Actual source code: ex66.c
1: static char help[] = "Tests MATH2OPUS\n\n";
3: #include <petscmat.h>
4: #include <petscsf.h>
6: static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
7: {
8: PetscInt d;
9: PetscReal clength = sdim == 3 ? 0.2 : 0.1;
10: PetscReal dist, diff = 0.0;
12: for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
13: dist = PetscSqrtReal(diff);
14: return PetscExpReal(-dist / clength);
15: }
17: static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
18: {
19: PetscInt d;
20: PetscReal clength = sdim == 3 ? 0.2 : 0.1;
21: PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0;
23: for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; }
24: for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; }
25: for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
26: dist = PetscSqrtReal(diff);
27: return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.;
28: }
30: int main(int argc,char **argv)
31: {
32: Mat A,B,C,D;
33: Vec v,x,y,Ax,Ay,Bx,By;
34: PetscRandom r;
35: PetscLayout map;
36: PetscScalar *Adata = NULL, *Cdata = NULL, scale = 1.0;
37: PetscReal *coords,nA,nD,nB,err,nX,norms[3];
38: PetscInt N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, nt, ntrials = 2;
39: PetscMPIInt size,rank;
40: PetscBool testlayout = PETSC_FALSE,flg,symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE;
41: PetscBool checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE, flgglob;
42: PetscBool testtrans, testnorm, randommat = PETSC_TRUE, testorthog, testcompress;
43: void (*approxnormfunc)(void);
44: void (*Anormfunc)(void);
47: #if defined(PETSC_HAVE_MPI_INIT_THREAD)
48: PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE;
49: #endif
50: PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
51: PetscOptionsGetInt(NULL,NULL,"-ng",&N,&flgglob);
52: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
53: PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
54: PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);
55: PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);
56: PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);
57: PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);
58: PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);
59: if (!flgglob) { PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL); }
60: PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);
61: PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);
62: PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);
63: PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);
64: PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);
65: PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);
66: PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);
67: PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);
68: if (!Asymm) symm = PETSC_FALSE;
70: MPI_Comm_size(PETSC_COMM_WORLD,&size);
71: /* MatMultTranspose for nonsymmetric matrices in parallel not implemented */
72: testtrans = (PetscBool)(size == 1 || symm);
73: testnorm = (PetscBool)(size == 1 || symm);
74: testorthog = (PetscBool)(size == 1 || symm);
75: testcompress = (PetscBool)(size == 1 || symm);
77: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
78: PetscLayoutCreate(PETSC_COMM_WORLD,&map);
79: if (testlayout) {
80: if (rank%2) n = PetscMax(2*n-5*rank,0);
81: else n = 2*n+rank;
82: }
83: if (!flgglob) {
84: PetscLayoutSetLocalSize(map,n);
85: PetscLayoutSetUp(map);
86: PetscLayoutGetSize(map,&N);
87: } else {
88: PetscLayoutSetSize(map,N);
89: PetscLayoutSetUp(map);
90: PetscLayoutGetLocalSize(map,&n);
91: }
92: PetscLayoutDestroy(&map);
94: if (lda) {
95: PetscMalloc1(N*(n+lda),&Adata);
96: }
97: MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);
98: MatDenseSetLDA(A,n+lda);
100: /* Create random points; these are replicated in order to populate a dense matrix and to compare sequential and dense runs
101: The constructor for MATH2OPUS can take as input the distributed coordinates and replicates them internally in case
102: a kernel construction is requested */
103: PetscRandomCreate(PETSC_COMM_WORLD,&r);
104: PetscRandomSetFromOptions(r);
105: PetscRandomSetSeed(r,123456);
106: PetscRandomSeed(r);
107: PetscMalloc1(N*dim,&coords);
108: PetscRandomGetValuesReal(r,N*dim,coords);
109: PetscRandomDestroy(&r);
111: if (kernel || !randommat) {
112: MatH2OpusKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm;
113: PetscInt ist,ien;
115: MatGetOwnershipRange(A,&ist,&ien);
116: for (i = ist; i < ien; i++) {
117: for (j = 0; j < N; j++) {
118: MatSetValue(A,i,j,(*k)(dim,coords + i*dim,coords + j*dim,NULL),INSERT_VALUES);
119: }
120: }
121: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
122: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
123: if (kernel) {
124: MatCreateH2OpusFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords + ist*dim,PETSC_TRUE,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
125: } else {
126: MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
127: }
128: } else {
129: PetscInt ist;
131: MatGetOwnershipRange(A,&ist,NULL);
132: MatSetRandom(A,NULL);
133: if (Asymm) {
134: MatTranspose(A,MAT_INITIAL_MATRIX,&B);
135: MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);
136: MatDestroy(&B);
137: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
138: }
139: MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);
140: }
141: PetscFree(coords);
142: if (agpu) {
143: MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);
144: }
145: MatViewFromOptions(A,NULL,"-A_view");
147: MatSetOption(B,MAT_SYMMETRIC,symm);
149: /* assemble the H-matrix */
150: MatBindToCPU(B,(PetscBool)!bgpu);
151: MatSetFromOptions(B);
152: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
153: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
154: MatViewFromOptions(B,NULL,"-B_view");
156: /* Test MatScale */
157: MatScale(A,scale);
158: MatScale(B,scale);
160: /* Test MatMult */
161: MatCreateVecs(A,&Ax,&Ay);
162: MatCreateVecs(B,&Bx,&By);
163: VecSetRandom(Ax,NULL);
164: VecCopy(Ax,Bx);
165: MatMult(A,Ax,Ay);
166: MatMult(B,Bx,By);
167: VecViewFromOptions(Ay,NULL,"-mult_vec_view");
168: VecViewFromOptions(By,NULL,"-mult_vec_view");
169: VecNorm(Ay,NORM_INFINITY,&nX);
170: VecAXPY(Ay,-1.0,By);
171: VecViewFromOptions(Ay,NULL,"-mult_vec_view");
172: VecNorm(Ay,NORM_INFINITY,&err);
173: PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);
174: VecScale(By,-1.0);
175: MatMultAdd(B,Bx,By,By);
176: VecNorm(By,NORM_INFINITY,&err);
177: VecViewFromOptions(By,NULL,"-mult_vec_view");
178: if (err > PETSC_SMALL) {
179: PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);
180: }
182: /* Test MatNorm */
183: MatNorm(A,NORM_INFINITY,&norms[0]);
184: MatNorm(A,NORM_1,&norms[1]);
185: norms[2] = -1.; /* NORM_2 not supported */
186: PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
187: MatGetOperation(A,MATOP_NORM,&Anormfunc);
188: MatGetOperation(B,MATOP_NORM,&approxnormfunc);
189: MatSetOperation(A,MATOP_NORM,approxnormfunc);
190: MatNorm(A,NORM_INFINITY,&norms[0]);
191: MatNorm(A,NORM_1,&norms[1]);
192: MatNorm(A,NORM_2,&norms[2]);
193: PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
194: if (testnorm) {
195: MatNorm(B,NORM_INFINITY,&norms[0]);
196: MatNorm(B,NORM_1,&norms[1]);
197: MatNorm(B,NORM_2,&norms[2]);
198: } else {
199: norms[0] = -1.;
200: norms[1] = -1.;
201: norms[2] = -1.;
202: }
203: PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);
204: MatSetOperation(A,MATOP_NORM,Anormfunc);
206: /* Test MatDuplicate */
207: MatDuplicate(B,MAT_COPY_VALUES,&D);
208: MatSetOption(D,MAT_SYMMETRIC,symm);
209: MatMultEqual(B,D,10,&flg);
210: if (!flg) {
211: PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");
212: }
213: if (testtrans) {
214: MatMultTransposeEqual(B,D,10,&flg);
215: if (!flg) {
216: PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose error after MatDuplicate\n");
217: }
218: }
219: MatDestroy(&D);
221: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
222: VecSetRandom(Ay,NULL);
223: VecCopy(Ay,By);
224: MatMultTranspose(A,Ay,Ax);
225: MatMultTranspose(B,By,Bx);
226: VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");
227: VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");
228: VecNorm(Ax,NORM_INFINITY,&nX);
229: VecAXPY(Ax,-1.0,Bx);
230: VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");
231: VecNorm(Ax,NORM_INFINITY,&err);
232: PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);
233: VecScale(Bx,-1.0);
234: MatMultTransposeAdd(B,By,Bx,Bx);
235: VecNorm(Bx,NORM_INFINITY,&err);
236: if (err > PETSC_SMALL) {
237: PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);
238: }
239: }
240: VecDestroy(&Ax);
241: VecDestroy(&Ay);
242: VecDestroy(&Bx);
243: VecDestroy(&By);
245: /* Test MatMatMult */
246: if (ldc) {
247: PetscMalloc1(nrhs*(n+ldc),&Cdata);
248: }
249: MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);
250: MatDenseSetLDA(C,n+ldc);
251: MatSetRandom(C,NULL);
252: if (cgpu) {
253: MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);
254: }
255: for (nt = 0; nt < ntrials; nt++) {
256: MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);
257: MatViewFromOptions(D,NULL,"-bc_view");
258: PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");
259: if (flg) {
260: MatCreateVecs(B,&x,&y);
261: MatCreateVecs(D,NULL,&v);
262: for (i = 0; i < nrhs; i++) {
263: MatGetColumnVector(D,v,i);
264: MatGetColumnVector(C,x,i);
265: MatMult(B,x,y);
266: VecAXPY(y,-1.0,v);
267: VecNorm(y,NORM_INFINITY,&err);
268: if (err > PETSC_SMALL) { PetscPrintf(PETSC_COMM_WORLD,"MatMat err %D %g\n",i,err); }
269: }
270: VecDestroy(&y);
271: VecDestroy(&x);
272: VecDestroy(&v);
273: }
274: }
275: MatDestroy(&D);
277: /* Test MatTransposeMatMult */
278: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
279: for (nt = 0; nt < ntrials; nt++) {
280: MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);
281: MatViewFromOptions(D,NULL,"-btc_view");
282: PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");
283: if (flg) {
284: MatCreateVecs(B,&y,&x);
285: MatCreateVecs(D,NULL,&v);
286: for (i = 0; i < nrhs; i++) {
287: MatGetColumnVector(D,v,i);
288: MatGetColumnVector(C,x,i);
289: MatMultTranspose(B,x,y);
290: VecAXPY(y,-1.0,v);
291: VecNorm(y,NORM_INFINITY,&err);
292: if (err > PETSC_SMALL) { PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %D %g\n",i,err); }
293: }
294: VecDestroy(&y);
295: VecDestroy(&x);
296: VecDestroy(&v);
297: }
298: }
299: MatDestroy(&D);
300: }
302: if (testorthog) {
303: MatDuplicate(B,MAT_COPY_VALUES,&D);
304: MatSetOption(D,MAT_SYMMETRIC,symm);
305: MatH2OpusOrthogonalize(D);
306: MatMultEqual(B,D,10,&flg);
307: if (!flg) {
308: PetscPrintf(PETSC_COMM_WORLD,"MatMult error after basis ortogonalization\n");
309: }
310: MatDestroy(&D);
311: }
313: if (testcompress) {
314: MatDuplicate(B,MAT_COPY_VALUES,&D);
315: MatSetOption(D,MAT_SYMMETRIC,symm);
316: MatH2OpusCompress(D,PETSC_SMALL);
317: MatDestroy(&D);
318: }
320: /* check explicit operator */
321: if (checkexpl) {
322: Mat Be, Bet;
324: MatComputeOperator(B,MATDENSE,&D);
325: MatDuplicate(D,MAT_COPY_VALUES,&Be);
326: MatNorm(D,NORM_FROBENIUS,&nB);
327: MatViewFromOptions(D,NULL,"-expl_view");
328: MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);
329: MatViewFromOptions(D,NULL,"-diff_view");
330: MatNorm(D,NORM_FROBENIUS,&nD);
331: MatNorm(A,NORM_FROBENIUS,&nA);
332: PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);
333: MatDestroy(&D);
335: if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
336: MatTranspose(A,MAT_INPLACE_MATRIX,&A);
337: MatComputeOperatorTranspose(B,MATDENSE,&D);
338: MatDuplicate(D,MAT_COPY_VALUES,&Bet);
339: MatNorm(D,NORM_FROBENIUS,&nB);
340: MatViewFromOptions(D,NULL,"-expl_trans_view");
341: MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);
342: MatViewFromOptions(D,NULL,"-diff_trans_view");
343: MatNorm(D,NORM_FROBENIUS,&nD);
344: MatNorm(A,NORM_FROBENIUS,&nA);
345: PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);
346: MatDestroy(&D);
348: MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);
349: MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);
350: MatViewFromOptions(Be,NULL,"-diff_expl_view");
351: MatNorm(Be,NORM_FROBENIUS,&nB);
352: PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);
353: MatDestroy(&Be);
354: MatDestroy(&Bet);
355: }
356: }
357: MatDestroy(&A);
358: MatDestroy(&B);
359: MatDestroy(&C);
360: PetscFree(Cdata);
361: PetscFree(Adata);
362: PetscFinalize();
363: return ierr;
364: }
366: /*TEST
368: build:
369: requires: h2opus
371: #tests from kernel
372: test:
373: requires: h2opus
374: nsize: 1
375: suffix: 1
376: args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0
378: test:
379: requires: h2opus
380: nsize: 1
381: suffix: 1_ld
382: output_file: output/ex66_1.out
383: args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 0
385: test:
386: requires: h2opus cuda
387: nsize: 1
388: suffix: 1_cuda
389: output_file: output/ex66_1.out
390: args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1
392: test:
393: requires: h2opus cuda
394: nsize: 1
395: suffix: 1_cuda_ld
396: output_file: output/ex66_1.out
397: args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 1
399: test:
400: requires: h2opus
401: nsize: {{2 3}}
402: suffix: 1_par
403: args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0
405: test:
406: requires: h2opus cuda
407: nsize: {{2 3}}
408: suffix: 1_par_cuda
409: args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}}
410: output_file: output/ex66_1_par.out
412: #tests from matrix sampling (parallel or unsymmetric not supported)
413: test:
414: requires: h2opus
415: nsize: 1
416: suffix: 2
417: args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0
419: test:
420: requires: h2opus cuda
421: nsize: 1
422: suffix: 2_cuda
423: output_file: output/ex66_2.out
424: args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}}
426: #tests view operation
427: test:
428: requires: h2opus !cuda
429: filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
430: nsize: {{1 2 3}}
431: suffix: view
432: args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
434: test:
435: requires: h2opus cuda
436: filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
437: nsize: {{1 2 3}}
438: suffix: view_cuda
439: args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -bgpu -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
441: TEST*/