Actual source code: ex18.c
1: static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\
2: Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n";
4: #include <petscmat.h>
6: int main(int argc,char **args)
7: {
8: Mat A;
9: Vec x, rhs, y;
10: PetscInt i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J;
11: PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices;
12: PetscMPIInt rank,size;
13: PetscScalar v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0;
14: PetscReal norm;
15: char convname[64];
16: PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE;
18: PetscInitialize(&argc,&args,(char*)0,help);
19: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
20: MPI_Comm_size(PETSC_COMM_WORLD,&size);
21: n = nlocal*size;
23: PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL);
24: PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL);
25: PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL);
26: PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert);
27: PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL);
29: MatCreate(PETSC_COMM_WORLD,&A);
30: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs);
31: MatSetFromOptions(A);
32: MatSetUp(A);
34: MatCreateVecs(A, NULL, &rhs);
35: VecSetFromOptions(rhs);
36: VecSetUp(rhs);
38: rhsval = 0.0;
39: for (i=0; i<m; i++) {
40: for (j=nlocal*rank; j<nlocal*(rank+1); j++) {
41: a = a0;
42: for (b=0; b<bs; b++) {
43: /* let's start with a 5-point stencil diffusion term */
44: v = -1.0; Ii = (j + n*i)*bs + b;
45: if (i>0) {J = Ii - n*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
46: if (i<m-1) {J = Ii + n*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
47: if (j>0) {J = Ii - 1*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
48: if (j<n-1) {J = Ii + 1*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
49: v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);
50: if (upwind) {
51: /* now add a 2nd order upwind advection term to add a little asymmetry */
52: if (j>2) {
53: J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a;
54: MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES);
55: } else {
56: /* fall back to 1st order upwind */
57: v1 = -1.0*a; v0 = 1.0*a;
58: };
59: if (j>1) {J = Ii-1*bs; MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES);}
60: MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES);
61: a /= 10.; /* use a different velocity for the next component */
62: /* add a coupling to the previous and next components */
63: v = 0.5;
64: if (b>0) {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
65: if (b<bs-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
66: }
67: /* make up some rhs */
68: VecSetValue(rhs, Ii, rhsval, INSERT_VALUES);
69: rhsval += 1.0;
70: }
71: }
72: }
73: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
74: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
76: if (convert) { /* Test different Mat implementations */
77: Mat B;
79: MatConvert(A,convname,MAT_INITIAL_MATRIX,&B);
80: MatDestroy(&A);
81: A = B;
82: }
84: VecAssemblyBegin(rhs);
85: VecAssemblyEnd(rhs);
86: /* set rhs to zero to simplify */
87: if (zerorhs) {
88: VecZeroEntries(rhs);
89: }
91: if (nonlocalBC) {
92: /*version where boundary conditions are set by processes that don't necessarily own the nodes */
93: if (rank == 0) {
94: nboundary_nodes = size>m ? nlocal : m-size+nlocal;
95: PetscMalloc1(nboundary_nodes,&boundary_nodes);
96: k = 0;
97: for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;};
98: } else if (rank < m) {
99: nboundary_nodes = nlocal+1;
100: PetscMalloc1(nboundary_nodes,&boundary_nodes);
101: boundary_nodes[0] = rank*n;
102: k = 1;
103: } else {
104: nboundary_nodes = nlocal;
105: PetscMalloc1(nboundary_nodes,&boundary_nodes);
106: k = 0;
107: }
108: for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;};
109: } else {
110: /*version where boundary conditions are set by the node owners only */
111: PetscMalloc1(m*n,&boundary_nodes);
112: k=0;
113: for (j=0; j<n; j++) {
114: Ii = j;
115: if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
116: }
117: for (i=1; i<m; i++) {
118: Ii = n*i;
119: if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
120: }
121: nboundary_nodes = k;
122: }
124: VecDuplicate(rhs, &x);
125: VecZeroEntries(x);
126: PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values);
127: for (k=0; k<nboundary_nodes; k++) {
128: Ii = boundary_nodes[k]*bs;
129: v = 1.0*boundary_nodes[k];
130: for (b=0; b<bs; b++, Ii++) {
131: boundary_indices[k*bs+b] = Ii;
132: boundary_values[k*bs+b] = v;
133: PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v));
134: v += 0.1;
135: }
136: }
137: PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);
138: VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);
139: VecAssemblyBegin(x);
140: VecAssemblyEnd(x);
142: /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */
143: VecDuplicate(x, &y);
144: MatMult(A, x, y);
145: VecAYPX(y, -1.0, rhs);
146: for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag;
147: VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);
148: VecAssemblyBegin(y);
149: VecAssemblyEnd(y);
151: PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n");
152: MatView(A, PETSC_VIEWER_STDOUT_WORLD);
153: VecView(x,PETSC_VIEWER_STDOUT_WORLD);
155: MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs);
156: PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n");
157: VecView(rhs,PETSC_VIEWER_STDOUT_WORLD);
158: VecAXPY(y, -1.0, rhs);
159: VecNorm(y, NORM_INFINITY, &norm);
160: if (norm > 1.0e-10) {
161: PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm);
162: VecView(y,PETSC_VIEWER_STDOUT_WORLD);
163: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns");
164: }
166: PetscFree(boundary_nodes);
167: PetscFree2(boundary_indices,boundary_values);
168: VecDestroy(&x);
169: VecDestroy(&y);
170: VecDestroy(&rhs);
171: MatDestroy(&A);
173: PetscFinalize();
174: return 0;
175: }
177: /*TEST
179: test:
180: diff_args: -j
181: suffix: 0
183: test:
184: diff_args: -j
185: suffix: 1
186: nsize: 2
188: test:
189: diff_args: -j
190: suffix: 10
191: nsize: 2
192: args: -bs 2 -nonlocal_bc
194: test:
195: diff_args: -j
196: suffix: 11
197: nsize: 7
198: args: -bs 2 -nonlocal_bc
200: test:
201: diff_args: -j
202: suffix: 12
203: args: -bs 2 -nonlocal_bc -mat_type baij
205: test:
206: diff_args: -j
207: suffix: 13
208: nsize: 2
209: args: -bs 2 -nonlocal_bc -mat_type baij
211: test:
212: diff_args: -j
213: suffix: 14
214: nsize: 7
215: args: -bs 2 -nonlocal_bc -mat_type baij
217: test:
218: diff_args: -j
219: suffix: 2
220: nsize: 7
222: test:
223: diff_args: -j
224: suffix: 3
225: args: -mat_type baij
227: test:
228: diff_args: -j
229: suffix: 4
230: nsize: 2
231: args: -mat_type baij
233: test:
234: diff_args: -j
235: suffix: 5
236: nsize: 7
237: args: -mat_type baij
239: test:
240: diff_args: -j
241: suffix: 6
242: args: -bs 2 -mat_type baij
244: test:
245: diff_args: -j
246: suffix: 7
247: nsize: 2
248: args: -bs 2 -mat_type baij
250: test:
251: diff_args: -j
252: suffix: 8
253: nsize: 7
254: args: -bs 2 -mat_type baij
256: test:
257: diff_args: -j
258: suffix: 9
259: args: -bs 2 -nonlocal_bc
261: test:
262: diff_args: -j
263: suffix: 15
264: args: -bs 2 -nonlocal_bc -convname shell
266: test:
267: diff_args: -j
268: suffix: 16
269: nsize: 2
270: args: -bs 2 -nonlocal_bc -convname shell
272: test:
273: diff_args: -j
274: suffix: 17
275: args: -bs 2 -nonlocal_bc -convname dense
277: testset:
278: diff_args: -j
279: suffix: full
280: nsize: {{1 3}separate output}
281: args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0
283: test:
284: diff_args: -j
285: requires: cuda
286: suffix: cusparse_1
287: nsize: 1
288: args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output}
290: test:
291: diff_args: -j
292: requires: cuda
293: suffix: cusparse_3
294: nsize: 3
295: args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse
297: TEST*/