Actual source code: bddcprivate.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: PetscScalar *uwork,*data,*U, ds = 0.;
18: PetscReal *sing;
19: PetscBLASInt bM,bN,lwork,lierr,di = 1;
20: PetscInt ulw,i,nr,nc,n;
21: #if defined(PETSC_USE_COMPLEX)
22: PetscReal *rwork2;
23: #endif
25: MatGetSize(A,&nr,&nc);
26: if (!nr || !nc) return 0;
28: /* workspace */
29: if (!work) {
30: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
31: PetscMalloc1(ulw,&uwork);
32: } else {
33: ulw = lw;
34: uwork = work;
35: }
36: n = PetscMin(nr,nc);
37: if (!rwork) {
38: PetscMalloc1(n,&sing);
39: } else {
40: sing = rwork;
41: }
43: /* SVD */
44: PetscMalloc1(nr*nr,&U);
45: PetscBLASIntCast(nr,&bM);
46: PetscBLASIntCast(nc,&bN);
47: PetscBLASIntCast(ulw,&lwork);
48: MatDenseGetArray(A,&data);
49: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
50: #if !defined(PETSC_USE_COMPLEX)
51: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
52: #else
53: PetscMalloc1(5*n,&rwork2);
54: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr));
55: PetscFree(rwork2);
56: #endif
57: PetscFPTrapPop();
59: MatDenseRestoreArray(A,&data);
60: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
61: if (!rwork) {
62: PetscFree(sing);
63: }
64: if (!work) {
65: PetscFree(uwork);
66: }
67: /* create B */
68: if (!range) {
69: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
70: MatDenseGetArray(*B,&data);
71: PetscArraycpy(data,U+nr*i,(nr-i)*nr);
72: } else {
73: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
74: MatDenseGetArray(*B,&data);
75: PetscArraycpy(data,U,i*nr);
76: }
77: MatDenseRestoreArray(*B,&data);
78: PetscFree(U);
79: return 0;
80: }
82: /* TODO REMOVE */
83: #if defined(PRINT_GDET)
84: static int inc = 0;
85: static int lev = 0;
86: #endif
88: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
89: {
90: Mat GE,GEd;
91: PetscInt rsize,csize,esize;
92: PetscScalar *ptr;
94: ISGetSize(edge,&esize);
95: if (!esize) return 0;
96: ISGetSize(extrow,&rsize);
97: ISGetSize(extcol,&csize);
99: /* gradients */
100: ptr = work + 5*esize;
101: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
102: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
103: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
104: MatDestroy(&GE);
106: /* constants */
107: ptr += rsize*csize;
108: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
109: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
110: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
111: MatDestroy(&GE);
112: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
113: MatDestroy(&GEd);
115: if (corners) {
116: Mat GEc;
117: const PetscScalar *vals;
118: PetscScalar v;
120: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
121: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
122: MatDenseGetArrayRead(GEd,&vals);
123: /* v = PetscAbsScalar(vals[0]) */;
124: v = 1.;
125: cvals[0] = vals[0]/v;
126: cvals[1] = vals[1]/v;
127: MatDenseRestoreArrayRead(GEd,&vals);
128: MatScale(*GKins,1./v);
129: #if defined(PRINT_GDET)
130: {
131: PetscViewer viewer;
132: char filename[256];
133: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
134: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
135: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
136: PetscObjectSetName((PetscObject)GEc,"GEc");
137: MatView(GEc,viewer);
138: PetscObjectSetName((PetscObject)(*GKins),"GK");
139: MatView(*GKins,viewer);
140: PetscObjectSetName((PetscObject)GEd,"Gproj");
141: MatView(GEd,viewer);
142: PetscViewerDestroy(&viewer);
143: }
144: #endif
145: MatDestroy(&GEd);
146: MatDestroy(&GEc);
147: }
149: return 0;
150: }
152: PetscErrorCode PCBDDCNedelecSupport(PC pc)
153: {
154: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
155: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
156: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
157: Vec tvec;
158: PetscSF sfv;
159: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
160: MPI_Comm comm;
161: IS lned,primals,allprimals,nedfieldlocal;
162: IS *eedges,*extrows,*extcols,*alleedges;
163: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
164: PetscScalar *vals,*work;
165: PetscReal *rwork;
166: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
167: PetscInt ne,nv,Lv,order,n,field;
168: PetscInt n_neigh,*neigh,*n_shared,**shared;
169: PetscInt i,j,extmem,cum,maxsize,nee;
170: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
171: PetscInt *sfvleaves,*sfvroots;
172: PetscInt *corners,*cedges;
173: PetscInt *ecount,**eneighs,*vcount,**vneighs;
174: PetscInt *emarks;
175: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
176: PetscErrorCode ierr;
178: /* If the discrete gradient is defined for a subset of dofs and global is true,
179: it assumes G is given in global ordering for all the dofs.
180: Otherwise, the ordering is global for the Nedelec field */
181: order = pcbddc->nedorder;
182: conforming = pcbddc->conforming;
183: field = pcbddc->nedfield;
184: global = pcbddc->nedglobal;
185: setprimal = PETSC_FALSE;
186: print = PETSC_FALSE;
187: singular = PETSC_FALSE;
189: /* Command line customization */
190: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
191: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
192: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
193: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
194: /* print debug info TODO: to be removed */
195: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
196: PetscOptionsEnd();
198: /* Return if there are no edges in the decomposition and the problem is not singular */
199: MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
200: ISLocalToGlobalMappingGetSize(al2g,&n);
201: PetscObjectGetComm((PetscObject)pc,&comm);
202: if (!singular) {
203: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
204: lrc[0] = PETSC_FALSE;
205: for (i=0;i<n;i++) {
206: if (PetscRealPart(vals[i]) > 2.) {
207: lrc[0] = PETSC_TRUE;
208: break;
209: }
210: }
211: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
212: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
213: if (!lrc[1]) return 0;
214: }
216: /* Get Nedelec field */
218: if (pcbddc->n_ISForDofsLocal && field >= 0) {
219: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
220: nedfieldlocal = pcbddc->ISForDofsLocal[field];
221: ISGetLocalSize(nedfieldlocal,&ne);
222: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
223: ne = n;
224: nedfieldlocal = NULL;
225: global = PETSC_TRUE;
226: } else if (field == PETSC_DECIDE) {
227: PetscInt rst,ren,*idx;
229: PetscArrayzero(matis->sf_leafdata,n);
230: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
231: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
232: for (i=rst;i<ren;i++) {
233: PetscInt nc;
235: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
236: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
237: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
238: }
239: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
240: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
241: PetscMalloc1(n,&idx);
242: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
243: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
244: } else {
245: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
246: }
248: /* Sanity checks */
253: /* Just set primal dofs and return */
254: if (setprimal) {
255: IS enedfieldlocal;
256: PetscInt *eidxs;
258: PetscMalloc1(ne,&eidxs);
259: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
260: if (nedfieldlocal) {
261: ISGetIndices(nedfieldlocal,&idxs);
262: for (i=0,cum=0;i<ne;i++) {
263: if (PetscRealPart(vals[idxs[i]]) > 2.) {
264: eidxs[cum++] = idxs[i];
265: }
266: }
267: ISRestoreIndices(nedfieldlocal,&idxs);
268: } else {
269: for (i=0,cum=0;i<ne;i++) {
270: if (PetscRealPart(vals[i]) > 2.) {
271: eidxs[cum++] = i;
272: }
273: }
274: }
275: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
276: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
277: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
278: PetscFree(eidxs);
279: ISDestroy(&nedfieldlocal);
280: ISDestroy(&enedfieldlocal);
281: return 0;
282: }
284: /* Compute some l2g maps */
285: if (nedfieldlocal) {
286: IS is;
288: /* need to map from the local Nedelec field to local numbering */
289: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
290: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
291: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
292: ISLocalToGlobalMappingCreateIS(is,&al2g);
293: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
294: if (global) {
295: PetscObjectReference((PetscObject)al2g);
296: el2g = al2g;
297: } else {
298: IS gis;
300: ISRenumber(is,NULL,NULL,&gis);
301: ISLocalToGlobalMappingCreateIS(gis,&el2g);
302: ISDestroy(&gis);
303: }
304: ISDestroy(&is);
305: } else {
306: /* restore default */
307: pcbddc->nedfield = -1;
308: /* one ref for the destruction of al2g, one for el2g */
309: PetscObjectReference((PetscObject)al2g);
310: PetscObjectReference((PetscObject)al2g);
311: el2g = al2g;
312: fl2g = NULL;
313: }
315: /* Start communication to drop connections for interior edges (for cc analysis only) */
316: PetscArrayzero(matis->sf_leafdata,n);
317: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
318: if (nedfieldlocal) {
319: ISGetIndices(nedfieldlocal,&idxs);
320: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
321: ISRestoreIndices(nedfieldlocal,&idxs);
322: } else {
323: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
324: }
325: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
326: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
328: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
329: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
330: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
331: if (global) {
332: PetscInt rst;
334: MatGetOwnershipRange(G,&rst,NULL);
335: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
336: if (matis->sf_rootdata[i] < 2) {
337: matis->sf_rootdata[cum++] = i + rst;
338: }
339: }
340: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
341: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
342: } else {
343: PetscInt *tbz;
345: PetscMalloc1(ne,&tbz);
346: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
347: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
348: ISGetIndices(nedfieldlocal,&idxs);
349: for (i=0,cum=0;i<ne;i++)
350: if (matis->sf_leafdata[idxs[i]] == 1)
351: tbz[cum++] = i;
352: ISRestoreIndices(nedfieldlocal,&idxs);
353: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
354: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
355: PetscFree(tbz);
356: }
357: } else { /* we need the entire G to infer the nullspace */
358: PetscObjectReference((PetscObject)pcbddc->discretegradient);
359: G = pcbddc->discretegradient;
360: }
362: /* Extract subdomain relevant rows of G */
363: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
364: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
365: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
366: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
367: ISDestroy(&lned);
368: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
369: MatDestroy(&lGall);
370: MatISGetLocalMat(lGis,&lG);
372: /* SF for nodal dofs communications */
373: MatGetLocalSize(G,NULL,&Lv);
374: MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g);
375: PetscObjectReference((PetscObject)vl2g);
376: ISLocalToGlobalMappingGetSize(vl2g,&nv);
377: PetscSFCreate(comm,&sfv);
378: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
379: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
380: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
381: i = singular ? 2 : 1;
382: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
384: /* Destroy temporary G created in MATIS format and modified G */
385: PetscObjectReference((PetscObject)lG);
386: MatDestroy(&lGis);
387: MatDestroy(&G);
389: if (print) {
390: PetscObjectSetName((PetscObject)lG,"initial_lG");
391: MatView(lG,NULL);
392: }
394: /* Save lG for values insertion in change of basis */
395: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
397: /* Analyze the edge-nodes connections (duplicate lG) */
398: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
399: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
400: PetscBTCreate(nv,&btv);
401: PetscBTCreate(ne,&bte);
402: PetscBTCreate(ne,&btb);
403: PetscBTCreate(ne,&btbd);
404: PetscBTCreate(nv,&btvcand);
405: /* need to import the boundary specification to ensure the
406: proper detection of coarse edges' endpoints */
407: if (pcbddc->DirichletBoundariesLocal) {
408: IS is;
410: if (fl2g) {
411: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
412: } else {
413: is = pcbddc->DirichletBoundariesLocal;
414: }
415: ISGetLocalSize(is,&cum);
416: ISGetIndices(is,&idxs);
417: for (i=0;i<cum;i++) {
418: if (idxs[i] >= 0) {
419: PetscBTSet(btb,idxs[i]);
420: PetscBTSet(btbd,idxs[i]);
421: }
422: }
423: ISRestoreIndices(is,&idxs);
424: if (fl2g) {
425: ISDestroy(&is);
426: }
427: }
428: if (pcbddc->NeumannBoundariesLocal) {
429: IS is;
431: if (fl2g) {
432: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
433: } else {
434: is = pcbddc->NeumannBoundariesLocal;
435: }
436: ISGetLocalSize(is,&cum);
437: ISGetIndices(is,&idxs);
438: for (i=0;i<cum;i++) {
439: if (idxs[i] >= 0) {
440: PetscBTSet(btb,idxs[i]);
441: }
442: }
443: ISRestoreIndices(is,&idxs);
444: if (fl2g) {
445: ISDestroy(&is);
446: }
447: }
449: /* Count neighs per dof */
450: ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
451: ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);
453: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
454: for proper detection of coarse edges' endpoints */
455: PetscBTCreate(ne,&btee);
456: for (i=0;i<ne;i++) {
457: if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
458: PetscBTSet(btee,i);
459: }
460: }
461: PetscMalloc1(ne,&marks);
462: if (!conforming) {
463: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
464: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
465: }
466: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
467: MatSeqAIJGetArray(lGe,&vals);
468: cum = 0;
469: for (i=0;i<ne;i++) {
470: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
471: if (!PetscBTLookup(btee,i)) {
472: marks[cum++] = i;
473: continue;
474: }
475: /* set badly connected edge dofs as primal */
476: if (!conforming) {
477: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
478: marks[cum++] = i;
479: PetscBTSet(bte,i);
480: for (j=ii[i];j<ii[i+1];j++) {
481: PetscBTSet(btv,jj[j]);
482: }
483: } else {
484: /* every edge dofs should be connected trough a certain number of nodal dofs
485: to other edge dofs belonging to coarse edges
486: - at most 2 endpoints
487: - order-1 interior nodal dofs
488: - no undefined nodal dofs (nconn < order)
489: */
490: PetscInt ends = 0,ints = 0, undef = 0;
491: for (j=ii[i];j<ii[i+1];j++) {
492: PetscInt v = jj[j],k;
493: PetscInt nconn = iit[v+1]-iit[v];
494: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
495: if (nconn > order) ends++;
496: else if (nconn == order) ints++;
497: else undef++;
498: }
499: if (undef || ends > 2 || ints != order -1) {
500: marks[cum++] = i;
501: PetscBTSet(bte,i);
502: for (j=ii[i];j<ii[i+1];j++) {
503: PetscBTSet(btv,jj[j]);
504: }
505: }
506: }
507: }
508: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
509: if (!order && ii[i+1] != ii[i]) {
510: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
511: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
512: }
513: }
514: PetscBTDestroy(&btee);
515: MatSeqAIJRestoreArray(lGe,&vals);
516: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
517: if (!conforming) {
518: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
519: MatDestroy(&lGt);
520: }
521: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
523: /* identify splitpoints and corner candidates */
524: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
525: if (print) {
526: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
527: MatView(lGe,NULL);
528: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
529: MatView(lGt,NULL);
530: }
531: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
532: MatSeqAIJGetArray(lGt,&vals);
533: for (i=0;i<nv;i++) {
534: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
535: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
536: if (!order) { /* variable order */
537: PetscReal vorder = 0.;
539: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
540: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
542: ord = 1;
543: }
544: PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord);
545: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
546: if (PetscBTLookup(btbd,jj[j])) {
547: bdir = PETSC_TRUE;
548: break;
549: }
550: if (vc != ecount[jj[j]]) {
551: sneighs = PETSC_FALSE;
552: } else {
553: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
554: for (k=0;k<vc;k++) {
555: if (vn[k] != en[k]) {
556: sneighs = PETSC_FALSE;
557: break;
558: }
559: }
560: }
561: }
562: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
563: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
564: PetscBTSet(btv,i);
565: } else if (test == ord) {
566: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
567: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
568: PetscBTSet(btv,i);
569: } else {
570: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
571: PetscBTSet(btvcand,i);
572: }
573: }
574: }
575: ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
576: ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
577: PetscBTDestroy(&btbd);
579: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
580: if (order != 1) {
581: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
582: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
583: for (i=0;i<nv;i++) {
584: if (PetscBTLookup(btvcand,i)) {
585: PetscBool found = PETSC_FALSE;
586: for (j=ii[i];j<ii[i+1] && !found;j++) {
587: PetscInt k,e = jj[j];
588: if (PetscBTLookup(bte,e)) continue;
589: for (k=iit[e];k<iit[e+1];k++) {
590: PetscInt v = jjt[k];
591: if (v != i && PetscBTLookup(btvcand,v)) {
592: found = PETSC_TRUE;
593: break;
594: }
595: }
596: }
597: if (!found) {
598: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i);
599: PetscBTClear(btvcand,i);
600: } else {
601: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i);
602: }
603: }
604: }
605: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
606: }
607: MatSeqAIJRestoreArray(lGt,&vals);
608: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
609: MatDestroy(&lGe);
611: /* Get the local G^T explicitly */
612: MatDestroy(&lGt);
613: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
614: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
616: /* Mark interior nodal dofs */
617: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
618: PetscBTCreate(nv,&btvi);
619: for (i=1;i<n_neigh;i++) {
620: for (j=0;j<n_shared[i];j++) {
621: PetscBTSet(btvi,shared[i][j]);
622: }
623: }
624: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
626: /* communicate corners and splitpoints */
627: PetscMalloc1(nv,&vmarks);
628: PetscArrayzero(sfvleaves,nv);
629: PetscArrayzero(sfvroots,Lv);
630: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
632: if (print) {
633: IS tbz;
635: cum = 0;
636: for (i=0;i<nv;i++)
637: if (sfvleaves[i])
638: vmarks[cum++] = i;
640: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
641: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
642: ISView(tbz,NULL);
643: ISDestroy(&tbz);
644: }
646: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
647: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
648: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);
649: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);
651: /* Zero rows of lGt corresponding to identified corners
652: and interior nodal dofs */
653: cum = 0;
654: for (i=0;i<nv;i++) {
655: if (sfvleaves[i]) {
656: vmarks[cum++] = i;
657: PetscBTSet(btv,i);
658: }
659: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
660: }
661: PetscBTDestroy(&btvi);
662: if (print) {
663: IS tbz;
665: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
666: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
667: ISView(tbz,NULL);
668: ISDestroy(&tbz);
669: }
670: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
671: PetscFree(vmarks);
672: PetscSFDestroy(&sfv);
673: PetscFree2(sfvleaves,sfvroots);
675: /* Recompute G */
676: MatDestroy(&lG);
677: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
678: if (print) {
679: PetscObjectSetName((PetscObject)lG,"used_lG");
680: MatView(lG,NULL);
681: PetscObjectSetName((PetscObject)lGt,"used_lGt");
682: MatView(lGt,NULL);
683: }
685: /* Get primal dofs (if any) */
686: cum = 0;
687: for (i=0;i<ne;i++) {
688: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
689: }
690: if (fl2g) {
691: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
692: }
693: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
694: if (print) {
695: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
696: ISView(primals,NULL);
697: }
698: PetscBTDestroy(&bte);
699: /* TODO: what if the user passed in some of them ? */
700: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
701: ISDestroy(&primals);
703: /* Compute edge connectivity */
704: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
706: /* Symbolic conn = lG*lGt */
707: MatProductCreate(lG,lGt,NULL,&conn);
708: MatProductSetType(conn,MATPRODUCT_AB);
709: MatProductSetAlgorithm(conn,"default");
710: MatProductSetFill(conn,PETSC_DEFAULT);
711: PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");
712: MatProductSetFromOptions(conn);
713: MatProductSymbolic(conn);
715: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
716: if (fl2g) {
717: PetscBT btf;
718: PetscInt *iia,*jja,*iiu,*jju;
719: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
721: /* create CSR for all local dofs */
722: PetscMalloc1(n+1,&iia);
723: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
725: iiu = pcbddc->mat_graph->xadj;
726: jju = pcbddc->mat_graph->adjncy;
727: } else if (pcbddc->use_local_adj) {
728: rest = PETSC_TRUE;
729: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
730: } else {
731: free = PETSC_TRUE;
732: PetscMalloc2(n+1,&iiu,n,&jju);
733: iiu[0] = 0;
734: for (i=0;i<n;i++) {
735: iiu[i+1] = i+1;
736: jju[i] = -1;
737: }
738: }
740: /* import sizes of CSR */
741: iia[0] = 0;
742: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
744: /* overwrite entries corresponding to the Nedelec field */
745: PetscBTCreate(n,&btf);
746: ISGetIndices(nedfieldlocal,&idxs);
747: for (i=0;i<ne;i++) {
748: PetscBTSet(btf,idxs[i]);
749: iia[idxs[i]+1] = ii[i+1]-ii[i];
750: }
752: /* iia in CSR */
753: for (i=0;i<n;i++) iia[i+1] += iia[i];
755: /* jja in CSR */
756: PetscMalloc1(iia[n],&jja);
757: for (i=0;i<n;i++)
758: if (!PetscBTLookup(btf,i))
759: for (j=0;j<iiu[i+1]-iiu[i];j++)
760: jja[iia[i]+j] = jju[iiu[i]+j];
762: /* map edge dofs connectivity */
763: if (jj) {
764: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
765: for (i=0;i<ne;i++) {
766: PetscInt e = idxs[i];
767: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
768: }
769: }
770: ISRestoreIndices(nedfieldlocal,&idxs);
771: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
772: if (rest) {
773: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
774: }
775: if (free) {
776: PetscFree2(iiu,jju);
777: }
778: PetscBTDestroy(&btf);
779: } else {
780: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
781: }
783: /* Analyze interface for edge dofs */
784: PCBDDCAnalyzeInterface(pc);
785: pcbddc->mat_graph->twodim = PETSC_FALSE;
787: /* Get coarse edges in the edge space */
788: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
789: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
791: if (fl2g) {
792: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
793: PetscMalloc1(nee,&eedges);
794: for (i=0;i<nee;i++) {
795: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
796: }
797: } else {
798: eedges = alleedges;
799: primals = allprimals;
800: }
802: /* Mark fine edge dofs with their coarse edge id */
803: PetscArrayzero(marks,ne);
804: ISGetLocalSize(primals,&cum);
805: ISGetIndices(primals,&idxs);
806: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
807: ISRestoreIndices(primals,&idxs);
808: if (print) {
809: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
810: ISView(primals,NULL);
811: }
813: maxsize = 0;
814: for (i=0;i<nee;i++) {
815: PetscInt size,mark = i+1;
817: ISGetLocalSize(eedges[i],&size);
818: ISGetIndices(eedges[i],&idxs);
819: for (j=0;j<size;j++) marks[idxs[j]] = mark;
820: ISRestoreIndices(eedges[i],&idxs);
821: maxsize = PetscMax(maxsize,size);
822: }
824: /* Find coarse edge endpoints */
825: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
826: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
827: for (i=0;i<nee;i++) {
828: PetscInt mark = i+1,size;
830: ISGetLocalSize(eedges[i],&size);
831: if (!size && nedfieldlocal) continue;
833: ISGetIndices(eedges[i],&idxs);
834: if (print) {
835: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
836: ISView(eedges[i],NULL);
837: }
838: for (j=0;j<size;j++) {
839: PetscInt k, ee = idxs[j];
840: if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee);
841: for (k=ii[ee];k<ii[ee+1];k++) {
842: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]);
843: if (PetscBTLookup(btv,jj[k])) {
844: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]);
845: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
846: PetscInt k2;
847: PetscBool corner = PETSC_FALSE;
848: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
849: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
850: /* it's a corner if either is connected with an edge dof belonging to a different cc or
851: if the edge dof lie on the natural part of the boundary */
852: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
853: corner = PETSC_TRUE;
854: break;
855: }
856: }
857: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
858: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]);
859: PetscBTSet(btv,jj[k]);
860: } else {
861: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
862: }
863: }
864: }
865: }
866: ISRestoreIndices(eedges[i],&idxs);
867: }
868: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
869: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
870: PetscBTDestroy(&btb);
872: /* Reset marked primal dofs */
873: ISGetLocalSize(primals,&cum);
874: ISGetIndices(primals,&idxs);
875: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
876: ISRestoreIndices(primals,&idxs);
878: /* Now use the initial lG */
879: MatDestroy(&lG);
880: MatDestroy(&lGt);
881: lG = lGinit;
882: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
884: /* Compute extended cols indices */
885: PetscBTCreate(nv,&btvc);
886: PetscBTCreate(nee,&bter);
887: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
888: MatSeqAIJGetMaxRowNonzeros(lG,&i);
889: i *= maxsize;
890: PetscCalloc1(nee,&extcols);
891: PetscMalloc2(i,&extrow,i,&gidxs);
892: eerr = PETSC_FALSE;
893: for (i=0;i<nee;i++) {
894: PetscInt size,found = 0;
896: cum = 0;
897: ISGetLocalSize(eedges[i],&size);
898: if (!size && nedfieldlocal) continue;
900: ISGetIndices(eedges[i],&idxs);
901: PetscBTMemzero(nv,btvc);
902: for (j=0;j<size;j++) {
903: PetscInt k,ee = idxs[j];
904: for (k=ii[ee];k<ii[ee+1];k++) {
905: PetscInt vv = jj[k];
906: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
907: else if (!PetscBTLookupSet(btvc,vv)) found++;
908: }
909: }
910: ISRestoreIndices(eedges[i],&idxs);
911: PetscSortRemoveDupsInt(&cum,extrow);
912: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
913: PetscSortIntWithArray(cum,gidxs,extrow);
914: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
915: /* it may happen that endpoints are not defined at this point
916: if it is the case, mark this edge for a second pass */
917: if (cum != size -1 || found != 2) {
918: PetscBTSet(bter,i);
919: if (print) {
920: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
921: ISView(eedges[i],NULL);
922: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
923: ISView(extcols[i],NULL);
924: }
925: eerr = PETSC_TRUE;
926: }
927: }
929: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
930: if (done) {
931: PetscInt *newprimals;
933: PetscMalloc1(ne,&newprimals);
934: ISGetLocalSize(primals,&cum);
935: ISGetIndices(primals,&idxs);
936: PetscArraycpy(newprimals,idxs,cum);
937: ISRestoreIndices(primals,&idxs);
938: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
939: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
940: for (i=0;i<nee;i++) {
941: PetscBool has_candidates = PETSC_FALSE;
942: if (PetscBTLookup(bter,i)) {
943: PetscInt size,mark = i+1;
945: ISGetLocalSize(eedges[i],&size);
946: ISGetIndices(eedges[i],&idxs);
947: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
948: for (j=0;j<size;j++) {
949: PetscInt k,ee = idxs[j];
950: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
951: for (k=ii[ee];k<ii[ee+1];k++) {
952: /* set all candidates located on the edge as corners */
953: if (PetscBTLookup(btvcand,jj[k])) {
954: PetscInt k2,vv = jj[k];
955: has_candidates = PETSC_TRUE;
956: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv);
957: PetscBTSet(btv,vv);
958: /* set all edge dofs connected to candidate as primals */
959: for (k2=iit[vv];k2<iit[vv+1];k2++) {
960: if (marks[jjt[k2]] == mark) {
961: PetscInt k3,ee2 = jjt[k2];
962: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2);
963: newprimals[cum++] = ee2;
964: /* finally set the new corners */
965: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
966: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]);
967: PetscBTSet(btv,jj[k3]);
968: }
969: }
970: }
971: } else {
972: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]);
973: }
974: }
975: }
976: if (!has_candidates) { /* circular edge */
977: PetscInt k, ee = idxs[0],*tmarks;
979: PetscCalloc1(ne,&tmarks);
980: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i);
981: for (k=ii[ee];k<ii[ee+1];k++) {
982: PetscInt k2;
983: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]);
984: PetscBTSet(btv,jj[k]);
985: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
986: }
987: for (j=0;j<size;j++) {
988: if (tmarks[idxs[j]] > 1) {
989: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]);
990: newprimals[cum++] = idxs[j];
991: }
992: }
993: PetscFree(tmarks);
994: }
995: ISRestoreIndices(eedges[i],&idxs);
996: }
997: ISDestroy(&extcols[i]);
998: }
999: PetscFree(extcols);
1000: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1001: PetscSortRemoveDupsInt(&cum,newprimals);
1002: if (fl2g) {
1003: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1004: ISDestroy(&primals);
1005: for (i=0;i<nee;i++) {
1006: ISDestroy(&eedges[i]);
1007: }
1008: PetscFree(eedges);
1009: }
1010: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1011: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1012: PetscFree(newprimals);
1013: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1014: ISDestroy(&primals);
1015: PCBDDCAnalyzeInterface(pc);
1016: pcbddc->mat_graph->twodim = PETSC_FALSE;
1017: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1018: if (fl2g) {
1019: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1020: PetscMalloc1(nee,&eedges);
1021: for (i=0;i<nee;i++) {
1022: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1023: }
1024: } else {
1025: eedges = alleedges;
1026: primals = allprimals;
1027: }
1028: PetscCalloc1(nee,&extcols);
1030: /* Mark again */
1031: PetscArrayzero(marks,ne);
1032: for (i=0;i<nee;i++) {
1033: PetscInt size,mark = i+1;
1035: ISGetLocalSize(eedges[i],&size);
1036: ISGetIndices(eedges[i],&idxs);
1037: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1038: ISRestoreIndices(eedges[i],&idxs);
1039: }
1040: if (print) {
1041: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1042: ISView(primals,NULL);
1043: }
1045: /* Recompute extended cols */
1046: eerr = PETSC_FALSE;
1047: for (i=0;i<nee;i++) {
1048: PetscInt size;
1050: cum = 0;
1051: ISGetLocalSize(eedges[i],&size);
1052: if (!size && nedfieldlocal) continue;
1054: ISGetIndices(eedges[i],&idxs);
1055: for (j=0;j<size;j++) {
1056: PetscInt k,ee = idxs[j];
1057: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1058: }
1059: ISRestoreIndices(eedges[i],&idxs);
1060: PetscSortRemoveDupsInt(&cum,extrow);
1061: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1062: PetscSortIntWithArray(cum,gidxs,extrow);
1063: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1064: if (cum != size -1) {
1065: if (print) {
1066: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1067: ISView(eedges[i],NULL);
1068: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1069: ISView(extcols[i],NULL);
1070: }
1071: eerr = PETSC_TRUE;
1072: }
1073: }
1074: }
1075: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1076: PetscFree2(extrow,gidxs);
1077: PetscBTDestroy(&bter);
1078: if (print) PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);
1079: /* an error should not occur at this point */
1082: /* Check the number of endpoints */
1083: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1084: PetscMalloc1(2*nee,&corners);
1085: PetscMalloc1(nee,&cedges);
1086: for (i=0;i<nee;i++) {
1087: PetscInt size, found = 0, gc[2];
1089: /* init with defaults */
1090: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1091: ISGetLocalSize(eedges[i],&size);
1092: if (!size && nedfieldlocal) continue;
1094: ISGetIndices(eedges[i],&idxs);
1095: PetscBTMemzero(nv,btvc);
1096: for (j=0;j<size;j++) {
1097: PetscInt k,ee = idxs[j];
1098: for (k=ii[ee];k<ii[ee+1];k++) {
1099: PetscInt vv = jj[k];
1100: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1102: corners[i*2+found++] = vv;
1103: }
1104: }
1105: }
1106: if (found != 2) {
1107: PetscInt e;
1108: if (fl2g) {
1109: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1110: } else {
1111: e = idxs[0];
1112: }
1113: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1114: }
1116: /* get primal dof index on this coarse edge */
1117: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1118: if (gc[0] > gc[1]) {
1119: PetscInt swap = corners[2*i];
1120: corners[2*i] = corners[2*i+1];
1121: corners[2*i+1] = swap;
1122: }
1123: cedges[i] = idxs[size-1];
1124: ISRestoreIndices(eedges[i],&idxs);
1125: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1126: }
1127: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1128: PetscBTDestroy(&btvc);
1130: if (PetscDefined(USE_DEBUG)) {
1131: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1132: not interfere with neighbouring coarse edges */
1133: PetscMalloc1(nee+1,&emarks);
1134: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1135: for (i=0;i<nv;i++) {
1136: PetscInt emax = 0,eemax = 0;
1138: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1139: PetscArrayzero(emarks,nee+1);
1140: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1141: for (j=1;j<nee+1;j++) {
1142: if (emax < emarks[j]) {
1143: emax = emarks[j];
1144: eemax = j;
1145: }
1146: }
1147: /* not relevant for edges */
1148: if (!eemax) continue;
1150: for (j=ii[i];j<ii[i+1];j++) {
1151: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1152: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1153: }
1154: }
1155: }
1156: PetscFree(emarks);
1157: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1158: }
1160: /* Compute extended rows indices for edge blocks of the change of basis */
1161: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1162: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1163: extmem *= maxsize;
1164: PetscMalloc1(extmem*nee,&extrow);
1165: PetscMalloc1(nee,&extrows);
1166: PetscCalloc1(nee,&extrowcum);
1167: for (i=0;i<nv;i++) {
1168: PetscInt mark = 0,size,start;
1170: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1171: for (j=ii[i];j<ii[i+1];j++)
1172: if (marks[jj[j]] && !mark)
1173: mark = marks[jj[j]];
1175: /* not relevant */
1176: if (!mark) continue;
1178: /* import extended row */
1179: mark--;
1180: start = mark*extmem+extrowcum[mark];
1181: size = ii[i+1]-ii[i];
1183: PetscArraycpy(extrow+start,jj+ii[i],size);
1184: extrowcum[mark] += size;
1185: }
1186: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1187: MatDestroy(&lGt);
1188: PetscFree(marks);
1190: /* Compress extrows */
1191: cum = 0;
1192: for (i=0;i<nee;i++) {
1193: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1194: PetscSortRemoveDupsInt(&size,start);
1195: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1196: cum = PetscMax(cum,size);
1197: }
1198: PetscFree(extrowcum);
1199: PetscBTDestroy(&btv);
1200: PetscBTDestroy(&btvcand);
1202: /* Workspace for lapack inner calls and VecSetValues */
1203: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1205: /* Create change of basis matrix (preallocation can be improved) */
1206: MatCreate(comm,&T);
1207: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1208: pc->pmat->rmap->N,pc->pmat->rmap->N);
1209: MatSetType(T,MATAIJ);
1210: MatSeqAIJSetPreallocation(T,10,NULL);
1211: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1212: MatSetLocalToGlobalMapping(T,al2g,al2g);
1213: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1214: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1215: ISLocalToGlobalMappingDestroy(&al2g);
1217: /* Defaults to identity */
1218: MatCreateVecs(pc->pmat,&tvec,NULL);
1219: VecSet(tvec,1.0);
1220: MatDiagonalSet(T,tvec,INSERT_VALUES);
1221: VecDestroy(&tvec);
1223: /* Create discrete gradient for the coarser level if needed */
1224: MatDestroy(&pcbddc->nedcG);
1225: ISDestroy(&pcbddc->nedclocal);
1226: if (pcbddc->current_level < pcbddc->max_levels) {
1227: ISLocalToGlobalMapping cel2g,cvl2g;
1228: IS wis,gwis;
1229: PetscInt cnv,cne;
1231: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1232: if (fl2g) {
1233: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1234: } else {
1235: PetscObjectReference((PetscObject)wis);
1236: pcbddc->nedclocal = wis;
1237: }
1238: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1239: ISDestroy(&wis);
1240: ISRenumber(gwis,NULL,&cne,&wis);
1241: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1242: ISDestroy(&wis);
1243: ISDestroy(&gwis);
1245: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1246: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1247: ISDestroy(&wis);
1248: ISRenumber(gwis,NULL,&cnv,&wis);
1249: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1250: ISDestroy(&wis);
1251: ISDestroy(&gwis);
1253: MatCreate(comm,&pcbddc->nedcG);
1254: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1255: MatSetType(pcbddc->nedcG,MATAIJ);
1256: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1257: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1258: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1259: ISLocalToGlobalMappingDestroy(&cel2g);
1260: ISLocalToGlobalMappingDestroy(&cvl2g);
1261: }
1262: ISLocalToGlobalMappingDestroy(&vl2g);
1264: #if defined(PRINT_GDET)
1265: inc = 0;
1266: lev = pcbddc->current_level;
1267: #endif
1269: /* Insert values in the change of basis matrix */
1270: for (i=0;i<nee;i++) {
1271: Mat Gins = NULL, GKins = NULL;
1272: IS cornersis = NULL;
1273: PetscScalar cvals[2];
1275: if (pcbddc->nedcG) {
1276: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1277: }
1278: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1279: if (Gins && GKins) {
1280: const PetscScalar *data;
1281: const PetscInt *rows,*cols;
1282: PetscInt nrh,nch,nrc,ncc;
1284: ISGetIndices(eedges[i],&cols);
1285: /* H1 */
1286: ISGetIndices(extrows[i],&rows);
1287: MatGetSize(Gins,&nrh,&nch);
1288: MatDenseGetArrayRead(Gins,&data);
1289: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1290: MatDenseRestoreArrayRead(Gins,&data);
1291: ISRestoreIndices(extrows[i],&rows);
1292: /* complement */
1293: MatGetSize(GKins,&nrc,&ncc);
1297: MatDenseGetArrayRead(GKins,&data);
1298: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1299: MatDenseRestoreArrayRead(GKins,&data);
1301: /* coarse discrete gradient */
1302: if (pcbddc->nedcG) {
1303: PetscInt cols[2];
1305: cols[0] = 2*i;
1306: cols[1] = 2*i+1;
1307: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1308: }
1309: ISRestoreIndices(eedges[i],&cols);
1310: }
1311: ISDestroy(&extrows[i]);
1312: ISDestroy(&extcols[i]);
1313: ISDestroy(&cornersis);
1314: MatDestroy(&Gins);
1315: MatDestroy(&GKins);
1316: }
1317: ISLocalToGlobalMappingDestroy(&el2g);
1319: /* Start assembling */
1320: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1321: if (pcbddc->nedcG) {
1322: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1323: }
1325: /* Free */
1326: if (fl2g) {
1327: ISDestroy(&primals);
1328: for (i=0;i<nee;i++) {
1329: ISDestroy(&eedges[i]);
1330: }
1331: PetscFree(eedges);
1332: }
1334: /* hack mat_graph with primal dofs on the coarse edges */
1335: {
1336: PCBDDCGraph graph = pcbddc->mat_graph;
1337: PetscInt *oqueue = graph->queue;
1338: PetscInt *ocptr = graph->cptr;
1339: PetscInt ncc,*idxs;
1341: /* find first primal edge */
1342: if (pcbddc->nedclocal) {
1343: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1344: } else {
1345: if (fl2g) {
1346: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1347: }
1348: idxs = cedges;
1349: }
1350: cum = 0;
1351: while (cum < nee && cedges[cum] < 0) cum++;
1353: /* adapt connected components */
1354: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1355: graph->cptr[0] = 0;
1356: for (i=0,ncc=0;i<graph->ncc;i++) {
1357: PetscInt lc = ocptr[i+1]-ocptr[i];
1358: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1359: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1360: graph->queue[graph->cptr[ncc]] = cedges[cum];
1361: ncc++;
1362: lc--;
1363: cum++;
1364: while (cum < nee && cedges[cum] < 0) cum++;
1365: }
1366: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1367: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1368: ncc++;
1369: }
1370: graph->ncc = ncc;
1371: if (pcbddc->nedclocal) {
1372: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1373: }
1374: PetscFree2(ocptr,oqueue);
1375: }
1376: ISLocalToGlobalMappingDestroy(&fl2g);
1377: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1378: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1379: MatDestroy(&conn);
1381: ISDestroy(&nedfieldlocal);
1382: PetscFree(extrow);
1383: PetscFree2(work,rwork);
1384: PetscFree(corners);
1385: PetscFree(cedges);
1386: PetscFree(extrows);
1387: PetscFree(extcols);
1388: MatDestroy(&lG);
1390: /* Complete assembling */
1391: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1392: if (pcbddc->nedcG) {
1393: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1394: #if 0
1395: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1396: MatView(pcbddc->nedcG,NULL);
1397: #endif
1398: }
1400: /* set change of basis */
1401: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1402: MatDestroy(&T);
1404: return 0;
1405: }
1407: /* the near-null space of BDDC carries information on quadrature weights,
1408: and these can be collinear -> so cheat with MatNullSpaceCreate
1409: and create a suitable set of basis vectors first */
1410: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1411: {
1412: PetscInt i;
1414: for (i=0;i<nvecs;i++) {
1415: PetscInt first,last;
1417: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1419: if (i>=first && i < last) {
1420: PetscScalar *data;
1421: VecGetArray(quad_vecs[i],&data);
1422: if (!has_const) {
1423: data[i-first] = 1.;
1424: } else {
1425: data[2*i-first] = 1./PetscSqrtReal(2.);
1426: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1427: }
1428: VecRestoreArray(quad_vecs[i],&data);
1429: }
1430: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1431: }
1432: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1433: for (i=0;i<nvecs;i++) { /* reset vectors */
1434: PetscInt first,last;
1435: VecLockReadPop(quad_vecs[i]);
1436: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1437: if (i>=first && i < last) {
1438: PetscScalar *data;
1439: VecGetArray(quad_vecs[i],&data);
1440: if (!has_const) {
1441: data[i-first] = 0.;
1442: } else {
1443: data[2*i-first] = 0.;
1444: data[2*i-first+1] = 0.;
1445: }
1446: VecRestoreArray(quad_vecs[i],&data);
1447: }
1448: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1449: VecLockReadPush(quad_vecs[i]);
1450: }
1451: return 0;
1452: }
1454: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1455: {
1456: Mat loc_divudotp;
1457: Vec p,v,vins,quad_vec,*quad_vecs;
1458: ISLocalToGlobalMapping map;
1459: PetscScalar *vals;
1460: const PetscScalar *array;
1461: PetscInt i,maxneighs = 0,maxsize,*gidxs;
1462: PetscInt n_neigh,*neigh,*n_shared,**shared;
1463: PetscMPIInt rank;
1465: ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1466: for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs);
1467: MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1468: if (!maxneighs) {
1469: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1470: *nnsp = NULL;
1471: return 0;
1472: }
1473: maxsize = 0;
1474: for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1475: PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1476: /* create vectors to hold quadrature weights */
1477: MatCreateVecs(A,&quad_vec,NULL);
1478: if (!transpose) {
1479: MatISGetLocalToGlobalMapping(A,&map,NULL);
1480: } else {
1481: MatISGetLocalToGlobalMapping(A,NULL,&map);
1482: }
1483: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1484: VecDestroy(&quad_vec);
1485: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1486: for (i=0;i<maxneighs;i++) {
1487: VecLockReadPop(quad_vecs[i]);
1488: }
1490: /* compute local quad vec */
1491: MatISGetLocalMat(divudotp,&loc_divudotp);
1492: if (!transpose) {
1493: MatCreateVecs(loc_divudotp,&v,&p);
1494: } else {
1495: MatCreateVecs(loc_divudotp,&p,&v);
1496: }
1497: VecSet(p,1.);
1498: if (!transpose) {
1499: MatMultTranspose(loc_divudotp,p,v);
1500: } else {
1501: MatMult(loc_divudotp,p,v);
1502: }
1503: if (vl2l) {
1504: Mat lA;
1505: VecScatter sc;
1507: MatISGetLocalMat(A,&lA);
1508: MatCreateVecs(lA,&vins,NULL);
1509: VecScatterCreate(v,NULL,vins,vl2l,&sc);
1510: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1511: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1512: VecScatterDestroy(&sc);
1513: } else {
1514: vins = v;
1515: }
1516: VecGetArrayRead(vins,&array);
1517: VecDestroy(&p);
1519: /* insert in global quadrature vecs */
1520: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1521: for (i=1;i<n_neigh;i++) {
1522: const PetscInt *idxs;
1523: PetscInt idx,nn,j;
1525: idxs = shared[i];
1526: nn = n_shared[i];
1527: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1528: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1529: idx = -(idx+1);
1531: ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1532: VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1533: }
1534: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1535: VecRestoreArrayRead(vins,&array);
1536: if (vl2l) {
1537: VecDestroy(&vins);
1538: }
1539: VecDestroy(&v);
1540: PetscFree2(gidxs,vals);
1542: /* assemble near null space */
1543: for (i=0;i<maxneighs;i++) {
1544: VecAssemblyBegin(quad_vecs[i]);
1545: }
1546: for (i=0;i<maxneighs;i++) {
1547: VecAssemblyEnd(quad_vecs[i]);
1548: VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1549: VecLockReadPush(quad_vecs[i]);
1550: }
1551: VecDestroyVecs(maxneighs,&quad_vecs);
1552: return 0;
1553: }
1555: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1556: {
1557: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1559: if (primalv) {
1560: if (pcbddc->user_primal_vertices_local) {
1561: IS list[2], newp;
1563: list[0] = primalv;
1564: list[1] = pcbddc->user_primal_vertices_local;
1565: ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1566: ISSortRemoveDups(newp);
1567: ISDestroy(&list[1]);
1568: pcbddc->user_primal_vertices_local = newp;
1569: } else {
1570: PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1571: }
1572: }
1573: return 0;
1574: }
1576: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1577: {
1578: PetscInt f, *comp = (PetscInt *)ctx;
1580: for (f=0;f<Nf;f++) out[f] = X[*comp];
1581: return 0;
1582: }
1584: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1585: {
1587: Vec local,global;
1588: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1589: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1590: PetscBool monolithic = PETSC_FALSE;
1592: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1593: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1594: PetscOptionsEnd();
1595: /* need to convert from global to local topology information and remove references to information in global ordering */
1596: MatCreateVecs(pc->pmat,&global,NULL);
1597: MatCreateVecs(matis->A,&local,NULL);
1598: VecBindToCPU(global,PETSC_TRUE);
1599: VecBindToCPU(local,PETSC_TRUE);
1600: if (monolithic) { /* just get block size to properly compute vertices */
1601: if (pcbddc->vertex_size == 1) {
1602: MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1603: }
1604: goto boundary;
1605: }
1607: if (pcbddc->user_provided_isfordofs) {
1608: if (pcbddc->n_ISForDofs) {
1609: PetscInt i;
1611: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1612: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1613: PetscInt bs;
1615: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1616: ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1617: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1618: ISDestroy(&pcbddc->ISForDofs[i]);
1619: }
1620: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1621: pcbddc->n_ISForDofs = 0;
1622: PetscFree(pcbddc->ISForDofs);
1623: }
1624: } else {
1625: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1626: DM dm;
1628: MatGetDM(pc->pmat, &dm);
1629: if (!dm) {
1630: PCGetDM(pc, &dm);
1631: }
1632: if (dm) {
1633: IS *fields;
1634: PetscInt nf,i;
1636: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1637: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1638: for (i=0;i<nf;i++) {
1639: PetscInt bs;
1641: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1642: ISGetBlockSize(fields[i],&bs);
1643: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1644: ISDestroy(&fields[i]);
1645: }
1646: PetscFree(fields);
1647: pcbddc->n_ISForDofsLocal = nf;
1648: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1649: PetscContainer c;
1651: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1652: if (c) {
1653: MatISLocalFields lf;
1654: PetscContainerGetPointer(c,(void**)&lf);
1655: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1656: } else { /* fallback, create the default fields if bs > 1 */
1657: PetscInt i, n = matis->A->rmap->n;
1658: MatGetBlockSize(pc->pmat,&i);
1659: if (i > 1) {
1660: pcbddc->n_ISForDofsLocal = i;
1661: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1662: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1663: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1664: }
1665: }
1666: }
1667: }
1668: } else {
1669: PetscInt i;
1670: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1671: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1672: }
1673: }
1674: }
1676: boundary:
1677: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1678: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1679: } else if (pcbddc->DirichletBoundariesLocal) {
1680: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1681: }
1682: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1683: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1684: } else if (pcbddc->NeumannBoundariesLocal) {
1685: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1686: }
1687: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1688: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1689: }
1690: VecDestroy(&global);
1691: VecDestroy(&local);
1692: /* detect local disconnected subdomains if requested (use matis->A) */
1693: if (pcbddc->detect_disconnected) {
1694: IS primalv = NULL;
1695: PetscInt i;
1696: PetscBool filter = pcbddc->detect_disconnected_filter;
1698: for (i=0;i<pcbddc->n_local_subs;i++) {
1699: ISDestroy(&pcbddc->local_subs[i]);
1700: }
1701: PetscFree(pcbddc->local_subs);
1702: PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1703: PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1704: ISDestroy(&primalv);
1705: }
1706: /* early stage corner detection */
1707: {
1708: DM dm;
1710: MatGetDM(pc->pmat,&dm);
1711: if (!dm) {
1712: PCGetDM(pc,&dm);
1713: }
1714: if (dm) {
1715: PetscBool isda;
1717: PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1718: if (isda) {
1719: ISLocalToGlobalMapping l2l;
1720: IS corners;
1721: Mat lA;
1722: PetscBool gl,lo;
1724: {
1725: Vec cvec;
1726: const PetscScalar *coords;
1727: PetscInt dof,n,cdim;
1728: PetscBool memc = PETSC_TRUE;
1730: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1731: DMGetCoordinates(dm,&cvec);
1732: VecGetLocalSize(cvec,&n);
1733: VecGetBlockSize(cvec,&cdim);
1734: n /= cdim;
1735: PetscFree(pcbddc->mat_graph->coords);
1736: PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1737: VecGetArrayRead(cvec,&coords);
1738: #if defined(PETSC_USE_COMPLEX)
1739: memc = PETSC_FALSE;
1740: #endif
1741: if (dof != 1) memc = PETSC_FALSE;
1742: if (memc) {
1743: PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);
1744: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1745: PetscReal *bcoords = pcbddc->mat_graph->coords;
1746: PetscInt i, b, d;
1748: for (i=0;i<n;i++) {
1749: for (b=0;b<dof;b++) {
1750: for (d=0;d<cdim;d++) {
1751: bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1752: }
1753: }
1754: }
1755: }
1756: VecRestoreArrayRead(cvec,&coords);
1757: pcbddc->mat_graph->cdim = cdim;
1758: pcbddc->mat_graph->cnloc = dof*n;
1759: pcbddc->mat_graph->cloc = PETSC_FALSE;
1760: }
1761: DMDAGetSubdomainCornersIS(dm,&corners);
1762: MatISGetLocalMat(pc->pmat,&lA);
1763: MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1764: MatISRestoreLocalMat(pc->pmat,&lA);
1765: lo = (PetscBool)(l2l && corners);
1766: MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1767: if (gl) { /* From PETSc's DMDA */
1768: const PetscInt *idx;
1769: PetscInt dof,bs,*idxout,n;
1771: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1772: ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1773: ISGetLocalSize(corners,&n);
1774: ISGetIndices(corners,&idx);
1775: if (bs == dof) {
1776: PetscMalloc1(n,&idxout);
1777: ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1778: } else { /* the original DMDA local-to-local map have been modified */
1779: PetscInt i,d;
1781: PetscMalloc1(dof*n,&idxout);
1782: for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1783: ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);
1785: bs = 1;
1786: n *= dof;
1787: }
1788: ISRestoreIndices(corners,&idx);
1789: DMDARestoreSubdomainCornersIS(dm,&corners);
1790: ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1791: PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1792: ISDestroy(&corners);
1793: pcbddc->corner_selected = PETSC_TRUE;
1794: pcbddc->corner_selection = PETSC_TRUE;
1795: }
1796: if (corners) {
1797: DMDARestoreSubdomainCornersIS(dm,&corners);
1798: }
1799: }
1800: }
1801: }
1802: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1803: DM dm;
1805: MatGetDM(pc->pmat,&dm);
1806: if (!dm) {
1807: PCGetDM(pc,&dm);
1808: }
1809: if (dm) { /* this can get very expensive, I need to find a faster alternative */
1810: Vec vcoords;
1811: PetscSection section;
1812: PetscReal *coords;
1813: PetscInt d,cdim,nl,nf,**ctxs;
1814: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1815: /* debug coordinates */
1816: PetscViewer viewer;
1817: PetscBool flg;
1818: PetscViewerFormat format;
1819: const char *prefix;
1821: DMGetCoordinateDim(dm,&cdim);
1822: DMGetLocalSection(dm,§ion);
1823: PetscSectionGetNumFields(section,&nf);
1824: DMCreateGlobalVector(dm,&vcoords);
1825: VecGetLocalSize(vcoords,&nl);
1826: PetscMalloc1(nl*cdim,&coords);
1827: PetscMalloc2(nf,&funcs,nf,&ctxs);
1828: PetscMalloc1(nf,&ctxs[0]);
1829: for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1830: for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1832: /* debug coordinates */
1833: PCGetOptionsPrefix(pc,&prefix);
1834: PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords),((PetscObject)vcoords)->options,prefix,"-pc_bddc_coords_vec_view",&viewer,&format,&flg);
1835: if (flg) PetscViewerPushFormat(viewer,format);
1836: for (d=0;d<cdim;d++) {
1837: PetscInt i;
1838: const PetscScalar *v;
1839: char name[16];
1841: for (i=0;i<nf;i++) ctxs[i][0] = d;
1842: PetscSNPrintf(name,sizeof(name),"bddc_coords_%d",(int)d);
1843: PetscObjectSetName((PetscObject)vcoords,name);
1844: DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1845: if (flg) VecView(vcoords,viewer);
1846: VecGetArrayRead(vcoords,&v);
1847: for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1848: VecRestoreArrayRead(vcoords,&v);
1849: }
1850: VecDestroy(&vcoords);
1851: PCSetCoordinates(pc,cdim,nl,coords);
1852: PetscFree(coords);
1853: PetscFree(ctxs[0]);
1854: PetscFree2(funcs,ctxs);
1855: if (flg) {
1856: PetscViewerPopFormat(viewer);
1857: PetscViewerDestroy(&viewer);
1858: }
1859: }
1860: }
1861: return 0;
1862: }
1864: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1865: {
1866: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1867: IS nis;
1868: const PetscInt *idxs;
1869: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1872: if (mop == MPI_LAND) {
1873: /* init rootdata with true */
1874: for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1;
1875: } else {
1876: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
1877: }
1878: PetscArrayzero(matis->sf_leafdata,n);
1879: ISGetLocalSize(*is,&nd);
1880: ISGetIndices(*is,&idxs);
1881: for (i=0;i<nd;i++)
1882: if (-1 < idxs[i] && idxs[i] < n)
1883: matis->sf_leafdata[idxs[i]] = 1;
1884: ISRestoreIndices(*is,&idxs);
1885: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop);
1886: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop);
1887: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
1888: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
1889: if (mop == MPI_LAND) {
1890: PetscMalloc1(nd,&nidxs);
1891: } else {
1892: PetscMalloc1(n,&nidxs);
1893: }
1894: for (i=0,nnd=0;i<n;i++)
1895: if (matis->sf_leafdata[i])
1896: nidxs[nnd++] = i;
1897: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1898: ISDestroy(is);
1899: *is = nis;
1900: return 0;
1901: }
1903: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1904: {
1905: PC_IS *pcis = (PC_IS*)(pc->data);
1906: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1908: if (!pcbddc->benign_have_null) {
1909: return 0;
1910: }
1911: if (pcbddc->ChangeOfBasisMatrix) {
1912: Vec swap;
1914: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1915: swap = pcbddc->work_change;
1916: pcbddc->work_change = r;
1917: r = swap;
1918: }
1919: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1920: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1921: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);
1922: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1923: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);
1924: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1925: VecSet(z,0.);
1926: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1927: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1928: if (pcbddc->ChangeOfBasisMatrix) {
1929: pcbddc->work_change = r;
1930: VecCopy(z,pcbddc->work_change);
1931: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1932: }
1933: return 0;
1934: }
1936: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1937: {
1938: PCBDDCBenignMatMult_ctx ctx;
1939: PetscBool apply_right,apply_left,reset_x;
1941: MatShellGetContext(A,&ctx);
1942: if (transpose) {
1943: apply_right = ctx->apply_left;
1944: apply_left = ctx->apply_right;
1945: } else {
1946: apply_right = ctx->apply_right;
1947: apply_left = ctx->apply_left;
1948: }
1949: reset_x = PETSC_FALSE;
1950: if (apply_right) {
1951: const PetscScalar *ax;
1952: PetscInt nl,i;
1954: VecGetLocalSize(x,&nl);
1955: VecGetArrayRead(x,&ax);
1956: PetscArraycpy(ctx->work,ax,nl);
1957: VecRestoreArrayRead(x,&ax);
1958: for (i=0;i<ctx->benign_n;i++) {
1959: PetscScalar sum,val;
1960: const PetscInt *idxs;
1961: PetscInt nz,j;
1962: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1963: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1964: sum = 0.;
1965: if (ctx->apply_p0) {
1966: val = ctx->work[idxs[nz-1]];
1967: for (j=0;j<nz-1;j++) {
1968: sum += ctx->work[idxs[j]];
1969: ctx->work[idxs[j]] += val;
1970: }
1971: } else {
1972: for (j=0;j<nz-1;j++) {
1973: sum += ctx->work[idxs[j]];
1974: }
1975: }
1976: ctx->work[idxs[nz-1]] -= sum;
1977: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1978: }
1979: VecPlaceArray(x,ctx->work);
1980: reset_x = PETSC_TRUE;
1981: }
1982: if (transpose) {
1983: MatMultTranspose(ctx->A,x,y);
1984: } else {
1985: MatMult(ctx->A,x,y);
1986: }
1987: if (reset_x) {
1988: VecResetArray(x);
1989: }
1990: if (apply_left) {
1991: PetscScalar *ay;
1992: PetscInt i;
1994: VecGetArray(y,&ay);
1995: for (i=0;i<ctx->benign_n;i++) {
1996: PetscScalar sum,val;
1997: const PetscInt *idxs;
1998: PetscInt nz,j;
1999: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
2000: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
2001: val = -ay[idxs[nz-1]];
2002: if (ctx->apply_p0) {
2003: sum = 0.;
2004: for (j=0;j<nz-1;j++) {
2005: sum += ay[idxs[j]];
2006: ay[idxs[j]] += val;
2007: }
2008: ay[idxs[nz-1]] += sum;
2009: } else {
2010: for (j=0;j<nz-1;j++) {
2011: ay[idxs[j]] += val;
2012: }
2013: ay[idxs[nz-1]] = 0.;
2014: }
2015: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2016: }
2017: VecRestoreArray(y,&ay);
2018: }
2019: return 0;
2020: }
2022: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2023: {
2024: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2025: return 0;
2026: }
2028: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2029: {
2030: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2031: return 0;
2032: }
2034: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2035: {
2036: PC_IS *pcis = (PC_IS*)pc->data;
2037: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2038: PCBDDCBenignMatMult_ctx ctx;
2040: if (!restore) {
2041: Mat A_IB,A_BI;
2042: PetscScalar *work;
2043: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2046: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return 0;
2047: PetscMalloc1(pcis->n,&work);
2048: MatCreate(PETSC_COMM_SELF,&A_IB);
2049: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2050: MatSetType(A_IB,MATSHELL);
2051: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2052: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2053: PetscNew(&ctx);
2054: MatShellSetContext(A_IB,ctx);
2055: ctx->apply_left = PETSC_TRUE;
2056: ctx->apply_right = PETSC_FALSE;
2057: ctx->apply_p0 = PETSC_FALSE;
2058: ctx->benign_n = pcbddc->benign_n;
2059: if (reuse) {
2060: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2061: ctx->free = PETSC_FALSE;
2062: } else { /* TODO: could be optimized for successive solves */
2063: ISLocalToGlobalMapping N_to_D;
2064: PetscInt i;
2066: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2067: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2068: for (i=0;i<pcbddc->benign_n;i++) {
2069: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2070: }
2071: ISLocalToGlobalMappingDestroy(&N_to_D);
2072: ctx->free = PETSC_TRUE;
2073: }
2074: ctx->A = pcis->A_IB;
2075: ctx->work = work;
2076: MatSetUp(A_IB);
2077: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2078: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2079: pcis->A_IB = A_IB;
2081: /* A_BI as A_IB^T */
2082: MatCreateTranspose(A_IB,&A_BI);
2083: pcbddc->benign_original_mat = pcis->A_BI;
2084: pcis->A_BI = A_BI;
2085: } else {
2086: if (!pcbddc->benign_original_mat) {
2087: return 0;
2088: }
2089: MatShellGetContext(pcis->A_IB,&ctx);
2090: MatDestroy(&pcis->A_IB);
2091: pcis->A_IB = ctx->A;
2092: ctx->A = NULL;
2093: MatDestroy(&pcis->A_BI);
2094: pcis->A_BI = pcbddc->benign_original_mat;
2095: pcbddc->benign_original_mat = NULL;
2096: if (ctx->free) {
2097: PetscInt i;
2098: for (i=0;i<ctx->benign_n;i++) {
2099: ISDestroy(&ctx->benign_zerodiag_subs[i]);
2100: }
2101: PetscFree(ctx->benign_zerodiag_subs);
2102: }
2103: PetscFree(ctx->work);
2104: PetscFree(ctx);
2105: }
2106: return 0;
2107: }
2109: /* used just in bddc debug mode */
2110: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2111: {
2112: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2113: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2114: Mat An;
2116: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2117: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2118: if (is1) {
2119: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2120: MatDestroy(&An);
2121: } else {
2122: *B = An;
2123: }
2124: return 0;
2125: }
2127: /* TODO: add reuse flag */
2128: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2129: {
2130: Mat Bt;
2131: PetscScalar *a,*bdata;
2132: const PetscInt *ii,*ij;
2133: PetscInt m,n,i,nnz,*bii,*bij;
2134: PetscBool flg_row;
2136: MatGetSize(A,&n,&m);
2137: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2138: MatSeqAIJGetArray(A,&a);
2139: nnz = n;
2140: for (i=0;i<ii[n];i++) {
2141: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2142: }
2143: PetscMalloc1(n+1,&bii);
2144: PetscMalloc1(nnz,&bij);
2145: PetscMalloc1(nnz,&bdata);
2146: nnz = 0;
2147: bii[0] = 0;
2148: for (i=0;i<n;i++) {
2149: PetscInt j;
2150: for (j=ii[i];j<ii[i+1];j++) {
2151: PetscScalar entry = a[j];
2152: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2153: bij[nnz] = ij[j];
2154: bdata[nnz] = entry;
2155: nnz++;
2156: }
2157: }
2158: bii[i+1] = nnz;
2159: }
2160: MatSeqAIJRestoreArray(A,&a);
2161: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2162: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2163: {
2164: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2165: b->free_a = PETSC_TRUE;
2166: b->free_ij = PETSC_TRUE;
2167: }
2168: if (*B == A) {
2169: MatDestroy(&A);
2170: }
2171: *B = Bt;
2172: return 0;
2173: }
2175: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2176: {
2177: Mat B = NULL;
2178: DM dm;
2179: IS is_dummy,*cc_n;
2180: ISLocalToGlobalMapping l2gmap_dummy;
2181: PCBDDCGraph graph;
2182: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2183: PetscInt i,n;
2184: PetscInt *xadj,*adjncy;
2185: PetscBool isplex = PETSC_FALSE;
2187: if (ncc) *ncc = 0;
2188: if (cc) *cc = NULL;
2189: if (primalv) *primalv = NULL;
2190: PCBDDCGraphCreate(&graph);
2191: MatGetDM(pc->pmat,&dm);
2192: if (!dm) {
2193: PCGetDM(pc,&dm);
2194: }
2195: if (dm) {
2196: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2197: }
2198: if (filter) isplex = PETSC_FALSE;
2200: if (isplex) { /* this code has been modified from plexpartition.c */
2201: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2202: PetscInt *adj = NULL;
2203: IS cellNumbering;
2204: const PetscInt *cellNum;
2205: PetscBool useCone, useClosure;
2206: PetscSection section;
2207: PetscSegBuffer adjBuffer;
2208: PetscSF sfPoint;
2210: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2211: DMGetPointSF(dm, &sfPoint);
2212: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2213: /* Build adjacency graph via a section/segbuffer */
2214: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2215: PetscSectionSetChart(section, pStart, pEnd);
2216: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2217: /* Always use FVM adjacency to create partitioner graph */
2218: DMGetBasicAdjacency(dm, &useCone, &useClosure);
2219: DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2220: DMPlexGetCellNumbering(dm, &cellNumbering);
2221: ISGetIndices(cellNumbering, &cellNum);
2222: for (n = 0, p = pStart; p < pEnd; p++) {
2223: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2224: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2225: adjSize = PETSC_DETERMINE;
2226: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2227: for (a = 0; a < adjSize; ++a) {
2228: const PetscInt point = adj[a];
2229: if (pStart <= point && point < pEnd) {
2230: PetscInt *PETSC_RESTRICT pBuf;
2231: PetscSectionAddDof(section, p, 1);
2232: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2233: *pBuf = point;
2234: }
2235: }
2236: n++;
2237: }
2238: DMSetBasicAdjacency(dm, useCone, useClosure);
2239: /* Derive CSR graph from section/segbuffer */
2240: PetscSectionSetUp(section);
2241: PetscSectionGetStorageSize(section, &size);
2242: PetscMalloc1(n+1, &xadj);
2243: for (idx = 0, p = pStart; p < pEnd; p++) {
2244: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2245: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2246: }
2247: xadj[n] = size;
2248: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2249: /* Clean up */
2250: PetscSegBufferDestroy(&adjBuffer);
2251: PetscSectionDestroy(§ion);
2252: PetscFree(adj);
2253: graph->xadj = xadj;
2254: graph->adjncy = adjncy;
2255: } else {
2256: Mat A;
2257: PetscBool isseqaij, flg_row;
2259: MatISGetLocalMat(pc->pmat,&A);
2260: if (!A->rmap->N || !A->cmap->N) {
2261: PCBDDCGraphDestroy(&graph);
2262: return 0;
2263: }
2264: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2265: if (!isseqaij && filter) {
2266: PetscBool isseqdense;
2268: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2269: if (!isseqdense) {
2270: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2271: } else { /* TODO: rectangular case and LDA */
2272: PetscScalar *array;
2273: PetscReal chop=1.e-6;
2275: MatDuplicate(A,MAT_COPY_VALUES,&B);
2276: MatDenseGetArray(B,&array);
2277: MatGetSize(B,&n,NULL);
2278: for (i=0;i<n;i++) {
2279: PetscInt j;
2280: for (j=i+1;j<n;j++) {
2281: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2282: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2283: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2284: }
2285: }
2286: MatDenseRestoreArray(B,&array);
2287: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2288: }
2289: } else {
2290: PetscObjectReference((PetscObject)A);
2291: B = A;
2292: }
2293: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2295: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2296: if (filter) {
2297: PetscScalar *data;
2298: PetscInt j,cum;
2300: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2301: MatSeqAIJGetArray(B,&data);
2302: cum = 0;
2303: for (i=0;i<n;i++) {
2304: PetscInt t;
2306: for (j=xadj[i];j<xadj[i+1];j++) {
2307: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2308: continue;
2309: }
2310: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2311: }
2312: t = xadj_filtered[i];
2313: xadj_filtered[i] = cum;
2314: cum += t;
2315: }
2316: MatSeqAIJRestoreArray(B,&data);
2317: graph->xadj = xadj_filtered;
2318: graph->adjncy = adjncy_filtered;
2319: } else {
2320: graph->xadj = xadj;
2321: graph->adjncy = adjncy;
2322: }
2323: }
2324: /* compute local connected components using PCBDDCGraph */
2325: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2326: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2327: ISDestroy(&is_dummy);
2328: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2329: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2330: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2331: PCBDDCGraphComputeConnectedComponents(graph);
2333: /* partial clean up */
2334: PetscFree2(xadj_filtered,adjncy_filtered);
2335: if (B) {
2336: PetscBool flg_row;
2337: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2338: MatDestroy(&B);
2339: }
2340: if (isplex) {
2341: PetscFree(xadj);
2342: PetscFree(adjncy);
2343: }
2345: /* get back data */
2346: if (isplex) {
2347: if (ncc) *ncc = graph->ncc;
2348: if (cc || primalv) {
2349: Mat A;
2350: PetscBT btv,btvt;
2351: PetscSection subSection;
2352: PetscInt *ids,cum,cump,*cids,*pids;
2354: DMPlexGetSubdomainSection(dm,&subSection);
2355: MatISGetLocalMat(pc->pmat,&A);
2356: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2357: PetscBTCreate(A->rmap->n,&btv);
2358: PetscBTCreate(A->rmap->n,&btvt);
2360: cids[0] = 0;
2361: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2362: PetscInt j;
2364: PetscBTMemzero(A->rmap->n,btvt);
2365: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2366: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2368: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2369: for (k = 0; k < 2*size; k += 2) {
2370: PetscInt s, pp, p = closure[k], off, dof, cdof;
2372: PetscSectionGetConstraintDof(subSection,p,&cdof);
2373: PetscSectionGetOffset(subSection,p,&off);
2374: PetscSectionGetDof(subSection,p,&dof);
2375: for (s = 0; s < dof-cdof; s++) {
2376: if (PetscBTLookupSet(btvt,off+s)) continue;
2377: if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s;
2378: else pids[cump++] = off+s; /* cross-vertex */
2379: }
2380: DMPlexGetTreeParent(dm,p,&pp,NULL);
2381: if (pp != p) {
2382: PetscSectionGetConstraintDof(subSection,pp,&cdof);
2383: PetscSectionGetOffset(subSection,pp,&off);
2384: PetscSectionGetDof(subSection,pp,&dof);
2385: for (s = 0; s < dof-cdof; s++) {
2386: if (PetscBTLookupSet(btvt,off+s)) continue;
2387: if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s;
2388: else pids[cump++] = off+s; /* cross-vertex */
2389: }
2390: }
2391: }
2392: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2393: }
2394: cids[i+1] = cum;
2395: /* mark dofs as already assigned */
2396: for (j = cids[i]; j < cids[i+1]; j++) {
2397: PetscBTSet(btv,ids[j]);
2398: }
2399: }
2400: if (cc) {
2401: PetscMalloc1(graph->ncc,&cc_n);
2402: for (i = 0; i < graph->ncc; i++) {
2403: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2404: }
2405: *cc = cc_n;
2406: }
2407: if (primalv) {
2408: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2409: }
2410: PetscFree3(ids,cids,pids);
2411: PetscBTDestroy(&btv);
2412: PetscBTDestroy(&btvt);
2413: }
2414: } else {
2415: if (ncc) *ncc = graph->ncc;
2416: if (cc) {
2417: PetscMalloc1(graph->ncc,&cc_n);
2418: for (i=0;i<graph->ncc;i++) {
2419: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2420: }
2421: *cc = cc_n;
2422: }
2423: }
2424: /* clean up graph */
2425: graph->xadj = NULL;
2426: graph->adjncy = NULL;
2427: PCBDDCGraphDestroy(&graph);
2428: return 0;
2429: }
2431: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2432: {
2433: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2434: PC_IS* pcis = (PC_IS*)(pc->data);
2435: IS dirIS = NULL;
2436: PetscInt i;
2438: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2439: if (zerodiag) {
2440: Mat A;
2441: Vec vec3_N;
2442: PetscScalar *vals;
2443: const PetscInt *idxs;
2444: PetscInt nz,*count;
2446: /* p0 */
2447: VecSet(pcis->vec1_N,0.);
2448: PetscMalloc1(pcis->n,&vals);
2449: ISGetLocalSize(zerodiag,&nz);
2450: ISGetIndices(zerodiag,&idxs);
2451: for (i=0;i<nz;i++) vals[i] = 1.;
2452: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2453: VecAssemblyBegin(pcis->vec1_N);
2454: VecAssemblyEnd(pcis->vec1_N);
2455: /* v_I */
2456: VecSetRandom(pcis->vec2_N,NULL);
2457: for (i=0;i<nz;i++) vals[i] = 0.;
2458: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2459: ISRestoreIndices(zerodiag,&idxs);
2460: ISGetIndices(pcis->is_B_local,&idxs);
2461: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2462: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2463: ISRestoreIndices(pcis->is_B_local,&idxs);
2464: if (dirIS) {
2465: PetscInt n;
2467: ISGetLocalSize(dirIS,&n);
2468: ISGetIndices(dirIS,&idxs);
2469: for (i=0;i<n;i++) vals[i] = 0.;
2470: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2471: ISRestoreIndices(dirIS,&idxs);
2472: }
2473: VecAssemblyBegin(pcis->vec2_N);
2474: VecAssemblyEnd(pcis->vec2_N);
2475: VecDuplicate(pcis->vec1_N,&vec3_N);
2476: VecSet(vec3_N,0.);
2477: MatISGetLocalMat(pc->pmat,&A);
2478: MatMult(A,pcis->vec1_N,vec3_N);
2479: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2481: PetscFree(vals);
2482: VecDestroy(&vec3_N);
2484: /* there should not be any pressure dofs lying on the interface */
2485: PetscCalloc1(pcis->n,&count);
2486: ISGetIndices(pcis->is_B_local,&idxs);
2487: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2488: ISRestoreIndices(pcis->is_B_local,&idxs);
2489: ISGetIndices(zerodiag,&idxs);
2491: ISRestoreIndices(zerodiag,&idxs);
2492: PetscFree(count);
2493: }
2494: ISDestroy(&dirIS);
2496: /* check PCBDDCBenignGetOrSetP0 */
2497: VecSetRandom(pcis->vec1_global,NULL);
2498: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2499: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2500: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2501: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2502: for (i=0;i<pcbddc->benign_n;i++) {
2503: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2505: }
2506: return 0;
2507: }
2509: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2510: {
2511: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2512: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2513: IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2514: PetscInt nz,n,benign_n,bsp = 1;
2515: PetscInt *interior_dofs,n_interior_dofs,nneu;
2516: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2519: if (reuse) goto project_b0;
2520: PetscSFDestroy(&pcbddc->benign_sf);
2521: MatDestroy(&pcbddc->benign_B0);
2522: for (n=0;n<pcbddc->benign_n;n++) {
2523: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2524: }
2525: PetscFree(pcbddc->benign_zerodiag_subs);
2526: has_null_pressures = PETSC_TRUE;
2527: have_null = PETSC_TRUE;
2528: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2529: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2530: Checks if all the pressure dofs in each subdomain have a zero diagonal
2531: If not, a change of basis on pressures is not needed
2532: since the local Schur complements are already SPD
2533: */
2534: if (pcbddc->n_ISForDofsLocal) {
2535: IS iP = NULL;
2536: PetscInt p,*pp;
2537: PetscBool flg;
2539: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2540: n = pcbddc->n_ISForDofsLocal;
2541: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2542: PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2543: PetscOptionsEnd();
2544: if (!flg) {
2545: n = 1;
2546: pp[0] = pcbddc->n_ISForDofsLocal-1;
2547: }
2549: bsp = 0;
2550: for (p=0;p<n;p++) {
2551: PetscInt bs;
2554: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2555: bsp += bs;
2556: }
2557: PetscMalloc1(bsp,&bzerodiag);
2558: bsp = 0;
2559: for (p=0;p<n;p++) {
2560: const PetscInt *idxs;
2561: PetscInt b,bs,npl,*bidxs;
2563: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2564: ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2565: ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2566: PetscMalloc1(npl/bs,&bidxs);
2567: for (b=0;b<bs;b++) {
2568: PetscInt i;
2570: for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2571: ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2572: bsp++;
2573: }
2574: PetscFree(bidxs);
2575: ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2576: }
2577: ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);
2579: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2580: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2581: if (iP) {
2582: IS newpressures;
2584: ISDifference(pressures,iP,&newpressures);
2585: ISDestroy(&pressures);
2586: pressures = newpressures;
2587: }
2588: ISSorted(pressures,&sorted);
2589: if (!sorted) {
2590: ISSort(pressures);
2591: }
2592: PetscFree(pp);
2593: }
2595: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2596: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2597: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2598: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2599: ISSorted(zerodiag,&sorted);
2600: if (!sorted) {
2601: ISSort(zerodiag);
2602: }
2603: PetscObjectReference((PetscObject)zerodiag);
2604: zerodiag_save = zerodiag;
2605: ISGetLocalSize(zerodiag,&nz);
2606: if (!nz) {
2607: if (n) have_null = PETSC_FALSE;
2608: has_null_pressures = PETSC_FALSE;
2609: ISDestroy(&zerodiag);
2610: }
2611: recompute_zerodiag = PETSC_FALSE;
2613: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2614: zerodiag_subs = NULL;
2615: benign_n = 0;
2616: n_interior_dofs = 0;
2617: interior_dofs = NULL;
2618: nneu = 0;
2619: if (pcbddc->NeumannBoundariesLocal) {
2620: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2621: }
2622: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2623: if (checkb) { /* need to compute interior nodes */
2624: PetscInt n,i,j;
2625: PetscInt n_neigh,*neigh,*n_shared,**shared;
2626: PetscInt *iwork;
2628: ISLocalToGlobalMappingGetSize(matis->rmapping,&n);
2629: ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared);
2630: PetscCalloc1(n,&iwork);
2631: PetscMalloc1(n,&interior_dofs);
2632: for (i=1;i<n_neigh;i++)
2633: for (j=0;j<n_shared[i];j++)
2634: iwork[shared[i][j]] += 1;
2635: for (i=0;i<n;i++)
2636: if (!iwork[i])
2637: interior_dofs[n_interior_dofs++] = i;
2638: PetscFree(iwork);
2639: ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared);
2640: }
2641: if (has_null_pressures) {
2642: IS *subs;
2643: PetscInt nsubs,i,j,nl;
2644: const PetscInt *idxs;
2645: PetscScalar *array;
2646: Vec *work;
2648: subs = pcbddc->local_subs;
2649: nsubs = pcbddc->n_local_subs;
2650: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2651: if (checkb) {
2652: VecDuplicateVecs(matis->y,2,&work);
2653: ISGetLocalSize(zerodiag,&nl);
2654: ISGetIndices(zerodiag,&idxs);
2655: /* work[0] = 1_p */
2656: VecSet(work[0],0.);
2657: VecGetArray(work[0],&array);
2658: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2659: VecRestoreArray(work[0],&array);
2660: /* work[0] = 1_v */
2661: VecSet(work[1],1.);
2662: VecGetArray(work[1],&array);
2663: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2664: VecRestoreArray(work[1],&array);
2665: ISRestoreIndices(zerodiag,&idxs);
2666: }
2668: if (nsubs > 1 || bsp > 1) {
2669: IS *is;
2670: PetscInt b,totb;
2672: totb = bsp;
2673: is = bsp > 1 ? bzerodiag : &zerodiag;
2674: nsubs = PetscMax(nsubs,1);
2675: PetscCalloc1(nsubs*totb,&zerodiag_subs);
2676: for (b=0;b<totb;b++) {
2677: for (i=0;i<nsubs;i++) {
2678: ISLocalToGlobalMapping l2g;
2679: IS t_zerodiag_subs;
2680: PetscInt nl;
2682: if (subs) {
2683: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2684: } else {
2685: IS tis;
2687: MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2688: ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2689: ISLocalToGlobalMappingCreateIS(tis,&l2g);
2690: ISDestroy(&tis);
2691: }
2692: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2693: ISGetLocalSize(t_zerodiag_subs,&nl);
2694: if (nl) {
2695: PetscBool valid = PETSC_TRUE;
2697: if (checkb) {
2698: VecSet(matis->x,0);
2699: ISGetLocalSize(subs[i],&nl);
2700: ISGetIndices(subs[i],&idxs);
2701: VecGetArray(matis->x,&array);
2702: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2703: VecRestoreArray(matis->x,&array);
2704: ISRestoreIndices(subs[i],&idxs);
2705: VecPointwiseMult(matis->x,work[0],matis->x);
2706: MatMult(matis->A,matis->x,matis->y);
2707: VecPointwiseMult(matis->y,work[1],matis->y);
2708: VecGetArray(matis->y,&array);
2709: for (j=0;j<n_interior_dofs;j++) {
2710: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2711: valid = PETSC_FALSE;
2712: break;
2713: }
2714: }
2715: VecRestoreArray(matis->y,&array);
2716: }
2717: if (valid && nneu) {
2718: const PetscInt *idxs;
2719: PetscInt nzb;
2721: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2722: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2723: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2724: if (nzb) valid = PETSC_FALSE;
2725: }
2726: if (valid && pressures) {
2727: IS t_pressure_subs,tmp;
2728: PetscInt i1,i2;
2730: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2731: ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2732: ISGetLocalSize(tmp,&i1);
2733: ISGetLocalSize(t_zerodiag_subs,&i2);
2734: if (i2 != i1) valid = PETSC_FALSE;
2735: ISDestroy(&t_pressure_subs);
2736: ISDestroy(&tmp);
2737: }
2738: if (valid) {
2739: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2740: benign_n++;
2741: } else recompute_zerodiag = PETSC_TRUE;
2742: }
2743: ISDestroy(&t_zerodiag_subs);
2744: ISLocalToGlobalMappingDestroy(&l2g);
2745: }
2746: }
2747: } else { /* there's just one subdomain (or zero if they have not been detected */
2748: PetscBool valid = PETSC_TRUE;
2750: if (nneu) valid = PETSC_FALSE;
2751: if (valid && pressures) {
2752: ISEqual(pressures,zerodiag,&valid);
2753: }
2754: if (valid && checkb) {
2755: MatMult(matis->A,work[0],matis->x);
2756: VecPointwiseMult(matis->x,work[1],matis->x);
2757: VecGetArray(matis->x,&array);
2758: for (j=0;j<n_interior_dofs;j++) {
2759: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2760: valid = PETSC_FALSE;
2761: break;
2762: }
2763: }
2764: VecRestoreArray(matis->x,&array);
2765: }
2766: if (valid) {
2767: benign_n = 1;
2768: PetscMalloc1(benign_n,&zerodiag_subs);
2769: PetscObjectReference((PetscObject)zerodiag);
2770: zerodiag_subs[0] = zerodiag;
2771: }
2772: }
2773: if (checkb) {
2774: VecDestroyVecs(2,&work);
2775: }
2776: }
2777: PetscFree(interior_dofs);
2779: if (!benign_n) {
2780: PetscInt n;
2782: ISDestroy(&zerodiag);
2783: recompute_zerodiag = PETSC_FALSE;
2784: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2785: if (n) have_null = PETSC_FALSE;
2786: }
2788: /* final check for null pressures */
2789: if (zerodiag && pressures) {
2790: ISEqual(pressures,zerodiag,&have_null);
2791: }
2793: if (recompute_zerodiag) {
2794: ISDestroy(&zerodiag);
2795: if (benign_n == 1) {
2796: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2797: zerodiag = zerodiag_subs[0];
2798: } else {
2799: PetscInt i,nzn,*new_idxs;
2801: nzn = 0;
2802: for (i=0;i<benign_n;i++) {
2803: PetscInt ns;
2804: ISGetLocalSize(zerodiag_subs[i],&ns);
2805: nzn += ns;
2806: }
2807: PetscMalloc1(nzn,&new_idxs);
2808: nzn = 0;
2809: for (i=0;i<benign_n;i++) {
2810: PetscInt ns,*idxs;
2811: ISGetLocalSize(zerodiag_subs[i],&ns);
2812: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2813: PetscArraycpy(new_idxs+nzn,idxs,ns);
2814: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2815: nzn += ns;
2816: }
2817: PetscSortInt(nzn,new_idxs);
2818: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2819: }
2820: have_null = PETSC_FALSE;
2821: }
2823: /* determines if the coarse solver will be singular or not */
2824: MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2826: /* Prepare matrix to compute no-net-flux */
2827: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2828: Mat A,loc_divudotp;
2829: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2830: IS row,col,isused = NULL;
2831: PetscInt M,N,n,st,n_isused;
2833: if (pressures) {
2834: isused = pressures;
2835: } else {
2836: isused = zerodiag_save;
2837: }
2838: MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2839: MatISGetLocalMat(pc->pmat,&A);
2840: MatGetLocalSize(A,&n,NULL);
2842: n_isused = 0;
2843: if (isused) {
2844: ISGetLocalSize(isused,&n_isused);
2845: }
2846: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2847: st = st-n_isused;
2848: if (n) {
2849: const PetscInt *gidxs;
2851: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2852: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2853: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2854: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2855: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2856: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2857: } else {
2858: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2859: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2860: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2861: }
2862: MatGetSize(pc->pmat,NULL,&N);
2863: ISGetSize(row,&M);
2864: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2865: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2866: ISDestroy(&row);
2867: ISDestroy(&col);
2868: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2869: MatSetType(pcbddc->divudotp,MATIS);
2870: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2871: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2872: ISLocalToGlobalMappingDestroy(&rl2g);
2873: ISLocalToGlobalMappingDestroy(&cl2g);
2874: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2875: MatDestroy(&loc_divudotp);
2876: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2877: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2878: }
2879: ISDestroy(&zerodiag_save);
2880: ISDestroy(&pressures);
2881: if (bzerodiag) {
2882: PetscInt i;
2884: for (i=0;i<bsp;i++) {
2885: ISDestroy(&bzerodiag[i]);
2886: }
2887: PetscFree(bzerodiag);
2888: }
2889: pcbddc->benign_n = benign_n;
2890: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2892: /* determines if the problem has subdomains with 0 pressure block */
2893: have_null = (PetscBool)(!!pcbddc->benign_n);
2894: MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2896: project_b0:
2897: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2898: /* change of basis and p0 dofs */
2899: if (pcbddc->benign_n) {
2900: PetscInt i,s,*nnz;
2902: /* local change of basis for pressures */
2903: MatDestroy(&pcbddc->benign_change);
2904: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2905: MatSetType(pcbddc->benign_change,MATAIJ);
2906: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2907: PetscMalloc1(n,&nnz);
2908: for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2909: for (i=0;i<pcbddc->benign_n;i++) {
2910: const PetscInt *idxs;
2911: PetscInt nzs,j;
2913: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2914: ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2915: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2916: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2917: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2918: }
2919: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2920: MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2921: PetscFree(nnz);
2922: /* set identity by default */
2923: for (i=0;i<n;i++) {
2924: MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2925: }
2926: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2927: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2928: /* set change on pressures */
2929: for (s=0;s<pcbddc->benign_n;s++) {
2930: PetscScalar *array;
2931: const PetscInt *idxs;
2932: PetscInt nzs;
2934: ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2935: ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2936: for (i=0;i<nzs-1;i++) {
2937: PetscScalar vals[2];
2938: PetscInt cols[2];
2940: cols[0] = idxs[i];
2941: cols[1] = idxs[nzs-1];
2942: vals[0] = 1.;
2943: vals[1] = 1.;
2944: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2945: }
2946: PetscMalloc1(nzs,&array);
2947: for (i=0;i<nzs-1;i++) array[i] = -1.;
2948: array[nzs-1] = 1.;
2949: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2950: /* store local idxs for p0 */
2951: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2952: ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2953: PetscFree(array);
2954: }
2955: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2956: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2958: /* project if needed */
2959: if (pcbddc->benign_change_explicit) {
2960: Mat M;
2962: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2963: MatDestroy(&pcbddc->local_mat);
2964: MatSeqAIJCompress(M,&pcbddc->local_mat);
2965: MatDestroy(&M);
2966: }
2967: /* store global idxs for p0 */
2968: ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2969: }
2970: *zerodiaglocal = zerodiag;
2971: return 0;
2972: }
2974: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2975: {
2976: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2977: PetscScalar *array;
2979: if (!pcbddc->benign_sf) {
2980: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2981: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2982: }
2983: if (get) {
2984: VecGetArrayRead(v,(const PetscScalar**)&array);
2985: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);
2986: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);
2987: VecRestoreArrayRead(v,(const PetscScalar**)&array);
2988: } else {
2989: VecGetArray(v,&array);
2990: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);
2991: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);
2992: VecRestoreArray(v,&array);
2993: }
2994: return 0;
2995: }
2997: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2998: {
2999: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3001: /* TODO: add error checking
3002: - avoid nested pop (or push) calls.
3003: - cannot push before pop.
3004: - cannot call this if pcbddc->local_mat is NULL
3005: */
3006: if (!pcbddc->benign_n) {
3007: return 0;
3008: }
3009: if (pop) {
3010: if (pcbddc->benign_change_explicit) {
3011: IS is_p0;
3012: MatReuse reuse;
3014: /* extract B_0 */
3015: reuse = MAT_INITIAL_MATRIX;
3016: if (pcbddc->benign_B0) {
3017: reuse = MAT_REUSE_MATRIX;
3018: }
3019: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3020: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3021: /* remove rows and cols from local problem */
3022: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3023: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3024: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3025: ISDestroy(&is_p0);
3026: } else {
3027: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3028: PetscScalar *vals;
3029: PetscInt i,n,*idxs_ins;
3031: VecGetLocalSize(matis->y,&n);
3032: PetscMalloc2(n,&idxs_ins,n,&vals);
3033: if (!pcbddc->benign_B0) {
3034: PetscInt *nnz;
3035: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3036: MatSetType(pcbddc->benign_B0,MATAIJ);
3037: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3038: PetscMalloc1(pcbddc->benign_n,&nnz);
3039: for (i=0;i<pcbddc->benign_n;i++) {
3040: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3041: nnz[i] = n - nnz[i];
3042: }
3043: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3044: MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3045: PetscFree(nnz);
3046: }
3048: for (i=0;i<pcbddc->benign_n;i++) {
3049: PetscScalar *array;
3050: PetscInt *idxs,j,nz,cum;
3052: VecSet(matis->x,0.);
3053: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3054: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3055: for (j=0;j<nz;j++) vals[j] = 1.;
3056: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3057: VecAssemblyBegin(matis->x);
3058: VecAssemblyEnd(matis->x);
3059: VecSet(matis->y,0.);
3060: MatMult(matis->A,matis->x,matis->y);
3061: VecGetArray(matis->y,&array);
3062: cum = 0;
3063: for (j=0;j<n;j++) {
3064: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3065: vals[cum] = array[j];
3066: idxs_ins[cum] = j;
3067: cum++;
3068: }
3069: }
3070: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3071: VecRestoreArray(matis->y,&array);
3072: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3073: }
3074: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3075: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3076: PetscFree2(idxs_ins,vals);
3077: }
3078: } else { /* push */
3079: if (pcbddc->benign_change_explicit) {
3080: PetscInt i;
3082: for (i=0;i<pcbddc->benign_n;i++) {
3083: PetscScalar *B0_vals;
3084: PetscInt *B0_cols,B0_ncol;
3086: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3087: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3088: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3089: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3090: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3091: }
3092: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3093: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3094: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3095: }
3096: return 0;
3097: }
3099: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3100: {
3101: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3102: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3103: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
3104: PetscBLASInt *B_iwork,*B_ifail;
3105: PetscScalar *work,lwork;
3106: PetscScalar *St,*S,*eigv;
3107: PetscScalar *Sarray,*Starray;
3108: PetscReal *eigs,thresh,lthresh,uthresh;
3109: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3110: PetscBool allocated_S_St;
3111: #if defined(PETSC_USE_COMPLEX)
3112: PetscReal *rwork;
3113: #endif
3114: PetscErrorCode ierr;
3119: PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3121: if (pcbddc->dbg_flag) {
3122: PetscViewerFlush(pcbddc->dbg_viewer);
3123: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3124: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3125: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3126: }
3128: if (pcbddc->dbg_flag) {
3129: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3130: }
3132: /* max size of subsets */
3133: mss = 0;
3134: for (i=0;i<sub_schurs->n_subs;i++) {
3135: PetscInt subset_size;
3137: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3138: mss = PetscMax(mss,subset_size);
3139: }
3141: /* min/max and threshold */
3142: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3143: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3144: nmax = PetscMax(nmin,nmax);
3145: allocated_S_St = PETSC_FALSE;
3146: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3147: allocated_S_St = PETSC_TRUE;
3148: }
3150: /* allocate lapack workspace */
3151: cum = cum2 = 0;
3152: maxneigs = 0;
3153: for (i=0;i<sub_schurs->n_subs;i++) {
3154: PetscInt n,subset_size;
3156: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3157: n = PetscMin(subset_size,nmax);
3158: cum += subset_size;
3159: cum2 += subset_size*n;
3160: maxneigs = PetscMax(maxneigs,n);
3161: }
3162: lwork = 0;
3163: if (mss) {
3164: if (sub_schurs->is_symmetric) {
3165: PetscScalar sdummy = 0.;
3166: PetscBLASInt B_itype = 1;
3167: PetscBLASInt B_N = mss, idummy = 0;
3168: PetscReal rdummy = 0.,zero = 0.0;
3169: PetscReal eps = 0.0; /* dlamch? */
3171: B_lwork = -1;
3172: /* some implementations may complain about NULL pointers, even if we are querying */
3173: S = &sdummy;
3174: St = &sdummy;
3175: eigs = &rdummy;
3176: eigv = &sdummy;
3177: B_iwork = &idummy;
3178: B_ifail = &idummy;
3179: #if defined(PETSC_USE_COMPLEX)
3180: rwork = &rdummy;
3181: #endif
3182: thresh = 1.0;
3183: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3184: #if defined(PETSC_USE_COMPLEX)
3185: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3186: #else
3187: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3188: #endif
3190: PetscFPTrapPop();
3191: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3192: }
3194: nv = 0;
3195: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3196: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3197: }
3198: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3199: if (allocated_S_St) {
3200: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3201: }
3202: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3203: #if defined(PETSC_USE_COMPLEX)
3204: PetscMalloc1(7*mss,&rwork);
3205: #endif
3206: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3207: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3208: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3209: nv+cum,&pcbddc->adaptive_constraints_idxs,
3210: nv+cum2,&pcbddc->adaptive_constraints_data);
3211: PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);
3213: maxneigs = 0;
3214: cum = cumarray = 0;
3215: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3216: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3217: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3218: const PetscInt *idxs;
3220: ISGetIndices(sub_schurs->is_vertices,&idxs);
3221: for (cum=0;cum<nv;cum++) {
3222: pcbddc->adaptive_constraints_n[cum] = 1;
3223: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3224: pcbddc->adaptive_constraints_data[cum] = 1.0;
3225: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3226: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3227: }
3228: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3229: }
3231: if (mss) { /* multilevel */
3232: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3233: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3234: }
3236: lthresh = pcbddc->adaptive_threshold[0];
3237: uthresh = pcbddc->adaptive_threshold[1];
3238: for (i=0;i<sub_schurs->n_subs;i++) {
3239: const PetscInt *idxs;
3240: PetscReal upper,lower;
3241: PetscInt j,subset_size,eigs_start = 0;
3242: PetscBLASInt B_N;
3243: PetscBool same_data = PETSC_FALSE;
3244: PetscBool scal = PETSC_FALSE;
3246: if (pcbddc->use_deluxe_scaling) {
3247: upper = PETSC_MAX_REAL;
3248: lower = uthresh;
3249: } else {
3251: upper = 1./uthresh;
3252: lower = 0.;
3253: }
3254: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3255: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3256: PetscBLASIntCast(subset_size,&B_N);
3257: /* this is experimental: we assume the dofs have been properly grouped to have
3258: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3259: if (!sub_schurs->is_posdef) {
3260: Mat T;
3262: for (j=0;j<subset_size;j++) {
3263: if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3264: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3265: MatScale(T,-1.0);
3266: MatDestroy(&T);
3267: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3268: MatScale(T,-1.0);
3269: MatDestroy(&T);
3270: if (sub_schurs->change_primal_sub) {
3271: PetscInt nz,k;
3272: const PetscInt *idxs;
3274: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3275: ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3276: for (k=0;k<nz;k++) {
3277: *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3278: *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0;
3279: }
3280: ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3281: }
3282: scal = PETSC_TRUE;
3283: break;
3284: }
3285: }
3286: }
3288: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3289: if (sub_schurs->is_symmetric) {
3290: PetscInt j,k;
3291: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3292: PetscArrayzero(S,subset_size*subset_size);
3293: PetscArrayzero(St,subset_size*subset_size);
3294: }
3295: for (j=0;j<subset_size;j++) {
3296: for (k=j;k<subset_size;k++) {
3297: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3298: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3299: }
3300: }
3301: } else {
3302: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3303: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3304: }
3305: } else {
3306: S = Sarray + cumarray;
3307: St = Starray + cumarray;
3308: }
3309: /* see if we can save some work */
3310: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3311: PetscArraycmp(S,St,subset_size*subset_size,&same_data);
3312: }
3314: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3315: B_neigs = 0;
3316: } else {
3317: if (sub_schurs->is_symmetric) {
3318: PetscBLASInt B_itype = 1;
3319: PetscBLASInt B_IL, B_IU;
3320: PetscReal eps = -1.0; /* dlamch? */
3321: PetscInt nmin_s;
3322: PetscBool compute_range;
3324: B_neigs = 0;
3325: compute_range = (PetscBool)!same_data;
3326: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3328: if (pcbddc->dbg_flag) {
3329: PetscInt nc = 0;
3331: if (sub_schurs->change_primal_sub) {
3332: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3333: }
3334: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3335: }
3337: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3338: if (compute_range) {
3340: /* ask for eigenvalues larger than thresh */
3341: if (sub_schurs->is_posdef) {
3342: #if defined(PETSC_USE_COMPLEX)
3343: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3344: #else
3345: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3346: #endif
3347: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3348: } else { /* no theory so far, but it works nicely */
3349: PetscInt recipe = 0,recipe_m = 1;
3350: PetscReal bb[2];
3352: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3353: switch (recipe) {
3354: case 0:
3355: if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3356: else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3357: #if defined(PETSC_USE_COMPLEX)
3358: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3359: #else
3360: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3361: #endif
3362: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3363: break;
3364: case 1:
3365: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3366: #if defined(PETSC_USE_COMPLEX)
3367: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3368: #else
3369: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3370: #endif
3371: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3372: if (!scal) {
3373: PetscBLASInt B_neigs2 = 0;
3375: bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3376: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3377: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3378: #if defined(PETSC_USE_COMPLEX)
3379: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3380: #else
3381: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3382: #endif
3383: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3384: B_neigs += B_neigs2;
3385: }
3386: break;
3387: case 2:
3388: if (scal) {
3389: bb[0] = PETSC_MIN_REAL;
3390: bb[1] = 0;
3391: #if defined(PETSC_USE_COMPLEX)
3392: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3393: #else
3394: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3395: #endif
3396: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3397: } else {
3398: PetscBLASInt B_neigs2 = 0;
3399: PetscBool import = PETSC_FALSE;
3401: lthresh = PetscMax(lthresh,0.0);
3402: if (lthresh > 0.0) {
3403: bb[0] = PETSC_MIN_REAL;
3404: bb[1] = lthresh*lthresh;
3406: import = PETSC_TRUE;
3407: #if defined(PETSC_USE_COMPLEX)
3408: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3409: #else
3410: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3411: #endif
3412: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3413: }
3414: bb[0] = PetscMax(lthresh*lthresh,uthresh);
3415: bb[1] = PETSC_MAX_REAL;
3416: if (import) {
3417: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3418: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3419: }
3420: #if defined(PETSC_USE_COMPLEX)
3421: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3422: #else
3423: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3424: #endif
3425: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3426: B_neigs += B_neigs2;
3427: }
3428: break;
3429: case 3:
3430: if (scal) {
3431: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3432: } else {
3433: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3434: }
3435: if (!scal) {
3436: bb[0] = uthresh;
3437: bb[1] = PETSC_MAX_REAL;
3438: #if defined(PETSC_USE_COMPLEX)
3439: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3440: #else
3441: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3442: #endif
3443: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3444: }
3445: if (recipe_m > 0 && B_N - B_neigs > 0) {
3446: PetscBLASInt B_neigs2 = 0;
3448: B_IL = 1;
3449: PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3450: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3451: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3452: #if defined(PETSC_USE_COMPLEX)
3453: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3454: #else
3455: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3456: #endif
3457: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3458: B_neigs += B_neigs2;
3459: }
3460: break;
3461: case 4:
3462: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3463: #if defined(PETSC_USE_COMPLEX)
3464: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3465: #else
3466: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3467: #endif
3468: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3469: {
3470: PetscBLASInt B_neigs2 = 0;
3472: bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3473: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3474: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3475: #if defined(PETSC_USE_COMPLEX)
3476: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3477: #else
3478: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3479: #endif
3480: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3481: B_neigs += B_neigs2;
3482: }
3483: break;
3484: case 5: /* same as before: first compute all eigenvalues, then filter */
3485: #if defined(PETSC_USE_COMPLEX)
3486: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3487: #else
3488: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3489: #endif
3490: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3491: {
3492: PetscInt e,k,ne;
3493: for (e=0,ne=0;e<B_neigs;e++) {
3494: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3495: for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3496: eigs[ne] = eigs[e];
3497: ne++;
3498: }
3499: }
3500: PetscArraycpy(eigv,S,B_N*ne);
3501: B_neigs = ne;
3502: }
3503: break;
3504: default:
3505: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3506: }
3507: }
3508: } else if (!same_data) { /* this is just to see all the eigenvalues */
3509: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3510: B_IL = 1;
3511: #if defined(PETSC_USE_COMPLEX)
3512: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3513: #else
3514: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3515: #endif
3516: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3517: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3518: PetscInt k;
3520: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3521: PetscBLASIntCast(nmax,&B_neigs);
3522: nmin = nmax;
3523: PetscArrayzero(eigv,subset_size*nmax);
3524: for (k=0;k<nmax;k++) {
3525: eigs[k] = 1./PETSC_SMALL;
3526: eigv[k*(subset_size+1)] = 1.0;
3527: }
3528: }
3529: PetscFPTrapPop();
3530: if (B_ierr) {
3533: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3534: }
3536: if (B_neigs > nmax) {
3537: if (pcbddc->dbg_flag) {
3538: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3539: }
3540: if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3541: B_neigs = nmax;
3542: }
3544: nmin_s = PetscMin(nmin,B_N);
3545: if (B_neigs < nmin_s) {
3546: PetscBLASInt B_neigs2 = 0;
3548: if (pcbddc->use_deluxe_scaling) {
3549: if (scal) {
3550: B_IU = nmin_s;
3551: B_IL = B_neigs + 1;
3552: } else {
3553: B_IL = B_N - nmin_s + 1;
3554: B_IU = B_N - B_neigs;
3555: }
3556: } else {
3557: B_IL = B_neigs + 1;
3558: B_IU = nmin_s;
3559: }
3560: if (pcbddc->dbg_flag) {
3561: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3562: }
3563: if (sub_schurs->is_symmetric) {
3564: PetscInt j,k;
3565: for (j=0;j<subset_size;j++) {
3566: for (k=j;k<subset_size;k++) {
3567: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3568: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3569: }
3570: }
3571: } else {
3572: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3573: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3574: }
3575: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3576: #if defined(PETSC_USE_COMPLEX)
3577: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3578: #else
3579: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3580: #endif
3581: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3582: PetscFPTrapPop();
3583: B_neigs += B_neigs2;
3584: }
3585: if (B_ierr) {
3588: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3589: }
3590: if (pcbddc->dbg_flag) {
3591: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3592: for (j=0;j<B_neigs;j++) {
3593: if (eigs[j] == 0.0) {
3594: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3595: } else {
3596: if (pcbddc->use_deluxe_scaling) {
3597: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3598: } else {
3599: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3600: }
3601: }
3602: }
3603: }
3604: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3605: }
3606: /* change the basis back to the original one */
3607: if (sub_schurs->change) {
3608: Mat change,phi,phit;
3610: if (pcbddc->dbg_flag > 2) {
3611: PetscInt ii;
3612: for (ii=0;ii<B_neigs;ii++) {
3613: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3614: for (j=0;j<B_N;j++) {
3615: #if defined(PETSC_USE_COMPLEX)
3616: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3617: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3618: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3619: #else
3620: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3621: #endif
3622: }
3623: }
3624: }
3625: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3626: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3627: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3628: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3629: MatDestroy(&phit);
3630: MatDestroy(&phi);
3631: }
3632: maxneigs = PetscMax(B_neigs,maxneigs);
3633: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3634: if (B_neigs) {
3635: PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);
3637: if (pcbddc->dbg_flag > 1) {
3638: PetscInt ii;
3639: for (ii=0;ii<B_neigs;ii++) {
3640: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3641: for (j=0;j<B_N;j++) {
3642: #if defined(PETSC_USE_COMPLEX)
3643: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3644: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3645: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3646: #else
3647: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3648: #endif
3649: }
3650: }
3651: }
3652: PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);
3653: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3654: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3655: cum++;
3656: }
3657: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3658: /* shift for next computation */
3659: cumarray += subset_size*subset_size;
3660: }
3661: if (pcbddc->dbg_flag) {
3662: PetscViewerFlush(pcbddc->dbg_viewer);
3663: }
3665: if (mss) {
3666: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3667: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3668: /* destroy matrices (junk) */
3669: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3670: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3671: }
3672: if (allocated_S_St) {
3673: PetscFree2(S,St);
3674: }
3675: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3676: #if defined(PETSC_USE_COMPLEX)
3677: PetscFree(rwork);
3678: #endif
3679: if (pcbddc->dbg_flag) {
3680: PetscInt maxneigs_r;
3681: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3682: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3683: }
3684: PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3685: return 0;
3686: }
3688: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3689: {
3690: PetscScalar *coarse_submat_vals;
3692: /* Setup local scatters R_to_B and (optionally) R_to_D */
3693: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3694: PCBDDCSetUpLocalScatters(pc);
3696: /* Setup local neumann solver ksp_R */
3697: /* PCBDDCSetUpLocalScatters should be called first! */
3698: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3700: /*
3701: Setup local correction and local part of coarse basis.
3702: Gives back the dense local part of the coarse matrix in column major ordering
3703: */
3704: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3706: /* Compute total number of coarse nodes and setup coarse solver */
3707: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3709: /* free */
3710: PetscFree(coarse_submat_vals);
3711: return 0;
3712: }
3714: PetscErrorCode PCBDDCResetCustomization(PC pc)
3715: {
3716: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3718: ISDestroy(&pcbddc->user_primal_vertices);
3719: ISDestroy(&pcbddc->user_primal_vertices_local);
3720: ISDestroy(&pcbddc->NeumannBoundaries);
3721: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3722: ISDestroy(&pcbddc->DirichletBoundaries);
3723: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3724: PetscFree(pcbddc->onearnullvecs_state);
3725: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3726: PCBDDCSetDofsSplitting(pc,0,NULL);
3727: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3728: return 0;
3729: }
3731: PetscErrorCode PCBDDCResetTopography(PC pc)
3732: {
3733: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3734: PetscInt i;
3736: MatDestroy(&pcbddc->nedcG);
3737: ISDestroy(&pcbddc->nedclocal);
3738: MatDestroy(&pcbddc->discretegradient);
3739: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3740: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3741: MatDestroy(&pcbddc->switch_static_change);
3742: VecDestroy(&pcbddc->work_change);
3743: MatDestroy(&pcbddc->ConstraintMatrix);
3744: MatDestroy(&pcbddc->divudotp);
3745: ISDestroy(&pcbddc->divudotp_vl2l);
3746: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3747: for (i=0;i<pcbddc->n_local_subs;i++) {
3748: ISDestroy(&pcbddc->local_subs[i]);
3749: }
3750: pcbddc->n_local_subs = 0;
3751: PetscFree(pcbddc->local_subs);
3752: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3753: pcbddc->graphanalyzed = PETSC_FALSE;
3754: pcbddc->recompute_topography = PETSC_TRUE;
3755: pcbddc->corner_selected = PETSC_FALSE;
3756: return 0;
3757: }
3759: PetscErrorCode PCBDDCResetSolvers(PC pc)
3760: {
3761: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3763: VecDestroy(&pcbddc->coarse_vec);
3764: if (pcbddc->coarse_phi_B) {
3765: PetscScalar *array;
3766: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3767: PetscFree(array);
3768: }
3769: MatDestroy(&pcbddc->coarse_phi_B);
3770: MatDestroy(&pcbddc->coarse_phi_D);
3771: MatDestroy(&pcbddc->coarse_psi_B);
3772: MatDestroy(&pcbddc->coarse_psi_D);
3773: VecDestroy(&pcbddc->vec1_P);
3774: VecDestroy(&pcbddc->vec1_C);
3775: MatDestroy(&pcbddc->local_auxmat2);
3776: MatDestroy(&pcbddc->local_auxmat1);
3777: VecDestroy(&pcbddc->vec1_R);
3778: VecDestroy(&pcbddc->vec2_R);
3779: ISDestroy(&pcbddc->is_R_local);
3780: VecScatterDestroy(&pcbddc->R_to_B);
3781: VecScatterDestroy(&pcbddc->R_to_D);
3782: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3783: KSPReset(pcbddc->ksp_D);
3784: KSPReset(pcbddc->ksp_R);
3785: KSPReset(pcbddc->coarse_ksp);
3786: MatDestroy(&pcbddc->local_mat);
3787: PetscFree(pcbddc->primal_indices_local_idxs);
3788: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3789: PetscFree(pcbddc->global_primal_indices);
3790: ISDestroy(&pcbddc->coarse_subassembling);
3791: MatDestroy(&pcbddc->benign_change);
3792: VecDestroy(&pcbddc->benign_vec);
3793: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3794: MatDestroy(&pcbddc->benign_B0);
3795: PetscSFDestroy(&pcbddc->benign_sf);
3796: if (pcbddc->benign_zerodiag_subs) {
3797: PetscInt i;
3798: for (i=0;i<pcbddc->benign_n;i++) {
3799: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3800: }
3801: PetscFree(pcbddc->benign_zerodiag_subs);
3802: }
3803: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3804: return 0;
3805: }
3807: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3808: {
3809: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3810: PC_IS *pcis = (PC_IS*)pc->data;
3811: VecType impVecType;
3812: PetscInt n_constraints,n_R,old_size;
3814: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3815: n_R = pcis->n - pcbddc->n_vertices;
3816: VecGetType(pcis->vec1_N,&impVecType);
3817: /* local work vectors (try to avoid unneeded work)*/
3818: /* R nodes */
3819: old_size = -1;
3820: if (pcbddc->vec1_R) {
3821: VecGetSize(pcbddc->vec1_R,&old_size);
3822: }
3823: if (n_R != old_size) {
3824: VecDestroy(&pcbddc->vec1_R);
3825: VecDestroy(&pcbddc->vec2_R);
3826: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3827: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3828: VecSetType(pcbddc->vec1_R,impVecType);
3829: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3830: }
3831: /* local primal dofs */
3832: old_size = -1;
3833: if (pcbddc->vec1_P) {
3834: VecGetSize(pcbddc->vec1_P,&old_size);
3835: }
3836: if (pcbddc->local_primal_size != old_size) {
3837: VecDestroy(&pcbddc->vec1_P);
3838: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3839: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3840: VecSetType(pcbddc->vec1_P,impVecType);
3841: }
3842: /* local explicit constraints */
3843: old_size = -1;
3844: if (pcbddc->vec1_C) {
3845: VecGetSize(pcbddc->vec1_C,&old_size);
3846: }
3847: if (n_constraints && n_constraints != old_size) {
3848: VecDestroy(&pcbddc->vec1_C);
3849: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3850: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3851: VecSetType(pcbddc->vec1_C,impVecType);
3852: }
3853: return 0;
3854: }
3856: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3857: {
3858: /* pointers to pcis and pcbddc */
3859: PC_IS* pcis = (PC_IS*)pc->data;
3860: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3861: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3862: /* submatrices of local problem */
3863: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3864: /* submatrices of local coarse problem */
3865: Mat S_VV,S_CV,S_VC,S_CC;
3866: /* working matrices */
3867: Mat C_CR;
3868: /* additional working stuff */
3869: PC pc_R;
3870: Mat F,Brhs = NULL;
3871: Vec dummy_vec;
3872: PetscBool isLU,isCHOL,need_benign_correction,sparserhs;
3873: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3874: PetscScalar *work;
3875: PetscInt *idx_V_B;
3876: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3877: PetscInt i,n_R,n_D,n_B;
3878: PetscScalar one=1.0,m_one=-1.0;
3881: PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
3883: /* Set Non-overlapping dimensions */
3884: n_vertices = pcbddc->n_vertices;
3885: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3886: n_B = pcis->n_B;
3887: n_D = pcis->n - n_B;
3888: n_R = pcis->n - n_vertices;
3890: /* vertices in boundary numbering */
3891: PetscMalloc1(n_vertices,&idx_V_B);
3892: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3895: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3896: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3897: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3898: MatDenseSetLDA(S_VV,pcbddc->local_primal_size);
3899: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3900: MatDenseSetLDA(S_CV,pcbddc->local_primal_size);
3901: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3902: MatDenseSetLDA(S_VC,pcbddc->local_primal_size);
3903: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3904: MatDenseSetLDA(S_CC,pcbddc->local_primal_size);
3906: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3907: KSPGetPC(pcbddc->ksp_R,&pc_R);
3908: PCSetUp(pc_R);
3909: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3910: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3911: lda_rhs = n_R;
3912: need_benign_correction = PETSC_FALSE;
3913: if (isLU || isCHOL) {
3914: PCFactorGetMatrix(pc_R,&F);
3915: } else if (sub_schurs && sub_schurs->reuse_solver) {
3916: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3917: MatFactorType type;
3919: F = reuse_solver->F;
3920: MatGetFactorType(F,&type);
3921: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3922: if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3923: MatGetSize(F,&lda_rhs,NULL);
3924: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3925: } else F = NULL;
3927: /* determine if we can use a sparse right-hand side */
3928: sparserhs = PETSC_FALSE;
3929: if (F) {
3930: MatSolverType solver;
3932: MatFactorGetSolverType(F,&solver);
3933: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3934: }
3936: /* allocate workspace */
3937: n = 0;
3938: if (n_constraints) {
3939: n += lda_rhs*n_constraints;
3940: }
3941: if (n_vertices) {
3942: n = PetscMax(2*lda_rhs*n_vertices,n);
3943: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3944: }
3945: if (!pcbddc->symmetric_primal) {
3946: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3947: }
3948: PetscMalloc1(n,&work);
3950: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3951: dummy_vec = NULL;
3952: if (need_benign_correction && lda_rhs != n_R && F) {
3953: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3954: VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3955: VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3956: }
3958: MatDestroy(&pcbddc->local_auxmat1);
3959: MatDestroy(&pcbddc->local_auxmat2);
3961: /* Precompute stuffs needed for preprocessing and application of BDDC*/
3962: if (n_constraints) {
3963: Mat M3,C_B;
3964: IS is_aux;
3966: /* Extract constraints on R nodes: C_{CR} */
3967: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3968: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3969: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
3971: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3972: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3973: if (!sparserhs) {
3974: PetscArrayzero(work,lda_rhs*n_constraints);
3975: for (i=0;i<n_constraints;i++) {
3976: const PetscScalar *row_cmat_values;
3977: const PetscInt *row_cmat_indices;
3978: PetscInt size_of_constraint,j;
3980: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3981: for (j=0;j<size_of_constraint;j++) {
3982: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3983: }
3984: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3985: }
3986: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3987: } else {
3988: Mat tC_CR;
3990: MatScale(C_CR,-1.0);
3991: if (lda_rhs != n_R) {
3992: PetscScalar *aa;
3993: PetscInt r,*ii,*jj;
3994: PetscBool done;
3996: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3998: MatSeqAIJGetArray(C_CR,&aa);
3999: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4000: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4002: } else {
4003: PetscObjectReference((PetscObject)C_CR);
4004: tC_CR = C_CR;
4005: }
4006: MatCreateTranspose(tC_CR,&Brhs);
4007: MatDestroy(&tC_CR);
4008: }
4009: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4010: if (F) {
4011: if (need_benign_correction) {
4012: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4014: /* rhs is already zero on interior dofs, no need to change the rhs */
4015: PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);
4016: }
4017: MatMatSolve(F,Brhs,local_auxmat2_R);
4018: if (need_benign_correction) {
4019: PetscScalar *marr;
4020: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4022: MatDenseGetArray(local_auxmat2_R,&marr);
4023: if (lda_rhs != n_R) {
4024: for (i=0;i<n_constraints;i++) {
4025: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4026: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4027: VecResetArray(dummy_vec);
4028: }
4029: } else {
4030: for (i=0;i<n_constraints;i++) {
4031: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4032: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4033: VecResetArray(pcbddc->vec1_R);
4034: }
4035: }
4036: MatDenseRestoreArray(local_auxmat2_R,&marr);
4037: }
4038: } else {
4039: PetscScalar *marr;
4041: MatDenseGetArray(local_auxmat2_R,&marr);
4042: for (i=0;i<n_constraints;i++) {
4043: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4044: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4045: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4046: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4047: VecResetArray(pcbddc->vec1_R);
4048: VecResetArray(pcbddc->vec2_R);
4049: }
4050: MatDenseRestoreArray(local_auxmat2_R,&marr);
4051: }
4052: if (sparserhs) {
4053: MatScale(C_CR,-1.0);
4054: }
4055: MatDestroy(&Brhs);
4056: if (!pcbddc->switch_static) {
4057: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4058: for (i=0;i<n_constraints;i++) {
4059: Vec r, b;
4060: MatDenseGetColumnVecRead(local_auxmat2_R,i,&r);
4061: MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b);
4062: VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD);
4063: VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD);
4064: MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b);
4065: MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r);
4066: }
4067: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4068: } else {
4069: if (lda_rhs != n_R) {
4070: IS dummy;
4072: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4073: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4074: ISDestroy(&dummy);
4075: } else {
4076: PetscObjectReference((PetscObject)local_auxmat2_R);
4077: pcbddc->local_auxmat2 = local_auxmat2_R;
4078: }
4079: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4080: }
4081: ISDestroy(&is_aux);
4082: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
4083: MatScale(M3,m_one);
4084: if (isCHOL) {
4085: MatCholeskyFactor(M3,NULL,NULL);
4086: } else {
4087: MatLUFactor(M3,NULL,NULL,NULL);
4088: }
4089: MatSeqDenseInvertFactors_Private(M3);
4090: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4091: MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4092: MatDestroy(&C_B);
4093: MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4094: MatDestroy(&M3);
4095: }
4097: /* Get submatrices from subdomain matrix */
4098: if (n_vertices) {
4099: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4100: PetscBool oldpin;
4101: #endif
4102: PetscBool isaij;
4103: IS is_aux;
4105: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4106: IS tis;
4108: ISDuplicate(pcbddc->is_R_local,&tis);
4109: ISSort(tis);
4110: ISComplement(tis,0,pcis->n,&is_aux);
4111: ISDestroy(&tis);
4112: } else {
4113: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4114: }
4115: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4116: oldpin = pcbddc->local_mat->boundtocpu;
4117: #endif
4118: MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);
4119: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4120: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4121: PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);
4122: if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4123: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4124: }
4125: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4126: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4127: MatBindToCPU(pcbddc->local_mat,oldpin);
4128: #endif
4129: ISDestroy(&is_aux);
4130: }
4132: /* Matrix of coarse basis functions (local) */
4133: if (pcbddc->coarse_phi_B) {
4134: PetscInt on_B,on_primal,on_D=n_D;
4135: if (pcbddc->coarse_phi_D) {
4136: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4137: }
4138: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4139: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4140: PetscScalar *marray;
4142: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4143: PetscFree(marray);
4144: MatDestroy(&pcbddc->coarse_phi_B);
4145: MatDestroy(&pcbddc->coarse_psi_B);
4146: MatDestroy(&pcbddc->coarse_phi_D);
4147: MatDestroy(&pcbddc->coarse_psi_D);
4148: }
4149: }
4151: if (!pcbddc->coarse_phi_B) {
4152: PetscScalar *marr;
4154: /* memory size */
4155: n = n_B*pcbddc->local_primal_size;
4156: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4157: if (!pcbddc->symmetric_primal) n *= 2;
4158: PetscCalloc1(n,&marr);
4159: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4160: marr += n_B*pcbddc->local_primal_size;
4161: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4162: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4163: marr += n_D*pcbddc->local_primal_size;
4164: }
4165: if (!pcbddc->symmetric_primal) {
4166: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4167: marr += n_B*pcbddc->local_primal_size;
4168: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4169: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4170: }
4171: } else {
4172: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4173: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4174: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4175: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4176: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4177: }
4178: }
4179: }
4181: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4182: p0_lidx_I = NULL;
4183: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4184: const PetscInt *idxs;
4186: ISGetIndices(pcis->is_I_local,&idxs);
4187: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4188: for (i=0;i<pcbddc->benign_n;i++) {
4189: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4190: }
4191: ISRestoreIndices(pcis->is_I_local,&idxs);
4192: }
4194: /* vertices */
4195: if (n_vertices) {
4196: PetscBool restoreavr = PETSC_FALSE;
4198: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
4200: if (n_R) {
4201: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4202: PetscBLASInt B_N,B_one = 1;
4203: const PetscScalar *x;
4204: PetscScalar *y;
4206: MatScale(A_RV,m_one);
4207: if (need_benign_correction) {
4208: ISLocalToGlobalMapping RtoN;
4209: IS is_p0;
4210: PetscInt *idxs_p0,n;
4212: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4213: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4214: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4216: ISLocalToGlobalMappingDestroy(&RtoN);
4217: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4218: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4219: ISDestroy(&is_p0);
4220: }
4222: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4223: if (!sparserhs || need_benign_correction) {
4224: if (lda_rhs == n_R) {
4225: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4226: } else {
4227: PetscScalar *av,*array;
4228: const PetscInt *xadj,*adjncy;
4229: PetscInt n;
4230: PetscBool flg_row;
4232: array = work+lda_rhs*n_vertices;
4233: PetscArrayzero(array,lda_rhs*n_vertices);
4234: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4235: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4236: MatSeqAIJGetArray(A_RV,&av);
4237: for (i=0;i<n;i++) {
4238: PetscInt j;
4239: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4240: }
4241: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4242: MatDestroy(&A_RV);
4243: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4244: }
4245: if (need_benign_correction) {
4246: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4247: PetscScalar *marr;
4249: MatDenseGetArray(A_RV,&marr);
4250: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4252: | 0 0 0 | (V)
4253: L = | 0 0 -1 | (P-p0)
4254: | 0 0 -1 | (p0)
4256: */
4257: for (i=0;i<reuse_solver->benign_n;i++) {
4258: const PetscScalar *vals;
4259: const PetscInt *idxs,*idxs_zero;
4260: PetscInt n,j,nz;
4262: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4263: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4264: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4265: for (j=0;j<n;j++) {
4266: PetscScalar val = vals[j];
4267: PetscInt k,col = idxs[j];
4268: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4269: }
4270: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4271: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4272: }
4273: MatDenseRestoreArray(A_RV,&marr);
4274: }
4275: PetscObjectReference((PetscObject)A_RV);
4276: Brhs = A_RV;
4277: } else {
4278: Mat tA_RVT,A_RVT;
4280: if (!pcbddc->symmetric_primal) {
4281: /* A_RV already scaled by -1 */
4282: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4283: } else {
4284: restoreavr = PETSC_TRUE;
4285: MatScale(A_VR,-1.0);
4286: PetscObjectReference((PetscObject)A_VR);
4287: A_RVT = A_VR;
4288: }
4289: if (lda_rhs != n_R) {
4290: PetscScalar *aa;
4291: PetscInt r,*ii,*jj;
4292: PetscBool done;
4294: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4296: MatSeqAIJGetArray(A_RVT,&aa);
4297: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4298: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4300: } else {
4301: PetscObjectReference((PetscObject)A_RVT);
4302: tA_RVT = A_RVT;
4303: }
4304: MatCreateTranspose(tA_RVT,&Brhs);
4305: MatDestroy(&tA_RVT);
4306: MatDestroy(&A_RVT);
4307: }
4308: if (F) {
4309: /* need to correct the rhs */
4310: if (need_benign_correction) {
4311: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4312: PetscScalar *marr;
4314: MatDenseGetArray(Brhs,&marr);
4315: if (lda_rhs != n_R) {
4316: for (i=0;i<n_vertices;i++) {
4317: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4318: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4319: VecResetArray(dummy_vec);
4320: }
4321: } else {
4322: for (i=0;i<n_vertices;i++) {
4323: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4324: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4325: VecResetArray(pcbddc->vec1_R);
4326: }
4327: }
4328: MatDenseRestoreArray(Brhs,&marr);
4329: }
4330: MatMatSolve(F,Brhs,A_RRmA_RV);
4331: if (restoreavr) {
4332: MatScale(A_VR,-1.0);
4333: }
4334: /* need to correct the solution */
4335: if (need_benign_correction) {
4336: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4337: PetscScalar *marr;
4339: MatDenseGetArray(A_RRmA_RV,&marr);
4340: if (lda_rhs != n_R) {
4341: for (i=0;i<n_vertices;i++) {
4342: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4343: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4344: VecResetArray(dummy_vec);
4345: }
4346: } else {
4347: for (i=0;i<n_vertices;i++) {
4348: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4349: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4350: VecResetArray(pcbddc->vec1_R);
4351: }
4352: }
4353: MatDenseRestoreArray(A_RRmA_RV,&marr);
4354: }
4355: } else {
4356: MatDenseGetArray(Brhs,&y);
4357: for (i=0;i<n_vertices;i++) {
4358: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4359: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4360: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4361: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4362: VecResetArray(pcbddc->vec1_R);
4363: VecResetArray(pcbddc->vec2_R);
4364: }
4365: MatDenseRestoreArray(Brhs,&y);
4366: }
4367: MatDestroy(&A_RV);
4368: MatDestroy(&Brhs);
4369: /* S_VV and S_CV */
4370: if (n_constraints) {
4371: Mat B;
4373: PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);
4374: for (i=0;i<n_vertices;i++) {
4375: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4376: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4377: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4378: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4379: VecResetArray(pcis->vec1_B);
4380: VecResetArray(pcbddc->vec1_R);
4381: }
4382: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4383: /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4384: MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);
4385: MatProductSetType(S_CV,MATPRODUCT_AB);
4386: MatProductSetFromOptions(S_CV);
4387: MatProductSymbolic(S_CV);
4388: MatProductNumeric(S_CV);
4389: MatProductClear(S_CV);
4391: MatDestroy(&B);
4392: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4393: /* Reuse B = local_auxmat2_R * S_CV */
4394: MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);
4395: MatProductSetType(B,MATPRODUCT_AB);
4396: MatProductSetFromOptions(B);
4397: MatProductSymbolic(B);
4398: MatProductNumeric(B);
4400: MatScale(S_CV,m_one);
4401: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4402: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4403: MatDestroy(&B);
4404: }
4405: if (lda_rhs != n_R) {
4406: MatDestroy(&A_RRmA_RV);
4407: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4408: MatDenseSetLDA(A_RRmA_RV,lda_rhs);
4409: }
4410: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4411: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4412: if (need_benign_correction) {
4413: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4414: PetscScalar *marr,*sums;
4416: PetscMalloc1(n_vertices,&sums);
4417: MatDenseGetArray(S_VVt,&marr);
4418: for (i=0;i<reuse_solver->benign_n;i++) {
4419: const PetscScalar *vals;
4420: const PetscInt *idxs,*idxs_zero;
4421: PetscInt n,j,nz;
4423: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4424: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4425: for (j=0;j<n_vertices;j++) {
4426: PetscInt k;
4427: sums[j] = 0.;
4428: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4429: }
4430: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4431: for (j=0;j<n;j++) {
4432: PetscScalar val = vals[j];
4433: PetscInt k;
4434: for (k=0;k<n_vertices;k++) {
4435: marr[idxs[j]+k*n_vertices] += val*sums[k];
4436: }
4437: }
4438: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4439: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4440: }
4441: PetscFree(sums);
4442: MatDenseRestoreArray(S_VVt,&marr);
4443: MatDestroy(&A_RV_bcorr);
4444: }
4445: MatDestroy(&A_RRmA_RV);
4446: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4447: MatDenseGetArrayRead(A_VV,&x);
4448: MatDenseGetArray(S_VVt,&y);
4449: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4450: MatDenseRestoreArrayRead(A_VV,&x);
4451: MatDenseRestoreArray(S_VVt,&y);
4452: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4453: MatDestroy(&S_VVt);
4454: } else {
4455: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4456: }
4457: MatDestroy(&A_VV);
4459: /* coarse basis functions */
4460: for (i=0;i<n_vertices;i++) {
4461: Vec v;
4462: PetscScalar one = 1.0,zero = 0.0;
4464: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4465: MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v);
4466: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4467: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4468: if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4469: PetscMPIInt rank;
4470: MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank);
4472: }
4473: VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES);
4474: VecAssemblyBegin(v); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */
4475: VecAssemblyEnd(v);
4476: MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v);
4478: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4479: PetscInt j;
4481: MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v);
4482: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4483: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4484: if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4485: PetscMPIInt rank;
4486: MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank);
4488: }
4489: for (j=0;j<pcbddc->benign_n;j++) VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES);
4490: VecAssemblyBegin(v);
4491: VecAssemblyEnd(v);
4492: MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v);
4493: }
4494: VecResetArray(pcbddc->vec1_R);
4495: }
4496: /* if n_R == 0 the object is not destroyed */
4497: MatDestroy(&A_RV);
4498: }
4499: VecDestroy(&dummy_vec);
4501: if (n_constraints) {
4502: Mat B;
4504: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4505: MatScale(S_CC,m_one);
4506: MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);
4507: MatProductSetType(B,MATPRODUCT_AB);
4508: MatProductSetFromOptions(B);
4509: MatProductSymbolic(B);
4510: MatProductNumeric(B);
4512: MatScale(S_CC,m_one);
4513: if (n_vertices) {
4514: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4515: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4516: } else {
4517: Mat S_VCt;
4519: if (lda_rhs != n_R) {
4520: MatDestroy(&B);
4521: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4522: MatDenseSetLDA(B,lda_rhs);
4523: }
4524: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4525: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4526: MatDestroy(&S_VCt);
4527: }
4528: }
4529: MatDestroy(&B);
4530: /* coarse basis functions */
4531: for (i=0;i<n_constraints;i++) {
4532: Vec v;
4534: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4535: MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v);
4536: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4537: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4538: MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v);
4539: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4540: PetscInt j;
4541: PetscScalar zero = 0.0;
4542: MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v);
4543: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4544: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4545: for (j=0;j<pcbddc->benign_n;j++) VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES);
4546: VecAssemblyBegin(v);
4547: VecAssemblyEnd(v);
4548: MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v);
4549: }
4550: VecResetArray(pcbddc->vec1_R);
4551: }
4552: }
4553: if (n_constraints) {
4554: MatDestroy(&local_auxmat2_R);
4555: }
4556: PetscFree(p0_lidx_I);
4558: /* coarse matrix entries relative to B_0 */
4559: if (pcbddc->benign_n) {
4560: Mat B0_B,B0_BPHI;
4561: IS is_dummy;
4562: const PetscScalar *data;
4563: PetscInt j;
4565: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4566: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4567: ISDestroy(&is_dummy);
4568: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4569: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4570: MatDenseGetArrayRead(B0_BPHI,&data);
4571: for (j=0;j<pcbddc->benign_n;j++) {
4572: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4573: for (i=0;i<pcbddc->local_primal_size;i++) {
4574: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4575: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4576: }
4577: }
4578: MatDenseRestoreArrayRead(B0_BPHI,&data);
4579: MatDestroy(&B0_B);
4580: MatDestroy(&B0_BPHI);
4581: }
4583: /* compute other basis functions for non-symmetric problems */
4584: if (!pcbddc->symmetric_primal) {
4585: Mat B_V=NULL,B_C=NULL;
4586: PetscScalar *marray;
4588: if (n_constraints) {
4589: Mat S_CCT,C_CRT;
4591: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4592: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4593: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4594: MatDestroy(&S_CCT);
4595: if (n_vertices) {
4596: Mat S_VCT;
4598: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4599: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4600: MatDestroy(&S_VCT);
4601: }
4602: MatDestroy(&C_CRT);
4603: } else {
4604: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4605: }
4606: if (n_vertices && n_R) {
4607: PetscScalar *av,*marray;
4608: const PetscInt *xadj,*adjncy;
4609: PetscInt n;
4610: PetscBool flg_row;
4612: /* B_V = B_V - A_VR^T */
4613: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4614: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4615: MatSeqAIJGetArray(A_VR,&av);
4616: MatDenseGetArray(B_V,&marray);
4617: for (i=0;i<n;i++) {
4618: PetscInt j;
4619: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4620: }
4621: MatDenseRestoreArray(B_V,&marray);
4622: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4623: MatDestroy(&A_VR);
4624: }
4626: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4627: if (n_vertices) {
4628: MatDenseGetArray(B_V,&marray);
4629: for (i=0;i<n_vertices;i++) {
4630: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4631: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4632: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4633: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4634: VecResetArray(pcbddc->vec1_R);
4635: VecResetArray(pcbddc->vec2_R);
4636: }
4637: MatDenseRestoreArray(B_V,&marray);
4638: }
4639: if (B_C) {
4640: MatDenseGetArray(B_C,&marray);
4641: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4642: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4643: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4644: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4645: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4646: VecResetArray(pcbddc->vec1_R);
4647: VecResetArray(pcbddc->vec2_R);
4648: }
4649: MatDenseRestoreArray(B_C,&marray);
4650: }
4651: /* coarse basis functions */
4652: for (i=0;i<pcbddc->local_primal_size;i++) {
4653: Vec v;
4655: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4656: MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v);
4657: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4658: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4659: if (i<n_vertices) {
4660: PetscScalar one = 1.0;
4661: VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES);
4662: VecAssemblyBegin(v);
4663: VecAssemblyEnd(v);
4664: }
4665: MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v);
4667: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4668: MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v);
4669: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4670: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4671: MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v);
4672: }
4673: VecResetArray(pcbddc->vec1_R);
4674: }
4675: MatDestroy(&B_V);
4676: MatDestroy(&B_C);
4677: }
4679: /* free memory */
4680: PetscFree(idx_V_B);
4681: MatDestroy(&S_VV);
4682: MatDestroy(&S_CV);
4683: MatDestroy(&S_VC);
4684: MatDestroy(&S_CC);
4685: PetscFree(work);
4686: if (n_vertices) {
4687: MatDestroy(&A_VR);
4688: }
4689: if (n_constraints) {
4690: MatDestroy(&C_CR);
4691: }
4692: PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4694: /* Checking coarse_sub_mat and coarse basis functios */
4695: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4696: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4697: if (pcbddc->dbg_flag) {
4698: Mat coarse_sub_mat;
4699: Mat AUXMAT,TM1,TM2,TM3,TM4;
4700: Mat coarse_phi_D,coarse_phi_B;
4701: Mat coarse_psi_D,coarse_psi_B;
4702: Mat A_II,A_BB,A_IB,A_BI;
4703: Mat C_B,CPHI;
4704: IS is_dummy;
4705: Vec mones;
4706: MatType checkmattype=MATSEQAIJ;
4707: PetscReal real_value;
4709: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4710: Mat A;
4711: PCBDDCBenignProject(pc,NULL,NULL,&A);
4712: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4713: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4714: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4715: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4716: MatDestroy(&A);
4717: } else {
4718: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4719: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4720: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4721: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4722: }
4723: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4724: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4725: if (!pcbddc->symmetric_primal) {
4726: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4727: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4728: }
4729: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4731: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4732: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4733: PetscViewerFlush(pcbddc->dbg_viewer);
4734: if (!pcbddc->symmetric_primal) {
4735: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4736: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4737: MatDestroy(&AUXMAT);
4738: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4739: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4740: MatDestroy(&AUXMAT);
4741: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4742: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4743: MatDestroy(&AUXMAT);
4744: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4745: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4746: MatDestroy(&AUXMAT);
4747: } else {
4748: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4749: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4750: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4751: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4752: MatDestroy(&AUXMAT);
4753: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4754: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4755: MatDestroy(&AUXMAT);
4756: }
4757: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4758: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4759: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4760: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4761: if (pcbddc->benign_n) {
4762: Mat B0_B,B0_BPHI;
4763: const PetscScalar *data2;
4764: PetscScalar *data;
4765: PetscInt j;
4767: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4768: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4769: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4770: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4771: MatDenseGetArray(TM1,&data);
4772: MatDenseGetArrayRead(B0_BPHI,&data2);
4773: for (j=0;j<pcbddc->benign_n;j++) {
4774: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4775: for (i=0;i<pcbddc->local_primal_size;i++) {
4776: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4777: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4778: }
4779: }
4780: MatDenseRestoreArray(TM1,&data);
4781: MatDenseRestoreArrayRead(B0_BPHI,&data2);
4782: MatDestroy(&B0_B);
4783: ISDestroy(&is_dummy);
4784: MatDestroy(&B0_BPHI);
4785: }
4786: #if 0
4787: {
4788: PetscViewer viewer;
4789: char filename[256];
4790: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4791: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4792: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4793: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4794: MatView(coarse_sub_mat,viewer);
4795: PetscObjectSetName((PetscObject)TM1,"projected");
4796: MatView(TM1,viewer);
4797: if (pcbddc->coarse_phi_B) {
4798: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4799: MatView(pcbddc->coarse_phi_B,viewer);
4800: }
4801: if (pcbddc->coarse_phi_D) {
4802: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4803: MatView(pcbddc->coarse_phi_D,viewer);
4804: }
4805: if (pcbddc->coarse_psi_B) {
4806: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4807: MatView(pcbddc->coarse_psi_B,viewer);
4808: }
4809: if (pcbddc->coarse_psi_D) {
4810: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4811: MatView(pcbddc->coarse_psi_D,viewer);
4812: }
4813: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4814: MatView(pcbddc->local_mat,viewer);
4815: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4816: MatView(pcbddc->ConstraintMatrix,viewer);
4817: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4818: ISView(pcis->is_I_local,viewer);
4819: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4820: ISView(pcis->is_B_local,viewer);
4821: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4822: ISView(pcbddc->is_R_local,viewer);
4823: PetscViewerDestroy(&viewer);
4824: }
4825: #endif
4826: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4827: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4828: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4829: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4831: /* check constraints */
4832: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4833: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4834: if (!pcbddc->benign_n) { /* TODO: add benign case */
4835: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4836: } else {
4837: PetscScalar *data;
4838: Mat tmat;
4839: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4840: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4841: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4842: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4843: MatDestroy(&tmat);
4844: }
4845: MatCreateVecs(CPHI,&mones,NULL);
4846: VecSet(mones,-1.0);
4847: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4848: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4849: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4850: if (!pcbddc->symmetric_primal) {
4851: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4852: VecSet(mones,-1.0);
4853: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4854: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4855: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4856: }
4857: MatDestroy(&C_B);
4858: MatDestroy(&CPHI);
4859: ISDestroy(&is_dummy);
4860: VecDestroy(&mones);
4861: PetscViewerFlush(pcbddc->dbg_viewer);
4862: MatDestroy(&A_II);
4863: MatDestroy(&A_BB);
4864: MatDestroy(&A_IB);
4865: MatDestroy(&A_BI);
4866: MatDestroy(&TM1);
4867: MatDestroy(&TM2);
4868: MatDestroy(&TM3);
4869: MatDestroy(&TM4);
4870: MatDestroy(&coarse_phi_D);
4871: MatDestroy(&coarse_phi_B);
4872: if (!pcbddc->symmetric_primal) {
4873: MatDestroy(&coarse_psi_D);
4874: MatDestroy(&coarse_psi_B);
4875: }
4876: MatDestroy(&coarse_sub_mat);
4877: }
4878: /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4879: {
4880: PetscBool gpu;
4882: PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);
4883: if (gpu) {
4884: if (pcbddc->local_auxmat1) {
4885: MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);
4886: }
4887: if (pcbddc->local_auxmat2) {
4888: MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);
4889: }
4890: if (pcbddc->coarse_phi_B) {
4891: MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);
4892: }
4893: if (pcbddc->coarse_phi_D) {
4894: MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);
4895: }
4896: if (pcbddc->coarse_psi_B) {
4897: MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);
4898: }
4899: if (pcbddc->coarse_psi_D) {
4900: MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);
4901: }
4902: }
4903: }
4904: /* get back data */
4905: *coarse_submat_vals_n = coarse_submat_vals;
4906: return 0;
4907: }
4909: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4910: {
4911: Mat *work_mat;
4912: IS isrow_s,iscol_s;
4913: PetscBool rsorted,csorted;
4914: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4916: ISSorted(isrow,&rsorted);
4917: ISSorted(iscol,&csorted);
4918: ISGetLocalSize(isrow,&rsize);
4919: ISGetLocalSize(iscol,&csize);
4921: if (!rsorted) {
4922: const PetscInt *idxs;
4923: PetscInt *idxs_sorted,i;
4925: PetscMalloc1(rsize,&idxs_perm_r);
4926: PetscMalloc1(rsize,&idxs_sorted);
4927: for (i=0;i<rsize;i++) {
4928: idxs_perm_r[i] = i;
4929: }
4930: ISGetIndices(isrow,&idxs);
4931: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4932: for (i=0;i<rsize;i++) {
4933: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4934: }
4935: ISRestoreIndices(isrow,&idxs);
4936: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4937: } else {
4938: PetscObjectReference((PetscObject)isrow);
4939: isrow_s = isrow;
4940: }
4942: if (!csorted) {
4943: if (isrow == iscol) {
4944: PetscObjectReference((PetscObject)isrow_s);
4945: iscol_s = isrow_s;
4946: } else {
4947: const PetscInt *idxs;
4948: PetscInt *idxs_sorted,i;
4950: PetscMalloc1(csize,&idxs_perm_c);
4951: PetscMalloc1(csize,&idxs_sorted);
4952: for (i=0;i<csize;i++) {
4953: idxs_perm_c[i] = i;
4954: }
4955: ISGetIndices(iscol,&idxs);
4956: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4957: for (i=0;i<csize;i++) {
4958: idxs_sorted[i] = idxs[idxs_perm_c[i]];
4959: }
4960: ISRestoreIndices(iscol,&idxs);
4961: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4962: }
4963: } else {
4964: PetscObjectReference((PetscObject)iscol);
4965: iscol_s = iscol;
4966: }
4968: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
4970: if (!rsorted || !csorted) {
4971: Mat new_mat;
4972: IS is_perm_r,is_perm_c;
4974: if (!rsorted) {
4975: PetscInt *idxs_r,i;
4976: PetscMalloc1(rsize,&idxs_r);
4977: for (i=0;i<rsize;i++) {
4978: idxs_r[idxs_perm_r[i]] = i;
4979: }
4980: PetscFree(idxs_perm_r);
4981: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4982: } else {
4983: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4984: }
4985: ISSetPermutation(is_perm_r);
4987: if (!csorted) {
4988: if (isrow_s == iscol_s) {
4989: PetscObjectReference((PetscObject)is_perm_r);
4990: is_perm_c = is_perm_r;
4991: } else {
4992: PetscInt *idxs_c,i;
4994: PetscMalloc1(csize,&idxs_c);
4995: for (i=0;i<csize;i++) {
4996: idxs_c[idxs_perm_c[i]] = i;
4997: }
4998: PetscFree(idxs_perm_c);
4999: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
5000: }
5001: } else {
5002: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
5003: }
5004: ISSetPermutation(is_perm_c);
5006: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
5007: MatDestroy(&work_mat[0]);
5008: work_mat[0] = new_mat;
5009: ISDestroy(&is_perm_r);
5010: ISDestroy(&is_perm_c);
5011: }
5013: PetscObjectReference((PetscObject)work_mat[0]);
5014: *B = work_mat[0];
5015: MatDestroyMatrices(1,&work_mat);
5016: ISDestroy(&isrow_s);
5017: ISDestroy(&iscol_s);
5018: return 0;
5019: }
5021: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5022: {
5023: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5024: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5025: Mat new_mat,lA;
5026: IS is_local,is_global;
5027: PetscInt local_size;
5028: PetscBool isseqaij;
5030: MatDestroy(&pcbddc->local_mat);
5031: MatGetSize(matis->A,&local_size,NULL);
5032: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5033: ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global);
5034: ISDestroy(&is_local);
5035: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5036: ISDestroy(&is_global);
5038: if (pcbddc->dbg_flag) {
5039: Vec x,x_change;
5040: PetscReal error;
5042: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5043: VecSetRandom(x,NULL);
5044: MatMult(ChangeOfBasisMatrix,x,x_change);
5045: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5046: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5047: MatMult(new_mat,matis->x,matis->y);
5048: if (!pcbddc->change_interior) {
5049: const PetscScalar *x,*y,*v;
5050: PetscReal lerror = 0.;
5051: PetscInt i;
5053: VecGetArrayRead(matis->x,&x);
5054: VecGetArrayRead(matis->y,&y);
5055: VecGetArrayRead(matis->counter,&v);
5056: for (i=0;i<local_size;i++)
5057: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5058: lerror = PetscAbsScalar(x[i]-y[i]);
5059: VecRestoreArrayRead(matis->x,&x);
5060: VecRestoreArrayRead(matis->y,&y);
5061: VecRestoreArrayRead(matis->counter,&v);
5062: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc));
5063: if (error > PETSC_SMALL) {
5064: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5065: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5066: } else {
5067: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5068: }
5069: }
5070: }
5071: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5072: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5073: VecAXPY(x,-1.0,x_change);
5074: VecNorm(x,NORM_INFINITY,&error);
5075: if (error > PETSC_SMALL) {
5076: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5077: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5078: } else {
5079: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5080: }
5081: }
5082: VecDestroy(&x);
5083: VecDestroy(&x_change);
5084: }
5086: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5087: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
5089: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5090: PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5091: if (isseqaij) {
5092: MatDestroy(&pcbddc->local_mat);
5093: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5094: if (lA) {
5095: Mat work;
5096: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5097: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5098: MatDestroy(&work);
5099: }
5100: } else {
5101: Mat work_mat;
5103: MatDestroy(&pcbddc->local_mat);
5104: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5105: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5106: MatDestroy(&work_mat);
5107: if (lA) {
5108: Mat work;
5109: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5110: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5111: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5112: MatDestroy(&work);
5113: }
5114: }
5115: if (matis->A->symmetric_set) {
5116: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5117: #if !defined(PETSC_USE_COMPLEX)
5118: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5119: #endif
5120: }
5121: MatDestroy(&new_mat);
5122: return 0;
5123: }
5125: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5126: {
5127: PC_IS* pcis = (PC_IS*)(pc->data);
5128: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5129: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5130: PetscInt *idx_R_local=NULL;
5131: PetscInt n_vertices,i,j,n_R,n_D,n_B;
5132: PetscInt vbs,bs;
5133: PetscBT bitmask=NULL;
5135: /*
5136: No need to setup local scatters if
5137: - primal space is unchanged
5138: AND
5139: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5140: AND
5141: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5142: */
5143: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5144: return 0;
5145: }
5146: /* destroy old objects */
5147: ISDestroy(&pcbddc->is_R_local);
5148: VecScatterDestroy(&pcbddc->R_to_B);
5149: VecScatterDestroy(&pcbddc->R_to_D);
5150: /* Set Non-overlapping dimensions */
5151: n_B = pcis->n_B;
5152: n_D = pcis->n - n_B;
5153: n_vertices = pcbddc->n_vertices;
5155: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5157: /* create auxiliary bitmask and allocate workspace */
5158: if (!sub_schurs || !sub_schurs->reuse_solver) {
5159: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5160: PetscBTCreate(pcis->n,&bitmask);
5161: for (i=0;i<n_vertices;i++) {
5162: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5163: }
5165: for (i=0, n_R=0; i<pcis->n; i++) {
5166: if (!PetscBTLookup(bitmask,i)) {
5167: idx_R_local[n_R++] = i;
5168: }
5169: }
5170: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5171: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5173: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5174: ISGetLocalSize(reuse_solver->is_R,&n_R);
5175: }
5177: /* Block code */
5178: vbs = 1;
5179: MatGetBlockSize(pcbddc->local_mat,&bs);
5180: if (bs>1 && !(n_vertices%bs)) {
5181: PetscBool is_blocked = PETSC_TRUE;
5182: PetscInt *vary;
5183: if (!sub_schurs || !sub_schurs->reuse_solver) {
5184: PetscMalloc1(pcis->n/bs,&vary);
5185: PetscArrayzero(vary,pcis->n/bs);
5186: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5187: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5188: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5189: for (i=0; i<pcis->n/bs; i++) {
5190: if (vary[i]!=0 && vary[i]!=bs) {
5191: is_blocked = PETSC_FALSE;
5192: break;
5193: }
5194: }
5195: PetscFree(vary);
5196: } else {
5197: /* Verify directly the R set */
5198: for (i=0; i<n_R/bs; i++) {
5199: PetscInt j,node=idx_R_local[bs*i];
5200: for (j=1; j<bs; j++) {
5201: if (node != idx_R_local[bs*i+j]-j) {
5202: is_blocked = PETSC_FALSE;
5203: break;
5204: }
5205: }
5206: }
5207: }
5208: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5209: vbs = bs;
5210: for (i=0;i<n_R/vbs;i++) {
5211: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5212: }
5213: }
5214: }
5215: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5216: if (sub_schurs && sub_schurs->reuse_solver) {
5217: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5219: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5220: ISDestroy(&reuse_solver->is_R);
5221: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5222: reuse_solver->is_R = pcbddc->is_R_local;
5223: } else {
5224: PetscFree(idx_R_local);
5225: }
5227: /* print some info if requested */
5228: if (pcbddc->dbg_flag) {
5229: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5230: PetscViewerFlush(pcbddc->dbg_viewer);
5231: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5232: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5233: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5234: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5235: PetscViewerFlush(pcbddc->dbg_viewer);
5236: }
5238: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5239: if (!sub_schurs || !sub_schurs->reuse_solver) {
5240: IS is_aux1,is_aux2;
5241: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5243: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5244: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5245: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5246: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5247: for (i=0; i<n_D; i++) {
5248: PetscBTSet(bitmask,is_indices[i]);
5249: }
5250: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5251: for (i=0, j=0; i<n_R; i++) {
5252: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5253: aux_array1[j++] = i;
5254: }
5255: }
5256: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5257: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5258: for (i=0, j=0; i<n_B; i++) {
5259: if (!PetscBTLookup(bitmask,is_indices[i])) {
5260: aux_array2[j++] = i;
5261: }
5262: }
5263: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5264: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5265: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5266: ISDestroy(&is_aux1);
5267: ISDestroy(&is_aux2);
5269: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5270: PetscMalloc1(n_D,&aux_array1);
5271: for (i=0, j=0; i<n_R; i++) {
5272: if (PetscBTLookup(bitmask,idx_R_local[i])) {
5273: aux_array1[j++] = i;
5274: }
5275: }
5276: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5277: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5278: ISDestroy(&is_aux1);
5279: }
5280: PetscBTDestroy(&bitmask);
5281: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5282: } else {
5283: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5284: IS tis;
5285: PetscInt schur_size;
5287: ISGetLocalSize(reuse_solver->is_B,&schur_size);
5288: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5289: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5290: ISDestroy(&tis);
5291: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5292: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5293: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5294: ISDestroy(&tis);
5295: }
5296: }
5297: return 0;
5298: }
5300: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5301: {
5302: MatNullSpace NullSpace;
5303: Mat dmat;
5304: const Vec *nullvecs;
5305: Vec v,v2,*nullvecs2;
5306: VecScatter sct = NULL;
5307: PetscContainer c;
5308: PetscScalar *ddata;
5309: PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs;
5310: PetscBool nnsp_has_cnst;
5312: if (!is && !B) { /* MATIS */
5313: Mat_IS* matis = (Mat_IS*)A->data;
5315: if (!B) {
5316: MatISGetLocalMat(A,&B);
5317: }
5318: sct = matis->cctx;
5319: PetscObjectReference((PetscObject)sct);
5320: } else {
5321: MatGetNullSpace(B,&NullSpace);
5322: if (!NullSpace) {
5323: MatGetNearNullSpace(B,&NullSpace);
5324: }
5325: if (NullSpace) return 0;
5326: }
5327: MatGetNullSpace(A,&NullSpace);
5328: if (!NullSpace) {
5329: MatGetNearNullSpace(A,&NullSpace);
5330: }
5331: if (!NullSpace) return 0;
5333: MatCreateVecs(A,&v,NULL);
5334: MatCreateVecs(B,&v2,NULL);
5335: if (!sct) {
5336: VecScatterCreate(v,is,v2,NULL,&sct);
5337: }
5338: MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5339: bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst;
5340: PetscMalloc1(bsiz,&nullvecs2);
5341: VecGetBlockSize(v2,&bs);
5342: VecGetSize(v2,&N);
5343: VecGetLocalSize(v2,&n);
5344: PetscMalloc1(n*bsiz,&ddata);
5345: for (k=0;k<nnsp_size;k++) {
5346: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);
5347: VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5348: VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5349: }
5350: if (nnsp_has_cnst) {
5351: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);
5352: VecSet(nullvecs2[nnsp_size],1.0);
5353: }
5354: PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);
5355: MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);
5357: MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);
5358: PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);
5359: PetscContainerSetPointer(c,ddata);
5360: PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);
5361: PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);
5362: PetscContainerDestroy(&c);
5363: PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5364: MatDestroy(&dmat);
5366: for (k=0;k<bsiz;k++) {
5367: VecDestroy(&nullvecs2[k]);
5368: }
5369: PetscFree(nullvecs2);
5370: MatSetNearNullSpace(B,NullSpace);
5371: MatNullSpaceDestroy(&NullSpace);
5372: VecDestroy(&v);
5373: VecDestroy(&v2);
5374: VecScatterDestroy(&sct);
5375: return 0;
5376: }
5378: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5379: {
5380: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
5381: PC_IS *pcis = (PC_IS*)pc->data;
5382: PC pc_temp;
5383: Mat A_RR;
5384: MatNullSpace nnsp;
5385: MatReuse reuse;
5386: PetscScalar m_one = -1.0;
5387: PetscReal value;
5388: PetscInt n_D,n_R;
5389: PetscBool issbaij,opts;
5390: void (*f)(void) = NULL;
5391: char dir_prefix[256],neu_prefix[256],str_level[16];
5392: size_t len;
5394: PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5395: /* approximate solver, propagate NearNullSpace if needed */
5396: if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5397: MatNullSpace gnnsp1,gnnsp2;
5398: PetscBool lhas,ghas;
5400: MatGetNearNullSpace(pcbddc->local_mat,&nnsp);
5401: MatGetNearNullSpace(pc->pmat,&gnnsp1);
5402: MatGetNullSpace(pc->pmat,&gnnsp2);
5403: lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5404: MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
5405: if (!ghas && (gnnsp1 || gnnsp2)) {
5406: MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);
5407: }
5408: }
5410: /* compute prefixes */
5411: PetscStrcpy(dir_prefix,"");
5412: PetscStrcpy(neu_prefix,"");
5413: if (!pcbddc->current_level) {
5414: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5415: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5416: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5417: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5418: } else {
5419: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5420: PetscStrlen(((PetscObject)pc)->prefix,&len);
5421: len -= 15; /* remove "pc_bddc_coarse_" */
5422: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5423: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5424: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5425: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5426: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5427: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5428: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5429: PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5430: PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5431: }
5433: /* DIRICHLET PROBLEM */
5434: if (dirichlet) {
5435: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5436: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5438: if (pcbddc->dbg_flag) {
5439: Mat A_IIn;
5441: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5442: MatDestroy(&pcis->A_II);
5443: pcis->A_II = A_IIn;
5444: }
5445: }
5446: if (pcbddc->local_mat->symmetric_set) {
5447: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5448: }
5449: /* Matrix for Dirichlet problem is pcis->A_II */
5450: n_D = pcis->n - pcis->n_B;
5451: opts = PETSC_FALSE;
5452: if (!pcbddc->ksp_D) { /* create object if not yet build */
5453: opts = PETSC_TRUE;
5454: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5455: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5456: /* default */
5457: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5458: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5459: PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5460: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5461: if (issbaij) {
5462: PCSetType(pc_temp,PCCHOLESKY);
5463: } else {
5464: PCSetType(pc_temp,PCLU);
5465: }
5466: KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5467: }
5468: MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5469: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5470: /* Allow user's customization */
5471: if (opts) {
5472: KSPSetFromOptions(pcbddc->ksp_D);
5473: }
5474: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5475: if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5476: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5477: }
5478: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5479: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5480: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5481: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5482: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5483: const PetscInt *idxs;
5484: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5486: ISGetLocalSize(pcis->is_I_local,&nl);
5487: ISGetIndices(pcis->is_I_local,&idxs);
5488: PetscMalloc1(nl*cdim,&scoords);
5489: for (i=0;i<nl;i++) {
5490: for (d=0;d<cdim;d++) {
5491: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5492: }
5493: }
5494: ISRestoreIndices(pcis->is_I_local,&idxs);
5495: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5496: PetscFree(scoords);
5497: }
5498: if (sub_schurs && sub_schurs->reuse_solver) {
5499: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5501: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5502: }
5504: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5505: if (!n_D) {
5506: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5507: PCSetType(pc_temp,PCNONE);
5508: }
5509: KSPSetUp(pcbddc->ksp_D);
5510: /* set ksp_D into pcis data */
5511: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5512: KSPDestroy(&pcis->ksp_D);
5513: pcis->ksp_D = pcbddc->ksp_D;
5514: }
5516: /* NEUMANN PROBLEM */
5517: A_RR = NULL;
5518: if (neumann) {
5519: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5520: PetscInt ibs,mbs;
5521: PetscBool issbaij, reuse_neumann_solver;
5522: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5524: reuse_neumann_solver = PETSC_FALSE;
5525: if (sub_schurs && sub_schurs->reuse_solver) {
5526: IS iP;
5528: reuse_neumann_solver = PETSC_TRUE;
5529: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5530: if (iP) reuse_neumann_solver = PETSC_FALSE;
5531: }
5532: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5533: ISGetSize(pcbddc->is_R_local,&n_R);
5534: if (pcbddc->ksp_R) { /* already created ksp */
5535: PetscInt nn_R;
5536: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5537: PetscObjectReference((PetscObject)A_RR);
5538: MatGetSize(A_RR,&nn_R,NULL);
5539: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5540: KSPReset(pcbddc->ksp_R);
5541: MatDestroy(&A_RR);
5542: reuse = MAT_INITIAL_MATRIX;
5543: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5544: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5545: MatDestroy(&A_RR);
5546: reuse = MAT_INITIAL_MATRIX;
5547: } else { /* safe to reuse the matrix */
5548: reuse = MAT_REUSE_MATRIX;
5549: }
5550: }
5551: /* last check */
5552: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5553: MatDestroy(&A_RR);
5554: reuse = MAT_INITIAL_MATRIX;
5555: }
5556: } else { /* first time, so we need to create the matrix */
5557: reuse = MAT_INITIAL_MATRIX;
5558: }
5559: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5560: TODO: Get Rid of these conversions */
5561: MatGetBlockSize(pcbddc->local_mat,&mbs);
5562: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5563: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5564: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5565: if (matis->A == pcbddc->local_mat) {
5566: MatDestroy(&pcbddc->local_mat);
5567: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5568: } else {
5569: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5570: }
5571: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5572: if (matis->A == pcbddc->local_mat) {
5573: MatDestroy(&pcbddc->local_mat);
5574: MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5575: } else {
5576: MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5577: }
5578: }
5579: /* extract A_RR */
5580: if (reuse_neumann_solver) {
5581: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5583: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5584: MatDestroy(&A_RR);
5585: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5586: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5587: } else {
5588: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5589: }
5590: } else {
5591: MatDestroy(&A_RR);
5592: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5593: PetscObjectReference((PetscObject)A_RR);
5594: }
5595: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5596: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5597: }
5598: if (pcbddc->local_mat->symmetric_set) {
5599: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5600: }
5601: opts = PETSC_FALSE;
5602: if (!pcbddc->ksp_R) { /* create object if not present */
5603: opts = PETSC_TRUE;
5604: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5605: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5606: /* default */
5607: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5608: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5609: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5610: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5611: if (issbaij) {
5612: PCSetType(pc_temp,PCCHOLESKY);
5613: } else {
5614: PCSetType(pc_temp,PCLU);
5615: }
5616: KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5617: }
5618: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5619: MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5620: if (opts) { /* Allow user's customization once */
5621: KSPSetFromOptions(pcbddc->ksp_R);
5622: }
5623: MatGetNearNullSpace(A_RR,&nnsp);
5624: if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5625: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5626: }
5627: MatGetNearNullSpace(A_RR,&nnsp);
5628: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5629: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5630: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5631: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5632: const PetscInt *idxs;
5633: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5635: ISGetLocalSize(pcbddc->is_R_local,&nl);
5636: ISGetIndices(pcbddc->is_R_local,&idxs);
5637: PetscMalloc1(nl*cdim,&scoords);
5638: for (i=0;i<nl;i++) {
5639: for (d=0;d<cdim;d++) {
5640: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5641: }
5642: }
5643: ISRestoreIndices(pcbddc->is_R_local,&idxs);
5644: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5645: PetscFree(scoords);
5646: }
5648: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5649: if (!n_R) {
5650: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5651: PCSetType(pc_temp,PCNONE);
5652: }
5653: /* Reuse solver if it is present */
5654: if (reuse_neumann_solver) {
5655: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5657: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5658: }
5659: KSPSetUp(pcbddc->ksp_R);
5660: }
5662: if (pcbddc->dbg_flag) {
5663: PetscViewerFlush(pcbddc->dbg_viewer);
5664: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5665: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5666: }
5667: PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5669: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5670: if (pcbddc->NullSpace_corr[0]) {
5671: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5672: }
5673: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5674: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5675: }
5676: if (neumann && pcbddc->NullSpace_corr[2]) {
5677: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5678: }
5679: /* check Dirichlet and Neumann solvers */
5680: if (pcbddc->dbg_flag) {
5681: if (dirichlet) { /* Dirichlet */
5682: VecSetRandom(pcis->vec1_D,NULL);
5683: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5684: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5685: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5686: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5687: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5688: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5689: PetscViewerFlush(pcbddc->dbg_viewer);
5690: }
5691: if (neumann) { /* Neumann */
5692: VecSetRandom(pcbddc->vec1_R,NULL);
5693: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5694: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5695: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5696: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5697: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5698: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5699: PetscViewerFlush(pcbddc->dbg_viewer);
5700: }
5701: }
5702: /* free Neumann problem's matrix */
5703: MatDestroy(&A_RR);
5704: return 0;
5705: }
5707: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5708: {
5709: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5710: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5711: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
5713: if (!reuse_solver) {
5714: VecSet(pcbddc->vec1_R,0.);
5715: }
5716: if (!pcbddc->switch_static) {
5717: if (applytranspose && pcbddc->local_auxmat1) {
5718: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5719: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5720: }
5721: if (!reuse_solver) {
5722: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5723: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5724: } else {
5725: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5727: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5728: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5729: }
5730: } else {
5731: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5732: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5733: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5734: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5735: if (applytranspose && pcbddc->local_auxmat1) {
5736: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5737: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5738: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5739: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5740: }
5741: }
5742: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);
5743: if (!reuse_solver || pcbddc->switch_static) {
5744: if (applytranspose) {
5745: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5746: } else {
5747: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5748: }
5749: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5750: } else {
5751: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5753: if (applytranspose) {
5754: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5755: } else {
5756: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5757: }
5758: }
5759: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);
5760: VecSet(inout_B,0.);
5761: if (!pcbddc->switch_static) {
5762: if (!reuse_solver) {
5763: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5764: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5765: } else {
5766: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5768: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5769: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5770: }
5771: if (!applytranspose && pcbddc->local_auxmat1) {
5772: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5773: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5774: }
5775: } else {
5776: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5777: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5778: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5779: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5780: if (!applytranspose && pcbddc->local_auxmat1) {
5781: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5782: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5783: }
5784: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5785: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5786: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5787: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5788: }
5789: return 0;
5790: }
5792: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5793: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5794: {
5795: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5796: PC_IS* pcis = (PC_IS*) (pc->data);
5797: const PetscScalar zero = 0.0;
5799: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5800: if (!pcbddc->benign_apply_coarse_only) {
5801: if (applytranspose) {
5802: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5803: if (pcbddc->switch_static) MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);
5804: } else {
5805: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5806: if (pcbddc->switch_static) MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);
5807: }
5808: } else {
5809: VecSet(pcbddc->vec1_P,zero);
5810: }
5812: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5813: if (pcbddc->benign_n) {
5814: PetscScalar *array;
5815: PetscInt j;
5817: VecGetArray(pcbddc->vec1_P,&array);
5818: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5819: VecRestoreArray(pcbddc->vec1_P,&array);
5820: }
5822: /* start communications from local primal nodes to rhs of coarse solver */
5823: VecSet(pcbddc->coarse_vec,zero);
5824: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5825: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5827: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5828: if (pcbddc->coarse_ksp) {
5829: Mat coarse_mat;
5830: Vec rhs,sol;
5831: MatNullSpace nullsp;
5832: PetscBool isbddc = PETSC_FALSE;
5834: if (pcbddc->benign_have_null) {
5835: PC coarse_pc;
5837: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5838: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5839: /* we need to propagate to coarser levels the need for a possible benign correction */
5840: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5841: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5842: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5843: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5844: }
5845: }
5846: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5847: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5848: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5849: if (applytranspose) {
5851: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5852: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5853: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5854: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5855: MatGetTransposeNullSpace(coarse_mat,&nullsp);
5856: if (nullsp) {
5857: MatNullSpaceRemove(nullsp,sol);
5858: }
5859: } else {
5860: MatGetNullSpace(coarse_mat,&nullsp);
5861: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5862: PC coarse_pc;
5864: if (nullsp) {
5865: MatNullSpaceRemove(nullsp,rhs);
5866: }
5867: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5868: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5869: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5870: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5871: } else {
5872: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5873: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5874: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5875: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5876: if (nullsp) {
5877: MatNullSpaceRemove(nullsp,sol);
5878: }
5879: }
5880: }
5881: /* we don't need the benign correction at coarser levels anymore */
5882: if (pcbddc->benign_have_null && isbddc) {
5883: PC coarse_pc;
5884: PC_BDDC* coarsepcbddc;
5886: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5887: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5888: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5889: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5890: }
5891: }
5893: /* Local solution on R nodes */
5894: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5895: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5896: }
5897: /* communications from coarse sol to local primal nodes */
5898: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5899: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5901: /* Sum contributions from the two levels */
5902: if (!pcbddc->benign_apply_coarse_only) {
5903: if (applytranspose) {
5904: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5905: if (pcbddc->switch_static) MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);
5906: } else {
5907: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5908: if (pcbddc->switch_static) MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);
5909: }
5910: /* store p0 */
5911: if (pcbddc->benign_n) {
5912: PetscScalar *array;
5913: PetscInt j;
5915: VecGetArray(pcbddc->vec1_P,&array);
5916: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5917: VecRestoreArray(pcbddc->vec1_P,&array);
5918: }
5919: } else { /* expand the coarse solution */
5920: if (applytranspose) {
5921: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5922: } else {
5923: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5924: }
5925: }
5926: return 0;
5927: }
5929: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5930: {
5931: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5932: Vec from,to;
5933: const PetscScalar *array;
5935: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5936: from = pcbddc->coarse_vec;
5937: to = pcbddc->vec1_P;
5938: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5939: Vec tvec;
5941: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5942: VecResetArray(tvec);
5943: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5944: VecGetArrayRead(tvec,&array);
5945: VecPlaceArray(from,array);
5946: VecRestoreArrayRead(tvec,&array);
5947: }
5948: } else { /* from local to global -> put data in coarse right hand side */
5949: from = pcbddc->vec1_P;
5950: to = pcbddc->coarse_vec;
5951: }
5952: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5953: return 0;
5954: }
5956: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5957: {
5958: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5959: Vec from,to;
5960: const PetscScalar *array;
5962: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5963: from = pcbddc->coarse_vec;
5964: to = pcbddc->vec1_P;
5965: } else { /* from local to global -> put data in coarse right hand side */
5966: from = pcbddc->vec1_P;
5967: to = pcbddc->coarse_vec;
5968: }
5969: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5970: if (smode == SCATTER_FORWARD) {
5971: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5972: Vec tvec;
5974: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5975: VecGetArrayRead(to,&array);
5976: VecPlaceArray(tvec,array);
5977: VecRestoreArrayRead(to,&array);
5978: }
5979: } else {
5980: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5981: VecResetArray(from);
5982: }
5983: }
5984: return 0;
5985: }
5987: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5988: {
5989: PetscErrorCode ierr;
5990: PC_IS* pcis = (PC_IS*)(pc->data);
5991: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5992: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5993: /* one and zero */
5994: PetscScalar one=1.0,zero=0.0;
5995: /* space to store constraints and their local indices */
5996: PetscScalar *constraints_data;
5997: PetscInt *constraints_idxs,*constraints_idxs_B;
5998: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
5999: PetscInt *constraints_n;
6000: /* iterators */
6001: PetscInt i,j,k,total_counts,total_counts_cc,cum;
6002: /* BLAS integers */
6003: PetscBLASInt lwork,lierr;
6004: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
6005: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
6006: /* reuse */
6007: PetscInt olocal_primal_size,olocal_primal_size_cc;
6008: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
6009: /* change of basis */
6010: PetscBool qr_needed;
6011: PetscBT change_basis,qr_needed_idx;
6012: /* auxiliary stuff */
6013: PetscInt *nnz,*is_indices;
6014: PetscInt ncc;
6015: /* some quantities */
6016: PetscInt n_vertices,total_primal_vertices,valid_constraints;
6017: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
6018: PetscReal tol; /* tolerance for retaining eigenmodes */
6020: tol = PetscSqrtReal(PETSC_SMALL);
6021: /* Destroy Mat objects computed previously */
6022: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
6023: MatDestroy(&pcbddc->ConstraintMatrix);
6024: MatDestroy(&pcbddc->switch_static_change);
6025: /* save info on constraints from previous setup (if any) */
6026: olocal_primal_size = pcbddc->local_primal_size;
6027: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6028: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
6029: PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);
6030: PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);
6031: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
6032: PetscFree(pcbddc->primal_indices_local_idxs);
6034: if (!pcbddc->adaptive_selection) {
6035: IS ISForVertices,*ISForFaces,*ISForEdges;
6036: MatNullSpace nearnullsp;
6037: const Vec *nearnullvecs;
6038: Vec *localnearnullsp;
6039: PetscScalar *array;
6040: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
6041: PetscBool nnsp_has_cnst;
6042: /* LAPACK working arrays for SVD or POD */
6043: PetscBool skip_lapack,boolforchange;
6044: PetscScalar *work;
6045: PetscReal *singular_vals;
6046: #if defined(PETSC_USE_COMPLEX)
6047: PetscReal *rwork;
6048: #endif
6049: PetscScalar *temp_basis = NULL,*correlation_mat = NULL;
6050: PetscBLASInt dummy_int=1;
6051: PetscScalar dummy_scalar=1.;
6052: PetscBool use_pod = PETSC_FALSE;
6054: /* MKL SVD with same input gives different results on different processes! */
6055: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6056: use_pod = PETSC_TRUE;
6057: #endif
6058: /* Get index sets for faces, edges and vertices from graph */
6059: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6060: /* print some info */
6061: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6062: PetscInt nv;
6064: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6065: ISGetSize(ISForVertices,&nv);
6066: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6067: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6068: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6069: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6070: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6071: PetscViewerFlush(pcbddc->dbg_viewer);
6072: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6073: }
6075: /* free unneeded index sets */
6076: if (!pcbddc->use_vertices) {
6077: ISDestroy(&ISForVertices);
6078: }
6079: if (!pcbddc->use_edges) {
6080: for (i=0;i<n_ISForEdges;i++) {
6081: ISDestroy(&ISForEdges[i]);
6082: }
6083: PetscFree(ISForEdges);
6084: n_ISForEdges = 0;
6085: }
6086: if (!pcbddc->use_faces) {
6087: for (i=0;i<n_ISForFaces;i++) {
6088: ISDestroy(&ISForFaces[i]);
6089: }
6090: PetscFree(ISForFaces);
6091: n_ISForFaces = 0;
6092: }
6094: /* check if near null space is attached to global mat */
6095: if (pcbddc->use_nnsp) {
6096: MatGetNearNullSpace(pc->pmat,&nearnullsp);
6097: } else nearnullsp = NULL;
6099: if (nearnullsp) {
6100: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6101: /* remove any stored info */
6102: MatNullSpaceDestroy(&pcbddc->onearnullspace);
6103: PetscFree(pcbddc->onearnullvecs_state);
6104: /* store information for BDDC solver reuse */
6105: PetscObjectReference((PetscObject)nearnullsp);
6106: pcbddc->onearnullspace = nearnullsp;
6107: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6108: for (i=0;i<nnsp_size;i++) {
6109: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6110: }
6111: } else { /* if near null space is not provided BDDC uses constants by default */
6112: nnsp_size = 0;
6113: nnsp_has_cnst = PETSC_TRUE;
6114: }
6115: /* get max number of constraints on a single cc */
6116: max_constraints = nnsp_size;
6117: if (nnsp_has_cnst) max_constraints++;
6119: /*
6120: Evaluate maximum storage size needed by the procedure
6121: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6122: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6123: There can be multiple constraints per connected component
6124: */
6125: n_vertices = 0;
6126: if (ISForVertices) {
6127: ISGetSize(ISForVertices,&n_vertices);
6128: }
6129: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6130: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
6132: total_counts = n_ISForFaces+n_ISForEdges;
6133: total_counts *= max_constraints;
6134: total_counts += n_vertices;
6135: PetscBTCreate(total_counts,&change_basis);
6137: total_counts = 0;
6138: max_size_of_constraint = 0;
6139: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6140: IS used_is;
6141: if (i<n_ISForEdges) {
6142: used_is = ISForEdges[i];
6143: } else {
6144: used_is = ISForFaces[i-n_ISForEdges];
6145: }
6146: ISGetSize(used_is,&j);
6147: total_counts += j;
6148: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6149: }
6150: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
6152: /* get local part of global near null space vectors */
6153: PetscMalloc1(nnsp_size,&localnearnullsp);
6154: for (k=0;k<nnsp_size;k++) {
6155: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6156: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6157: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6158: }
6160: /* whether or not to skip lapack calls */
6161: skip_lapack = PETSC_TRUE;
6162: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6164: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6165: if (!skip_lapack) {
6166: PetscScalar temp_work;
6168: if (use_pod) {
6169: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6170: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6171: PetscMalloc1(max_constraints,&singular_vals);
6172: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6173: #if defined(PETSC_USE_COMPLEX)
6174: PetscMalloc1(3*max_constraints,&rwork);
6175: #endif
6176: /* now we evaluate the optimal workspace using query with lwork=-1 */
6177: PetscBLASIntCast(max_constraints,&Blas_N);
6178: PetscBLASIntCast(max_constraints,&Blas_LDA);
6179: lwork = -1;
6180: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6181: #if !defined(PETSC_USE_COMPLEX)
6182: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6183: #else
6184: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6185: #endif
6186: PetscFPTrapPop();
6188: } else {
6189: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6190: /* SVD */
6191: PetscInt max_n,min_n;
6192: max_n = max_size_of_constraint;
6193: min_n = max_constraints;
6194: if (max_size_of_constraint < max_constraints) {
6195: min_n = max_size_of_constraint;
6196: max_n = max_constraints;
6197: }
6198: PetscMalloc1(min_n,&singular_vals);
6199: #if defined(PETSC_USE_COMPLEX)
6200: PetscMalloc1(5*min_n,&rwork);
6201: #endif
6202: /* now we evaluate the optimal workspace using query with lwork=-1 */
6203: lwork = -1;
6204: PetscBLASIntCast(max_n,&Blas_M);
6205: PetscBLASIntCast(min_n,&Blas_N);
6206: PetscBLASIntCast(max_n,&Blas_LDA);
6207: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6208: #if !defined(PETSC_USE_COMPLEX)
6209: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6210: #else
6211: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6212: #endif
6213: PetscFPTrapPop();
6215: #else
6216: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6217: #endif /* on missing GESVD */
6218: }
6219: /* Allocate optimal workspace */
6220: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6221: PetscMalloc1(lwork,&work);
6222: }
6223: /* Now we can loop on constraining sets */
6224: total_counts = 0;
6225: constraints_idxs_ptr[0] = 0;
6226: constraints_data_ptr[0] = 0;
6227: /* vertices */
6228: if (n_vertices) {
6229: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6230: PetscArraycpy(constraints_idxs,is_indices,n_vertices);
6231: for (i=0;i<n_vertices;i++) {
6232: constraints_n[total_counts] = 1;
6233: constraints_data[total_counts] = 1.0;
6234: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6235: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6236: total_counts++;
6237: }
6238: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6239: n_vertices = total_counts;
6240: }
6242: /* edges and faces */
6243: total_counts_cc = total_counts;
6244: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6245: IS used_is;
6246: PetscBool idxs_copied = PETSC_FALSE;
6248: if (ncc<n_ISForEdges) {
6249: used_is = ISForEdges[ncc];
6250: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6251: } else {
6252: used_is = ISForFaces[ncc-n_ISForEdges];
6253: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6254: }
6255: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6257: ISGetSize(used_is,&size_of_constraint);
6258: ISGetIndices(used_is,(const PetscInt**)&is_indices);
6259: /* change of basis should not be performed on local periodic nodes */
6260: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6261: if (nnsp_has_cnst) {
6262: PetscScalar quad_value;
6264: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6265: idxs_copied = PETSC_TRUE;
6267: if (!pcbddc->use_nnsp_true) {
6268: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6269: } else {
6270: quad_value = 1.0;
6271: }
6272: for (j=0;j<size_of_constraint;j++) {
6273: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6274: }
6275: temp_constraints++;
6276: total_counts++;
6277: }
6278: for (k=0;k<nnsp_size;k++) {
6279: PetscReal real_value;
6280: PetscScalar *ptr_to_data;
6282: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6283: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6284: for (j=0;j<size_of_constraint;j++) {
6285: ptr_to_data[j] = array[is_indices[j]];
6286: }
6287: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6288: /* check if array is null on the connected component */
6289: PetscBLASIntCast(size_of_constraint,&Blas_N);
6290: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6291: if (real_value > tol*size_of_constraint) { /* keep indices and values */
6292: temp_constraints++;
6293: total_counts++;
6294: if (!idxs_copied) {
6295: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6296: idxs_copied = PETSC_TRUE;
6297: }
6298: }
6299: }
6300: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6301: valid_constraints = temp_constraints;
6302: if (!pcbddc->use_nnsp_true && temp_constraints) {
6303: if (temp_constraints == 1) { /* just normalize the constraint */
6304: PetscScalar norm,*ptr_to_data;
6306: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6307: PetscBLASIntCast(size_of_constraint,&Blas_N);
6308: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6309: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6310: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6311: } else { /* perform SVD */
6312: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6314: if (use_pod) {
6315: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6316: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6317: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6318: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6319: from that computed using LAPACKgesvd
6320: -> This is due to a different computation of eigenvectors in LAPACKheev
6321: -> The quality of the POD-computed basis will be the same */
6322: PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);
6323: /* Store upper triangular part of correlation matrix */
6324: PetscBLASIntCast(size_of_constraint,&Blas_N);
6325: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6326: for (j=0;j<temp_constraints;j++) {
6327: for (k=0;k<j+1;k++) {
6328: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6329: }
6330: }
6331: /* compute eigenvalues and eigenvectors of correlation matrix */
6332: PetscBLASIntCast(temp_constraints,&Blas_N);
6333: PetscBLASIntCast(temp_constraints,&Blas_LDA);
6334: #if !defined(PETSC_USE_COMPLEX)
6335: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6336: #else
6337: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6338: #endif
6339: PetscFPTrapPop();
6341: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6342: j = 0;
6343: while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6344: total_counts = total_counts-j;
6345: valid_constraints = temp_constraints-j;
6346: /* scale and copy POD basis into used quadrature memory */
6347: PetscBLASIntCast(size_of_constraint,&Blas_M);
6348: PetscBLASIntCast(temp_constraints,&Blas_N);
6349: PetscBLASIntCast(temp_constraints,&Blas_K);
6350: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6351: PetscBLASIntCast(temp_constraints,&Blas_LDB);
6352: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6353: if (j<temp_constraints) {
6354: PetscInt ii;
6355: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6356: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6357: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6358: PetscFPTrapPop();
6359: for (k=0;k<temp_constraints-j;k++) {
6360: for (ii=0;ii<size_of_constraint;ii++) {
6361: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6362: }
6363: }
6364: }
6365: } else {
6366: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6367: PetscBLASIntCast(size_of_constraint,&Blas_M);
6368: PetscBLASIntCast(temp_constraints,&Blas_N);
6369: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6370: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6371: #if !defined(PETSC_USE_COMPLEX)
6372: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6373: #else
6374: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6375: #endif
6377: PetscFPTrapPop();
6378: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6379: k = temp_constraints;
6380: if (k > size_of_constraint) k = size_of_constraint;
6381: j = 0;
6382: while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6383: valid_constraints = k-j;
6384: total_counts = total_counts-temp_constraints+valid_constraints;
6385: #else
6386: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6387: #endif /* on missing GESVD */
6388: }
6389: }
6390: }
6391: /* update pointers information */
6392: if (valid_constraints) {
6393: constraints_n[total_counts_cc] = valid_constraints;
6394: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6395: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6396: /* set change_of_basis flag */
6397: if (boolforchange) {
6398: PetscBTSet(change_basis,total_counts_cc);
6399: }
6400: total_counts_cc++;
6401: }
6402: }
6403: /* free workspace */
6404: if (!skip_lapack) {
6405: PetscFree(work);
6406: #if defined(PETSC_USE_COMPLEX)
6407: PetscFree(rwork);
6408: #endif
6409: PetscFree(singular_vals);
6410: PetscFree(correlation_mat);
6411: PetscFree(temp_basis);
6412: }
6413: for (k=0;k<nnsp_size;k++) {
6414: VecDestroy(&localnearnullsp[k]);
6415: }
6416: PetscFree(localnearnullsp);
6417: /* free index sets of faces, edges and vertices */
6418: for (i=0;i<n_ISForFaces;i++) {
6419: ISDestroy(&ISForFaces[i]);
6420: }
6421: if (n_ISForFaces) {
6422: PetscFree(ISForFaces);
6423: }
6424: for (i=0;i<n_ISForEdges;i++) {
6425: ISDestroy(&ISForEdges[i]);
6426: }
6427: if (n_ISForEdges) {
6428: PetscFree(ISForEdges);
6429: }
6430: ISDestroy(&ISForVertices);
6431: } else {
6432: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6434: total_counts = 0;
6435: n_vertices = 0;
6436: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6437: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6438: }
6439: max_constraints = 0;
6440: total_counts_cc = 0;
6441: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6442: total_counts += pcbddc->adaptive_constraints_n[i];
6443: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6444: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6445: }
6446: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6447: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6448: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6449: constraints_data = pcbddc->adaptive_constraints_data;
6450: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6451: PetscMalloc1(total_counts_cc,&constraints_n);
6452: total_counts_cc = 0;
6453: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6454: if (pcbddc->adaptive_constraints_n[i]) {
6455: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6456: }
6457: }
6459: max_size_of_constraint = 0;
6460: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6461: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6462: /* Change of basis */
6463: PetscBTCreate(total_counts_cc,&change_basis);
6464: if (pcbddc->use_change_of_basis) {
6465: for (i=0;i<sub_schurs->n_subs;i++) {
6466: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6467: PetscBTSet(change_basis,i+n_vertices);
6468: }
6469: }
6470: }
6471: }
6472: pcbddc->local_primal_size = total_counts;
6473: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
6475: /* map constraints_idxs in boundary numbering */
6476: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6479: /* Create constraint matrix */
6480: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6481: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6482: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
6484: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6485: /* determine if a QR strategy is needed for change of basis */
6486: qr_needed = pcbddc->use_qr_single;
6487: PetscBTCreate(total_counts_cc,&qr_needed_idx);
6488: total_primal_vertices=0;
6489: pcbddc->local_primal_size_cc = 0;
6490: for (i=0;i<total_counts_cc;i++) {
6491: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6492: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6493: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6494: pcbddc->local_primal_size_cc += 1;
6495: } else if (PetscBTLookup(change_basis,i)) {
6496: for (k=0;k<constraints_n[i];k++) {
6497: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6498: }
6499: pcbddc->local_primal_size_cc += constraints_n[i];
6500: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6501: PetscBTSet(qr_needed_idx,i);
6502: qr_needed = PETSC_TRUE;
6503: }
6504: } else {
6505: pcbddc->local_primal_size_cc += 1;
6506: }
6507: }
6508: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6509: pcbddc->n_vertices = total_primal_vertices;
6510: /* permute indices in order to have a sorted set of vertices */
6511: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6512: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6513: PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);
6514: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6516: /* nonzero structure of constraint matrix */
6517: /* and get reference dof for local constraints */
6518: PetscMalloc1(pcbddc->local_primal_size,&nnz);
6519: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6521: j = total_primal_vertices;
6522: total_counts = total_primal_vertices;
6523: cum = total_primal_vertices;
6524: for (i=n_vertices;i<total_counts_cc;i++) {
6525: if (!PetscBTLookup(change_basis,i)) {
6526: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6527: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6528: cum++;
6529: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6530: for (k=0;k<constraints_n[i];k++) {
6531: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6532: nnz[j+k] = size_of_constraint;
6533: }
6534: j += constraints_n[i];
6535: }
6536: }
6537: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6538: MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6539: PetscFree(nnz);
6541: /* set values in constraint matrix */
6542: for (i=0;i<total_primal_vertices;i++) {
6543: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6544: }
6545: total_counts = total_primal_vertices;
6546: for (i=n_vertices;i<total_counts_cc;i++) {
6547: if (!PetscBTLookup(change_basis,i)) {
6548: PetscInt *cols;
6550: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6551: cols = constraints_idxs+constraints_idxs_ptr[i];
6552: for (k=0;k<constraints_n[i];k++) {
6553: PetscInt row = total_counts+k;
6554: PetscScalar *vals;
6556: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6557: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6558: }
6559: total_counts += constraints_n[i];
6560: }
6561: }
6562: /* assembling */
6563: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6564: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6565: MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");
6567: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6568: if (pcbddc->use_change_of_basis) {
6569: /* dual and primal dofs on a single cc */
6570: PetscInt dual_dofs,primal_dofs;
6571: /* working stuff for GEQRF */
6572: PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6573: PetscBLASInt lqr_work;
6574: /* working stuff for UNGQR */
6575: PetscScalar *gqr_work = NULL,lgqr_work_t=0.0;
6576: PetscBLASInt lgqr_work;
6577: /* working stuff for TRTRS */
6578: PetscScalar *trs_rhs = NULL;
6579: PetscBLASInt Blas_NRHS;
6580: /* pointers for values insertion into change of basis matrix */
6581: PetscInt *start_rows,*start_cols;
6582: PetscScalar *start_vals;
6583: /* working stuff for values insertion */
6584: PetscBT is_primal;
6585: PetscInt *aux_primal_numbering_B;
6586: /* matrix sizes */
6587: PetscInt global_size,local_size;
6588: /* temporary change of basis */
6589: Mat localChangeOfBasisMatrix;
6590: /* extra space for debugging */
6591: PetscScalar *dbg_work = NULL;
6593: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6594: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6595: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6596: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6597: /* nonzeros for local mat */
6598: PetscMalloc1(pcis->n,&nnz);
6599: if (!pcbddc->benign_change || pcbddc->fake_change) {
6600: for (i=0;i<pcis->n;i++) nnz[i]=1;
6601: } else {
6602: const PetscInt *ii;
6603: PetscInt n;
6604: PetscBool flg_row;
6605: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6606: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6607: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6608: }
6609: for (i=n_vertices;i<total_counts_cc;i++) {
6610: if (PetscBTLookup(change_basis,i)) {
6611: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6612: if (PetscBTLookup(qr_needed_idx,i)) {
6613: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6614: } else {
6615: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6616: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6617: }
6618: }
6619: }
6620: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6621: MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6622: PetscFree(nnz);
6623: /* Set interior change in the matrix */
6624: if (!pcbddc->benign_change || pcbddc->fake_change) {
6625: for (i=0;i<pcis->n;i++) {
6626: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6627: }
6628: } else {
6629: const PetscInt *ii,*jj;
6630: PetscScalar *aa;
6631: PetscInt n;
6632: PetscBool flg_row;
6633: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6634: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6635: for (i=0;i<n;i++) {
6636: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6637: }
6638: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6639: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6640: }
6642: if (pcbddc->dbg_flag) {
6643: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6644: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6645: }
6647: /* Now we loop on the constraints which need a change of basis */
6648: /*
6649: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6650: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6652: Basic blocks of change of basis matrix T computed by
6654: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6656: | 1 0 ... 0 s_1/S |
6657: | 0 1 ... 0 s_2/S |
6658: | ... |
6659: | 0 ... 1 s_{n-1}/S |
6660: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6662: with S = \sum_{i=1}^n s_i^2
6663: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6664: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6666: - QR decomposition of constraints otherwise
6667: */
6668: if (qr_needed && max_size_of_constraint) {
6669: /* space to store Q */
6670: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6671: /* array to store scaling factors for reflectors */
6672: PetscMalloc1(max_constraints,&qr_tau);
6673: /* first we issue queries for optimal work */
6674: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6675: PetscBLASIntCast(max_constraints,&Blas_N);
6676: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6677: lqr_work = -1;
6678: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6680: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6681: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6682: lgqr_work = -1;
6683: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6684: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6685: PetscBLASIntCast(max_constraints,&Blas_K);
6686: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6687: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6688: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6690: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6691: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6692: /* array to store rhs and solution of triangular solver */
6693: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6694: /* allocating workspace for check */
6695: if (pcbddc->dbg_flag) {
6696: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6697: }
6698: }
6699: /* array to store whether a node is primal or not */
6700: PetscBTCreate(pcis->n_B,&is_primal);
6701: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6702: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6704: for (i=0;i<total_primal_vertices;i++) {
6705: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6706: }
6707: PetscFree(aux_primal_numbering_B);
6709: /* loop on constraints and see whether or not they need a change of basis and compute it */
6710: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6711: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6712: if (PetscBTLookup(change_basis,total_counts)) {
6713: /* get constraint info */
6714: primal_dofs = constraints_n[total_counts];
6715: dual_dofs = size_of_constraint-primal_dofs;
6717: if (pcbddc->dbg_flag) {
6718: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6719: }
6721: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6723: /* copy quadrature constraints for change of basis check */
6724: if (pcbddc->dbg_flag) {
6725: PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6726: }
6727: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6728: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6730: /* compute QR decomposition of constraints */
6731: PetscBLASIntCast(size_of_constraint,&Blas_M);
6732: PetscBLASIntCast(primal_dofs,&Blas_N);
6733: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6734: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6735: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6737: PetscFPTrapPop();
6739: /* explicitly compute R^-T */
6740: PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);
6741: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6742: PetscBLASIntCast(primal_dofs,&Blas_N);
6743: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6744: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6745: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6746: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6747: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6749: PetscFPTrapPop();
6751: /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6752: PetscBLASIntCast(size_of_constraint,&Blas_M);
6753: PetscBLASIntCast(size_of_constraint,&Blas_N);
6754: PetscBLASIntCast(primal_dofs,&Blas_K);
6755: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6756: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6757: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6759: PetscFPTrapPop();
6761: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6762: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6763: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6764: PetscBLASIntCast(size_of_constraint,&Blas_M);
6765: PetscBLASIntCast(primal_dofs,&Blas_N);
6766: PetscBLASIntCast(primal_dofs,&Blas_K);
6767: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6768: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6769: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6770: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6771: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6772: PetscFPTrapPop();
6773: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6775: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6776: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6777: /* insert cols for primal dofs */
6778: for (j=0;j<primal_dofs;j++) {
6779: start_vals = &qr_basis[j*size_of_constraint];
6780: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6781: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6782: }
6783: /* insert cols for dual dofs */
6784: for (j=0,k=0;j<dual_dofs;k++) {
6785: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6786: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6787: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6788: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6789: j++;
6790: }
6791: }
6793: /* check change of basis */
6794: if (pcbddc->dbg_flag) {
6795: PetscInt ii,jj;
6796: PetscBool valid_qr=PETSC_TRUE;
6797: PetscBLASIntCast(primal_dofs,&Blas_M);
6798: PetscBLASIntCast(size_of_constraint,&Blas_N);
6799: PetscBLASIntCast(size_of_constraint,&Blas_K);
6800: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6801: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6802: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6803: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6804: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6805: PetscFPTrapPop();
6806: for (jj=0;jj<size_of_constraint;jj++) {
6807: for (ii=0;ii<primal_dofs;ii++) {
6808: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6809: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6810: }
6811: }
6812: if (!valid_qr) {
6813: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6814: for (jj=0;jj<size_of_constraint;jj++) {
6815: for (ii=0;ii<primal_dofs;ii++) {
6816: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6817: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6818: }
6819: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6820: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6821: }
6822: }
6823: }
6824: } else {
6825: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6826: }
6827: }
6828: } else { /* simple transformation block */
6829: PetscInt row,col;
6830: PetscScalar val,norm;
6832: PetscBLASIntCast(size_of_constraint,&Blas_N);
6833: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6834: for (j=0;j<size_of_constraint;j++) {
6835: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6836: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6837: if (!PetscBTLookup(is_primal,row_B)) {
6838: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6839: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6840: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6841: } else {
6842: for (k=0;k<size_of_constraint;k++) {
6843: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6844: if (row != col) {
6845: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6846: } else {
6847: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6848: }
6849: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6850: }
6851: }
6852: }
6853: if (pcbddc->dbg_flag) {
6854: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6855: }
6856: }
6857: } else {
6858: if (pcbddc->dbg_flag) {
6859: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6860: }
6861: }
6862: }
6864: /* free workspace */
6865: if (qr_needed) {
6866: if (pcbddc->dbg_flag) {
6867: PetscFree(dbg_work);
6868: }
6869: PetscFree(trs_rhs);
6870: PetscFree(qr_tau);
6871: PetscFree(qr_work);
6872: PetscFree(gqr_work);
6873: PetscFree(qr_basis);
6874: }
6875: PetscBTDestroy(&is_primal);
6876: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6877: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6879: /* assembling of global change of variable */
6880: if (!pcbddc->fake_change) {
6881: Mat tmat;
6882: PetscInt bs;
6884: VecGetSize(pcis->vec1_global,&global_size);
6885: VecGetLocalSize(pcis->vec1_global,&local_size);
6886: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6887: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6888: MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6889: MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6890: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6891: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6892: MatGetBlockSize(pc->pmat,&bs);
6893: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6894: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6895: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6896: MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6897: MatDestroy(&tmat);
6898: VecSet(pcis->vec1_global,0.0);
6899: VecSet(pcis->vec1_N,1.0);
6900: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6901: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6902: VecReciprocal(pcis->vec1_global);
6903: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6905: /* check */
6906: if (pcbddc->dbg_flag) {
6907: PetscReal error;
6908: Vec x,x_change;
6910: VecDuplicate(pcis->vec1_global,&x);
6911: VecDuplicate(pcis->vec1_global,&x_change);
6912: VecSetRandom(x,NULL);
6913: VecCopy(x,pcis->vec1_global);
6914: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6915: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6916: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6917: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6918: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6919: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6920: VecAXPY(x,-1.0,x_change);
6921: VecNorm(x,NORM_INFINITY,&error);
6922: if (error > PETSC_SMALL) {
6923: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6924: }
6925: VecDestroy(&x);
6926: VecDestroy(&x_change);
6927: }
6928: /* adapt sub_schurs computed (if any) */
6929: if (pcbddc->use_deluxe_scaling) {
6930: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6933: if (sub_schurs && sub_schurs->S_Ej_all) {
6934: Mat S_new,tmat;
6935: IS is_all_N,is_V_Sall = NULL;
6937: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6938: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6939: if (pcbddc->deluxe_zerorows) {
6940: ISLocalToGlobalMapping NtoSall;
6941: IS is_V;
6942: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6943: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6944: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6945: ISLocalToGlobalMappingDestroy(&NtoSall);
6946: ISDestroy(&is_V);
6947: }
6948: ISDestroy(&is_all_N);
6949: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6950: MatDestroy(&sub_schurs->S_Ej_all);
6951: PetscObjectReference((PetscObject)S_new);
6952: if (pcbddc->deluxe_zerorows) {
6953: const PetscScalar *array;
6954: const PetscInt *idxs_V,*idxs_all;
6955: PetscInt i,n_V;
6957: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6958: ISGetLocalSize(is_V_Sall,&n_V);
6959: ISGetIndices(is_V_Sall,&idxs_V);
6960: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6961: VecGetArrayRead(pcis->D,&array);
6962: for (i=0;i<n_V;i++) {
6963: PetscScalar val;
6964: PetscInt idx;
6966: idx = idxs_V[i];
6967: val = array[idxs_all[idxs_V[i]]];
6968: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6969: }
6970: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6971: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6972: VecRestoreArrayRead(pcis->D,&array);
6973: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6974: ISRestoreIndices(is_V_Sall,&idxs_V);
6975: }
6976: sub_schurs->S_Ej_all = S_new;
6977: MatDestroy(&S_new);
6978: if (sub_schurs->sum_S_Ej_all) {
6979: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6980: MatDestroy(&sub_schurs->sum_S_Ej_all);
6981: PetscObjectReference((PetscObject)S_new);
6982: if (pcbddc->deluxe_zerorows) {
6983: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6984: }
6985: sub_schurs->sum_S_Ej_all = S_new;
6986: MatDestroy(&S_new);
6987: }
6988: ISDestroy(&is_V_Sall);
6989: MatDestroy(&tmat);
6990: }
6991: /* destroy any change of basis context in sub_schurs */
6992: if (sub_schurs && sub_schurs->change) {
6993: PetscInt i;
6995: for (i=0;i<sub_schurs->n_subs;i++) {
6996: KSPDestroy(&sub_schurs->change[i]);
6997: }
6998: PetscFree(sub_schurs->change);
6999: }
7000: }
7001: if (pcbddc->switch_static) { /* need to save the local change */
7002: pcbddc->switch_static_change = localChangeOfBasisMatrix;
7003: } else {
7004: MatDestroy(&localChangeOfBasisMatrix);
7005: }
7006: /* determine if any process has changed the pressures locally */
7007: pcbddc->change_interior = pcbddc->benign_have_null;
7008: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7009: MatDestroy(&pcbddc->ConstraintMatrix);
7010: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7011: pcbddc->use_qr_single = qr_needed;
7012: }
7013: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7014: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7015: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
7016: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7017: } else {
7018: Mat benign_global = NULL;
7019: if (pcbddc->benign_have_null) {
7020: Mat M;
7022: pcbddc->change_interior = PETSC_TRUE;
7023: VecCopy(matis->counter,pcis->vec1_N);
7024: VecReciprocal(pcis->vec1_N);
7025: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
7026: if (pcbddc->benign_change) {
7027: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
7028: MatDiagonalScale(M,pcis->vec1_N,NULL);
7029: } else {
7030: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
7031: MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
7032: }
7033: MatISSetLocalMat(benign_global,M);
7034: MatDestroy(&M);
7035: MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
7036: MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
7037: }
7038: if (pcbddc->user_ChangeOfBasisMatrix) {
7039: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
7040: MatDestroy(&benign_global);
7041: } else if (pcbddc->benign_have_null) {
7042: pcbddc->ChangeOfBasisMatrix = benign_global;
7043: }
7044: }
7045: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7046: IS is_global;
7047: const PetscInt *gidxs;
7049: ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs);
7050: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
7051: ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs);
7052: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7053: ISDestroy(&is_global);
7054: }
7055: }
7056: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7057: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7058: }
7060: if (!pcbddc->fake_change) {
7061: /* add pressure dofs to set of primal nodes for numbering purposes */
7062: for (i=0;i<pcbddc->benign_n;i++) {
7063: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7064: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7065: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7066: pcbddc->local_primal_size_cc++;
7067: pcbddc->local_primal_size++;
7068: }
7070: /* check if a new primal space has been introduced (also take into account benign trick) */
7071: pcbddc->new_primal_space_local = PETSC_TRUE;
7072: if (olocal_primal_size == pcbddc->local_primal_size) {
7073: PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7074: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7075: if (!pcbddc->new_primal_space_local) {
7076: PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7077: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7078: }
7079: }
7080: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7081: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7082: }
7083: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
7085: /* flush dbg viewer */
7086: if (pcbddc->dbg_flag) {
7087: PetscViewerFlush(pcbddc->dbg_viewer);
7088: }
7090: /* free workspace */
7091: PetscBTDestroy(&qr_needed_idx);
7092: PetscBTDestroy(&change_basis);
7093: if (!pcbddc->adaptive_selection) {
7094: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7095: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7096: } else {
7097: PetscFree5(pcbddc->adaptive_constraints_n,
7098: pcbddc->adaptive_constraints_idxs_ptr,
7099: pcbddc->adaptive_constraints_data_ptr,
7100: pcbddc->adaptive_constraints_idxs,
7101: pcbddc->adaptive_constraints_data);
7102: PetscFree(constraints_n);
7103: PetscFree(constraints_idxs_B);
7104: }
7105: return 0;
7106: }
7108: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7109: {
7110: ISLocalToGlobalMapping map;
7111: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7112: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
7113: PetscInt i,N;
7114: PetscBool rcsr = PETSC_FALSE;
7116: if (pcbddc->recompute_topography) {
7117: pcbddc->graphanalyzed = PETSC_FALSE;
7118: /* Reset previously computed graph */
7119: PCBDDCGraphReset(pcbddc->mat_graph);
7120: /* Init local Graph struct */
7121: MatGetSize(pc->pmat,&N,NULL);
7122: MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7123: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
7125: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7126: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7127: }
7128: /* Check validity of the csr graph passed in by the user */
7131: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7132: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7133: PetscInt *xadj,*adjncy;
7134: PetscInt nvtxs;
7135: PetscBool flg_row=PETSC_FALSE;
7137: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7138: if (flg_row) {
7139: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7140: pcbddc->computed_rowadj = PETSC_TRUE;
7141: }
7142: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7143: rcsr = PETSC_TRUE;
7144: }
7145: if (pcbddc->dbg_flag) {
7146: PetscViewerFlush(pcbddc->dbg_viewer);
7147: }
7149: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7150: PetscReal *lcoords;
7151: PetscInt n;
7152: MPI_Datatype dimrealtype;
7154: /* TODO: support for blocked */
7156: MatGetLocalSize(matis->A,&n,NULL);
7157: PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7158: MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7159: MPI_Type_commit(&dimrealtype);
7160: PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);
7161: PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);
7162: MPI_Type_free(&dimrealtype);
7163: PetscFree(pcbddc->mat_graph->coords);
7165: pcbddc->mat_graph->coords = lcoords;
7166: pcbddc->mat_graph->cloc = PETSC_TRUE;
7167: pcbddc->mat_graph->cnloc = n;
7168: }
7170: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7172: /* Setup of Graph */
7173: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7174: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
7176: /* attach info on disconnected subdomains if present */
7177: if (pcbddc->n_local_subs) {
7178: PetscInt *local_subs,n,totn;
7180: MatGetLocalSize(matis->A,&n,NULL);
7181: PetscMalloc1(n,&local_subs);
7182: for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7183: for (i=0;i<pcbddc->n_local_subs;i++) {
7184: const PetscInt *idxs;
7185: PetscInt nl,j;
7187: ISGetLocalSize(pcbddc->local_subs[i],&nl);
7188: ISGetIndices(pcbddc->local_subs[i],&idxs);
7189: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7190: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7191: }
7192: for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7193: pcbddc->mat_graph->n_local_subs = totn + 1;
7194: pcbddc->mat_graph->local_subs = local_subs;
7195: }
7196: }
7198: if (!pcbddc->graphanalyzed) {
7199: /* Graph's connected components analysis */
7200: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7201: pcbddc->graphanalyzed = PETSC_TRUE;
7202: pcbddc->corner_selected = pcbddc->corner_selection;
7203: }
7204: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7205: return 0;
7206: }
7208: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7209: {
7210: PetscInt i,j,n;
7211: PetscScalar *alphas;
7212: PetscReal norm,*onorms;
7214: n = *nio;
7215: if (!n) return 0;
7216: PetscMalloc2(n,&alphas,n,&onorms);
7217: VecNormalize(vecs[0],&norm);
7218: if (norm < PETSC_SMALL) {
7219: onorms[0] = 0.0;
7220: VecSet(vecs[0],0.0);
7221: } else {
7222: onorms[0] = norm;
7223: }
7225: for (i=1;i<n;i++) {
7226: VecMDot(vecs[i],i,vecs,alphas);
7227: for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7228: VecMAXPY(vecs[i],i,alphas,vecs);
7229: VecNormalize(vecs[i],&norm);
7230: if (norm < PETSC_SMALL) {
7231: onorms[i] = 0.0;
7232: VecSet(vecs[i],0.0);
7233: } else {
7234: onorms[i] = norm;
7235: }
7236: }
7237: /* push nonzero vectors at the beginning */
7238: for (i=0;i<n;i++) {
7239: if (onorms[i] == 0.0) {
7240: for (j=i+1;j<n;j++) {
7241: if (onorms[j] != 0.0) {
7242: VecCopy(vecs[j],vecs[i]);
7243: onorms[j] = 0.0;
7244: }
7245: }
7246: }
7247: }
7248: for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7249: PetscFree2(alphas,onorms);
7250: return 0;
7251: }
7253: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7254: {
7255: ISLocalToGlobalMapping mapping;
7256: Mat A;
7257: PetscInt n_neighs,*neighs,*n_shared,**shared;
7258: PetscMPIInt size,rank,color;
7259: PetscInt *xadj,*adjncy;
7260: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7261: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
7262: PetscInt void_procs,*procs_candidates = NULL;
7263: PetscInt xadj_count,*count;
7264: PetscBool ismatis,use_vwgt=PETSC_FALSE;
7265: PetscSubcomm psubcomm;
7266: MPI_Comm subcomm;
7269: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7275: if (have_void) *have_void = PETSC_FALSE;
7276: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7277: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7278: MatISGetLocalMat(mat,&A);
7279: MatGetLocalSize(A,&n,NULL);
7280: im_active = !!n;
7281: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7282: void_procs = size - active_procs;
7283: /* get ranks of of non-active processes in mat communicator */
7284: if (void_procs) {
7285: PetscInt ncand;
7287: if (have_void) *have_void = PETSC_TRUE;
7288: PetscMalloc1(size,&procs_candidates);
7289: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7290: for (i=0,ncand=0;i<size;i++) {
7291: if (!procs_candidates[i]) {
7292: procs_candidates[ncand++] = i;
7293: }
7294: }
7295: /* force n_subdomains to be not greater that the number of non-active processes */
7296: *n_subdomains = PetscMin(void_procs,*n_subdomains);
7297: }
7299: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7300: number of subdomains requested 1 -> send to rank-0 or first candidate in voids */
7301: MatGetSize(mat,&N,NULL);
7302: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7303: PetscInt issize,isidx,dest;
7304: if (*n_subdomains == 1) dest = 0;
7305: else dest = rank;
7306: if (im_active) {
7307: issize = 1;
7308: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7309: isidx = procs_candidates[dest];
7310: } else {
7311: isidx = dest;
7312: }
7313: } else {
7314: issize = 0;
7315: isidx = -1;
7316: }
7317: if (*n_subdomains != 1) *n_subdomains = active_procs;
7318: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7319: PetscFree(procs_candidates);
7320: return 0;
7321: }
7322: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7323: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7324: threshold = PetscMax(threshold,2);
7326: /* Get info on mapping */
7327: MatISGetLocalToGlobalMapping(mat,&mapping,NULL);
7328: ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared);
7330: /* build local CSR graph of subdomains' connectivity */
7331: PetscMalloc1(2,&xadj);
7332: xadj[0] = 0;
7333: xadj[1] = PetscMax(n_neighs-1,0);
7334: PetscMalloc1(xadj[1],&adjncy);
7335: PetscMalloc1(xadj[1],&adjncy_wgt);
7336: PetscCalloc1(n,&count);
7337: for (i=1;i<n_neighs;i++)
7338: for (j=0;j<n_shared[i];j++)
7339: count[shared[i][j]] += 1;
7341: xadj_count = 0;
7342: for (i=1;i<n_neighs;i++) {
7343: for (j=0;j<n_shared[i];j++) {
7344: if (count[shared[i][j]] < threshold) {
7345: adjncy[xadj_count] = neighs[i];
7346: adjncy_wgt[xadj_count] = n_shared[i];
7347: xadj_count++;
7348: break;
7349: }
7350: }
7351: }
7352: xadj[1] = xadj_count;
7353: PetscFree(count);
7354: ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared);
7355: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7357: PetscMalloc1(1,&ranks_send_to_idx);
7359: /* Restrict work on active processes only */
7360: PetscMPIIntCast(im_active,&color);
7361: if (void_procs) {
7362: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7363: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7364: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7365: subcomm = PetscSubcommChild(psubcomm);
7366: } else {
7367: psubcomm = NULL;
7368: subcomm = PetscObjectComm((PetscObject)mat);
7369: }
7371: v_wgt = NULL;
7372: if (!color) {
7373: PetscFree(xadj);
7374: PetscFree(adjncy);
7375: PetscFree(adjncy_wgt);
7376: } else {
7377: Mat subdomain_adj;
7378: IS new_ranks,new_ranks_contig;
7379: MatPartitioning partitioner;
7380: PetscInt rstart=0,rend=0;
7381: PetscInt *is_indices,*oldranks;
7382: PetscMPIInt size;
7383: PetscBool aggregate;
7385: MPI_Comm_size(subcomm,&size);
7386: if (void_procs) {
7387: PetscInt prank = rank;
7388: PetscMalloc1(size,&oldranks);
7389: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7390: for (i=0;i<xadj[1];i++) {
7391: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7392: }
7393: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7394: } else {
7395: oldranks = NULL;
7396: }
7397: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7398: if (aggregate) { /* TODO: all this part could be made more efficient */
7399: PetscInt lrows,row,ncols,*cols;
7400: PetscMPIInt nrank;
7401: PetscScalar *vals;
7403: MPI_Comm_rank(subcomm,&nrank);
7404: lrows = 0;
7405: if (nrank<redprocs) {
7406: lrows = size/redprocs;
7407: if (nrank<size%redprocs) lrows++;
7408: }
7409: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7410: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7411: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7412: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7413: row = nrank;
7414: ncols = xadj[1]-xadj[0];
7415: cols = adjncy;
7416: PetscMalloc1(ncols,&vals);
7417: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7418: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7419: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7420: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7421: PetscFree(xadj);
7422: PetscFree(adjncy);
7423: PetscFree(adjncy_wgt);
7424: PetscFree(vals);
7425: if (use_vwgt) {
7426: Vec v;
7427: const PetscScalar *array;
7428: PetscInt nl;
7430: MatCreateVecs(subdomain_adj,&v,NULL);
7431: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7432: VecAssemblyBegin(v);
7433: VecAssemblyEnd(v);
7434: VecGetLocalSize(v,&nl);
7435: VecGetArrayRead(v,&array);
7436: PetscMalloc1(nl,&v_wgt);
7437: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7438: VecRestoreArrayRead(v,&array);
7439: VecDestroy(&v);
7440: }
7441: } else {
7442: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7443: if (use_vwgt) {
7444: PetscMalloc1(1,&v_wgt);
7445: v_wgt[0] = n;
7446: }
7447: }
7448: /* MatView(subdomain_adj,0); */
7450: /* Partition */
7451: MatPartitioningCreate(subcomm,&partitioner);
7452: #if defined(PETSC_HAVE_PTSCOTCH)
7453: MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7454: #elif defined(PETSC_HAVE_PARMETIS)
7455: MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7456: #else
7457: MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7458: #endif
7459: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7460: if (v_wgt) {
7461: MatPartitioningSetVertexWeights(partitioner,v_wgt);
7462: }
7463: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7464: MatPartitioningSetNParts(partitioner,*n_subdomains);
7465: MatPartitioningSetFromOptions(partitioner);
7466: MatPartitioningApply(partitioner,&new_ranks);
7467: /* MatPartitioningView(partitioner,0); */
7469: /* renumber new_ranks to avoid "holes" in new set of processors */
7470: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7471: ISDestroy(&new_ranks);
7472: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7473: if (!aggregate) {
7474: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7475: PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7476: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7477: } else if (oldranks) {
7478: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7479: } else {
7480: ranks_send_to_idx[0] = is_indices[0];
7481: }
7482: } else {
7483: PetscInt idx = 0;
7484: PetscMPIInt tag;
7485: MPI_Request *reqs;
7487: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7488: PetscMalloc1(rend-rstart,&reqs);
7489: for (i=rstart;i<rend;i++) {
7490: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7491: }
7492: MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7493: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7494: PetscFree(reqs);
7495: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7496: PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7497: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7498: } else if (oldranks) {
7499: ranks_send_to_idx[0] = oldranks[idx];
7500: } else {
7501: ranks_send_to_idx[0] = idx;
7502: }
7503: }
7504: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7505: /* clean up */
7506: PetscFree(oldranks);
7507: ISDestroy(&new_ranks_contig);
7508: MatDestroy(&subdomain_adj);
7509: MatPartitioningDestroy(&partitioner);
7510: }
7511: PetscSubcommDestroy(&psubcomm);
7512: PetscFree(procs_candidates);
7514: /* assemble parallel IS for sends */
7515: i = 1;
7516: if (!color) i=0;
7517: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7518: return 0;
7519: }
7521: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7523: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7524: {
7525: Mat local_mat;
7526: IS is_sends_internal;
7527: PetscInt rows,cols,new_local_rows;
7528: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7529: PetscBool ismatis,isdense,newisdense,destroy_mat;
7530: ISLocalToGlobalMapping l2gmap;
7531: PetscInt* l2gmap_indices;
7532: const PetscInt* is_indices;
7533: MatType new_local_type;
7534: /* buffers */
7535: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7536: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7537: PetscInt *recv_buffer_idxs_local;
7538: PetscScalar *ptr_vals,*recv_buffer_vals;
7539: const PetscScalar *send_buffer_vals;
7540: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7541: /* MPI */
7542: MPI_Comm comm,comm_n;
7543: PetscSubcomm subcomm;
7544: PetscMPIInt n_sends,n_recvs,size;
7545: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7546: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7547: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7548: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7549: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7552: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7560: if (nvecs) {
7563: }
7564: /* further checks */
7565: MatISGetLocalMat(mat,&local_mat);
7566: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7568: MatGetSize(local_mat,&rows,&cols);
7570: if (reuse && *mat_n) {
7571: PetscInt mrows,mcols,mnrows,mncols;
7573: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7575: MatGetSize(mat,&mrows,&mcols);
7576: MatGetSize(*mat_n,&mnrows,&mncols);
7579: }
7580: MatGetBlockSize(local_mat,&bs);
7583: /* prepare IS for sending if not provided */
7584: if (!is_sends) {
7586: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7587: } else {
7588: PetscObjectReference((PetscObject)is_sends);
7589: is_sends_internal = is_sends;
7590: }
7592: /* get comm */
7593: PetscObjectGetComm((PetscObject)mat,&comm);
7595: /* compute number of sends */
7596: ISGetLocalSize(is_sends_internal,&i);
7597: PetscMPIIntCast(i,&n_sends);
7599: /* compute number of receives */
7600: MPI_Comm_size(comm,&size);
7601: PetscMalloc1(size,&iflags);
7602: PetscArrayzero(iflags,size);
7603: ISGetIndices(is_sends_internal,&is_indices);
7604: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7605: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7606: PetscFree(iflags);
7608: /* restrict comm if requested */
7609: subcomm = NULL;
7610: destroy_mat = PETSC_FALSE;
7611: if (restrict_comm) {
7612: PetscMPIInt color,subcommsize;
7614: color = 0;
7615: if (restrict_full) {
7616: if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7617: } else {
7618: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7619: }
7620: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7621: subcommsize = size - subcommsize;
7622: /* check if reuse has been requested */
7623: if (reuse) {
7624: if (*mat_n) {
7625: PetscMPIInt subcommsize2;
7626: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7628: comm_n = PetscObjectComm((PetscObject)*mat_n);
7629: } else {
7630: comm_n = PETSC_COMM_SELF;
7631: }
7632: } else { /* MAT_INITIAL_MATRIX */
7633: PetscMPIInt rank;
7635: MPI_Comm_rank(comm,&rank);
7636: PetscSubcommCreate(comm,&subcomm);
7637: PetscSubcommSetNumber(subcomm,2);
7638: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7639: comm_n = PetscSubcommChild(subcomm);
7640: }
7641: /* flag to destroy *mat_n if not significative */
7642: if (color) destroy_mat = PETSC_TRUE;
7643: } else {
7644: comm_n = comm;
7645: }
7647: /* prepare send/receive buffers */
7648: PetscMalloc1(size,&ilengths_idxs);
7649: PetscArrayzero(ilengths_idxs,size);
7650: PetscMalloc1(size,&ilengths_vals);
7651: PetscArrayzero(ilengths_vals,size);
7652: if (nis) {
7653: PetscCalloc1(size,&ilengths_idxs_is);
7654: }
7656: /* Get data from local matrices */
7658: /* TODO: See below some guidelines on how to prepare the local buffers */
7659: /*
7660: send_buffer_vals should contain the raw values of the local matrix
7661: send_buffer_idxs should contain:
7662: - MatType_PRIVATE type
7663: - PetscInt size_of_l2gmap
7664: - PetscInt global_row_indices[size_of_l2gmap]
7665: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7666: */
7667: {
7668: ISLocalToGlobalMapping mapping;
7670: MatISGetLocalToGlobalMapping(mat,&mapping,NULL);
7671: MatDenseGetArrayRead(local_mat,&send_buffer_vals);
7672: ISLocalToGlobalMappingGetSize(mapping,&i);
7673: PetscMalloc1(i+2,&send_buffer_idxs);
7674: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7675: send_buffer_idxs[1] = i;
7676: ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs);
7677: PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);
7678: ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs);
7679: PetscMPIIntCast(i,&len);
7680: for (i=0;i<n_sends;i++) {
7681: ilengths_vals[is_indices[i]] = len*len;
7682: ilengths_idxs[is_indices[i]] = len+2;
7683: }
7684: }
7685: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7686: /* additional is (if any) */
7687: if (nis) {
7688: PetscMPIInt psum;
7689: PetscInt j;
7690: for (j=0,psum=0;j<nis;j++) {
7691: PetscInt plen;
7692: ISGetLocalSize(isarray[j],&plen);
7693: PetscMPIIntCast(plen,&len);
7694: psum += len+1; /* indices + length */
7695: }
7696: PetscMalloc1(psum,&send_buffer_idxs_is);
7697: for (j=0,psum=0;j<nis;j++) {
7698: PetscInt plen;
7699: const PetscInt *is_array_idxs;
7700: ISGetLocalSize(isarray[j],&plen);
7701: send_buffer_idxs_is[psum] = plen;
7702: ISGetIndices(isarray[j],&is_array_idxs);
7703: PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);
7704: ISRestoreIndices(isarray[j],&is_array_idxs);
7705: psum += plen+1; /* indices + length */
7706: }
7707: for (i=0;i<n_sends;i++) {
7708: ilengths_idxs_is[is_indices[i]] = psum;
7709: }
7710: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7711: }
7712: MatISRestoreLocalMat(mat,&local_mat);
7714: buf_size_idxs = 0;
7715: buf_size_vals = 0;
7716: buf_size_idxs_is = 0;
7717: buf_size_vecs = 0;
7718: for (i=0;i<n_recvs;i++) {
7719: buf_size_idxs += (PetscInt)olengths_idxs[i];
7720: buf_size_vals += (PetscInt)olengths_vals[i];
7721: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7722: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7723: }
7724: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7725: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7726: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7727: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7729: /* get new tags for clean communications */
7730: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7731: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7732: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7733: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7735: /* allocate for requests */
7736: PetscMalloc1(n_sends,&send_req_idxs);
7737: PetscMalloc1(n_sends,&send_req_vals);
7738: PetscMalloc1(n_sends,&send_req_idxs_is);
7739: PetscMalloc1(n_sends,&send_req_vecs);
7740: PetscMalloc1(n_recvs,&recv_req_idxs);
7741: PetscMalloc1(n_recvs,&recv_req_vals);
7742: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7743: PetscMalloc1(n_recvs,&recv_req_vecs);
7745: /* communications */
7746: ptr_idxs = recv_buffer_idxs;
7747: ptr_vals = recv_buffer_vals;
7748: ptr_idxs_is = recv_buffer_idxs_is;
7749: ptr_vecs = recv_buffer_vecs;
7750: for (i=0;i<n_recvs;i++) {
7751: source_dest = onodes[i];
7752: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7753: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7754: ptr_idxs += olengths_idxs[i];
7755: ptr_vals += olengths_vals[i];
7756: if (nis) {
7757: source_dest = onodes_is[i];
7758: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7759: ptr_idxs_is += olengths_idxs_is[i];
7760: }
7761: if (nvecs) {
7762: source_dest = onodes[i];
7763: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7764: ptr_vecs += olengths_idxs[i]-2;
7765: }
7766: }
7767: for (i=0;i<n_sends;i++) {
7768: PetscMPIIntCast(is_indices[i],&source_dest);
7769: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7770: MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7771: if (nis) {
7772: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7773: }
7774: if (nvecs) {
7775: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7776: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7777: }
7778: }
7779: ISRestoreIndices(is_sends_internal,&is_indices);
7780: ISDestroy(&is_sends_internal);
7782: /* assemble new l2g map */
7783: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7784: ptr_idxs = recv_buffer_idxs;
7785: new_local_rows = 0;
7786: for (i=0;i<n_recvs;i++) {
7787: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7788: ptr_idxs += olengths_idxs[i];
7789: }
7790: PetscMalloc1(new_local_rows,&l2gmap_indices);
7791: ptr_idxs = recv_buffer_idxs;
7792: new_local_rows = 0;
7793: for (i=0;i<n_recvs;i++) {
7794: PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));
7795: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7796: ptr_idxs += olengths_idxs[i];
7797: }
7798: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7799: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7800: PetscFree(l2gmap_indices);
7802: /* infer new local matrix type from received local matrices type */
7803: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7804: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7805: if (n_recvs) {
7806: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7807: ptr_idxs = recv_buffer_idxs;
7808: for (i=0;i<n_recvs;i++) {
7809: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7810: new_local_type_private = MATAIJ_PRIVATE;
7811: break;
7812: }
7813: ptr_idxs += olengths_idxs[i];
7814: }
7815: switch (new_local_type_private) {
7816: case MATDENSE_PRIVATE:
7817: new_local_type = MATSEQAIJ;
7818: bs = 1;
7819: break;
7820: case MATAIJ_PRIVATE:
7821: new_local_type = MATSEQAIJ;
7822: bs = 1;
7823: break;
7824: case MATBAIJ_PRIVATE:
7825: new_local_type = MATSEQBAIJ;
7826: break;
7827: case MATSBAIJ_PRIVATE:
7828: new_local_type = MATSEQSBAIJ;
7829: break;
7830: default:
7831: SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7832: }
7833: } else { /* by default, new_local_type is seqaij */
7834: new_local_type = MATSEQAIJ;
7835: bs = 1;
7836: }
7838: /* create MATIS object if needed */
7839: if (!reuse) {
7840: MatGetSize(mat,&rows,&cols);
7841: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n);
7842: } else {
7843: /* it also destroys the local matrices */
7844: if (*mat_n) {
7845: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7846: } else { /* this is a fake object */
7847: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n);
7848: }
7849: }
7850: MatISGetLocalMat(*mat_n,&local_mat);
7851: MatSetType(local_mat,new_local_type);
7853: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7855: /* Global to local map of received indices */
7856: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7857: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7858: ISLocalToGlobalMappingDestroy(&l2gmap);
7860: /* restore attributes -> type of incoming data and its size */
7861: buf_size_idxs = 0;
7862: for (i=0;i<n_recvs;i++) {
7863: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7864: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7865: buf_size_idxs += (PetscInt)olengths_idxs[i];
7866: }
7867: PetscFree(recv_buffer_idxs);
7869: /* set preallocation */
7870: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7871: if (!newisdense) {
7872: PetscInt *new_local_nnz=NULL;
7874: ptr_idxs = recv_buffer_idxs_local;
7875: if (n_recvs) {
7876: PetscCalloc1(new_local_rows,&new_local_nnz);
7877: }
7878: for (i=0;i<n_recvs;i++) {
7879: PetscInt j;
7880: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7881: for (j=0;j<*(ptr_idxs+1);j++) {
7882: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7883: }
7884: } else {
7885: /* TODO */
7886: }
7887: ptr_idxs += olengths_idxs[i];
7888: }
7889: if (new_local_nnz) {
7890: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7891: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7892: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7893: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7894: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7895: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7896: } else {
7897: MatSetUp(local_mat);
7898: }
7899: PetscFree(new_local_nnz);
7900: } else {
7901: MatSetUp(local_mat);
7902: }
7904: /* set values */
7905: ptr_vals = recv_buffer_vals;
7906: ptr_idxs = recv_buffer_idxs_local;
7907: for (i=0;i<n_recvs;i++) {
7908: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7909: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7910: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7911: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7912: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7913: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7914: } else {
7915: /* TODO */
7916: }
7917: ptr_idxs += olengths_idxs[i];
7918: ptr_vals += olengths_vals[i];
7919: }
7920: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7921: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7922: MatISRestoreLocalMat(*mat_n,&local_mat);
7923: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7924: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7925: PetscFree(recv_buffer_vals);
7927: #if 0
7928: if (!restrict_comm) { /* check */
7929: Vec lvec,rvec;
7930: PetscReal infty_error;
7932: MatCreateVecs(mat,&rvec,&lvec);
7933: VecSetRandom(rvec,NULL);
7934: MatMult(mat,rvec,lvec);
7935: VecScale(lvec,-1.0);
7936: MatMultAdd(*mat_n,rvec,lvec,lvec);
7937: VecNorm(lvec,NORM_INFINITY,&infty_error);
7938: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7939: VecDestroy(&rvec);
7940: VecDestroy(&lvec);
7941: }
7942: #endif
7944: /* assemble new additional is (if any) */
7945: if (nis) {
7946: PetscInt **temp_idxs,*count_is,j,psum;
7948: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7949: PetscCalloc1(nis,&count_is);
7950: ptr_idxs = recv_buffer_idxs_is;
7951: psum = 0;
7952: for (i=0;i<n_recvs;i++) {
7953: for (j=0;j<nis;j++) {
7954: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7955: count_is[j] += plen; /* increment counting of buffer for j-th IS */
7956: psum += plen;
7957: ptr_idxs += plen+1; /* shift pointer to received data */
7958: }
7959: }
7960: PetscMalloc1(nis,&temp_idxs);
7961: PetscMalloc1(psum,&temp_idxs[0]);
7962: for (i=1;i<nis;i++) {
7963: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7964: }
7965: PetscArrayzero(count_is,nis);
7966: ptr_idxs = recv_buffer_idxs_is;
7967: for (i=0;i<n_recvs;i++) {
7968: for (j=0;j<nis;j++) {
7969: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7970: PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);
7971: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7972: ptr_idxs += plen+1; /* shift pointer to received data */
7973: }
7974: }
7975: for (i=0;i<nis;i++) {
7976: ISDestroy(&isarray[i]);
7977: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7978: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7979: }
7980: PetscFree(count_is);
7981: PetscFree(temp_idxs[0]);
7982: PetscFree(temp_idxs);
7983: }
7984: /* free workspace */
7985: PetscFree(recv_buffer_idxs_is);
7986: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7987: PetscFree(send_buffer_idxs);
7988: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7989: if (isdense) {
7990: MatISGetLocalMat(mat,&local_mat);
7991: MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);
7992: MatISRestoreLocalMat(mat,&local_mat);
7993: } else {
7994: /* PetscFree(send_buffer_vals); */
7995: }
7996: if (nis) {
7997: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7998: PetscFree(send_buffer_idxs_is);
7999: }
8001: if (nvecs) {
8002: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
8003: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
8004: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8005: VecDestroy(&nnsp_vec[0]);
8006: VecCreate(comm_n,&nnsp_vec[0]);
8007: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
8008: VecSetType(nnsp_vec[0],VECSTANDARD);
8009: /* set values */
8010: ptr_vals = recv_buffer_vecs;
8011: ptr_idxs = recv_buffer_idxs_local;
8012: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
8013: for (i=0;i<n_recvs;i++) {
8014: PetscInt j;
8015: for (j=0;j<*(ptr_idxs+1);j++) {
8016: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
8017: }
8018: ptr_idxs += olengths_idxs[i];
8019: ptr_vals += olengths_idxs[i]-2;
8020: }
8021: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8022: VecAssemblyBegin(nnsp_vec[0]);
8023: VecAssemblyEnd(nnsp_vec[0]);
8024: }
8026: PetscFree(recv_buffer_vecs);
8027: PetscFree(recv_buffer_idxs_local);
8028: PetscFree(recv_req_idxs);
8029: PetscFree(recv_req_vals);
8030: PetscFree(recv_req_vecs);
8031: PetscFree(recv_req_idxs_is);
8032: PetscFree(send_req_idxs);
8033: PetscFree(send_req_vals);
8034: PetscFree(send_req_vecs);
8035: PetscFree(send_req_idxs_is);
8036: PetscFree(ilengths_vals);
8037: PetscFree(ilengths_idxs);
8038: PetscFree(olengths_vals);
8039: PetscFree(olengths_idxs);
8040: PetscFree(onodes);
8041: if (nis) {
8042: PetscFree(ilengths_idxs_is);
8043: PetscFree(olengths_idxs_is);
8044: PetscFree(onodes_is);
8045: }
8046: PetscSubcommDestroy(&subcomm);
8047: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8048: MatDestroy(mat_n);
8049: for (i=0;i<nis;i++) {
8050: ISDestroy(&isarray[i]);
8051: }
8052: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8053: VecDestroy(&nnsp_vec[0]);
8054: }
8055: *mat_n = NULL;
8056: }
8057: return 0;
8058: }
8060: /* temporary hack into ksp private data structure */
8061: #include <petsc/private/kspimpl.h>
8063: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8064: {
8065: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
8066: PC_IS *pcis = (PC_IS*)pc->data;
8067: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
8068: Mat coarsedivudotp = NULL;
8069: Mat coarseG,t_coarse_mat_is;
8070: MatNullSpace CoarseNullSpace = NULL;
8071: ISLocalToGlobalMapping coarse_islg;
8072: IS coarse_is,*isarray,corners;
8073: PetscInt i,im_active=-1,active_procs=-1;
8074: PetscInt nis,nisdofs,nisneu,nisvert;
8075: PetscInt coarse_eqs_per_proc;
8076: PC pc_temp;
8077: PCType coarse_pc_type;
8078: KSPType coarse_ksp_type;
8079: PetscBool multilevel_requested,multilevel_allowed;
8080: PetscBool coarse_reuse;
8081: PetscInt ncoarse,nedcfield;
8082: PetscBool compute_vecs = PETSC_FALSE;
8083: PetscScalar *array;
8084: MatReuse coarse_mat_reuse;
8085: PetscBool restr, full_restr, have_void;
8086: PetscMPIInt size;
8087: PetscErrorCode ierr;
8089: PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8090: /* Assign global numbering to coarse dofs */
8091: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8092: PetscInt ocoarse_size;
8093: compute_vecs = PETSC_TRUE;
8095: pcbddc->new_primal_space = PETSC_TRUE;
8096: ocoarse_size = pcbddc->coarse_size;
8097: PetscFree(pcbddc->global_primal_indices);
8098: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8099: /* see if we can avoid some work */
8100: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8101: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8102: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8103: KSPReset(pcbddc->coarse_ksp);
8104: coarse_reuse = PETSC_FALSE;
8105: } else { /* we can safely reuse already computed coarse matrix */
8106: coarse_reuse = PETSC_TRUE;
8107: }
8108: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8109: coarse_reuse = PETSC_FALSE;
8110: }
8111: /* reset any subassembling information */
8112: if (!coarse_reuse || pcbddc->recompute_topography) {
8113: ISDestroy(&pcbddc->coarse_subassembling);
8114: }
8115: } else { /* primal space is unchanged, so we can reuse coarse matrix */
8116: coarse_reuse = PETSC_TRUE;
8117: }
8118: if (coarse_reuse && pcbddc->coarse_ksp) {
8119: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8120: PetscObjectReference((PetscObject)coarse_mat);
8121: coarse_mat_reuse = MAT_REUSE_MATRIX;
8122: } else {
8123: coarse_mat = NULL;
8124: coarse_mat_reuse = MAT_INITIAL_MATRIX;
8125: }
8127: /* creates temporary l2gmap and IS for coarse indexes */
8128: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8129: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
8131: /* creates temporary MATIS object for coarse matrix */
8132: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8133: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is);
8134: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8135: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8136: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8137: MatDestroy(&coarse_submat_dense);
8139: /* count "active" (i.e. with positive local size) and "void" processes */
8140: im_active = !!(pcis->n);
8141: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8143: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8144: /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8145: /* full_restr : just use the receivers from the subassembling pattern */
8146: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8147: coarse_mat_is = NULL;
8148: multilevel_allowed = PETSC_FALSE;
8149: multilevel_requested = PETSC_FALSE;
8150: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8151: if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8152: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8153: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8154: if (multilevel_requested) {
8155: ncoarse = active_procs/pcbddc->coarsening_ratio;
8156: restr = PETSC_FALSE;
8157: full_restr = PETSC_FALSE;
8158: } else {
8159: ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8160: restr = PETSC_TRUE;
8161: full_restr = PETSC_TRUE;
8162: }
8163: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8164: ncoarse = PetscMax(1,ncoarse);
8165: if (!pcbddc->coarse_subassembling) {
8166: if (pcbddc->coarsening_ratio > 1) {
8167: if (multilevel_requested) {
8168: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8169: } else {
8170: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8171: }
8172: } else {
8173: PetscMPIInt rank;
8175: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8176: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8177: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8178: }
8179: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8180: PetscInt psum;
8181: if (pcbddc->coarse_ksp) psum = 1;
8182: else psum = 0;
8183: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8184: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8185: }
8186: /* determine if we can go multilevel */
8187: if (multilevel_requested) {
8188: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8189: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8190: }
8191: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8193: /* dump subassembling pattern */
8194: if (pcbddc->dbg_flag && multilevel_allowed) {
8195: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8196: }
8197: /* compute dofs splitting and neumann boundaries for coarse dofs */
8198: nedcfield = -1;
8199: corners = NULL;
8200: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8201: PetscInt *tidxs,*tidxs2,nout,tsize,i;
8202: const PetscInt *idxs;
8203: ISLocalToGlobalMapping tmap;
8205: /* create map between primal indices (in local representative ordering) and local primal numbering */
8206: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8207: /* allocate space for temporary storage */
8208: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8209: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8210: /* allocate for IS array */
8211: nisdofs = pcbddc->n_ISForDofsLocal;
8212: if (pcbddc->nedclocal) {
8213: if (pcbddc->nedfield > -1) {
8214: nedcfield = pcbddc->nedfield;
8215: } else {
8216: nedcfield = 0;
8218: nisdofs = 1;
8219: }
8220: }
8221: nisneu = !!pcbddc->NeumannBoundariesLocal;
8222: nisvert = 0; /* nisvert is not used */
8223: nis = nisdofs + nisneu + nisvert;
8224: PetscMalloc1(nis,&isarray);
8225: /* dofs splitting */
8226: for (i=0;i<nisdofs;i++) {
8227: /* ISView(pcbddc->ISForDofsLocal[i],0); */
8228: if (nedcfield != i) {
8229: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8230: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8231: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8232: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8233: } else {
8234: ISGetLocalSize(pcbddc->nedclocal,&tsize);
8235: ISGetIndices(pcbddc->nedclocal,&idxs);
8236: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8238: ISRestoreIndices(pcbddc->nedclocal,&idxs);
8239: }
8240: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8241: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8242: /* ISView(isarray[i],0); */
8243: }
8244: /* neumann boundaries */
8245: if (pcbddc->NeumannBoundariesLocal) {
8246: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8247: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8248: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8249: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8250: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8251: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8252: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8253: /* ISView(isarray[nisdofs],0); */
8254: }
8255: /* coordinates */
8256: if (pcbddc->corner_selected) {
8257: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8258: ISGetLocalSize(corners,&tsize);
8259: ISGetIndices(corners,&idxs);
8260: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8262: ISRestoreIndices(corners,&idxs);
8263: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8264: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8265: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8266: }
8267: PetscFree(tidxs);
8268: PetscFree(tidxs2);
8269: ISLocalToGlobalMappingDestroy(&tmap);
8270: } else {
8271: nis = 0;
8272: nisdofs = 0;
8273: nisneu = 0;
8274: nisvert = 0;
8275: isarray = NULL;
8276: }
8277: /* destroy no longer needed map */
8278: ISLocalToGlobalMappingDestroy(&coarse_islg);
8280: /* subassemble */
8281: if (multilevel_allowed) {
8282: Vec vp[1];
8283: PetscInt nvecs = 0;
8284: PetscBool reuse,reuser;
8286: if (coarse_mat) reuse = PETSC_TRUE;
8287: else reuse = PETSC_FALSE;
8288: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8289: vp[0] = NULL;
8290: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8291: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8292: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8293: VecSetType(vp[0],VECSTANDARD);
8294: nvecs = 1;
8296: if (pcbddc->divudotp) {
8297: Mat B,loc_divudotp;
8298: Vec v,p;
8299: IS dummy;
8300: PetscInt np;
8302: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8303: MatGetSize(loc_divudotp,&np,NULL);
8304: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8305: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8306: MatCreateVecs(B,&v,&p);
8307: VecSet(p,1.);
8308: MatMultTranspose(B,p,v);
8309: VecDestroy(&p);
8310: MatDestroy(&B);
8311: VecGetArray(vp[0],&array);
8312: VecPlaceArray(pcbddc->vec1_P,array);
8313: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8314: VecResetArray(pcbddc->vec1_P);
8315: VecRestoreArray(vp[0],&array);
8316: ISDestroy(&dummy);
8317: VecDestroy(&v);
8318: }
8319: }
8320: if (reuser) {
8321: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8322: } else {
8323: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8324: }
8325: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8326: PetscScalar *arraym;
8327: const PetscScalar *arrayv;
8328: PetscInt nl;
8329: VecGetLocalSize(vp[0],&nl);
8330: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8331: MatDenseGetArray(coarsedivudotp,&arraym);
8332: VecGetArrayRead(vp[0],&arrayv);
8333: PetscArraycpy(arraym,arrayv,nl);
8334: VecRestoreArrayRead(vp[0],&arrayv);
8335: MatDenseRestoreArray(coarsedivudotp,&arraym);
8336: VecDestroy(&vp[0]);
8337: } else {
8338: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8339: }
8340: } else {
8341: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8342: }
8343: if (coarse_mat_is || coarse_mat) {
8344: if (!multilevel_allowed) {
8345: MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8346: } else {
8347: /* if this matrix is present, it means we are not reusing the coarse matrix */
8348: if (coarse_mat_is) {
8350: PetscObjectReference((PetscObject)coarse_mat_is);
8351: coarse_mat = coarse_mat_is;
8352: }
8353: }
8354: }
8355: MatDestroy(&t_coarse_mat_is);
8356: MatDestroy(&coarse_mat_is);
8358: /* create local to global scatters for coarse problem */
8359: if (compute_vecs) {
8360: PetscInt lrows;
8361: VecDestroy(&pcbddc->coarse_vec);
8362: if (coarse_mat) {
8363: MatGetLocalSize(coarse_mat,&lrows,NULL);
8364: } else {
8365: lrows = 0;
8366: }
8367: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8368: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8369: VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8370: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8371: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8372: }
8373: ISDestroy(&coarse_is);
8375: /* set defaults for coarse KSP and PC */
8376: if (multilevel_allowed) {
8377: coarse_ksp_type = KSPRICHARDSON;
8378: coarse_pc_type = PCBDDC;
8379: } else {
8380: coarse_ksp_type = KSPPREONLY;
8381: coarse_pc_type = PCREDUNDANT;
8382: }
8384: /* print some info if requested */
8385: if (pcbddc->dbg_flag) {
8386: if (!multilevel_allowed) {
8387: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8388: if (multilevel_requested) {
8389: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8390: } else if (pcbddc->max_levels) {
8391: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8392: }
8393: PetscViewerFlush(pcbddc->dbg_viewer);
8394: }
8395: }
8397: /* communicate coarse discrete gradient */
8398: coarseG = NULL;
8399: if (pcbddc->nedcG && multilevel_allowed) {
8400: MPI_Comm ccomm;
8401: if (coarse_mat) {
8402: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8403: } else {
8404: ccomm = MPI_COMM_NULL;
8405: }
8406: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8407: }
8409: /* create the coarse KSP object only once with defaults */
8410: if (coarse_mat) {
8411: PetscBool isredundant,isbddc,force,valid;
8412: PetscViewer dbg_viewer = NULL;
8414: if (pcbddc->dbg_flag) {
8415: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8416: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8417: }
8418: if (!pcbddc->coarse_ksp) {
8419: char prefix[256],str_level[16];
8420: size_t len;
8422: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8423: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8424: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8425: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8426: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8427: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8428: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8429: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8430: /* TODO is this logic correct? should check for coarse_mat type */
8431: PCSetType(pc_temp,coarse_pc_type);
8432: /* prefix */
8433: PetscStrcpy(prefix,"");
8434: PetscStrcpy(str_level,"");
8435: if (!pcbddc->current_level) {
8436: PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8437: PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8438: } else {
8439: PetscStrlen(((PetscObject)pc)->prefix,&len);
8440: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8441: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8442: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8443: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8444: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8445: PetscStrlcat(prefix,str_level,sizeof(prefix));
8446: }
8447: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8448: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8449: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8450: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8451: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8452: /* allow user customization */
8453: KSPSetFromOptions(pcbddc->coarse_ksp);
8454: /* get some info after set from options */
8455: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8456: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8457: force = PETSC_FALSE;
8458: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8459: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8460: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8461: if (multilevel_allowed && !force && !valid) {
8462: isbddc = PETSC_TRUE;
8463: PCSetType(pc_temp,PCBDDC);
8464: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8465: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8466: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8467: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8468: PetscObjectOptionsBegin((PetscObject)pc_temp);
8469: (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8470: PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8471: PetscOptionsEnd();
8472: pc_temp->setfromoptionscalled++;
8473: }
8474: }
8475: }
8476: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8477: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8478: if (nisdofs) {
8479: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8480: for (i=0;i<nisdofs;i++) {
8481: ISDestroy(&isarray[i]);
8482: }
8483: }
8484: if (nisneu) {
8485: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8486: ISDestroy(&isarray[nisdofs]);
8487: }
8488: if (nisvert) {
8489: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8490: ISDestroy(&isarray[nis-1]);
8491: }
8492: if (coarseG) {
8493: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8494: }
8496: /* get some info after set from options */
8497: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8499: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8500: if (isbddc && !multilevel_allowed) {
8501: PCSetType(pc_temp,coarse_pc_type);
8502: }
8503: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8504: force = PETSC_FALSE;
8505: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8506: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8507: if (multilevel_requested && multilevel_allowed && !valid && !force) {
8508: PCSetType(pc_temp,PCBDDC);
8509: }
8510: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8511: if (isredundant) {
8512: KSP inner_ksp;
8513: PC inner_pc;
8515: PCRedundantGetKSP(pc_temp,&inner_ksp);
8516: KSPGetPC(inner_ksp,&inner_pc);
8517: }
8519: /* parameters which miss an API */
8520: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8521: if (isbddc) {
8522: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8524: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8525: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8526: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8527: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8528: if (pcbddc_coarse->benign_saddle_point) {
8529: Mat coarsedivudotp_is;
8530: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8531: IS row,col;
8532: const PetscInt *gidxs;
8533: PetscInt n,st,M,N;
8535: MatGetSize(coarsedivudotp,&n,NULL);
8536: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8537: st = st-n;
8538: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8539: MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8540: ISLocalToGlobalMappingGetSize(l2gmap,&n);
8541: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8542: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8543: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8544: ISLocalToGlobalMappingCreateIS(row,&rl2g);
8545: ISLocalToGlobalMappingCreateIS(col,&cl2g);
8546: ISGetSize(row,&M);
8547: MatGetSize(coarse_mat,&N,NULL);
8548: ISDestroy(&row);
8549: ISDestroy(&col);
8550: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8551: MatSetType(coarsedivudotp_is,MATIS);
8552: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8553: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8554: ISLocalToGlobalMappingDestroy(&rl2g);
8555: ISLocalToGlobalMappingDestroy(&cl2g);
8556: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8557: MatDestroy(&coarsedivudotp);
8558: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8559: MatDestroy(&coarsedivudotp_is);
8560: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8561: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8562: }
8563: }
8565: /* propagate symmetry info of coarse matrix */
8566: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8567: if (pc->pmat->symmetric_set) {
8568: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8569: }
8570: if (pc->pmat->hermitian_set) {
8571: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8572: }
8573: if (pc->pmat->spd_set) {
8574: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8575: }
8576: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8577: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8578: }
8579: /* set operators */
8580: MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8581: MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8582: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8583: if (pcbddc->dbg_flag) {
8584: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8585: }
8586: }
8587: MatDestroy(&coarseG);
8588: PetscFree(isarray);
8589: #if 0
8590: {
8591: PetscViewer viewer;
8592: char filename[256];
8593: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8594: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8595: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8596: MatView(coarse_mat,viewer);
8597: PetscViewerPopFormat(viewer);
8598: PetscViewerDestroy(&viewer);
8599: }
8600: #endif
8602: if (corners) {
8603: Vec gv;
8604: IS is;
8605: const PetscInt *idxs;
8606: PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8607: PetscScalar *coords;
8610: VecGetSize(pcbddc->coarse_vec,&N);
8611: VecGetLocalSize(pcbddc->coarse_vec,&n);
8612: VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8613: VecSetBlockSize(gv,cdim);
8614: VecSetSizes(gv,n*cdim,N*cdim);
8615: VecSetType(gv,VECSTANDARD);
8616: VecSetFromOptions(gv);
8617: VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */
8619: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8620: ISGetLocalSize(is,&n);
8621: ISGetIndices(is,&idxs);
8622: PetscMalloc1(n*cdim,&coords);
8623: for (i=0;i<n;i++) {
8624: for (d=0;d<cdim;d++) {
8625: coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8626: }
8627: }
8628: ISRestoreIndices(is,&idxs);
8629: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8631: ISGetLocalSize(corners,&n);
8632: ISGetIndices(corners,&idxs);
8633: VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8634: ISRestoreIndices(corners,&idxs);
8635: PetscFree(coords);
8636: VecAssemblyBegin(gv);
8637: VecAssemblyEnd(gv);
8638: VecGetArray(gv,&coords);
8639: if (pcbddc->coarse_ksp) {
8640: PC coarse_pc;
8641: PetscBool isbddc;
8643: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8644: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8645: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8646: PetscReal *realcoords;
8648: VecGetLocalSize(gv,&n);
8649: #if defined(PETSC_USE_COMPLEX)
8650: PetscMalloc1(n,&realcoords);
8651: for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8652: #else
8653: realcoords = coords;
8654: #endif
8655: PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8656: #if defined(PETSC_USE_COMPLEX)
8657: PetscFree(realcoords);
8658: #endif
8659: }
8660: }
8661: VecRestoreArray(gv,&coords);
8662: VecDestroy(&gv);
8663: }
8664: ISDestroy(&corners);
8666: if (pcbddc->coarse_ksp) {
8667: Vec crhs,csol;
8669: KSPGetSolution(pcbddc->coarse_ksp,&csol);
8670: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8671: if (!csol) {
8672: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8673: }
8674: if (!crhs) {
8675: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8676: }
8677: }
8678: MatDestroy(&coarsedivudotp);
8680: /* compute null space for coarse solver if the benign trick has been requested */
8681: if (pcbddc->benign_null) {
8683: VecSet(pcbddc->vec1_P,0.);
8684: for (i=0;i<pcbddc->benign_n;i++) {
8685: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8686: }
8687: VecAssemblyBegin(pcbddc->vec1_P);
8688: VecAssemblyEnd(pcbddc->vec1_P);
8689: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8690: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8691: if (coarse_mat) {
8692: Vec nullv;
8693: PetscScalar *array,*array2;
8694: PetscInt nl;
8696: MatCreateVecs(coarse_mat,&nullv,NULL);
8697: VecGetLocalSize(nullv,&nl);
8698: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8699: VecGetArray(nullv,&array2);
8700: PetscArraycpy(array2,array,nl);
8701: VecRestoreArray(nullv,&array2);
8702: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8703: VecNormalize(nullv,NULL);
8704: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8705: VecDestroy(&nullv);
8706: }
8707: }
8708: PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8710: PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8711: if (pcbddc->coarse_ksp) {
8712: PetscBool ispreonly;
8714: if (CoarseNullSpace) {
8715: PetscBool isnull;
8716: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8717: if (isnull) {
8718: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8719: }
8720: /* TODO: add local nullspaces (if any) */
8721: }
8722: /* setup coarse ksp */
8723: KSPSetUp(pcbddc->coarse_ksp);
8724: /* Check coarse problem if in debug mode or if solving with an iterative method */
8725: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8726: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8727: KSP check_ksp;
8728: KSPType check_ksp_type;
8729: PC check_pc;
8730: Vec check_vec,coarse_vec;
8731: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8732: PetscInt its;
8733: PetscBool compute_eigs;
8734: PetscReal *eigs_r,*eigs_c;
8735: PetscInt neigs;
8736: const char *prefix;
8738: /* Create ksp object suitable for estimation of extreme eigenvalues */
8739: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8740: PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8741: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8742: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8743: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8744: /* prevent from setup unneeded object */
8745: KSPGetPC(check_ksp,&check_pc);
8746: PCSetType(check_pc,PCNONE);
8747: if (ispreonly) {
8748: check_ksp_type = KSPPREONLY;
8749: compute_eigs = PETSC_FALSE;
8750: } else {
8751: check_ksp_type = KSPGMRES;
8752: compute_eigs = PETSC_TRUE;
8753: }
8754: KSPSetType(check_ksp,check_ksp_type);
8755: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8756: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8757: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8758: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8759: KSPSetOptionsPrefix(check_ksp,prefix);
8760: KSPAppendOptionsPrefix(check_ksp,"check_");
8761: KSPSetFromOptions(check_ksp);
8762: KSPSetUp(check_ksp);
8763: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8764: KSPSetPC(check_ksp,check_pc);
8765: /* create random vec */
8766: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8767: VecSetRandom(check_vec,NULL);
8768: MatMult(coarse_mat,check_vec,coarse_vec);
8769: /* solve coarse problem */
8770: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8771: KSPCheckSolve(check_ksp,pc,coarse_vec);
8772: /* set eigenvalue estimation if preonly has not been requested */
8773: if (compute_eigs) {
8774: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8775: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8776: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8777: if (neigs) {
8778: lambda_max = eigs_r[neigs-1];
8779: lambda_min = eigs_r[0];
8780: if (pcbddc->use_coarse_estimates) {
8781: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8782: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8783: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8784: }
8785: }
8786: }
8787: }
8789: /* check coarse problem residual error */
8790: if (pcbddc->dbg_flag) {
8791: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8792: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8793: VecAXPY(check_vec,-1.0,coarse_vec);
8794: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8795: MatMult(coarse_mat,check_vec,coarse_vec);
8796: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8797: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8798: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8799: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8800: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8801: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8802: if (CoarseNullSpace) {
8803: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8804: }
8805: if (compute_eigs) {
8806: PetscReal lambda_max_s,lambda_min_s;
8807: KSPConvergedReason reason;
8808: KSPGetType(check_ksp,&check_ksp_type);
8809: KSPGetIterationNumber(check_ksp,&its);
8810: KSPGetConvergedReason(check_ksp,&reason);
8811: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8812: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8813: for (i=0;i<neigs;i++) {
8814: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8815: }
8816: }
8817: PetscViewerFlush(dbg_viewer);
8818: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8819: }
8820: VecDestroy(&check_vec);
8821: VecDestroy(&coarse_vec);
8822: KSPDestroy(&check_ksp);
8823: if (compute_eigs) {
8824: PetscFree(eigs_r);
8825: PetscFree(eigs_c);
8826: }
8827: }
8828: }
8829: MatNullSpaceDestroy(&CoarseNullSpace);
8830: /* print additional info */
8831: if (pcbddc->dbg_flag) {
8832: /* waits until all processes reaches this point */
8833: PetscBarrier((PetscObject)pc);
8834: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8835: PetscViewerFlush(pcbddc->dbg_viewer);
8836: }
8838: /* free memory */
8839: MatDestroy(&coarse_mat);
8840: PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8841: return 0;
8842: }
8844: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8845: {
8846: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8847: PC_IS* pcis = (PC_IS*)pc->data;
8848: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8849: IS subset,subset_mult,subset_n;
8850: PetscInt local_size,coarse_size=0;
8851: PetscInt *local_primal_indices=NULL;
8852: const PetscInt *t_local_primal_indices;
8854: /* Compute global number of coarse dofs */
8856: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8857: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8858: ISDestroy(&subset_n);
8859: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8860: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8861: ISDestroy(&subset);
8862: ISDestroy(&subset_mult);
8863: ISGetLocalSize(subset_n,&local_size);
8865: PetscMalloc1(local_size,&local_primal_indices);
8866: ISGetIndices(subset_n,&t_local_primal_indices);
8867: PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);
8868: ISRestoreIndices(subset_n,&t_local_primal_indices);
8869: ISDestroy(&subset_n);
8871: /* check numbering */
8872: if (pcbddc->dbg_flag) {
8873: PetscScalar coarsesum,*array,*array2;
8874: PetscInt i;
8875: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8877: PetscViewerFlush(pcbddc->dbg_viewer);
8878: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8879: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8880: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8881: /* counter */
8882: VecSet(pcis->vec1_global,0.0);
8883: VecSet(pcis->vec1_N,1.0);
8884: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8885: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8886: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8887: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8888: VecSet(pcis->vec1_N,0.0);
8889: for (i=0;i<pcbddc->local_primal_size;i++) {
8890: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8891: }
8892: VecAssemblyBegin(pcis->vec1_N);
8893: VecAssemblyEnd(pcis->vec1_N);
8894: VecSet(pcis->vec1_global,0.0);
8895: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8896: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8897: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8898: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8899: VecGetArray(pcis->vec1_N,&array);
8900: VecGetArray(pcis->vec2_N,&array2);
8901: for (i=0;i<pcis->n;i++) {
8902: if (array[i] != 0.0 && array[i] != array2[i]) {
8903: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8904: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8905: set_error = PETSC_TRUE;
8906: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8907: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8908: }
8909: }
8910: VecRestoreArray(pcis->vec2_N,&array2);
8911: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8912: PetscViewerFlush(pcbddc->dbg_viewer);
8913: for (i=0;i<pcis->n;i++) {
8914: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8915: }
8916: VecRestoreArray(pcis->vec1_N,&array);
8917: VecSet(pcis->vec1_global,0.0);
8918: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8919: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8920: VecSum(pcis->vec1_global,&coarsesum);
8921: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8922: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8923: PetscInt *gidxs;
8925: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8926: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8927: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8928: PetscViewerFlush(pcbddc->dbg_viewer);
8929: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8930: for (i=0;i<pcbddc->local_primal_size;i++) {
8931: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8932: }
8933: PetscViewerFlush(pcbddc->dbg_viewer);
8934: PetscFree(gidxs);
8935: }
8936: PetscViewerFlush(pcbddc->dbg_viewer);
8937: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8939: }
8941: /* get back data */
8942: *coarse_size_n = coarse_size;
8943: *local_primal_indices_n = local_primal_indices;
8944: return 0;
8945: }
8947: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8948: {
8949: IS localis_t;
8950: PetscInt i,lsize,*idxs,n;
8951: PetscScalar *vals;
8953: /* get indices in local ordering exploiting local to global map */
8954: ISGetLocalSize(globalis,&lsize);
8955: PetscMalloc1(lsize,&vals);
8956: for (i=0;i<lsize;i++) vals[i] = 1.0;
8957: ISGetIndices(globalis,(const PetscInt**)&idxs);
8958: VecSet(gwork,0.0);
8959: VecSet(lwork,0.0);
8960: if (idxs) { /* multilevel guard */
8961: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8962: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8963: }
8964: VecAssemblyBegin(gwork);
8965: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8966: PetscFree(vals);
8967: VecAssemblyEnd(gwork);
8968: /* now compute set in local ordering */
8969: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8970: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8971: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8972: VecGetSize(lwork,&n);
8973: for (i=0,lsize=0;i<n;i++) {
8974: if (PetscRealPart(vals[i]) > 0.5) {
8975: lsize++;
8976: }
8977: }
8978: PetscMalloc1(lsize,&idxs);
8979: for (i=0,lsize=0;i<n;i++) {
8980: if (PetscRealPart(vals[i]) > 0.5) {
8981: idxs[lsize++] = i;
8982: }
8983: }
8984: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8985: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8986: *localis = localis_t;
8987: return 0;
8988: }
8990: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8991: {
8992: PC_IS *pcis=(PC_IS*)pc->data;
8993: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8994: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
8995: Mat S_j;
8996: PetscInt *used_xadj,*used_adjncy;
8997: PetscBool free_used_adj;
8998: PetscErrorCode ierr;
9000: PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9001: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9002: free_used_adj = PETSC_FALSE;
9003: if (pcbddc->sub_schurs_layers == -1) {
9004: used_xadj = NULL;
9005: used_adjncy = NULL;
9006: } else {
9007: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9008: used_xadj = pcbddc->mat_graph->xadj;
9009: used_adjncy = pcbddc->mat_graph->adjncy;
9010: } else if (pcbddc->computed_rowadj) {
9011: used_xadj = pcbddc->mat_graph->xadj;
9012: used_adjncy = pcbddc->mat_graph->adjncy;
9013: } else {
9014: PetscBool flg_row=PETSC_FALSE;
9015: const PetscInt *xadj,*adjncy;
9016: PetscInt nvtxs;
9018: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9019: if (flg_row) {
9020: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
9021: PetscArraycpy(used_xadj,xadj,nvtxs+1);
9022: PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);
9023: free_used_adj = PETSC_TRUE;
9024: } else {
9025: pcbddc->sub_schurs_layers = -1;
9026: used_xadj = NULL;
9027: used_adjncy = NULL;
9028: }
9029: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9030: }
9031: }
9033: /* setup sub_schurs data */
9034: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9035: if (!sub_schurs->schur_explicit) {
9036: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9037: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9038: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
9039: } else {
9040: Mat change = NULL;
9041: Vec scaling = NULL;
9042: IS change_primal = NULL, iP;
9043: PetscInt benign_n;
9044: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9045: PetscBool need_change = PETSC_FALSE;
9046: PetscBool discrete_harmonic = PETSC_FALSE;
9048: if (!pcbddc->use_vertices && reuse_solvers) {
9049: PetscInt n_vertices;
9051: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
9052: reuse_solvers = (PetscBool)!n_vertices;
9053: }
9054: if (!pcbddc->benign_change_explicit) {
9055: benign_n = pcbddc->benign_n;
9056: } else {
9057: benign_n = 0;
9058: }
9059: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9060: We need a global reduction to avoid possible deadlocks.
9061: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9062: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9063: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9064: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9065: need_change = (PetscBool)(!need_change);
9066: }
9067: /* If the user defines additional constraints, we import them here.
9068: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9069: if (need_change) {
9070: PC_IS *pcisf;
9071: PC_BDDC *pcbddcf;
9072: PC pcf;
9075: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9076: PCSetOperators(pcf,pc->mat,pc->pmat);
9077: PCSetType(pcf,PCBDDC);
9079: /* hacks */
9080: pcisf = (PC_IS*)pcf->data;
9081: pcisf->is_B_local = pcis->is_B_local;
9082: pcisf->vec1_N = pcis->vec1_N;
9083: pcisf->BtoNmap = pcis->BtoNmap;
9084: pcisf->n = pcis->n;
9085: pcisf->n_B = pcis->n_B;
9086: pcbddcf = (PC_BDDC*)pcf->data;
9087: PetscFree(pcbddcf->mat_graph);
9088: pcbddcf->mat_graph = pcbddc->mat_graph;
9089: pcbddcf->use_faces = PETSC_TRUE;
9090: pcbddcf->use_change_of_basis = PETSC_TRUE;
9091: pcbddcf->use_change_on_faces = PETSC_TRUE;
9092: pcbddcf->use_qr_single = PETSC_TRUE;
9093: pcbddcf->fake_change = PETSC_TRUE;
9095: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9096: PCBDDCConstraintsSetUp(pcf);
9097: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9098: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9099: change = pcbddcf->ConstraintMatrix;
9100: pcbddcf->ConstraintMatrix = NULL;
9102: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9103: PetscFree(pcbddcf->sub_schurs);
9104: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9105: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9106: PetscFree(pcbddcf->primal_indices_local_idxs);
9107: PetscFree(pcbddcf->onearnullvecs_state);
9108: PetscFree(pcf->data);
9109: pcf->ops->destroy = NULL;
9110: pcf->ops->reset = NULL;
9111: PCDestroy(&pcf);
9112: }
9113: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9115: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9116: if (iP) {
9117: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9118: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9119: PetscOptionsEnd();
9120: }
9121: if (discrete_harmonic) {
9122: Mat A;
9123: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9124: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9125: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9126: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9127: MatDestroy(&A);
9128: } else {
9129: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9130: }
9131: MatDestroy(&change);
9132: ISDestroy(&change_primal);
9133: }
9134: MatDestroy(&S_j);
9136: /* free adjacency */
9137: if (free_used_adj) {
9138: PetscFree2(used_xadj,used_adjncy);
9139: }
9140: PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9141: return 0;
9142: }
9144: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9145: {
9146: PC_IS *pcis=(PC_IS*)pc->data;
9147: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9148: PCBDDCGraph graph;
9150: /* attach interface graph for determining subsets */
9151: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9152: IS verticesIS,verticescomm;
9153: PetscInt vsize,*idxs;
9155: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9156: ISGetSize(verticesIS,&vsize);
9157: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9158: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9159: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9160: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9161: PCBDDCGraphCreate(&graph);
9162: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9163: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9164: ISDestroy(&verticescomm);
9165: PCBDDCGraphComputeConnectedComponents(graph);
9166: } else {
9167: graph = pcbddc->mat_graph;
9168: }
9169: /* print some info */
9170: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9171: IS vertices;
9172: PetscInt nv,nedges,nfaces;
9173: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9174: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9175: ISGetSize(vertices,&nv);
9176: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9177: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9178: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9179: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9180: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9181: PetscViewerFlush(pcbddc->dbg_viewer);
9182: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9183: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9184: }
9186: /* sub_schurs init */
9187: if (!pcbddc->sub_schurs) {
9188: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9189: }
9190: PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
9192: /* free graph struct */
9193: if (pcbddc->sub_schurs_rebuild) {
9194: PCBDDCGraphDestroy(&graph);
9195: }
9196: return 0;
9197: }
9199: PetscErrorCode PCBDDCCheckOperator(PC pc)
9200: {
9201: PC_IS *pcis=(PC_IS*)pc->data;
9202: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9204: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9205: IS zerodiag = NULL;
9206: Mat S_j,B0_B=NULL;
9207: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
9208: PetscScalar *p0_check,*array,*array2;
9209: PetscReal norm;
9210: PetscInt i;
9212: /* B0 and B0_B */
9213: if (zerodiag) {
9214: IS dummy;
9216: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9217: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9218: MatCreateVecs(B0_B,NULL,&dummy_vec);
9219: ISDestroy(&dummy);
9220: }
9221: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9222: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9223: VecSet(pcbddc->vec1_P,1.0);
9224: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9225: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9226: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9227: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9228: VecReciprocal(vec_scale_P);
9229: /* S_j */
9230: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9231: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9233: /* mimic vector in \widetilde{W}_\Gamma */
9234: VecSetRandom(pcis->vec1_N,NULL);
9235: /* continuous in primal space */
9236: VecSetRandom(pcbddc->coarse_vec,NULL);
9237: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9238: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9239: VecGetArray(pcbddc->vec1_P,&array);
9240: PetscCalloc1(pcbddc->benign_n,&p0_check);
9241: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9242: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9243: VecRestoreArray(pcbddc->vec1_P,&array);
9244: VecAssemblyBegin(pcis->vec1_N);
9245: VecAssemblyEnd(pcis->vec1_N);
9246: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9247: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9248: VecDuplicate(pcis->vec2_B,&vec_check_B);
9249: VecCopy(pcis->vec2_B,vec_check_B);
9251: /* assemble rhs for coarse problem */
9252: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9253: /* local with Schur */
9254: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9255: if (zerodiag) {
9256: VecGetArray(dummy_vec,&array);
9257: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9258: VecRestoreArray(dummy_vec,&array);
9259: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9260: }
9261: /* sum on primal nodes the local contributions */
9262: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9263: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9264: VecGetArray(pcis->vec1_N,&array);
9265: VecGetArray(pcbddc->vec1_P,&array2);
9266: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9267: VecRestoreArray(pcbddc->vec1_P,&array2);
9268: VecRestoreArray(pcis->vec1_N,&array);
9269: VecSet(pcbddc->coarse_vec,0.);
9270: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9271: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9272: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9273: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9274: VecGetArray(pcbddc->vec1_P,&array);
9275: /* scale primal nodes (BDDC sums contibutions) */
9276: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9277: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9278: VecRestoreArray(pcbddc->vec1_P,&array);
9279: VecAssemblyBegin(pcis->vec1_N);
9280: VecAssemblyEnd(pcis->vec1_N);
9281: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9282: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9283: /* global: \widetilde{B0}_B w_\Gamma */
9284: if (zerodiag) {
9285: MatMult(B0_B,pcis->vec2_B,dummy_vec);
9286: VecGetArray(dummy_vec,&array);
9287: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9288: VecRestoreArray(dummy_vec,&array);
9289: }
9290: /* BDDC */
9291: VecSet(pcis->vec1_D,0.);
9292: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
9294: VecCopy(pcis->vec1_B,pcis->vec2_B);
9295: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9296: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9297: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9298: for (i=0;i<pcbddc->benign_n;i++) {
9299: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9300: }
9301: PetscFree(p0_check);
9302: VecDestroy(&vec_scale_P);
9303: VecDestroy(&vec_check_B);
9304: VecDestroy(&dummy_vec);
9305: MatDestroy(&S_j);
9306: MatDestroy(&B0_B);
9307: }
9308: return 0;
9309: }
9311: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9312: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9313: {
9314: Mat At;
9315: IS rows;
9316: PetscInt rst,ren;
9317: PetscLayout rmap;
9319: rst = ren = 0;
9320: if (ccomm != MPI_COMM_NULL) {
9321: PetscLayoutCreate(ccomm,&rmap);
9322: PetscLayoutSetSize(rmap,A->rmap->N);
9323: PetscLayoutSetBlockSize(rmap,1);
9324: PetscLayoutSetUp(rmap);
9325: PetscLayoutGetRange(rmap,&rst,&ren);
9326: }
9327: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9328: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9329: ISDestroy(&rows);
9331: if (ccomm != MPI_COMM_NULL) {
9332: Mat_MPIAIJ *a,*b;
9333: IS from,to;
9334: Vec gvec;
9335: PetscInt lsize;
9337: MatCreate(ccomm,B);
9338: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9339: MatSetType(*B,MATAIJ);
9340: PetscLayoutDestroy(&((*B)->rmap));
9341: PetscLayoutSetUp((*B)->cmap);
9342: a = (Mat_MPIAIJ*)At->data;
9343: b = (Mat_MPIAIJ*)(*B)->data;
9344: MPI_Comm_size(ccomm,&b->size);
9345: MPI_Comm_rank(ccomm,&b->rank);
9346: PetscObjectReference((PetscObject)a->A);
9347: PetscObjectReference((PetscObject)a->B);
9348: b->A = a->A;
9349: b->B = a->B;
9351: b->donotstash = a->donotstash;
9352: b->roworiented = a->roworiented;
9353: b->rowindices = NULL;
9354: b->rowvalues = NULL;
9355: b->getrowactive = PETSC_FALSE;
9357: (*B)->rmap = rmap;
9358: (*B)->factortype = A->factortype;
9359: (*B)->assembled = PETSC_TRUE;
9360: (*B)->insertmode = NOT_SET_VALUES;
9361: (*B)->preallocated = PETSC_TRUE;
9363: if (a->colmap) {
9364: #if defined(PETSC_USE_CTABLE)
9365: PetscTableCreateCopy(a->colmap,&b->colmap);
9366: #else
9367: PetscMalloc1(At->cmap->N,&b->colmap);
9368: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9369: PetscArraycpy(b->colmap,a->colmap,At->cmap->N);
9370: #endif
9371: } else b->colmap = NULL;
9372: if (a->garray) {
9373: PetscInt len;
9374: len = a->B->cmap->n;
9375: PetscMalloc1(len+1,&b->garray);
9376: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9377: if (len) PetscArraycpy(b->garray,a->garray,len);
9378: } else b->garray = NULL;
9380: PetscObjectReference((PetscObject)a->lvec);
9381: b->lvec = a->lvec;
9382: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
9384: /* cannot use VecScatterCopy */
9385: VecGetLocalSize(b->lvec,&lsize);
9386: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9387: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9388: MatCreateVecs(*B,&gvec,NULL);
9389: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9390: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9391: ISDestroy(&from);
9392: ISDestroy(&to);
9393: VecDestroy(&gvec);
9394: }
9395: MatDestroy(&At);
9396: return 0;
9397: }