Actual source code: hem.c
2: #include <petsc/private/matimpl.h>
3: #include <../src/mat/impls/aij/seq/aij.h>
4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
6: /* linked list methods
7: *
8: * PetscCDCreate
9: */
10: PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
11: {
12: PetscCoarsenData *ail;
14: /* alocate pool, partially */
15: PetscNew(&ail);
16: *a_out = ail;
17: ail->pool_list.next = NULL;
18: ail->pool_list.array = NULL;
19: ail->chk_sz = 0;
20: /* allocate array */
21: ail->size = a_size;
22: PetscCalloc1(a_size, &ail->array);
23: ail->extra_nodes = NULL;
24: ail->mat = NULL;
25: return 0;
26: }
28: /* NPDestroy
29: */
30: PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
31: {
32: PetscCDArrNd *n = &ail->pool_list;
34: n = n->next;
35: while (n) {
36: PetscCDArrNd *lstn = n;
37: n = n->next;
38: PetscFree(lstn);
39: }
40: if (ail->pool_list.array) {
41: PetscFree(ail->pool_list.array);
42: }
43: PetscFree(ail->array);
44: /* delete this (+agg+pool array) */
45: PetscFree(ail);
46: return 0;
47: }
49: /* PetscCDSetChuckSize
50: */
51: PetscErrorCode PetscCDSetChuckSize(PetscCoarsenData *ail, PetscInt a_sz)
52: {
53: ail->chk_sz = a_sz;
54: return 0;
55: }
57: /* PetscCDGetNewNode
58: */
59: PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
60: {
61: *a_out = NULL; /* squelch -Wmaybe-uninitialized */
62: if (ail->extra_nodes) {
63: PetscCDIntNd *node = ail->extra_nodes;
64: ail->extra_nodes = node->next;
65: node->gid = a_id;
66: node->next = NULL;
67: *a_out = node;
68: } else {
69: if (!ail->pool_list.array) {
70: if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
71: PetscMalloc1(ail->chk_sz, &ail->pool_list.array);
72: ail->new_node = ail->pool_list.array;
73: ail->new_left = ail->chk_sz;
74: ail->new_node->next = NULL;
75: } else if (!ail->new_left) {
76: PetscCDArrNd *node;
77: PetscMalloc(ail->chk_sz*sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node);
78: node->array = (PetscCDIntNd*)(node + 1);
79: node->next = ail->pool_list.next;
80: ail->pool_list.next = node;
81: ail->new_left = ail->chk_sz;
82: ail->new_node = node->array;
83: }
84: ail->new_node->gid = a_id;
85: ail->new_node->next = NULL;
86: *a_out = ail->new_node++; ail->new_left--;
87: }
88: return 0;
89: }
91: /* PetscCDIntNdSetID
92: */
93: PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
94: {
95: a_this->gid = a_id;
96: return 0;
97: }
99: /* PetscCDIntNdGetID
100: */
101: PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
102: {
103: *a_gid = a_this->gid;
104: return 0;
105: }
107: /* PetscCDGetHeadPos
108: */
109: PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
110: {
112: *pos = ail->array[a_idx];
113: return 0;
114: }
116: /* PetscCDGetNextPos
117: */
118: PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
119: {
121: *pos = (*pos)->next;
122: return 0;
123: }
125: /* PetscCDAppendID
126: */
127: PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
128: {
129: PetscCDIntNd *n,*n2;
131: PetscCDGetNewNode(ail, &n, a_id);
133: if (!(n2=ail->array[a_idx])) ail->array[a_idx] = n;
134: else {
135: do {
136: if (!n2->next) {
137: n2->next = n;
139: break;
140: }
141: n2 = n2->next;
142: } while (n2);
144: }
145: return 0;
146: }
148: /* PetscCDAppendNode
149: */
150: PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n)
151: {
152: PetscCDIntNd *n2;
155: if (!(n2=ail->array[a_idx])) ail->array[a_idx] = a_n;
156: else {
157: do {
158: if (!n2->next) {
159: n2->next = a_n;
160: a_n->next = NULL;
161: break;
162: }
163: n2 = n2->next;
164: } while (n2);
166: }
167: return 0;
168: }
170: /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API
171: */
172: PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last)
173: {
174: PetscCDIntNd *del;
178: del = a_last->next;
179: a_last->next = del->next;
180: /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
181: /* could reuse n2 but PetscCDAppendNode sometimes uses it */
182: return 0;
183: }
185: /* PetscCDPrint
186: */
187: PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, MPI_Comm comm)
188: {
189: PetscCDIntNd *n;
190: PetscInt ii,kk;
191: PetscMPIInt rank;
193: MPI_Comm_rank(comm, &rank);
194: for (ii=0; ii<ail->size; ii++) {
195: kk = 0;
196: n = ail->array[ii];
197: if (n) PetscPrintf(comm,"[%d]%s list %d:\n",rank,PETSC_FUNCTION_NAME,ii);
198: while (n) {
199: PetscPrintf(comm,"\t[%d] %" PetscInt_FMT ") id %" PetscInt_FMT "\n",rank,++kk,n->gid);
200: n = n->next;
201: }
202: }
203: return 0;
204: }
206: /* PetscCDAppendRemove
207: */
208: PetscErrorCode PetscCDAppendRemove(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
209: {
210: PetscCDIntNd *n;
215: n = ail->array[a_destidx];
216: if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
217: else {
218: do {
219: if (!n->next) {
220: n->next = ail->array[a_srcidx];
221: break;
222: }
223: n = n->next;
224: } while (1);
225: }
226: ail->array[a_srcidx] = NULL;
227: return 0;
228: }
230: /* PetscCDRemoveAll
231: */
232: PetscErrorCode PetscCDRemoveAll(PetscCoarsenData *ail, PetscInt a_idx)
233: {
234: PetscCDIntNd *rem,*n1;
237: rem = ail->array[a_idx];
238: ail->array[a_idx] = NULL;
239: if (!(n1=ail->extra_nodes)) ail->extra_nodes = rem;
240: else {
241: while (n1->next) n1 = n1->next;
242: n1->next = rem;
243: }
244: return 0;
245: }
247: /* PetscCDSizeAt
248: */
249: PetscErrorCode PetscCDSizeAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
250: {
251: PetscCDIntNd *n1;
252: PetscInt sz = 0;
255: n1 = ail->array[a_idx];
256: while (n1) {
257: n1 = n1->next;
258: sz++;
259: }
260: *a_sz = sz;
261: return 0;
262: }
264: /* PetscCDEmptyAt
265: */
266: PetscErrorCode PetscCDEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
267: {
269: *a_e = (PetscBool)(ail->array[a_idx]==NULL);
270: return 0;
271: }
273: /* PetscCDGetMIS
274: */
275: PetscErrorCode PetscCDGetMIS(PetscCoarsenData *ail, IS *a_mis)
276: {
277: PetscCDIntNd *n;
278: PetscInt ii,kk;
279: PetscInt *permute;
281: for (ii=kk=0; ii<ail->size; ii++) {
282: n = ail->array[ii];
283: if (n) kk++;
284: }
285: PetscMalloc1(kk, &permute);
286: for (ii=kk=0; ii<ail->size; ii++) {
287: n = ail->array[ii];
288: if (n) permute[kk++] = ii;
289: }
290: ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis);
291: return 0;
292: }
294: /* PetscCDGetMat
295: */
296: PetscErrorCode PetscCDGetMat(const PetscCoarsenData *ail, Mat *a_mat)
297: {
298: *a_mat = ail->mat;
299: return 0;
300: }
302: /* PetscCDSetMat
303: */
304: PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
305: {
306: ail->mat = a_mat;
307: return 0;
308: }
310: /* PetscCDGetASMBlocks
311: */
312: PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, Mat mat, PetscInt *a_sz, IS **a_local_is)
313: {
314: PetscCDIntNd *n;
315: PetscInt lsz,ii,kk,*idxs,jj,s,e,gid;
316: IS *is_loc,is_bcs;
318: for (ii=kk=0; ii<ail->size; ii++) {
319: if (ail->array[ii]) kk++;
320: }
321: /* count BCs */
322: MatGetOwnershipRange(mat, &s, &e);
323: for (gid=s,lsz=0; gid<e; gid++) {
324: MatGetRow(mat,gid,&jj,NULL,NULL);
325: if (jj<2) lsz++;
326: MatRestoreRow(mat,gid,&jj,NULL,NULL);
327: }
328: if (lsz) {
329: PetscMalloc1(a_bs*lsz, &idxs);
330: for (gid=s,lsz=0; gid<e; gid++) {
331: MatGetRow(mat,gid,&jj,NULL,NULL);
332: if (jj<2) {
333: for (jj=0; jj<a_bs; lsz++,jj++) idxs[lsz] = a_bs*gid + jj;
334: }
335: MatRestoreRow(mat,gid,&jj,NULL,NULL);
336: }
337: ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_bcs);
338: *a_sz = kk + 1; /* out */
339: } else {
340: is_bcs=NULL;
341: *a_sz = kk; /* out */
342: }
343: PetscMalloc1(*a_sz, &is_loc);
345: for (ii=kk=0; ii<ail->size; ii++) {
346: for (lsz=0, n=ail->array[ii]; n; lsz++, n=n->next) /* void */;
347: if (lsz) {
348: PetscMalloc1(a_bs*lsz, &idxs);
349: for (lsz = 0, n=ail->array[ii]; n; n = n->next) {
350: PetscCDIntNdGetID(n, &gid);
351: for (jj=0; jj<a_bs; lsz++,jj++) idxs[lsz] = a_bs*gid + jj;
352: }
353: ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]);
354: }
355: }
356: if (is_bcs) {
357: is_loc[kk++] = is_bcs;
358: }
360: *a_local_is = is_loc; /* out */
362: return 0;
363: }
365: /* ********************************************************************** */
366: /* edge for priority queue */
367: typedef struct edge_tag {
368: PetscReal weight;
369: PetscInt lid0,gid1,cpid1;
370: } Edge;
372: static int gamg_hem_compare(const void *a, const void *b)
373: {
374: PetscReal va = ((Edge*)a)->weight, vb = ((Edge*)b)->weight;
375: return (va < vb) ? 1 : (va == vb) ? 0 : -1; /* 0 for equal */
376: }
378: /* -------------------------------------------------------------------------- */
379: /*
380: heavyEdgeMatchAgg - parallel heavy edge matching (HEM). MatAIJ specific!!!
382: Input Parameter:
383: . perm - permutation
384: . a_Gmat - global matrix of graph (data not defined)
386: Output Parameter:
387: . a_locals_llist - array of list of local nodes rooted at local node
388: */
389: static PetscErrorCode heavyEdgeMatchAgg(IS perm,Mat a_Gmat,PetscCoarsenData **a_locals_llist)
390: {
391: PetscBool isMPI;
392: MPI_Comm comm;
393: PetscInt sub_it,kk,n,ix,*idx,*ii,iter,Iend,my0;
394: PetscMPIInt rank,size;
395: const PetscInt nloc = a_Gmat->rmap->n,n_iter=6; /* need to figure out how to stop this */
396: PetscInt *lid_cprowID,*lid_gid;
397: PetscBool *lid_matched;
398: Mat_SeqAIJ *matA, *matB=NULL;
399: Mat_MPIAIJ *mpimat =NULL;
400: PetscScalar one =1.;
401: PetscCoarsenData *agg_llists = NULL,*deleted_list = NULL;
402: Mat cMat,tMat,P;
403: MatScalar *ap;
404: PetscMPIInt tag1,tag2;
406: PetscObjectGetComm((PetscObject)a_Gmat,&comm);
407: MPI_Comm_rank(comm, &rank);
408: MPI_Comm_size(comm, &size);
409: MatGetOwnershipRange(a_Gmat, &my0, &Iend);
410: PetscCommGetNewTag(comm, &tag1);
411: PetscCommGetNewTag(comm, &tag2);
413: PetscMalloc1(nloc, &lid_gid); /* explicit array needed */
414: PetscMalloc1(nloc, &lid_cprowID);
415: PetscMalloc1(nloc, &lid_matched);
417: PetscCDCreate(nloc, &agg_llists);
418: /* PetscCDSetChuckSize(agg_llists, nloc+1); */
419: *a_locals_llist = agg_llists;
420: PetscCDCreate(size, &deleted_list);
421: PetscCDSetChuckSize(deleted_list, 100);
422: /* setup 'lid_gid' for scatters and add self to all lists */
423: for (kk=0; kk<nloc; kk++) {
424: lid_gid[kk] = kk + my0;
425: PetscCDAppendID(agg_llists, kk, my0+kk);
426: }
428: /* make a copy of the graph, this gets destroyed in iterates */
429: MatDuplicate(a_Gmat,MAT_COPY_VALUES,&cMat);
430: PetscObjectTypeCompare((PetscObject)a_Gmat, MATMPIAIJ, &isMPI);
431: iter = 0;
432: while (iter++ < n_iter) {
433: PetscScalar *cpcol_gid,*cpcol_max_ew,*cpcol_max_pe,*lid_max_ew;
434: PetscBool *cpcol_matched;
435: PetscMPIInt *cpcol_pe,proc;
436: Vec locMaxEdge,locMaxPE,ghostMaxEdge,ghostMaxPE;
437: PetscInt nEdges,n_nz_row,jj;
438: Edge *Edges;
439: PetscInt gid;
440: const PetscInt *perm_ix, n_sub_its = 120;
442: /* get submatrices of cMat */
443: if (isMPI) {
444: mpimat = (Mat_MPIAIJ*)cMat->data;
445: matA = (Mat_SeqAIJ*)mpimat->A->data;
446: matB = (Mat_SeqAIJ*)mpimat->B->data;
447: if (!matB->compressedrow.use) {
448: /* force construction of compressed row data structure since code below requires it */
449: MatCheckCompressedRow(mpimat->B,matB->nonzerorowcnt,&matB->compressedrow,matB->i,mpimat->B->rmap->n,-1.0);
450: }
451: } else {
452: matA = (Mat_SeqAIJ*)cMat->data;
453: }
455: /* set max edge on nodes */
456: MatCreateVecs(cMat, &locMaxEdge, NULL);
457: MatCreateVecs(cMat, &locMaxPE, NULL);
459: /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */
460: if (mpimat) {
461: Vec vec;
462: PetscScalar vval;
464: MatCreateVecs(cMat, &vec, NULL);
465: /* cpcol_pe */
466: vval = (PetscScalar)(rank);
467: for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
468: VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
469: }
470: VecAssemblyBegin(vec);
471: VecAssemblyEnd(vec);
472: VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
473: VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
474: VecGetArray(mpimat->lvec, &cpcol_gid); /* get proc ID in 'cpcol_gid' */
475: VecGetLocalSize(mpimat->lvec, &n);
476: PetscMalloc1(n, &cpcol_pe);
477: for (kk=0; kk<n; kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]);
478: VecRestoreArray(mpimat->lvec, &cpcol_gid);
480: /* cpcol_gid */
481: for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
482: vval = (PetscScalar)(gid);
483: VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
484: }
485: VecAssemblyBegin(vec);
486: VecAssemblyEnd(vec);
487: VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
488: VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
489: VecDestroy(&vec);
490: VecGetArray(mpimat->lvec, &cpcol_gid); /* get proc ID in 'cpcol_gid' */
492: /* cpcol_matched */
493: VecGetLocalSize(mpimat->lvec, &n);
494: PetscMalloc1(n, &cpcol_matched);
495: for (kk=0; kk<n; kk++) cpcol_matched[kk] = PETSC_FALSE;
496: }
498: /* need an inverse map - locals */
499: for (kk=0; kk<nloc; kk++) lid_cprowID[kk] = -1;
500: /* set index into compressed row 'lid_cprowID' */
501: if (matB) {
502: for (ix=0; ix<matB->compressedrow.nrows; ix++) {
503: lid_cprowID[matB->compressedrow.rindex[ix]] = ix;
504: }
505: }
507: /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */
508: for (nEdges=0,kk=0,gid=my0; kk<nloc; kk++,gid++) {
509: PetscReal max_e = 0., tt;
510: PetscScalar vval;
511: PetscInt lid = kk;
512: PetscMPIInt max_pe=rank,pe;
514: ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
515: ap = matA->a + ii[lid];
516: for (jj=0; jj<n; jj++) {
517: PetscInt lidj = idx[jj];
518: if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
519: if (lidj > lid) nEdges++;
520: }
521: if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
522: ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
523: ap = matB->a + ii[ix];
524: idx = matB->j + ii[ix];
525: for (jj=0; jj<n; jj++) {
526: if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
527: nEdges++;
528: if ((pe=cpcol_pe[idx[jj]]) > max_pe) max_pe = pe;
529: }
530: }
531: vval = max_e;
532: VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);
534: vval = (PetscScalar)max_pe;
535: VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);
536: }
537: VecAssemblyBegin(locMaxEdge);
538: VecAssemblyEnd(locMaxEdge);
539: VecAssemblyBegin(locMaxPE);
540: VecAssemblyEnd(locMaxPE);
542: /* get 'cpcol_max_ew' & 'cpcol_max_pe' */
543: if (mpimat) {
544: VecDuplicate(mpimat->lvec, &ghostMaxEdge);
545: VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
546: VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
547: VecGetArray(ghostMaxEdge, &cpcol_max_ew);
549: VecDuplicate(mpimat->lvec, &ghostMaxPE);
550: VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
551: VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
552: VecGetArray(ghostMaxPE, &cpcol_max_pe);
553: }
555: /* setup sorted list of edges */
556: PetscMalloc1(nEdges, &Edges);
557: ISGetIndices(perm, &perm_ix);
558: for (nEdges=n_nz_row=kk=0; kk<nloc; kk++) {
559: PetscInt nn, lid = perm_ix[kk];
560: ii = matA->i; nn = n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
561: ap = matA->a + ii[lid];
562: for (jj=0; jj<n; jj++) {
563: PetscInt lidj = idx[jj];
564: if (lidj > lid) {
565: Edges[nEdges].lid0 = lid;
566: Edges[nEdges].gid1 = lidj + my0;
567: Edges[nEdges].cpid1 = -1;
568: Edges[nEdges].weight = PetscRealPart(ap[jj]);
569: nEdges++;
570: }
571: }
572: if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
573: ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
574: ap = matB->a + ii[ix];
575: idx = matB->j + ii[ix];
576: nn += n;
577: for (jj=0; jj<n; jj++) {
578: Edges[nEdges].lid0 = lid;
579: Edges[nEdges].gid1 = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]);
580: Edges[nEdges].cpid1 = idx[jj];
581: Edges[nEdges].weight = PetscRealPart(ap[jj]);
582: nEdges++;
583: }
584: }
585: if (nn > 1) n_nz_row++;
586: else if (iter == 1) {
587: /* should select this because it is technically in the MIS but lets not */
588: PetscCDRemoveAll(agg_llists, lid);
589: }
590: }
591: ISRestoreIndices(perm,&perm_ix);
593: qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);
595: /* projection matrix */
596: MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, NULL, 1, NULL, &P);
598: /* clear matched flags */
599: for (kk=0; kk<nloc; kk++) lid_matched[kk] = PETSC_FALSE;
600: /* process - communicate - process */
601: for (sub_it=0; sub_it<n_sub_its; sub_it++) {
602: PetscInt nactive_edges;
604: VecGetArray(locMaxEdge, &lid_max_ew);
605: for (kk=nactive_edges=0; kk<nEdges; kk++) {
606: /* HEM */
607: const Edge *e = &Edges[kk];
608: const PetscInt lid0 =e->lid0,gid1=e->gid1,cpid1=e->cpid1,gid0=lid0+my0,lid1=gid1-my0;
609: PetscBool isOK = PETSC_TRUE;
611: /* skip if either (local) vertex is done already */
612: if (lid_matched[lid0] || (gid1>=my0 && gid1<Iend && lid_matched[gid1-my0])) continue;
614: /* skip if ghost vertex is done */
615: if (cpid1 != -1 && cpcol_matched[cpid1]) continue;
617: nactive_edges++;
618: /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */
619: if (PetscRealPart(lid_max_ew[lid0]) > e->weight + PETSC_SMALL) continue;
621: if (cpid1 == -1) {
622: if (PetscRealPart(lid_max_ew[lid1]) > e->weight + PETSC_SMALL) continue;
623: } else {
624: /* see if edge might get matched on other proc */
625: PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]);
626: if (g_max_e > e->weight + PETSC_SMALL) continue;
627: else if (e->weight > g_max_e - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > rank) {
628: /* check for max_e == to this edge and larger processor that will deal with this */
629: continue;
630: }
631: }
633: /* check ghost for v0 */
634: if (isOK) {
635: PetscReal max_e,ew;
636: if ((ix=lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
637: ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
638: ap = matB->a + ii[ix];
639: idx = matB->j + ii[ix];
640: for (jj=0; jj<n && isOK; jj++) {
641: PetscInt lidj = idx[jj];
642: if (cpcol_matched[lidj]) continue;
643: ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
644: /* check for max_e == to this edge and larger processor that will deal with this */
645: if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid0]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) {
646: isOK = PETSC_FALSE;
647: }
648: }
649: }
651: /* for v1 */
652: if (cpid1 == -1 && isOK) {
653: if ((ix=lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
654: ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
655: ap = matB->a + ii[ix];
656: idx = matB->j + ii[ix];
657: for (jj=0; jj<n && isOK; jj++) {
658: PetscInt lidj = idx[jj];
659: if (cpcol_matched[lidj]) continue;
660: ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
661: /* check for max_e == to this edge and larger processor that will deal with this */
662: if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid1]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) {
663: isOK = PETSC_FALSE;
664: }
665: }
666: }
667: }
668: }
670: /* do it */
671: if (isOK) {
672: if (cpid1 == -1) {
673: lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
674: PetscCDAppendRemove(agg_llists, lid0, lid1);
675: } else if (sub_it != n_sub_its-1) {
676: /* add gid1 to list of ghost deleted by me -- I need their children */
677: proc = cpcol_pe[cpid1];
679: cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */
681: PetscCDAppendID(deleted_list, proc, cpid1); /* cache to send messages */
682: PetscCDAppendID(deleted_list, proc, lid0);
683: } else continue;
685: lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
686: /* set projection */
687: MatSetValues(P,1,&gid0,1,&gid0,&one,INSERT_VALUES);
688: MatSetValues(P,1,&gid1,1,&gid0,&one,INSERT_VALUES);
689: } /* matched */
690: } /* edge loop */
692: /* deal with deleted ghost on first pass */
693: if (size>1 && sub_it != n_sub_its-1) {
694: #define REQ_BF_SIZE 100
695: PetscCDIntNd *pos;
696: PetscBool ise = PETSC_FALSE;
697: PetscInt nSend1, **sbuffs1,nSend2;
698: MPI_Request *sreqs2[REQ_BF_SIZE],*rreqs2[REQ_BF_SIZE];
699: MPI_Status status;
701: /* send request */
702: for (proc=0,nSend1=0; proc<size; proc++) {
703: PetscCDEmptyAt(deleted_list,proc,&ise);
704: if (!ise) nSend1++;
705: }
706: PetscMalloc1(nSend1, &sbuffs1);
707: for (proc=0,nSend1=0; proc<size; proc++) {
708: /* count ghosts */
709: PetscCDSizeAt(deleted_list,proc,&n);
710: if (n>0) {
711: #define CHUNCK_SIZE 100
712: PetscInt *sbuff,*pt;
713: MPI_Request *request;
714: n /= 2;
715: PetscMalloc1(2 + 2*n + n*CHUNCK_SIZE + 2, &sbuff);
716: /* save requests */
717: sbuffs1[nSend1] = sbuff;
718: request = (MPI_Request*)sbuff;
719: sbuff = pt = (PetscInt*)(request+1);
720: *pt++ = n; *pt++ = rank;
722: PetscCDGetHeadPos(deleted_list,proc,&pos);
723: while (pos) {
724: PetscInt lid0, cpid, gid;
725: PetscCDIntNdGetID(pos, &cpid);
726: gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
727: PetscCDGetNextPos(deleted_list,proc,&pos);
728: PetscCDIntNdGetID(pos, &lid0);
729: PetscCDGetNextPos(deleted_list,proc,&pos);
730: *pt++ = gid; *pt++ = lid0;
731: }
732: /* send request tag1 [n, proc, n*[gid1,lid0] ] */
733: MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, comm, request);
734: /* post receive */
735: request = (MPI_Request*)pt;
736: rreqs2[nSend1] = request; /* cache recv request */
737: pt = (PetscInt*)(request+1);
738: MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, comm, request);
739: /* clear list */
740: PetscCDRemoveAll(deleted_list, proc);
741: nSend1++;
742: }
743: }
744: /* receive requests, send response, clear lists */
745: kk = nactive_edges;
746: MPIU_Allreduce(&kk,&nactive_edges,1,MPIU_INT,MPI_SUM,comm); /* not correct syncronization and global */
747: nSend2 = 0;
748: while (1) {
749: #define BF_SZ 10000
750: PetscMPIInt flag,count;
751: PetscInt rbuff[BF_SZ],*pt,*pt2,*pt3,count2,*sbuff,count3;
752: MPI_Request *request;
754: MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &status);
755: if (!flag) break;
756: MPI_Get_count(&status, MPIU_INT, &count);
758: proc = status.MPI_SOURCE;
759: /* receive request tag1 [n, proc, n*[gid1,lid0] ] */
760: MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, comm, &status);
761: /* count sends */
762: pt = rbuff; count3 = count2 = 0;
763: n = *pt++; kk = *pt++;
764: while (n--) {
765: PetscInt gid1=*pt++, lid1=gid1-my0; kk=*pt++;
767: lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
768: PetscCDSizeAt(agg_llists, lid1, &kk);
769: count2 += kk + 2;
770: count3++; /* number of verts requested (n) */
771: }
773: /* send tag2 *[lid0, n, n*[gid] ] */
774: PetscMalloc(count2*sizeof(PetscInt) + sizeof(MPI_Request), &sbuff);
775: request = (MPI_Request*)sbuff;
776: sreqs2[nSend2++] = request; /* cache request */
778: pt2 = sbuff = (PetscInt*)(request+1);
779: pt = rbuff;
780: n = *pt++; kk = *pt++;
781: while (n--) {
782: /* read [n, proc, n*[gid1,lid0] */
783: PetscInt gid1=*pt++, lid1=gid1-my0, lid0=*pt++;
784: /* write [lid0, n, n*[gid] ] */
785: *pt2++ = lid0;
786: pt3 = pt2++; /* save pointer for later */
787: PetscCDGetHeadPos(agg_llists,lid1,&pos);
788: while (pos) {
789: PetscInt gid;
790: PetscCDIntNdGetID(pos, &gid);
791: PetscCDGetNextPos(agg_llists,lid1,&pos);
792: *pt2++ = gid;
793: }
794: *pt3 = (pt2-pt3)-1;
795: /* clear list */
796: PetscCDRemoveAll(agg_llists, lid1);
797: }
798: /* send requested data tag2 *[lid0, n, n*[gid1] ] */
799: MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, comm, request);
800: }
802: /* receive tag2 *[lid0, n, n*[gid] ] */
803: for (kk=0; kk<nSend1; kk++) {
804: PetscMPIInt count;
805: MPI_Request *request;
806: PetscInt *pt, *pt2;
808: request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */
809: MPI_Wait(request, &status);
810: MPI_Get_count(&status, MPIU_INT, &count);
811: pt = pt2 = (PetscInt*)(request+1);
812: while (pt-pt2 < count) {
813: PetscInt lid0 = *pt++, n = *pt++;
814: while (n--) {
815: PetscInt gid1 = *pt++;
816: PetscCDAppendID(agg_llists, lid0, gid1);
817: }
818: }
819: }
821: /* wait for tag1 isends */
822: while (nSend1--) {
823: MPI_Request *request;
824: request = (MPI_Request*)sbuffs1[nSend1];
825: MPI_Wait(request, &status);
826: PetscFree(request);
827: }
828: PetscFree(sbuffs1);
830: /* wait for tag2 isends */
831: while (nSend2--) {
832: MPI_Request *request = sreqs2[nSend2];
833: MPI_Wait(request, &status);
834: PetscFree(request);
835: }
837: VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
838: VecRestoreArray(ghostMaxPE, &cpcol_max_pe);
840: /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */
841: for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
842: PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
843: VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
844: }
845: VecAssemblyBegin(locMaxPE);
846: VecAssemblyEnd(locMaxPE);
847: VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
848: VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
849: VecGetArray(ghostMaxEdge, &cpcol_max_ew);
850: VecGetLocalSize(mpimat->lvec, &n);
851: for (kk=0; kk<n; kk++) {
852: cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0);
853: }
854: VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
855: } /* size > 1 */
857: /* compute 'locMaxEdge' */
858: VecRestoreArray(locMaxEdge, &lid_max_ew);
859: for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
860: PetscReal max_e = 0.,tt;
861: PetscScalar vval;
862: PetscInt lid = kk;
864: if (lid_matched[lid]) vval = 0.;
865: else {
866: ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
867: ap = matA->a + ii[lid];
868: for (jj=0; jj<n; jj++) {
869: PetscInt lidj = idx[jj];
870: if (lid_matched[lidj]) continue; /* this is new - can change local max */
871: if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
872: }
873: if (lid_cprowID && (ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
874: ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
875: ap = matB->a + ii[ix];
876: idx = matB->j + ii[ix];
877: for (jj=0; jj<n; jj++) {
878: PetscInt lidj = idx[jj];
879: if (cpcol_matched[lidj]) continue;
880: if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
881: }
882: }
883: }
884: vval = (PetscScalar)max_e;
885: VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
886: }
887: VecAssemblyBegin(locMaxEdge);
888: VecAssemblyEnd(locMaxEdge);
890: if (size>1 && sub_it != n_sub_its-1) {
891: /* compute 'cpcol_max_ew' */
892: VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
893: VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
894: VecGetArray(ghostMaxEdge, &cpcol_max_ew);
895: VecGetArray(locMaxEdge, &lid_max_ew);
897: /* compute 'cpcol_max_pe' */
898: for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
899: PetscInt lid = kk;
900: PetscReal ew,v1_max_e,v0_max_e=PetscRealPart(lid_max_ew[lid]);
901: PetscScalar vval;
902: PetscMPIInt max_pe=rank,pe;
904: if (lid_matched[lid]) vval = (PetscScalar)rank;
905: else if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
906: ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
907: ap = matB->a + ii[ix];
908: idx = matB->j + ii[ix];
909: for (jj=0; jj<n; jj++) {
910: PetscInt lidj = idx[jj];
911: if (cpcol_matched[lidj]) continue;
912: ew = PetscRealPart(ap[jj]); v1_max_e = PetscRealPart(cpcol_max_ew[lidj]);
913: /* get max pe that has a max_e == to this edge w */
914: if ((pe=cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - PETSC_SMALL && ew > v0_max_e - PETSC_SMALL) max_pe = pe;
915: }
916: vval = (PetscScalar)max_pe;
917: }
918: VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);
919: }
920: VecAssemblyBegin(locMaxPE);
921: VecAssemblyEnd(locMaxPE);
923: VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
924: VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
925: VecGetArray(ghostMaxPE, &cpcol_max_pe);
926: VecRestoreArray(locMaxEdge, &lid_max_ew);
927: } /* deal with deleted ghost */
928: PetscInfo(a_Gmat,"\t %" PetscInt_FMT ".%" PetscInt_FMT ": %" PetscInt_FMT " active edges.\n",iter,sub_it,nactive_edges);
929: if (!nactive_edges) break;
930: } /* sub_it loop */
932: /* clean up iteration */
933: PetscFree(Edges);
934: if (mpimat) {
935: VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
936: VecDestroy(&ghostMaxEdge);
937: VecRestoreArray(ghostMaxPE, &cpcol_max_pe);
938: VecDestroy(&ghostMaxPE);
939: PetscFree(cpcol_pe);
940: PetscFree(cpcol_matched);
941: }
943: VecDestroy(&locMaxEdge);
944: VecDestroy(&locMaxPE);
946: if (mpimat) {
947: VecRestoreArray(mpimat->lvec, &cpcol_gid);
948: }
950: /* create next G if needed */
951: if (iter == n_iter) { /* hard wired test - need to look at full surrounded nodes or something */
952: MatDestroy(&P);
953: MatDestroy(&cMat);
954: break;
955: } else {
956: Vec diag;
957: /* add identity for unmatched vertices so they stay alive */
958: for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
959: if (!lid_matched[kk]) {
960: gid = kk+my0;
961: MatGetRow(cMat,gid,&n,NULL,NULL);
962: if (n>1) {
963: MatSetValues(P,1,&gid,1,&gid,&one,INSERT_VALUES);
964: }
965: MatRestoreRow(cMat,gid,&n,NULL,NULL);
966: }
967: }
968: MatAssemblyBegin(P,MAT_FINAL_ASSEMBLY);
969: MatAssemblyEnd(P,MAT_FINAL_ASSEMBLY);
971: /* project to make new graph with colapsed edges */
972: MatPtAP(cMat,P,MAT_INITIAL_MATRIX,1.0,&tMat);
973: MatDestroy(&P);
974: MatDestroy(&cMat);
975: cMat = tMat;
976: MatCreateVecs(cMat, &diag, NULL);
977: MatGetDiagonal(cMat, diag); /* effectively PCJACOBI */
978: VecReciprocal(diag);
979: VecSqrtAbs(diag);
980: MatDiagonalScale(cMat, diag, diag);
981: VecDestroy(&diag);
982: }
983: } /* coarsen iterator */
985: /* make fake matrix */
986: if (size>1) {
987: Mat mat;
988: PetscCDIntNd *pos;
989: PetscInt gid, NN, MM, jj = 0, mxsz = 0;
991: for (kk=0; kk<nloc; kk++) {
992: PetscCDSizeAt(agg_llists, kk, &jj);
993: if (jj > mxsz) mxsz = jj;
994: }
995: MatGetSize(a_Gmat, &MM, &NN);
996: if (mxsz > MM-nloc) mxsz = MM-nloc;
998: MatCreateAIJ(comm, nloc, nloc,PETSC_DETERMINE, PETSC_DETERMINE,0, NULL, mxsz, NULL, &mat);
1000: for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
1001: /* for (pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)) { */
1002: PetscCDGetHeadPos(agg_llists,kk,&pos);
1003: while (pos) {
1004: PetscInt gid1;
1005: PetscCDIntNdGetID(pos, &gid1);
1006: PetscCDGetNextPos(agg_llists,kk,&pos);
1008: if (gid1 < my0 || gid1 >= my0+nloc) {
1009: MatSetValues(mat,1,&gid,1,&gid1,&one,ADD_VALUES);
1010: }
1011: }
1012: }
1013: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1014: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1015: PetscCDSetMat(agg_llists, mat);
1016: }
1018: PetscFree(lid_cprowID);
1019: PetscFree(lid_gid);
1020: PetscFree(lid_matched);
1021: PetscCDDestroy(deleted_list);
1022: return 0;
1023: }
1025: /*
1026: HEM coarsen, simple greedy.
1027: */
1028: static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1029: {
1030: Mat mat = coarse->graph;
1032: if (!coarse->perm) {
1033: IS perm;
1034: PetscInt n,m;
1036: MatGetLocalSize(mat, &m, &n);
1037: ISCreateStride(PetscObjectComm((PetscObject)mat), m, 0, 1, &perm);
1038: heavyEdgeMatchAgg(perm, mat, &coarse->agg_lists);
1039: ISDestroy(&perm);
1040: } else {
1041: heavyEdgeMatchAgg(coarse->perm, mat, &coarse->agg_lists);
1042: }
1043: return 0;
1044: }
1046: static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse,PetscViewer viewer)
1047: {
1048: PetscMPIInt rank;
1049: PetscBool iascii;
1051: MPI_Comm_rank(PetscObjectComm((PetscObject)coarse),&rank);
1052: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1053: if (iascii) {
1054: PetscViewerASCIIPushSynchronized(viewer);
1055: PetscViewerASCIISynchronizedPrintf(viewer," [%d] HEM aggregator\n",rank);
1056: PetscViewerFlush(viewer);
1057: PetscViewerASCIIPopSynchronized(viewer);
1058: }
1059: return 0;
1060: }
1062: /*MC
1063: MATCOARSENHEM - A coarsener that uses HEM a simple greedy coarsener
1065: Level: beginner
1067: .seealso: MatCoarsenSetType(), MatCoarsenType, MatCoarsenCreate()
1069: M*/
1071: PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1072: {
1073: coarse->ops->apply = MatCoarsenApply_HEM;
1074: coarse->ops->view = MatCoarsenView_HEM;
1075: return 0;
1076: }