Actual source code: ispai.c
1: /*
2: 3/99 Modified by Stephen Barnard to support SPAI version 3.0
3: */
5: /*
6: Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
7: Code written by Stephen Barnard.
9: Note: there is some BAD memory bleeding below!
11: This code needs work
13: 1) get rid of all memory bleeding
14: 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
15: rather than having the sp flag for PC_SPAI
16: 3) fix to set the block size based on the matrix block size
18: */
19: #if !defined(PETSC_SKIP_COMPLEX)
20: #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
21: #endif
23: #include <petsc/private/pcimpl.h>
24: #include <../src/ksp/pc/impls/spai/petscspai.h>
26: /*
27: These are the SPAI include files
28: */
29: EXTERN_C_BEGIN
30: #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
31: #include <spai.h>
32: #include <matrix.h>
33: EXTERN_C_END
35: static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
36: static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
38: typedef struct {
39: matrix *B; /* matrix in SPAI format */
40: matrix *BT; /* transpose of matrix in SPAI format */
41: matrix *M; /* the approximate inverse in SPAI format */
43: Mat PM; /* the approximate inverse PETSc format */
45: double epsilon; /* tolerance */
46: int nbsteps; /* max number of "improvement" steps per line */
47: int max; /* max dimensions of is_I, q, etc. */
48: int maxnew; /* max number of new entries per step */
49: int block_size; /* constant block size */
50: int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
51: int verbose; /* SPAI prints timing and statistics */
53: int sp; /* symmetric nonzero pattern */
54: MPI_Comm comm_spai; /* communicator to be used with spai */
55: } PC_SPAI;
57: static PetscErrorCode PCSetUp_SPAI(PC pc)
58: {
59: PC_SPAI *ispai = (PC_SPAI *)pc->data;
60: Mat AT;
62: PetscFunctionBegin;
63: init_SPAI();
65: if (ispai->sp) {
66: PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
67: } else {
68: /* Use the transpose to get the column nonzero structure. */
69: PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
70: PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
71: PetscCall(MatDestroy(&AT));
72: }
74: /* Destroy the transpose */
75: /* Don't know how to do it. PETSc developers? */
77: /* construct SPAI preconditioner */
78: /* FILE *messages */ /* file for warning messages */
79: /* double epsilon */ /* tolerance */
80: /* int nbsteps */ /* max number of "improvement" steps per line */
81: /* int max */ /* max dimensions of is_I, q, etc. */
82: /* int maxnew */ /* max number of new entries per step */
83: /* int block_size */ /* block_size == 1 specifies scalar elements
84: block_size == n specifies nxn constant-block elements
85: block_size == 0 specifies variable-block elements */
86: /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
87: /* int verbose */ /* verbose == 0 specifies that SPAI is silent
88: verbose == 1 prints timing and matrix statistics */
90: PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
92: PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
94: /* free the SPAI matrices */
95: sp_free_matrix(ispai->B);
96: sp_free_matrix(ispai->M);
97: PetscFunctionReturn(PETSC_SUCCESS);
98: }
100: static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
101: {
102: PC_SPAI *ispai = (PC_SPAI *)pc->data;
104: PetscFunctionBegin;
105: /* Now using PETSc's multiply */
106: PetscCall(MatMult(ispai->PM, xx, y));
107: PetscFunctionReturn(PETSC_SUCCESS);
108: }
110: static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
111: {
112: PC_SPAI *ispai = (PC_SPAI *)pc->data;
114: PetscFunctionBegin;
115: /* Now using PETSc's multiply */
116: PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
117: PetscFunctionReturn(PETSC_SUCCESS);
118: }
120: static PetscErrorCode PCDestroy_SPAI(PC pc)
121: {
122: PC_SPAI *ispai = (PC_SPAI *)pc->data;
124: PetscFunctionBegin;
125: PetscCall(MatDestroy(&ispai->PM));
126: PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
127: PetscCall(PetscFree(pc->data));
128: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
129: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
130: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
131: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
132: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
133: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
134: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
135: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
136: PetscFunctionReturn(PETSC_SUCCESS);
137: }
139: static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
140: {
141: PC_SPAI *ispai = (PC_SPAI *)pc->data;
142: PetscBool iascii;
144: PetscFunctionBegin;
145: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
146: if (iascii) {
147: PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon));
148: PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps));
149: PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max));
150: PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew));
151: PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size));
152: PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size));
153: PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose));
154: PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp));
155: }
156: PetscFunctionReturn(PETSC_SUCCESS);
157: }
159: static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
160: {
161: PC_SPAI *ispai = (PC_SPAI *)pc->data;
163: PetscFunctionBegin;
164: ispai->epsilon = (double)epsilon1;
165: PetscFunctionReturn(PETSC_SUCCESS);
166: }
168: static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
169: {
170: PC_SPAI *ispai = (PC_SPAI *)pc->data;
172: PetscFunctionBegin;
173: ispai->nbsteps = (int)nbsteps1;
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
177: /* added 1/7/99 g.h. */
178: static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
179: {
180: PC_SPAI *ispai = (PC_SPAI *)pc->data;
182: PetscFunctionBegin;
183: ispai->max = (int)max1;
184: PetscFunctionReturn(PETSC_SUCCESS);
185: }
187: static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
188: {
189: PC_SPAI *ispai = (PC_SPAI *)pc->data;
191: PetscFunctionBegin;
192: ispai->maxnew = (int)maxnew1;
193: PetscFunctionReturn(PETSC_SUCCESS);
194: }
196: static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
197: {
198: PC_SPAI *ispai = (PC_SPAI *)pc->data;
200: PetscFunctionBegin;
201: ispai->block_size = (int)block_size1;
202: PetscFunctionReturn(PETSC_SUCCESS);
203: }
205: static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
206: {
207: PC_SPAI *ispai = (PC_SPAI *)pc->data;
209: PetscFunctionBegin;
210: ispai->cache_size = (int)cache_size;
211: PetscFunctionReturn(PETSC_SUCCESS);
212: }
214: static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
215: {
216: PC_SPAI *ispai = (PC_SPAI *)pc->data;
218: PetscFunctionBegin;
219: ispai->verbose = (int)verbose;
220: PetscFunctionReturn(PETSC_SUCCESS);
221: }
223: static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
224: {
225: PC_SPAI *ispai = (PC_SPAI *)pc->data;
227: PetscFunctionBegin;
228: ispai->sp = (int)sp;
229: PetscFunctionReturn(PETSC_SUCCESS);
230: }
232: /*@
233: PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
235: Input Parameters:
236: + pc - the preconditioner
237: - epsilon1 - epsilon (default .4)
239: Note:
240: Espilon must be between 0 and 1. It controls the
241: quality of the approximation of M to the inverse of
242: A. Higher values of epsilon lead to more work, more
243: fill, and usually better preconditioners. In many
244: cases the best choice of epsilon is the one that
245: divides the total solution time equally between the
246: preconditioner and the solver.
248: Level: intermediate
250: .seealso: `PCSPAI`, `PCSetType()`
251: @*/
252: PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
253: {
254: PetscFunctionBegin;
255: PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
256: PetscFunctionReturn(PETSC_SUCCESS);
257: }
259: /*@
260: PCSPAISetNBSteps - set maximum number of improvement steps per row in
261: the `PCSPAI` preconditioner
263: Input Parameters:
264: + pc - the preconditioner
265: - nbsteps1 - number of steps (default 5)
267: Note:
268: `PCSPAI` constructs to approximation to every column of
269: the exact inverse of A in a series of improvement
270: steps. The quality of the approximation is determined
271: by epsilon. If an approximation achieving an accuracy
272: of epsilon is not obtained after ns steps, SPAI simply
273: uses the best approximation constructed so far.
275: Level: intermediate
277: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
278: @*/
279: PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
280: {
281: PetscFunctionBegin;
282: PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
283: PetscFunctionReturn(PETSC_SUCCESS);
284: }
286: /* added 1/7/99 g.h. */
287: /*@
288: PCSPAISetMax - set the size of various working buffers in
289: the `PCSPAI` preconditioner
291: Input Parameters:
292: + pc - the preconditioner
293: - max1 - size (default is 5000)
295: Level: intermediate
297: .seealso: `PCSPAI`, `PCSetType()`
298: @*/
299: PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
300: {
301: PetscFunctionBegin;
302: PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
303: PetscFunctionReturn(PETSC_SUCCESS);
304: }
306: /*@
307: PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
308: in `PCSPAI` preconditioner
310: Input Parameters:
311: + pc - the preconditioner
312: - maxnew1 - maximum number (default 5)
314: Level: intermediate
316: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
317: @*/
318: PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
319: {
320: PetscFunctionBegin;
321: PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
322: PetscFunctionReturn(PETSC_SUCCESS);
323: }
325: /*@
326: PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
328: Input Parameters:
329: + pc - the preconditioner
330: - block_size1 - block size (default 1)
332: Notes:
333: A block
334: size of 1 treats A as a matrix of scalar elements. A
335: block size of s > 1 treats A as a matrix of sxs
336: blocks. A block size of 0 treats A as a matrix with
337: variable sized blocks, which are determined by
338: searching for dense square diagonal blocks in A.
339: This can be very effective for finite-element
340: matrices.
342: SPAI will convert A to block form, use a block
343: version of the preconditioner algorithm, and then
344: convert the result back to scalar form.
346: In many cases the a block-size parameter other than 1
347: can lead to very significant improvement in
348: performance.
350: Level: intermediate
352: .seealso: `PCSPAI`, `PCSetType()`
353: @*/
354: PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
355: {
356: PetscFunctionBegin;
357: PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
361: /*@
362: PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
364: Input Parameters:
365: + pc - the preconditioner
366: - cache_size - cache size {0,1,2,3,4,5} (default 5)
368: Note:
369: `PCSPAI` uses a hash table to cache messages and avoid
370: redundant communication. If suggest always using
371: 5. This parameter is irrelevant in the serial
372: version.
374: Level: intermediate
376: .seealso: `PCSPAI`, `PCSetType()`
377: @*/
378: PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
379: {
380: PetscFunctionBegin;
381: PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
382: PetscFunctionReturn(PETSC_SUCCESS);
383: }
385: /*@
386: PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
388: Input Parameters:
389: + pc - the preconditioner
390: - verbose - level (default 1)
392: Note:
393: print parameters, timings and matrix statistics
395: Level: intermediate
397: .seealso: `PCSPAI`, `PCSetType()`
398: @*/
399: PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
400: {
401: PetscFunctionBegin;
402: PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
403: PetscFunctionReturn(PETSC_SUCCESS);
404: }
406: /*@
407: PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
409: Input Parameters:
410: + pc - the preconditioner
411: - sp - 0 or 1
413: Note:
414: If A has a symmetric nonzero pattern use -sp 1 to
415: improve performance by eliminating some communication
416: in the parallel version. Even if A does not have a
417: symmetric nonzero pattern -sp 1 may well lead to good
418: results, but the code will not follow the published
419: SPAI algorithm exactly.
421: Level: intermediate
423: .seealso: `PCSPAI`, `PCSetType()`
424: @*/
425: PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
426: {
427: PetscFunctionBegin;
428: PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
429: PetscFunctionReturn(PETSC_SUCCESS);
430: }
432: static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
433: {
434: PC_SPAI *ispai = (PC_SPAI *)pc->data;
435: int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
436: double epsilon1;
437: PetscBool flg;
439: PetscFunctionBegin;
440: PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
441: PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
442: if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
443: PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
444: if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
445: /* added 1/7/99 g.h. */
446: PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
447: if (flg) PetscCall(PCSPAISetMax(pc, max1));
448: PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
449: if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
450: PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
451: if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
452: PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
453: if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
454: PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
455: if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
456: PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
457: if (flg) PetscCall(PCSPAISetSp(pc, sp));
458: PetscOptionsHeadEnd();
459: PetscFunctionReturn(PETSC_SUCCESS);
460: }
462: /*MC
463: PCSPAI - Use the Sparse Approximate Inverse method
465: Options Database Keys:
466: + -pc_spai_epsilon <eps> - set tolerance
467: . -pc_spai_nbstep <n> - set nbsteps
468: . -pc_spai_max <m> - set max
469: . -pc_spai_max_new <m> - set maxnew
470: . -pc_spai_block_size <n> - set block size
471: . -pc_spai_cache_size <n> - set cache size
472: . -pc_spai_sp <m> - set sp
473: - -pc_spai_set_verbose <true,false> - verbose output
475: Level: beginner
477: Note:
478: This only works with `MATAIJ` matrices.
480: References:
481: . * - Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)
483: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
484: `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
485: `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
486: M*/
488: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
489: {
490: PC_SPAI *ispai;
492: PetscFunctionBegin;
493: PetscCall(PetscNew(&ispai));
494: pc->data = ispai;
496: pc->ops->destroy = PCDestroy_SPAI;
497: pc->ops->apply = PCApply_SPAI;
498: pc->ops->matapply = PCMatApply_SPAI;
499: pc->ops->applyrichardson = 0;
500: pc->ops->setup = PCSetUp_SPAI;
501: pc->ops->view = PCView_SPAI;
502: pc->ops->setfromoptions = PCSetFromOptions_SPAI;
504: ispai->epsilon = .4;
505: ispai->nbsteps = 5;
506: ispai->max = 5000;
507: ispai->maxnew = 5;
508: ispai->block_size = 1;
509: ispai->cache_size = 5;
510: ispai->verbose = 0;
512: ispai->sp = 1;
513: PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));
515: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
516: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
517: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
518: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
519: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
520: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
521: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
522: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
523: PetscFunctionReturn(PETSC_SUCCESS);
524: }
526: /*
527: Converts from a PETSc matrix to an SPAI matrix
528: */
529: static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
530: {
531: matrix *M;
532: int i, j, col;
533: int row_indx;
534: int len, pe, local_indx, start_indx;
535: int *mapping;
536: const int *cols;
537: const double *vals;
538: int n, mnl, nnl, nz, rstart, rend;
539: PetscMPIInt size, rank;
540: struct compressed_lines *rows;
542: PetscFunctionBegin;
543: PetscCallMPI(MPI_Comm_size(comm, &size));
544: PetscCallMPI(MPI_Comm_rank(comm, &rank));
545: PetscCall(MatGetSize(A, &n, &n));
546: PetscCall(MatGetLocalSize(A, &mnl, &nnl));
548: /*
549: not sure why a barrier is required. commenting out
550: PetscCallMPI(MPI_Barrier(comm));
551: */
553: M = new_matrix((SPAI_Comm)comm);
555: M->n = n;
556: M->bs = 1;
557: M->max_block_size = 1;
559: M->mnls = (int *)malloc(sizeof(int) * size);
560: M->start_indices = (int *)malloc(sizeof(int) * size);
561: M->pe = (int *)malloc(sizeof(int) * n);
562: M->block_sizes = (int *)malloc(sizeof(int) * n);
563: for (i = 0; i < n; i++) M->block_sizes[i] = 1;
565: PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
567: M->start_indices[0] = 0;
568: for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
570: M->mnl = M->mnls[M->myid];
571: M->my_start_index = M->start_indices[M->myid];
573: for (i = 0; i < size; i++) {
574: start_indx = M->start_indices[i];
575: for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
576: }
578: if (AT) {
579: M->lines = new_compressed_lines(M->mnls[rank], 1);
580: } else {
581: M->lines = new_compressed_lines(M->mnls[rank], 0);
582: }
584: rows = M->lines;
586: /* Determine the mapping from global indices to pointers */
587: PetscCall(PetscMalloc1(M->n, &mapping));
588: pe = 0;
589: local_indx = 0;
590: for (i = 0; i < M->n; i++) {
591: if (local_indx >= M->mnls[pe]) {
592: pe++;
593: local_indx = 0;
594: }
595: mapping[i] = local_indx + M->start_indices[pe];
596: local_indx++;
597: }
599: /************** Set up the row structure *****************/
601: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
602: for (i = rstart; i < rend; i++) {
603: row_indx = i - rstart;
604: PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
605: /* allocate buffers */
606: rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
607: rows->A[row_indx] = (double *)malloc(nz * sizeof(double));
608: /* copy the matrix */
609: for (j = 0; j < nz; j++) {
610: col = cols[j];
611: len = rows->len[row_indx]++;
613: rows->ptrs[row_indx][len] = mapping[col];
614: rows->A[row_indx][len] = vals[j];
615: }
616: rows->slen[row_indx] = rows->len[row_indx];
618: PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
619: }
621: /************** Set up the column structure *****************/
623: if (AT) {
624: for (i = rstart; i < rend; i++) {
625: row_indx = i - rstart;
626: PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
627: /* allocate buffers */
628: rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
629: /* copy the matrix (i.e., the structure) */
630: for (j = 0; j < nz; j++) {
631: col = cols[j];
632: len = rows->rlen[row_indx]++;
634: rows->rptrs[row_indx][len] = mapping[col];
635: }
636: PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
637: }
638: }
640: PetscCall(PetscFree(mapping));
642: order_pointers(M);
643: M->maxnz = calc_maxnz(M);
644: *B = M;
645: PetscFunctionReturn(PETSC_SUCCESS);
646: }
648: /*
649: Converts from an SPAI matrix B to a PETSc matrix PB.
650: This assumes that the SPAI matrix B is stored in
651: COMPRESSED-ROW format.
652: */
653: static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
654: {
655: PetscMPIInt size, rank;
656: int m, n, M, N;
657: int d_nz, o_nz;
658: int *d_nnz, *o_nnz;
659: int i, k, global_row, global_col, first_diag_col, last_diag_col;
660: PetscScalar val;
662: PetscFunctionBegin;
663: PetscCallMPI(MPI_Comm_size(comm, &size));
664: PetscCallMPI(MPI_Comm_rank(comm, &rank));
666: m = n = B->mnls[rank];
667: d_nz = o_nz = 0;
669: /* Determine preallocation for MatCreateAIJ */
670: PetscCall(PetscMalloc1(m, &d_nnz));
671: PetscCall(PetscMalloc1(m, &o_nnz));
672: for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
673: first_diag_col = B->start_indices[rank];
674: last_diag_col = first_diag_col + B->mnls[rank];
675: for (i = 0; i < B->mnls[rank]; i++) {
676: for (k = 0; k < B->lines->len[i]; k++) {
677: global_col = B->lines->ptrs[i][k];
678: if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
679: else o_nnz[i]++;
680: }
681: }
683: M = N = B->n;
684: /* Here we only know how to create AIJ format */
685: PetscCall(MatCreate(comm, PB));
686: PetscCall(MatSetSizes(*PB, m, n, M, N));
687: PetscCall(MatSetType(*PB, MATAIJ));
688: PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
689: PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
691: for (i = 0; i < B->mnls[rank]; i++) {
692: global_row = B->start_indices[rank] + i;
693: for (k = 0; k < B->lines->len[i]; k++) {
694: global_col = B->lines->ptrs[i][k];
696: val = B->lines->A[i][k];
697: PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
698: }
699: }
701: PetscCall(PetscFree(d_nnz));
702: PetscCall(PetscFree(o_nnz));
704: PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
705: PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
706: PetscFunctionReturn(PETSC_SUCCESS);
707: }