Actual source code: superlu_dist.c
1: /*
2: Provides an interface to the SuperLU_DIST sparse solver
3: */
5: #include <../src/mat/impls/aij/seq/aij.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <petscpkg_version.h>
9: EXTERN_C_BEGIN
10: #if defined(PETSC_USE_COMPLEX)
11: #define CASTDOUBLECOMPLEX (doublecomplex*)
12: #include <superlu_zdefs.h>
13: #define LUstructInit zLUstructInit
14: #define ScalePermstructInit zScalePermstructInit
15: #define ScalePermstructFree zScalePermstructFree
16: #define LUstructFree zLUstructFree
17: #define Destroy_LU zDestroy_LU
18: #define ScalePermstruct_t zScalePermstruct_t
19: #define LUstruct_t zLUstruct_t
20: #define SOLVEstruct_t zSOLVEstruct_t
21: #define SolveFinalize zSolveFinalize
22: #define pgssvx pzgssvx
23: #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
24: #define pGetDiagU pzGetDiagU
25: #define allocateA_dist zallocateA_dist
26: #define SLU_ZD SLU_Z
27: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
28: #define DeAllocLlu_3d zDeAllocLlu_3d
29: #define DeAllocGlu_3d zDeAllocGlu_3d
30: #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
31: #define pgssvx3d pzgssvx3d
32: #endif
33: #else
34: #define CASTDOUBLECOMPLEX
35: #include <superlu_ddefs.h>
36: #define LUstructInit dLUstructInit
37: #define ScalePermstructInit dScalePermstructInit
38: #define ScalePermstructFree dScalePermstructFree
39: #define LUstructFree dLUstructFree
40: #define Destroy_LU dDestroy_LU
41: #define ScalePermstruct_t dScalePermstruct_t
42: #define LUstruct_t dLUstruct_t
43: #define SOLVEstruct_t dSOLVEstruct_t
44: #define SolveFinalize dSolveFinalize
45: #define pgssvx pdgssvx
46: #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
47: #define pGetDiagU pdGetDiagU
48: #define allocateA_dist dallocateA_dist
49: #define SLU_ZD SLU_D
50: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
51: #define DeAllocLlu_3d dDeAllocLlu_3d
52: #define DeAllocGlu_3d dDeAllocGlu_3d
53: #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
54: #define pgssvx3d pdgssvx3d
55: #endif
56: #endif
57: EXTERN_C_END
59: typedef struct {
60: int_t nprow,npcol,*row,*col;
61: gridinfo_t grid;
62: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
63: PetscBool use3d;
64: int_t npdep; /* replication factor, must be power of two */
65: gridinfo3d_t grid3d;
66: #endif
67: superlu_dist_options_t options;
68: SuperMatrix A_sup;
69: ScalePermstruct_t ScalePermstruct;
70: LUstruct_t LUstruct;
71: int StatPrint;
72: SOLVEstruct_t SOLVEstruct;
73: fact_t FactPattern;
74: MPI_Comm comm_superlu;
75: #if defined(PETSC_USE_COMPLEX)
76: doublecomplex *val;
77: #else
78: double *val;
79: #endif
80: PetscBool matsolve_iscalled,matmatsolve_iscalled;
81: PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
82: } Mat_SuperLU_DIST;
84: PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU)
85: {
86: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data;
88: PetscStackCall("SuperLU_DIST:pGetDiagU",pGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,CASTDOUBLECOMPLEX diagU));
89: return 0;
90: }
92: PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU)
93: {
95: PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU));
96: return 0;
97: }
99: /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
100: typedef struct {
101: MPI_Comm comm;
102: PetscBool busy;
103: gridinfo_t grid;
104: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
105: PetscBool use3d;
106: gridinfo3d_t grid3d;
107: #endif
108: } PetscSuperLU_DIST;
109: static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
111: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *attr_val,void *extra_state)
112: {
113: PetscSuperLU_DIST *context = (PetscSuperLU_DIST *) attr_val;
115: if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Unexpected keyval");
116: PetscInfo(NULL,"Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n");
117: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
118: if (context->use3d) {
119: PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&context->grid3d));
120: } else
121: #endif
122: PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&context->grid));
123: MPI_Comm_free(&context->comm);
124: PetscFree(context);
125: return MPI_SUCCESS;
126: }
128: /*
129: Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
130: users who do not destroy all PETSc objects before PetscFinalize().
132: The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn()
133: can still check that the keyval associated with the MPI communicator is correct when the MPI
134: communicator is destroyed.
136: This is called in PetscFinalize()
137: */
138: static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
139: {
140: PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
142: PetscInfo(NULL,"Freeing Petsc_Superlu_dist_keyval\n");
143: MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp);
144: return 0;
145: }
147: static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
148: {
149: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data;
151: if (lu->CleanUpSuperLU_Dist) {
152: /* Deallocate SuperLU_DIST storage */
153: PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
154: if (lu->options.SolveInitialized) {
155: PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct));
156: }
157: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
158: if (lu->use3d) {
159: if (lu->grid3d.zscp.Iam == 0) {
160: PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
161: } else {
162: PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
163: PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct));
164: }
165: PetscStackCall("SuperLU_DIST:Destroy_A3d_gathered_on_2d",Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
166: } else
167: #endif
168: PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
169: PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct));
170: PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct));
172: /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
173: if (lu->comm_superlu) {
174: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
175: if (lu->use3d) {
176: PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&lu->grid3d));
177: } else
178: #endif
179: PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid));
180: }
181: }
182: /*
183: * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
184: * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
185: * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
186: * is off, and the communicator was not released or marked as "not busy " in the old code.
187: * Here we try to release comm regardless.
188: */
189: if (lu->comm_superlu) {
190: PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu);
191: } else {
192: PetscSuperLU_DIST *context;
193: MPI_Comm comm;
194: PetscMPIInt flg;
196: PetscObjectGetComm((PetscObject)A,&comm);
197: MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg);
199: context->busy = PETSC_FALSE;
200: }
202: PetscFree(A->data);
203: /* clear composed functions */
204: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
205: PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL);
207: return 0;
208: }
210: static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
211: {
212: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data;
213: PetscInt m=A->rmap->n;
214: SuperLUStat_t stat;
215: double berr[1];
216: PetscScalar *bptr=NULL;
217: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
218: static PetscBool cite = PETSC_FALSE;
221: PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);
223: if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
224: /* see comments in MatMatSolve() */
225: PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct));
226: lu->options.SolveInitialized = NO;
227: }
228: VecCopy(b_mpi,x);
229: VecGetArray(x,&bptr);
231: PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */
232: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE)
233: if (lu->use3d)
234: PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
235: else
236: #endif
237: PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
240: if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
241: PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat));
243: VecRestoreArray(x,&bptr);
244: lu->matsolve_iscalled = PETSC_TRUE;
245: lu->matmatsolve_iscalled = PETSC_FALSE;
246: return 0;
247: }
249: static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)
250: {
251: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data;
252: PetscInt m=A->rmap->n,nrhs;
253: SuperLUStat_t stat;
254: double berr[1];
255: PetscScalar *bptr;
256: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
257: PetscBool flg;
260: PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
262: if (X != B_mpi) {
263: PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
265: }
267: if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
268: /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
269: thus destroy it and create a new SOLVEstruct.
270: Otherwise it may result in memory corruption or incorrect solution
271: See src/mat/tests/ex125.c */
272: PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct));
273: lu->options.SolveInitialized = NO;
274: }
275: if (X != B_mpi) {
276: MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);
277: }
279: MatGetSize(B_mpi,NULL,&nrhs);
281: PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */
282: MatDenseGetArray(X,&bptr);
284: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE)
285: if (lu->use3d)
286: PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
287: else
288: #endif
289: PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
292: MatDenseRestoreArray(X,&bptr);
294: if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
295: PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat));
296: lu->matsolve_iscalled = PETSC_FALSE;
297: lu->matmatsolve_iscalled = PETSC_TRUE;
298: return 0;
299: }
301: /*
302: input:
303: F: numeric Cholesky factor
304: output:
305: nneg: total number of negative pivots
306: nzero: total number of zero pivots
307: npos: (global dimension of F) - nneg - nzero
308: */
309: static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
310: {
311: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data;
312: PetscScalar *diagU=NULL;
313: PetscInt M,i,neg=0,zero=0,pos=0;
314: PetscReal r;
318: MatGetSize(F,&M,NULL);
319: PetscMalloc1(M,&diagU);
320: MatSuperluDistGetDiagU(F,diagU);
321: for (i=0; i<M; i++) {
322: #if defined(PETSC_USE_COMPLEX)
323: r = PetscImaginaryPart(diagU[i])/10.0;
325: r = PetscRealPart(diagU[i]);
326: #else
327: r = diagU[i];
328: #endif
329: if (r > 0) {
330: pos++;
331: } else if (r < 0) {
332: neg++;
333: } else zero++;
334: }
336: PetscFree(diagU);
337: if (nneg) *nneg = neg;
338: if (nzero) *nzero = zero;
339: if (npos) *npos = pos;
340: return 0;
341: }
343: static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
344: {
345: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data;
346: Mat Aloc;
347: const PetscScalar *av;
348: const PetscInt *ai=NULL,*aj=NULL;
349: PetscInt nz,dummy;
350: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
351: SuperLUStat_t stat;
352: double *berr=0;
353: PetscBool ismpiaij,isseqaij,flg;
355: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
356: PetscObjectBaseTypeCompare((PetscObject)A,MATMPIAIJ,&ismpiaij);
357: if (ismpiaij) {
358: MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&Aloc);
359: } else if (isseqaij) {
360: PetscObjectReference((PetscObject)A);
361: Aloc = A;
362: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for type %s",((PetscObject)A)->type_name);
364: MatGetRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);
366: MatSeqAIJGetArrayRead(Aloc,&av);
367: nz = ai[Aloc->rmap->n];
369: /* Allocations for A_sup */
370: if (lu->options.Fact == DOFACT) { /* first numeric factorization */
371: PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row));
372: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
373: if (lu->FactPattern == SamePattern_SameRowPerm) {
374: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
375: } else if (lu->FactPattern == SamePattern) {
376: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
377: if (lu->use3d) {
378: if (lu->grid3d.zscp.Iam == 0) {
379: PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
380: PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct));
381: } else {
382: PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
383: PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct));
384: }
385: } else
386: #endif
387: PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
388: lu->options.Fact = SamePattern;
389: } else if (lu->FactPattern == DOFACT) {
390: PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
391: PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
392: lu->options.Fact = DOFACT;
393: PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row));
394: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
395: }
397: /* Copy AIJ matrix to superlu_dist matrix */
398: PetscArraycpy(lu->row,ai,Aloc->rmap->n+1);
399: PetscArraycpy(lu->col,aj,nz);
400: PetscArraycpy(lu->val,av,nz);
401: MatRestoreRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);
403: MatSeqAIJRestoreArrayRead(Aloc,&av);
404: MatDestroy(&Aloc);
406: /* Create and setup A_sup */
407: if (lu->options.Fact == DOFACT) {
408: PetscStackCall("SuperLU_DIST:Create_CompRowLoc_Matrix_dist",Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_ZD, SLU_GE));
409: }
411: /* Factor the matrix. */
412: PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */
413: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) && !PetscDefined(MISSING_GETLINE)
414: if (lu->use3d) {
415: PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
416: } else
417: #endif
418: PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
419: if (sinfo > 0) {
421: else {
422: if (sinfo <= lu->A_sup.ncol) {
423: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
424: PetscInfo(F,"U(i,i) is exactly zero, i= %d\n",sinfo);
425: } else if (sinfo > lu->A_sup.ncol) {
426: /*
427: number of bytes allocated when memory allocation
428: failure occurred, plus A->ncol.
429: */
430: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
431: PetscInfo(F,"Number of bytes allocated when memory allocation fails %d\n",sinfo);
432: }
433: }
436: if (lu->options.PrintStat) {
437: PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
438: }
439: PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat));
440: F->assembled = PETSC_TRUE;
441: F->preallocated = PETSC_TRUE;
442: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
443: return 0;
444: }
446: /* Note the Petsc r and c permutations are ignored */
447: static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
448: {
449: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data;
450: PetscInt M = A->rmap->N,N=A->cmap->N;
452: /* Initialize ScalePermstruct and LUstruct. */
453: PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct));
454: PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct));
455: F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
456: F->ops->solve = MatSolve_SuperLU_DIST;
457: F->ops->matsolve = MatMatSolve_SuperLU_DIST;
458: F->ops->getinertia = NULL;
460: if (A->symmetric || A->hermitian) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
461: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
462: return 0;
463: }
465: static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info)
466: {
467: MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info);
468: F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
469: return 0;
470: }
472: static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type)
473: {
474: *type = MATSOLVERSUPERLU_DIST;
475: return 0;
476: }
478: static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer)
479: {
480: Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data;
481: superlu_dist_options_t options;
483: /* check if matrix is superlu_dist type */
484: if (A->ops->solve != MatSolve_SuperLU_DIST) return 0;
486: options = lu->options;
487: PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
488: /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
489: * format spec for int64_t is set to %d for whatever reason */
490: PetscViewerASCIIPrintf(viewer," Process grid nprow %lld x npcol %lld \n",(long long)lu->nprow,(long long)lu->npcol);
491: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
492: if (lu->use3d) {
493: PetscViewerASCIIPrintf(viewer," Using 3d decomposition with npdep %lld \n",(long long)lu->npdep);
494: }
495: #endif
497: PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);
498: PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);
499: PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);
500: PetscViewerASCIIPrintf(viewer," Processors in row %lld col partition %lld \n",(long long)lu->nprow,(long long)lu->npcol);
502: switch (options.RowPerm) {
503: case NOROWPERM:
504: PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n");
505: break;
506: case LargeDiag_MC64:
507: PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n");
508: break;
509: case LargeDiag_AWPM:
510: PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n");
511: break;
512: case MY_PERMR:
513: PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n");
514: break;
515: default:
516: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
517: }
519: switch (options.ColPerm) {
520: case NATURAL:
521: PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");
522: break;
523: case MMD_AT_PLUS_A:
524: PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");
525: break;
526: case MMD_ATA:
527: PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");
528: break;
529: /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
530: case METIS_AT_PLUS_A:
531: PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");
532: break;
533: case PARMETIS:
534: PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");
535: break;
536: default:
537: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
538: }
540: PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);
542: if (lu->FactPattern == SamePattern) {
543: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");
544: } else if (lu->FactPattern == SamePattern_SameRowPerm) {
545: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");
546: } else if (lu->FactPattern == DOFACT) {
547: PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n");
548: } else {
549: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern");
550: }
551: return 0;
552: }
554: static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
555: {
556: PetscBool iascii;
557: PetscViewerFormat format;
559: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
560: if (iascii) {
561: PetscViewerGetFormat(viewer,&format);
562: if (format == PETSC_VIEWER_ASCII_INFO) {
563: MatView_Info_SuperLU_DIST(A,viewer);
564: }
565: }
566: return 0;
567: }
569: static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
570: {
571: Mat B;
572: Mat_SuperLU_DIST *lu;
573: PetscErrorCode ierr;
574: PetscInt M=A->rmap->N,N=A->cmap->N,indx;
575: PetscMPIInt size;
576: superlu_dist_options_t options;
577: PetscBool flg;
578: const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"};
579: const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"};
580: const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"};
581: PetscBool set;
583: /* Create the factorization matrix */
584: MatCreate(PetscObjectComm((PetscObject)A),&B);
585: MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
586: PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name);
587: MatSetUp(B);
588: B->ops->getinfo = MatGetInfo_External;
589: B->ops->view = MatView_SuperLU_DIST;
590: B->ops->destroy = MatDestroy_SuperLU_DIST;
592: /* Set the default input options:
593: options.Fact = DOFACT;
594: options.Equil = YES;
595: options.ParSymbFact = NO;
596: options.ColPerm = METIS_AT_PLUS_A;
597: options.RowPerm = LargeDiag_MC64;
598: options.ReplaceTinyPivot = YES;
599: options.IterRefine = DOUBLE;
600: options.Trans = NOTRANS;
601: options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
602: options.RefineInitialized = NO;
603: options.PrintStat = YES;
604: options.SymPattern = NO;
605: */
606: set_default_options_dist(&options);
608: B->trivialsymbolic = PETSC_TRUE;
609: if (ftype == MAT_FACTOR_LU) {
610: B->factortype = MAT_FACTOR_LU;
611: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
612: } else {
613: B->factortype = MAT_FACTOR_CHOLESKY;
614: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
615: options.SymPattern = YES;
616: }
618: /* set solvertype */
619: PetscFree(B->solvertype);
620: PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype);
622: PetscNewLog(B,&lu);
623: B->data = lu;
624: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
626: {
627: PetscMPIInt flg;
628: MPI_Comm comm;
629: PetscSuperLU_DIST *context = NULL;
631: PetscObjectGetComm((PetscObject)A,&comm);
632: if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
633: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_Superlu_dist_keyval_Delete_Fn,&Petsc_Superlu_dist_keyval,(void*)0);
634: PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free);
635: }
636: MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg);
637: if (!flg || context->busy) {
638: if (!flg) {
639: PetscNew(&context);
640: context->busy = PETSC_TRUE;
641: MPI_Comm_dup(comm,&context->comm);
642: MPI_Comm_set_attr(comm,Petsc_Superlu_dist_keyval,context);
643: } else {
644: PetscCommGetComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu);
645: }
647: /* Default number of process columns and rows */
648: lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size));
649: if (!lu->nprow) lu->nprow = 1;
650: while (lu->nprow > 0) {
651: lu->npcol = (int_t) (size/lu->nprow);
652: if (size == lu->nprow * lu->npcol) break;
653: lu->nprow--;
654: }
655: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
656: lu->use3d = PETSC_FALSE;
657: lu->npdep = 1;
658: #endif
659: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
660: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
661: PetscOptionsBool("-mat_superlu_dist_3d","Use SuperLU_DIST 3D distribution","None",lu->use3d,&lu->use3d,NULL);
663: if (lu->use3d) {
664: PetscInt t;
665: PetscOptionsInt("-mat_superlu_dist_d","Number of z entries in processor partition","None",lu->npdep,(PetscInt*)&lu->npdep,NULL);
666: t = (PetscInt) PetscLog2Real((PetscReal)lu->npdep);
668: if (lu->npdep > 1) {
669: lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)(size/lu->npdep)));
670: if (!lu->nprow) lu->nprow = 1;
671: while (lu->nprow > 0) {
672: lu->npcol = (int_t) (size/(lu->npdep*lu->nprow));
673: if (size == lu->nprow * lu->npcol * lu->npdep) break;
674: lu->nprow--;
675: }
676: }
677: }
678: #endif
679: PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL);
680: PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL);
681: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
683: #else
685: #endif
686: PetscOptionsEnd();
687: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
688: if (lu->use3d) {
689: PetscStackCall("SuperLU_DIST:superlu_gridinit3d",superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol,lu->npdep, &lu->grid3d));
690: if (context) {context->grid3d = lu->grid3d; context->use3d = lu->use3d;}
691: } else {
692: #endif
693: PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));
694: if (context) context->grid = lu->grid;
695: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0)
696: }
697: #endif
698: PetscInfo(NULL,"Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n");
699: if (flg) {
700: PetscInfo(NULL,"Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n");
701: } else {
702: PetscInfo(NULL,"Storing communicator and SuperLU_DIST grid in communicator attribute\n");
703: }
704: } else {
705: PetscInfo(NULL,"Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.");
706: context->busy = PETSC_TRUE;
707: lu->grid = context->grid;
708: }
709: }
711: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
712: PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set);
713: if (set && !flg) options.Equil = NO;
715: PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg);
716: if (flg) {
717: switch (indx) {
718: case 0:
719: options.RowPerm = NOROWPERM;
720: break;
721: case 1:
722: options.RowPerm = LargeDiag_MC64;
723: break;
724: case 2:
725: options.RowPerm = LargeDiag_AWPM;
726: break;
727: case 3:
728: options.RowPerm = MY_PERMR;
729: break;
730: default:
731: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation");
732: }
733: }
735: PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);
736: if (flg) {
737: switch (indx) {
738: case 0:
739: options.ColPerm = NATURAL;
740: break;
741: case 1:
742: options.ColPerm = MMD_AT_PLUS_A;
743: break;
744: case 2:
745: options.ColPerm = MMD_ATA;
746: break;
747: case 3:
748: options.ColPerm = METIS_AT_PLUS_A;
749: break;
750: case 4:
751: options.ColPerm = PARMETIS; /* only works for np>1 */
752: break;
753: default:
754: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
755: }
756: }
758: options.ReplaceTinyPivot = NO;
759: PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set);
760: if (set && flg) options.ReplaceTinyPivot = YES;
762: options.ParSymbFact = NO;
763: PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set);
764: if (set && flg && size>1) {
765: #if defined(PETSC_HAVE_PARMETIS)
766: options.ParSymbFact = YES;
767: options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
768: #else
769: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS");
770: #endif
771: }
773: lu->FactPattern = SamePattern;
774: PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg);
775: if (flg) {
776: switch (indx) {
777: case 0:
778: lu->FactPattern = SamePattern;
779: break;
780: case 1:
781: lu->FactPattern = SamePattern_SameRowPerm;
782: break;
783: case 2:
784: lu->FactPattern = DOFACT;
785: break;
786: }
787: }
789: options.IterRefine = NOREFINE;
790: PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set);
791: if (set) {
792: if (flg) options.IterRefine = SLU_DOUBLE;
793: else options.IterRefine = NOREFINE;
794: }
796: if (PetscLogPrintInfo) options.PrintStat = YES;
797: else options.PrintStat = NO;
798: PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,NULL);
799: PetscOptionsEnd();
801: lu->options = options;
802: lu->options.Fact = DOFACT;
803: lu->matsolve_iscalled = PETSC_FALSE;
804: lu->matmatsolve_iscalled = PETSC_FALSE;
806: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist);
807: PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST);
809: *F = B;
810: return 0;
811: }
813: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
814: {
815: MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);
816: MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);
817: MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);
818: MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);
819: return 0;
820: }
822: /*MC
823: MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
825: Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST
827: Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver
829: Works with AIJ matrices
831: Options Database Keys:
832: + -mat_superlu_dist_r <n> - number of rows in processor partition
833: . -mat_superlu_dist_c <n> - number of columns in processor partition
834: . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
835: . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if -mat_superlu_dist_3d) is provided
836: . -mat_superlu_dist_equil - equilibrate the matrix
837: . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
838: . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
839: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
840: . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT
841: . -mat_superlu_dist_iterrefine - use iterative refinement
842: - -mat_superlu_dist_statprint - print factorization information
844: Notes:
845: If PETSc was configured with --with-cuda than this solver will automatically use the GPUs.
847: Level: beginner
849: .seealso: PCLU
851: .seealso: PCFactorSetMatSolverType(), MatSolverType
853: M*/