Actual source code: sbaijfact.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petsc/private/kernels/blockinvert.h>
4: #include <petscis.h>
6: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
7: {
8: Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
9: MatScalar *dd = fact->a;
10: PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
12: PetscFunctionBegin;
13: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
15: nneg_tmp = 0;
16: npos_tmp = 0;
17: for (i = 0; i < mbs; i++) {
18: if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
19: else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
20: fi++;
21: }
22: if (nneg) *nneg = nneg_tmp;
23: if (npos) *npos = npos_tmp;
24: if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
25: PetscFunctionReturn(PETSC_SUCCESS);
26: }
28: /*
29: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
30: Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
31: */
32: static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
33: {
34: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b;
35: const PetscInt *rip, *ai, *aj;
36: PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
37: PetscInt m, reallocs = 0, prow;
38: PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
39: PetscReal f = info->fill;
40: PetscBool perm_identity;
42: PetscFunctionBegin;
43: /* check whether perm is the identity mapping */
44: PetscCall(ISIdentity(perm, &perm_identity));
45: PetscCall(ISGetIndices(perm, &rip));
47: if (perm_identity) { /* without permutation */
48: a->permute = PETSC_FALSE;
50: ai = a->i;
51: aj = a->j;
52: } else { /* non-trivial permutation */
53: a->permute = PETSC_TRUE;
55: PetscCall(MatReorderingSeqSBAIJ(A, perm));
57: ai = a->inew;
58: aj = a->jnew;
59: }
61: /* initialization */
62: PetscCall(PetscMalloc1(mbs + 1, &iu));
63: umax = (PetscInt)(f * ai[mbs] + 1);
64: umax += mbs + 1;
65: PetscCall(PetscMalloc1(umax, &ju));
66: iu[0] = mbs + 1;
67: juidx = mbs + 1; /* index for ju */
68: /* jl linked list for pivot row -- linked list for col index */
69: PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
70: for (i = 0; i < mbs; i++) {
71: jl[i] = mbs;
72: q[i] = 0;
73: }
75: /* for each row k */
76: for (k = 0; k < mbs; k++) {
77: for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
78: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
79: q[k] = mbs;
80: /* initialize nonzero structure of k-th row to row rip[k] of A */
81: jmin = ai[rip[k]] + 1; /* exclude diag[k] */
82: jmax = ai[rip[k] + 1];
83: for (j = jmin; j < jmax; j++) {
84: vj = rip[aj[j]]; /* col. value */
85: if (vj > k) {
86: qm = k;
87: do {
88: m = qm;
89: qm = q[m];
90: } while (qm < vj);
91: PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
92: nzk++;
93: q[m] = vj;
94: q[vj] = qm;
95: } /* if (vj > k) */
96: } /* for (j=jmin; j<jmax; j++) */
98: /* modify nonzero structure of k-th row by computing fill-in
99: for each row i to be merged in */
100: prow = k;
101: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
103: while (prow < k) {
104: /* merge row prow into k-th row */
105: jmin = iu[prow] + 1;
106: jmax = iu[prow + 1];
107: qm = k;
108: for (j = jmin; j < jmax; j++) {
109: vj = ju[j];
110: do {
111: m = qm;
112: qm = q[m];
113: } while (qm < vj);
114: if (qm != vj) {
115: nzk++;
116: q[m] = vj;
117: q[vj] = qm;
118: qm = vj;
119: }
120: }
121: prow = jl[prow]; /* next pivot row */
122: }
124: /* add k to row list for first nonzero element in k-th row */
125: if (nzk > 0) {
126: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
127: jl[k] = jl[i];
128: jl[i] = k;
129: }
130: iu[k + 1] = iu[k] + nzk;
132: /* allocate more space to ju if needed */
133: if (iu[k + 1] > umax) {
134: /* estimate how much additional space we will need */
135: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
136: /* just double the memory each time */
137: maxadd = umax;
138: if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
139: umax += maxadd;
141: /* allocate a longer ju */
142: PetscCall(PetscMalloc1(umax, &jutmp));
143: PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
144: PetscCall(PetscFree(ju));
145: ju = jutmp;
146: reallocs++; /* count how many times we realloc */
147: }
149: /* save nonzero structure of k-th row in ju */
150: i = k;
151: while (nzk--) {
152: i = q[i];
153: ju[juidx++] = i;
154: }
155: }
157: #if defined(PETSC_USE_INFO)
158: if (ai[mbs] != 0) {
159: PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
160: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
161: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
162: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
163: PetscCall(PetscInfo(A, "for best performance.\n"));
164: } else {
165: PetscCall(PetscInfo(A, "Empty matrix\n"));
166: }
167: #endif
169: PetscCall(ISRestoreIndices(perm, &rip));
170: PetscCall(PetscFree2(jl, q));
172: /* put together the new matrix */
173: PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
175: b = (Mat_SeqSBAIJ *)(F)->data;
176: b->singlemalloc = PETSC_FALSE;
177: b->free_a = PETSC_TRUE;
178: b->free_ij = PETSC_TRUE;
180: PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
181: b->j = ju;
182: b->i = iu;
183: b->diag = NULL;
184: b->ilen = NULL;
185: b->imax = NULL;
186: b->row = perm;
188: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
190: PetscCall(PetscObjectReference((PetscObject)perm));
192: b->icol = perm;
193: PetscCall(PetscObjectReference((PetscObject)perm));
194: PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
195: /* In b structure: Free imax, ilen, old a, old j.
196: Allocate idnew, solve_work, new a, new j */
197: b->maxnz = b->nz = iu[mbs];
199: (F)->info.factor_mallocs = reallocs;
200: (F)->info.fill_ratio_given = f;
201: if (ai[mbs] != 0) {
202: (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
203: } else {
204: (F)->info.fill_ratio_needed = 0.0;
205: }
206: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
207: PetscFunctionReturn(PETSC_SUCCESS);
208: }
209: /*
210: Symbolic U^T*D*U factorization for SBAIJ format.
211: See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
212: */
213: #include <petscbt.h>
214: #include <../src/mat/utils/freespace.h>
215: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
216: {
217: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
218: Mat_SeqSBAIJ *b;
219: PetscBool perm_identity, missing;
220: PetscReal fill = info->fill;
221: const PetscInt *rip, *ai = a->i, *aj = a->j;
222: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
223: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
224: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
225: PetscFreeSpaceList free_space = NULL, current_space = NULL;
226: PetscBT lnkbt;
228: PetscFunctionBegin;
229: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
230: PetscCall(MatMissingDiagonal(A, &missing, &i));
231: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
232: if (bs > 1) {
233: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
237: /* check whether perm is the identity mapping */
238: PetscCall(ISIdentity(perm, &perm_identity));
239: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
240: a->permute = PETSC_FALSE;
241: PetscCall(ISGetIndices(perm, &rip));
243: /* initialization */
244: PetscCall(PetscMalloc1(mbs + 1, &ui));
245: PetscCall(PetscMalloc1(mbs + 1, &udiag));
246: ui[0] = 0;
248: /* jl: linked list for storing indices of the pivot rows
249: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
250: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
251: for (i = 0; i < mbs; i++) {
252: jl[i] = mbs;
253: il[i] = 0;
254: }
256: /* create and initialize a linked list for storing column indices of the active row k */
257: nlnk = mbs + 1;
258: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
260: /* initial FreeSpace size is fill*(ai[mbs]+1) */
261: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
262: current_space = free_space;
264: for (k = 0; k < mbs; k++) { /* for each active row k */
265: /* initialize lnk by the column indices of row rip[k] of A */
266: nzk = 0;
267: ncols = ai[k + 1] - ai[k];
268: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
269: for (j = 0; j < ncols; j++) {
270: i = *(aj + ai[k] + j);
271: cols[j] = i;
272: }
273: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
274: nzk += nlnk;
276: /* update lnk by computing fill-in for each pivot row to be merged in */
277: prow = jl[k]; /* 1st pivot row */
279: while (prow < k) {
280: nextprow = jl[prow];
281: /* merge prow into k-th row */
282: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
283: jmax = ui[prow + 1];
284: ncols = jmax - jmin;
285: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
286: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
287: nzk += nlnk;
289: /* update il and jl for prow */
290: if (jmin < jmax) {
291: il[prow] = jmin;
292: j = *uj_ptr;
293: jl[prow] = jl[j];
294: jl[j] = prow;
295: }
296: prow = nextprow;
297: }
299: /* if free space is not available, make more free space */
300: if (current_space->local_remaining < nzk) {
301: i = mbs - k + 1; /* num of unfactored rows */
302: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
303: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
304: reallocs++;
305: }
307: /* copy data into free space, then initialize lnk */
308: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
310: /* add the k-th row into il and jl */
311: if (nzk > 1) {
312: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
313: jl[k] = jl[i];
314: jl[i] = k;
315: il[k] = ui[k] + 1;
316: }
317: ui_ptr[k] = current_space->array;
319: current_space->array += nzk;
320: current_space->local_used += nzk;
321: current_space->local_remaining -= nzk;
323: ui[k + 1] = ui[k] + nzk;
324: }
326: PetscCall(ISRestoreIndices(perm, &rip));
327: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
329: /* destroy list of free space and other temporary array(s) */
330: PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
331: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
332: PetscCall(PetscLLDestroy(lnk, lnkbt));
334: /* put together the new matrix in MATSEQSBAIJ format */
335: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
337: b = (Mat_SeqSBAIJ *)fact->data;
338: b->singlemalloc = PETSC_FALSE;
339: b->free_a = PETSC_TRUE;
340: b->free_ij = PETSC_TRUE;
342: PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
344: b->j = uj;
345: b->i = ui;
346: b->diag = udiag;
347: b->free_diag = PETSC_TRUE;
348: b->ilen = NULL;
349: b->imax = NULL;
350: b->row = perm;
351: b->icol = perm;
353: PetscCall(PetscObjectReference((PetscObject)perm));
354: PetscCall(PetscObjectReference((PetscObject)perm));
356: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
358: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
360: b->maxnz = b->nz = ui[mbs];
362: fact->info.factor_mallocs = reallocs;
363: fact->info.fill_ratio_given = fill;
364: if (ai[mbs] != 0) {
365: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
366: } else {
367: fact->info.fill_ratio_needed = 0.0;
368: }
369: #if defined(PETSC_USE_INFO)
370: if (ai[mbs] != 0) {
371: PetscReal af = fact->info.fill_ratio_needed;
372: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
373: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
374: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
375: } else {
376: PetscCall(PetscInfo(A, "Empty matrix\n"));
377: }
378: #endif
379: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
380: PetscFunctionReturn(PETSC_SUCCESS);
381: }
383: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
384: {
385: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
386: Mat_SeqSBAIJ *b;
387: PetscBool perm_identity, missing;
388: PetscReal fill = info->fill;
389: const PetscInt *rip, *ai, *aj;
390: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
391: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
392: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
393: PetscFreeSpaceList free_space = NULL, current_space = NULL;
394: PetscBT lnkbt;
396: PetscFunctionBegin;
397: PetscCall(MatMissingDiagonal(A, &missing, &d));
398: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
400: /*
401: This code originally uses Modified Sparse Row (MSR) storage
402: (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
403: Then it is rewritten so the factor B takes seqsbaij format. However the associated
404: MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
405: thus the original code in MSR format is still used for these cases.
406: The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
407: MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
408: */
409: if (bs > 1) {
410: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
411: PetscFunctionReturn(PETSC_SUCCESS);
412: }
414: /* check whether perm is the identity mapping */
415: PetscCall(ISIdentity(perm, &perm_identity));
416: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
417: a->permute = PETSC_FALSE;
418: ai = a->i;
419: aj = a->j;
420: PetscCall(ISGetIndices(perm, &rip));
422: /* initialization */
423: PetscCall(PetscMalloc1(mbs + 1, &ui));
424: ui[0] = 0;
426: /* jl: linked list for storing indices of the pivot rows
427: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
428: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
429: for (i = 0; i < mbs; i++) {
430: jl[i] = mbs;
431: il[i] = 0;
432: }
434: /* create and initialize a linked list for storing column indices of the active row k */
435: nlnk = mbs + 1;
436: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
438: /* initial FreeSpace size is fill*(ai[mbs]+1) */
439: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
440: current_space = free_space;
442: for (k = 0; k < mbs; k++) { /* for each active row k */
443: /* initialize lnk by the column indices of row rip[k] of A */
444: nzk = 0;
445: ncols = ai[rip[k] + 1] - ai[rip[k]];
446: for (j = 0; j < ncols; j++) {
447: i = *(aj + ai[rip[k]] + j);
448: cols[j] = rip[i];
449: }
450: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
451: nzk += nlnk;
453: /* update lnk by computing fill-in for each pivot row to be merged in */
454: prow = jl[k]; /* 1st pivot row */
456: while (prow < k) {
457: nextprow = jl[prow];
458: /* merge prow into k-th row */
459: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
460: jmax = ui[prow + 1];
461: ncols = jmax - jmin;
462: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
463: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
464: nzk += nlnk;
466: /* update il and jl for prow */
467: if (jmin < jmax) {
468: il[prow] = jmin;
470: j = *uj_ptr;
471: jl[prow] = jl[j];
472: jl[j] = prow;
473: }
474: prow = nextprow;
475: }
477: /* if free space is not available, make more free space */
478: if (current_space->local_remaining < nzk) {
479: i = mbs - k + 1; /* num of unfactored rows */
480: i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
481: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
482: reallocs++;
483: }
485: /* copy data into free space, then initialize lnk */
486: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
488: /* add the k-th row into il and jl */
489: if (nzk - 1 > 0) {
490: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
491: jl[k] = jl[i];
492: jl[i] = k;
493: il[k] = ui[k] + 1;
494: }
495: ui_ptr[k] = current_space->array;
497: current_space->array += nzk;
498: current_space->local_used += nzk;
499: current_space->local_remaining -= nzk;
501: ui[k + 1] = ui[k] + nzk;
502: }
504: PetscCall(ISRestoreIndices(perm, &rip));
505: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
507: /* destroy list of free space and other temporary array(s) */
508: PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
509: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
510: PetscCall(PetscLLDestroy(lnk, lnkbt));
512: /* put together the new matrix in MATSEQSBAIJ format */
513: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
515: b = (Mat_SeqSBAIJ *)fact->data;
516: b->singlemalloc = PETSC_FALSE;
517: b->free_a = PETSC_TRUE;
518: b->free_ij = PETSC_TRUE;
520: PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
522: b->j = uj;
523: b->i = ui;
524: b->diag = NULL;
525: b->ilen = NULL;
526: b->imax = NULL;
527: b->row = perm;
529: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
531: PetscCall(PetscObjectReference((PetscObject)perm));
532: b->icol = perm;
533: PetscCall(PetscObjectReference((PetscObject)perm));
534: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
535: b->maxnz = b->nz = ui[mbs];
537: fact->info.factor_mallocs = reallocs;
538: fact->info.fill_ratio_given = fill;
539: if (ai[mbs] != 0) {
540: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
541: } else {
542: fact->info.fill_ratio_needed = 0.0;
543: }
544: #if defined(PETSC_USE_INFO)
545: if (ai[mbs] != 0) {
546: PetscReal af = fact->info.fill_ratio_needed;
547: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
548: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
549: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
550: } else {
551: PetscCall(PetscInfo(A, "Empty matrix\n"));
552: }
553: #endif
554: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
555: PetscFunctionReturn(PETSC_SUCCESS);
556: }
558: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
559: {
560: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
561: IS perm = b->row;
562: const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
563: PetscInt i, j;
564: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
565: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
566: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
567: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
568: MatScalar *work;
569: PetscInt *pivots;
570: PetscBool allowzeropivot, zeropivotdetected;
572: PetscFunctionBegin;
573: /* initialization */
574: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
575: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
576: allowzeropivot = PetscNot(A->erroriffailure);
578: il[0] = 0;
579: for (i = 0; i < mbs; i++) jl[i] = mbs;
581: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
582: PetscCall(PetscMalloc1(bs, &pivots));
584: PetscCall(ISGetIndices(perm, &perm_ptr));
586: /* check permutation */
587: if (!a->permute) {
588: ai = a->i;
589: aj = a->j;
590: aa = a->a;
591: } else {
592: ai = a->inew;
593: aj = a->jnew;
594: PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
595: PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
596: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
597: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
599: for (i = 0; i < mbs; i++) {
600: jmin = ai[i];
601: jmax = ai[i + 1];
602: for (j = jmin; j < jmax; j++) {
603: while (a2anew[j] != j) {
604: k = a2anew[j];
605: a2anew[j] = a2anew[k];
606: a2anew[k] = k;
607: for (k1 = 0; k1 < bs2; k1++) {
608: dk[k1] = aa[k * bs2 + k1];
609: aa[k * bs2 + k1] = aa[j * bs2 + k1];
610: aa[j * bs2 + k1] = dk[k1];
611: }
612: }
613: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
614: if (i > aj[j]) {
615: ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */
616: for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
617: for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */
618: for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
619: }
620: }
621: }
622: }
623: PetscCall(PetscFree(a2anew));
624: }
626: /* for each row k */
627: for (k = 0; k < mbs; k++) {
628: /*initialize k-th row with elements nonzero in row perm(k) of A */
629: jmin = ai[perm_ptr[k]];
630: jmax = ai[perm_ptr[k] + 1];
632: ap = aa + jmin * bs2;
633: for (j = jmin; j < jmax; j++) {
634: vj = perm_ptr[aj[j]]; /* block col. index */
635: rtmp_ptr = rtmp + vj * bs2;
636: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
637: }
639: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
640: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
641: i = jl[k]; /* first row to be added to k_th row */
643: while (i < k) {
644: nexti = jl[i]; /* next row to be added to k_th row */
646: /* compute multiplier */
647: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
649: /* uik = -inv(Di)*U_bar(i,k) */
650: diag = ba + i * bs2;
651: u = ba + ili * bs2;
652: PetscCall(PetscArrayzero(uik, bs2));
653: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
655: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
656: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
657: PetscCall(PetscLogFlops(4.0 * bs * bs2));
659: /* update -U(i,k) */
660: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
662: /* add multiple of row i to k-th row ... */
663: jmin = ili + 1;
664: jmax = bi[i + 1];
665: if (jmin < jmax) {
666: for (j = jmin; j < jmax; j++) {
667: /* rtmp += -U(i,k)^T * U_bar(i,j) */
668: rtmp_ptr = rtmp + bj[j] * bs2;
669: u = ba + j * bs2;
670: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
671: }
672: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
674: /* ... add i to row list for next nonzero entry */
675: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
676: j = bj[jmin];
677: jl[i] = jl[j];
678: jl[j] = i; /* update jl */
679: }
680: i = nexti;
681: }
683: /* save nonzero entries in k-th row of U ... */
685: /* invert diagonal block */
686: diag = ba + k * bs2;
687: PetscCall(PetscArraycpy(diag, dk, bs2));
689: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
690: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
692: jmin = bi[k];
693: jmax = bi[k + 1];
694: if (jmin < jmax) {
695: for (j = jmin; j < jmax; j++) {
696: vj = bj[j]; /* block col. index of U */
697: u = ba + j * bs2;
698: rtmp_ptr = rtmp + vj * bs2;
699: for (k1 = 0; k1 < bs2; k1++) {
700: *u++ = *rtmp_ptr;
701: *rtmp_ptr++ = 0.0;
702: }
703: }
705: /* ... add k to row list for first nonzero entry in k-th row */
706: il[k] = jmin;
707: i = bj[jmin];
708: jl[k] = jl[i];
709: jl[i] = k;
710: }
711: }
713: PetscCall(PetscFree(rtmp));
714: PetscCall(PetscFree2(il, jl));
715: PetscCall(PetscFree3(dk, uik, work));
716: PetscCall(PetscFree(pivots));
717: if (a->permute) PetscCall(PetscFree(aa));
719: PetscCall(ISRestoreIndices(perm, &perm_ptr));
721: C->ops->solve = MatSolve_SeqSBAIJ_N_inplace;
722: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
723: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace;
724: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace;
726: C->assembled = PETSC_TRUE;
727: C->preallocated = PETSC_TRUE;
729: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
730: PetscFunctionReturn(PETSC_SUCCESS);
731: }
733: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
734: {
735: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
736: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
737: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
738: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
739: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
740: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
741: MatScalar *work;
742: PetscInt *pivots;
743: PetscBool allowzeropivot, zeropivotdetected;
745: PetscFunctionBegin;
746: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
747: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
748: il[0] = 0;
749: for (i = 0; i < mbs; i++) jl[i] = mbs;
751: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
752: PetscCall(PetscMalloc1(bs, &pivots));
753: allowzeropivot = PetscNot(A->erroriffailure);
755: ai = a->i;
756: aj = a->j;
757: aa = a->a;
759: /* for each row k */
760: for (k = 0; k < mbs; k++) {
761: /*initialize k-th row with elements nonzero in row k of A */
762: jmin = ai[k];
763: jmax = ai[k + 1];
764: ap = aa + jmin * bs2;
765: for (j = jmin; j < jmax; j++) {
766: vj = aj[j]; /* block col. index */
767: rtmp_ptr = rtmp + vj * bs2;
768: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
769: }
771: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
772: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
773: i = jl[k]; /* first row to be added to k_th row */
775: while (i < k) {
776: nexti = jl[i]; /* next row to be added to k_th row */
778: /* compute multiplier */
779: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
781: /* uik = -inv(Di)*U_bar(i,k) */
782: diag = ba + i * bs2;
783: u = ba + ili * bs2;
784: PetscCall(PetscArrayzero(uik, bs2));
785: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
787: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
788: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
789: PetscCall(PetscLogFlops(2.0 * bs * bs2));
791: /* update -U(i,k) */
792: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
794: /* add multiple of row i to k-th row ... */
795: jmin = ili + 1;
796: jmax = bi[i + 1];
797: if (jmin < jmax) {
798: for (j = jmin; j < jmax; j++) {
799: /* rtmp += -U(i,k)^T * U_bar(i,j) */
800: rtmp_ptr = rtmp + bj[j] * bs2;
801: u = ba + j * bs2;
802: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
803: }
804: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
806: /* ... add i to row list for next nonzero entry */
807: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
808: j = bj[jmin];
809: jl[i] = jl[j];
810: jl[j] = i; /* update jl */
811: }
812: i = nexti;
813: }
815: /* save nonzero entries in k-th row of U ... */
817: /* invert diagonal block */
818: diag = ba + k * bs2;
819: PetscCall(PetscArraycpy(diag, dk, bs2));
821: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
822: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
824: jmin = bi[k];
825: jmax = bi[k + 1];
826: if (jmin < jmax) {
827: for (j = jmin; j < jmax; j++) {
828: vj = bj[j]; /* block col. index of U */
829: u = ba + j * bs2;
830: rtmp_ptr = rtmp + vj * bs2;
831: for (k1 = 0; k1 < bs2; k1++) {
832: *u++ = *rtmp_ptr;
833: *rtmp_ptr++ = 0.0;
834: }
835: }
837: /* ... add k to row list for first nonzero entry in k-th row */
838: il[k] = jmin;
839: i = bj[jmin];
840: jl[k] = jl[i];
841: jl[i] = k;
842: }
843: }
845: PetscCall(PetscFree(rtmp));
846: PetscCall(PetscFree2(il, jl));
847: PetscCall(PetscFree3(dk, uik, work));
848: PetscCall(PetscFree(pivots));
850: C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
851: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
854: C->assembled = PETSC_TRUE;
855: C->preallocated = PETSC_TRUE;
857: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
858: PetscFunctionReturn(PETSC_SUCCESS);
859: }
861: /*
862: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
863: Version for blocks 2 by 2.
864: */
865: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
866: {
867: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
868: IS perm = b->row;
869: const PetscInt *ai, *aj, *perm_ptr;
870: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
871: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
872: MatScalar *ba = b->a, *aa, *ap;
873: MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
874: PetscReal shift = info->shiftamount;
875: PetscBool allowzeropivot, zeropivotdetected;
877: PetscFunctionBegin;
878: allowzeropivot = PetscNot(A->erroriffailure);
880: /* initialization */
881: /* il and jl record the first nonzero element in each row of the accessing
882: window U(0:k, k:mbs-1).
883: jl: list of rows to be added to uneliminated rows
884: i>= k: jl(i) is the first row to be added to row i
885: i< k: jl(i) is the row following row i in some list of rows
886: jl(i) = mbs indicates the end of a list
887: il(i): points to the first nonzero element in columns k,...,mbs-1 of
888: row i of U */
889: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
890: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
891: il[0] = 0;
892: for (i = 0; i < mbs; i++) jl[i] = mbs;
894: PetscCall(ISGetIndices(perm, &perm_ptr));
896: /* check permutation */
897: if (!a->permute) {
898: ai = a->i;
899: aj = a->j;
900: aa = a->a;
901: } else {
902: ai = a->inew;
903: aj = a->jnew;
904: PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
905: PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
906: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
907: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
909: for (i = 0; i < mbs; i++) {
910: jmin = ai[i];
911: jmax = ai[i + 1];
912: for (j = jmin; j < jmax; j++) {
913: while (a2anew[j] != j) {
914: k = a2anew[j];
915: a2anew[j] = a2anew[k];
916: a2anew[k] = k;
917: for (k1 = 0; k1 < 4; k1++) {
918: dk[k1] = aa[k * 4 + k1];
919: aa[k * 4 + k1] = aa[j * 4 + k1];
920: aa[j * 4 + k1] = dk[k1];
921: }
922: }
923: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
924: if (i > aj[j]) {
925: ap = aa + j * 4; /* ptr to the beginning of the block */
926: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
927: ap[1] = ap[2];
928: ap[2] = dk[1];
929: }
930: }
931: }
932: PetscCall(PetscFree(a2anew));
933: }
935: /* for each row k */
936: for (k = 0; k < mbs; k++) {
937: /*initialize k-th row with elements nonzero in row perm(k) of A */
938: jmin = ai[perm_ptr[k]];
939: jmax = ai[perm_ptr[k] + 1];
940: ap = aa + jmin * 4;
941: for (j = jmin; j < jmax; j++) {
942: vj = perm_ptr[aj[j]]; /* block col. index */
943: rtmp_ptr = rtmp + vj * 4;
944: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
945: }
947: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
948: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
949: i = jl[k]; /* first row to be added to k_th row */
951: while (i < k) {
952: nexti = jl[i]; /* next row to be added to k_th row */
954: /* compute multiplier */
955: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
957: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
958: diag = ba + i * 4;
959: u = ba + ili * 4;
960: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
961: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
962: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
963: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
965: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
966: dk[0] += uik[0] * u[0] + uik[1] * u[1];
967: dk[1] += uik[2] * u[0] + uik[3] * u[1];
968: dk[2] += uik[0] * u[2] + uik[1] * u[3];
969: dk[3] += uik[2] * u[2] + uik[3] * u[3];
971: PetscCall(PetscLogFlops(16.0 * 2.0));
973: /* update -U(i,k): ba[ili] = uik */
974: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
976: /* add multiple of row i to k-th row ... */
977: jmin = ili + 1;
978: jmax = bi[i + 1];
979: if (jmin < jmax) {
980: for (j = jmin; j < jmax; j++) {
981: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
982: rtmp_ptr = rtmp + bj[j] * 4;
983: u = ba + j * 4;
984: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
985: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
986: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
987: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
988: }
989: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
991: /* ... add i to row list for next nonzero entry */
992: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
993: j = bj[jmin];
994: jl[i] = jl[j];
995: jl[j] = i; /* update jl */
996: }
997: i = nexti;
998: }
1000: /* save nonzero entries in k-th row of U ... */
1002: /* invert diagonal block */
1003: diag = ba + k * 4;
1004: PetscCall(PetscArraycpy(diag, dk, 4));
1005: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1006: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1008: jmin = bi[k];
1009: jmax = bi[k + 1];
1010: if (jmin < jmax) {
1011: for (j = jmin; j < jmax; j++) {
1012: vj = bj[j]; /* block col. index of U */
1013: u = ba + j * 4;
1014: rtmp_ptr = rtmp + vj * 4;
1015: for (k1 = 0; k1 < 4; k1++) {
1016: *u++ = *rtmp_ptr;
1017: *rtmp_ptr++ = 0.0;
1018: }
1019: }
1021: /* ... add k to row list for first nonzero entry in k-th row */
1022: il[k] = jmin;
1023: i = bj[jmin];
1024: jl[k] = jl[i];
1025: jl[i] = k;
1026: }
1027: }
1029: PetscCall(PetscFree(rtmp));
1030: PetscCall(PetscFree2(il, jl));
1031: if (a->permute) PetscCall(PetscFree(aa));
1032: PetscCall(ISRestoreIndices(perm, &perm_ptr));
1034: C->ops->solve = MatSolve_SeqSBAIJ_2_inplace;
1035: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1036: C->assembled = PETSC_TRUE;
1037: C->preallocated = PETSC_TRUE;
1039: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1040: PetscFunctionReturn(PETSC_SUCCESS);
1041: }
1043: /*
1044: Version for when blocks are 2 by 2 Using natural ordering
1045: */
1046: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1047: {
1048: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1049: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1050: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1051: MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8];
1052: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
1053: PetscReal shift = info->shiftamount;
1054: PetscBool allowzeropivot, zeropivotdetected;
1056: PetscFunctionBegin;
1057: allowzeropivot = PetscNot(A->erroriffailure);
1059: /* initialization */
1060: /* il and jl record the first nonzero element in each row of the accessing
1061: window U(0:k, k:mbs-1).
1062: jl: list of rows to be added to uneliminated rows
1063: i>= k: jl(i) is the first row to be added to row i
1064: i< k: jl(i) is the row following row i in some list of rows
1065: jl(i) = mbs indicates the end of a list
1066: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1067: row i of U */
1068: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1069: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1070: il[0] = 0;
1071: for (i = 0; i < mbs; i++) jl[i] = mbs;
1073: ai = a->i;
1074: aj = a->j;
1075: aa = a->a;
1077: /* for each row k */
1078: for (k = 0; k < mbs; k++) {
1079: /*initialize k-th row with elements nonzero in row k of A */
1080: jmin = ai[k];
1081: jmax = ai[k + 1];
1082: ap = aa + jmin * 4;
1083: for (j = jmin; j < jmax; j++) {
1084: vj = aj[j]; /* block col. index */
1085: rtmp_ptr = rtmp + vj * 4;
1086: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1087: }
1089: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1090: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1091: i = jl[k]; /* first row to be added to k_th row */
1093: while (i < k) {
1094: nexti = jl[i]; /* next row to be added to k_th row */
1096: /* compute multiplier */
1097: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1099: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1100: diag = ba + i * 4;
1101: u = ba + ili * 4;
1102: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1103: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1104: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1105: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1107: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1108: dk[0] += uik[0] * u[0] + uik[1] * u[1];
1109: dk[1] += uik[2] * u[0] + uik[3] * u[1];
1110: dk[2] += uik[0] * u[2] + uik[1] * u[3];
1111: dk[3] += uik[2] * u[2] + uik[3] * u[3];
1113: PetscCall(PetscLogFlops(16.0 * 2.0));
1115: /* update -U(i,k): ba[ili] = uik */
1116: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1118: /* add multiple of row i to k-th row ... */
1119: jmin = ili + 1;
1120: jmax = bi[i + 1];
1121: if (jmin < jmax) {
1122: for (j = jmin; j < jmax; j++) {
1123: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1124: rtmp_ptr = rtmp + bj[j] * 4;
1125: u = ba + j * 4;
1126: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1127: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1128: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1129: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1130: }
1131: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1133: /* ... add i to row list for next nonzero entry */
1134: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1135: j = bj[jmin];
1136: jl[i] = jl[j];
1137: jl[j] = i; /* update jl */
1138: }
1139: i = nexti;
1140: }
1142: /* save nonzero entries in k-th row of U ... */
1144: /* invert diagonal block */
1145: diag = ba + k * 4;
1146: PetscCall(PetscArraycpy(diag, dk, 4));
1147: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1148: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1150: jmin = bi[k];
1151: jmax = bi[k + 1];
1152: if (jmin < jmax) {
1153: for (j = jmin; j < jmax; j++) {
1154: vj = bj[j]; /* block col. index of U */
1155: u = ba + j * 4;
1156: rtmp_ptr = rtmp + vj * 4;
1157: for (k1 = 0; k1 < 4; k1++) {
1158: *u++ = *rtmp_ptr;
1159: *rtmp_ptr++ = 0.0;
1160: }
1161: }
1163: /* ... add k to row list for first nonzero entry in k-th row */
1164: il[k] = jmin;
1165: i = bj[jmin];
1166: jl[k] = jl[i];
1167: jl[i] = k;
1168: }
1169: }
1171: PetscCall(PetscFree(rtmp));
1172: PetscCall(PetscFree2(il, jl));
1174: C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1175: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1178: C->assembled = PETSC_TRUE;
1179: C->preallocated = PETSC_TRUE;
1181: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1182: PetscFunctionReturn(PETSC_SUCCESS);
1183: }
1185: /*
1186: Numeric U^T*D*U factorization for SBAIJ format.
1187: Version for blocks are 1 by 1.
1188: */
1189: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1190: {
1191: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1192: IS ip = b->row;
1193: const PetscInt *ai, *aj, *rip;
1194: PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1195: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1196: MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1197: PetscReal rs;
1198: FactorShiftCtx sctx;
1200: PetscFunctionBegin;
1201: /* MatPivotSetUp(): initialize shift context sctx */
1202: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1204: PetscCall(ISGetIndices(ip, &rip));
1205: if (!a->permute) {
1206: ai = a->i;
1207: aj = a->j;
1208: aa = a->a;
1209: } else {
1210: ai = a->inew;
1211: aj = a->jnew;
1212: nz = ai[mbs];
1213: PetscCall(PetscMalloc1(nz, &aa));
1214: a2anew = a->a2anew;
1215: bval = a->a;
1216: for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1217: }
1219: /* initialization */
1220: /* il and jl record the first nonzero element in each row of the accessing
1221: window U(0:k, k:mbs-1).
1222: jl: list of rows to be added to uneliminated rows
1223: i>= k: jl(i) is the first row to be added to row i
1224: i< k: jl(i) is the row following row i in some list of rows
1225: jl(i) = mbs indicates the end of a list
1226: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1227: row i of U */
1228: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1230: do {
1231: sctx.newshift = PETSC_FALSE;
1232: il[0] = 0;
1233: for (i = 0; i < mbs; i++) {
1234: rtmp[i] = 0.0;
1235: jl[i] = mbs;
1236: }
1238: for (k = 0; k < mbs; k++) {
1239: /*initialize k-th row by the perm[k]-th row of A */
1240: jmin = ai[rip[k]];
1241: jmax = ai[rip[k] + 1];
1242: bval = ba + bi[k];
1243: for (j = jmin; j < jmax; j++) {
1244: col = rip[aj[j]];
1245: rtmp[col] = aa[j];
1246: *bval++ = 0.0; /* for in-place factorization */
1247: }
1249: /* shift the diagonal of the matrix */
1250: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1252: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1253: dk = rtmp[k];
1254: i = jl[k]; /* first row to be added to k_th row */
1256: while (i < k) {
1257: nexti = jl[i]; /* next row to be added to k_th row */
1259: /* compute multiplier, update diag(k) and U(i,k) */
1260: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1261: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1262: dk += uikdi * ba[ili];
1263: ba[ili] = uikdi; /* -U(i,k) */
1265: /* add multiple of row i to k-th row */
1266: jmin = ili + 1;
1267: jmax = bi[i + 1];
1268: if (jmin < jmax) {
1269: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1270: PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1272: /* update il and jl for row i */
1273: il[i] = jmin;
1274: j = bj[jmin];
1275: jl[i] = jl[j];
1276: jl[j] = i;
1277: }
1278: i = nexti;
1279: }
1281: /* shift the diagonals when zero pivot is detected */
1282: /* compute rs=sum of abs(off-diagonal) */
1283: rs = 0.0;
1284: jmin = bi[k] + 1;
1285: nz = bi[k + 1] - jmin;
1286: if (nz) {
1287: bcol = bj + jmin;
1288: while (nz--) {
1289: rs += PetscAbsScalar(rtmp[*bcol]);
1290: bcol++;
1291: }
1292: }
1294: sctx.rs = rs;
1295: sctx.pv = dk;
1296: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1297: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1298: dk = sctx.pv;
1300: /* copy data into U(k,:) */
1301: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1302: jmin = bi[k] + 1;
1303: jmax = bi[k + 1];
1304: if (jmin < jmax) {
1305: for (j = jmin; j < jmax; j++) {
1306: col = bj[j];
1307: ba[j] = rtmp[col];
1308: rtmp[col] = 0.0;
1309: }
1310: /* add the k-th row into il and jl */
1311: il[k] = jmin;
1312: i = bj[jmin];
1313: jl[k] = jl[i];
1314: jl[i] = k;
1315: }
1316: }
1317: } while (sctx.newshift);
1318: PetscCall(PetscFree3(rtmp, il, jl));
1319: if (a->permute) PetscCall(PetscFree(aa));
1321: PetscCall(ISRestoreIndices(ip, &rip));
1323: C->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
1324: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1325: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1326: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
1327: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
1328: C->assembled = PETSC_TRUE;
1329: C->preallocated = PETSC_TRUE;
1331: PetscCall(PetscLogFlops(C->rmap->N));
1332: if (sctx.nshift) {
1333: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1334: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1335: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1336: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1337: }
1338: }
1339: PetscFunctionReturn(PETSC_SUCCESS);
1340: }
1342: /*
1343: Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1344: Modified from MatCholeskyFactorNumeric_SeqAIJ()
1345: */
1346: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1347: {
1348: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1349: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data;
1350: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1351: PetscInt *ai = a->i, *aj = a->j, *ajtmp;
1352: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1353: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1354: FactorShiftCtx sctx;
1355: PetscReal rs;
1356: MatScalar d, *v;
1358: PetscFunctionBegin;
1359: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1361: /* MatPivotSetUp(): initialize shift context sctx */
1362: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1364: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1365: sctx.shift_top = info->zeropivot;
1367: PetscCall(PetscArrayzero(rtmp, mbs));
1369: for (i = 0; i < mbs; i++) {
1370: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1371: d = (aa)[a->diag[i]];
1372: rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1373: ajtmp = aj + ai[i] + 1; /* exclude diagonal */
1374: v = aa + ai[i] + 1;
1375: nz = ai[i + 1] - ai[i] - 1;
1376: for (j = 0; j < nz; j++) {
1377: rtmp[i] += PetscAbsScalar(v[j]);
1378: rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1379: }
1380: if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1381: }
1382: sctx.shift_top *= 1.1;
1383: sctx.nshift_max = 5;
1384: sctx.shift_lo = 0.;
1385: sctx.shift_hi = 1.;
1386: }
1388: /* allocate working arrays
1389: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1390: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1391: */
1392: do {
1393: sctx.newshift = PETSC_FALSE;
1395: for (i = 0; i < mbs; i++) c2r[i] = mbs;
1396: if (mbs) il[0] = 0;
1398: for (k = 0; k < mbs; k++) {
1399: /* zero rtmp */
1400: nz = bi[k + 1] - bi[k];
1401: bjtmp = bj + bi[k];
1402: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1404: /* load in initial unfactored row */
1405: bval = ba + bi[k];
1406: jmin = ai[k];
1407: jmax = ai[k + 1];
1408: for (j = jmin; j < jmax; j++) {
1409: col = aj[j];
1410: rtmp[col] = aa[j];
1411: *bval++ = 0.0; /* for in-place factorization */
1412: }
1413: /* shift the diagonal of the matrix: ZeropivotApply() */
1414: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1416: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1417: dk = rtmp[k];
1418: i = c2r[k]; /* first row to be added to k_th row */
1420: while (i < k) {
1421: nexti = c2r[i]; /* next row to be added to k_th row */
1423: /* compute multiplier, update diag(k) and U(i,k) */
1424: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1425: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1426: dk += uikdi * ba[ili]; /* update diag[k] */
1427: ba[ili] = uikdi; /* -U(i,k) */
1429: /* add multiple of row i to k-th row */
1430: jmin = ili + 1;
1431: jmax = bi[i + 1];
1432: if (jmin < jmax) {
1433: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1434: /* update il and c2r for row i */
1435: il[i] = jmin;
1436: j = bj[jmin];
1437: c2r[i] = c2r[j];
1438: c2r[j] = i;
1439: }
1440: i = nexti;
1441: }
1443: /* copy data into U(k,:) */
1444: rs = 0.0;
1445: jmin = bi[k];
1446: jmax = bi[k + 1] - 1;
1447: if (jmin < jmax) {
1448: for (j = jmin; j < jmax; j++) {
1449: col = bj[j];
1450: ba[j] = rtmp[col];
1451: rs += PetscAbsScalar(ba[j]);
1452: }
1453: /* add the k-th row into il and c2r */
1454: il[k] = jmin;
1455: i = bj[jmin];
1456: c2r[k] = c2r[i];
1457: c2r[i] = k;
1458: }
1460: sctx.rs = rs;
1461: sctx.pv = dk;
1462: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1463: if (sctx.newshift) break;
1464: dk = sctx.pv;
1466: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1467: }
1468: } while (sctx.newshift);
1470: PetscCall(PetscFree3(rtmp, il, c2r));
1472: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1473: B->ops->solves = MatSolves_SeqSBAIJ_1;
1474: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1475: B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1476: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1477: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1479: B->assembled = PETSC_TRUE;
1480: B->preallocated = PETSC_TRUE;
1482: PetscCall(PetscLogFlops(B->rmap->n));
1484: /* MatPivotView() */
1485: if (sctx.nshift) {
1486: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1487: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1488: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1489: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1490: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1491: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1492: }
1493: }
1494: PetscFunctionReturn(PETSC_SUCCESS);
1495: }
1497: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1498: {
1499: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1500: PetscInt i, j, mbs = a->mbs;
1501: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1502: PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1503: MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1504: PetscReal rs;
1505: FactorShiftCtx sctx;
1507: PetscFunctionBegin;
1508: /* MatPivotSetUp(): initialize shift context sctx */
1509: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1511: /* initialization */
1512: /* il and jl record the first nonzero element in each row of the accessing
1513: window U(0:k, k:mbs-1).
1514: jl: list of rows to be added to uneliminated rows
1515: i>= k: jl(i) is the first row to be added to row i
1516: i< k: jl(i) is the row following row i in some list of rows
1517: jl(i) = mbs indicates the end of a list
1518: il(i): points to the first nonzero element in U(i,k:mbs-1)
1519: */
1520: PetscCall(PetscMalloc1(mbs, &rtmp));
1521: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1523: do {
1524: sctx.newshift = PETSC_FALSE;
1525: il[0] = 0;
1526: for (i = 0; i < mbs; i++) {
1527: rtmp[i] = 0.0;
1528: jl[i] = mbs;
1529: }
1531: for (k = 0; k < mbs; k++) {
1532: /*initialize k-th row with elements nonzero in row perm(k) of A */
1533: nz = ai[k + 1] - ai[k];
1534: acol = aj + ai[k];
1535: aval = aa + ai[k];
1536: bval = ba + bi[k];
1537: while (nz--) {
1538: rtmp[*acol++] = *aval++;
1539: *bval++ = 0.0; /* for in-place factorization */
1540: }
1542: /* shift the diagonal of the matrix */
1543: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1545: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1546: dk = rtmp[k];
1547: i = jl[k]; /* first row to be added to k_th row */
1549: while (i < k) {
1550: nexti = jl[i]; /* next row to be added to k_th row */
1551: /* compute multiplier, update D(k) and U(i,k) */
1552: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1553: uikdi = -ba[ili] * ba[bi[i]];
1554: dk += uikdi * ba[ili];
1555: ba[ili] = uikdi; /* -U(i,k) */
1557: /* add multiple of row i to k-th row ... */
1558: jmin = ili + 1;
1559: nz = bi[i + 1] - jmin;
1560: if (nz > 0) {
1561: bcol = bj + jmin;
1562: bval = ba + jmin;
1563: PetscCall(PetscLogFlops(2.0 * nz));
1564: while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1566: /* update il and jl for i-th row */
1567: il[i] = jmin;
1568: j = bj[jmin];
1569: jl[i] = jl[j];
1570: jl[j] = i;
1571: }
1572: i = nexti;
1573: }
1575: /* shift the diagonals when zero pivot is detected */
1576: /* compute rs=sum of abs(off-diagonal) */
1577: rs = 0.0;
1578: jmin = bi[k] + 1;
1579: nz = bi[k + 1] - jmin;
1580: if (nz) {
1581: bcol = bj + jmin;
1582: while (nz--) {
1583: rs += PetscAbsScalar(rtmp[*bcol]);
1584: bcol++;
1585: }
1586: }
1588: sctx.rs = rs;
1589: sctx.pv = dk;
1590: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1591: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1592: dk = sctx.pv;
1594: /* copy data into U(k,:) */
1595: ba[bi[k]] = 1.0 / dk;
1596: jmin = bi[k] + 1;
1597: nz = bi[k + 1] - jmin;
1598: if (nz) {
1599: bcol = bj + jmin;
1600: bval = ba + jmin;
1601: while (nz--) {
1602: *bval++ = rtmp[*bcol];
1603: rtmp[*bcol++] = 0.0;
1604: }
1605: /* add k-th row into il and jl */
1606: il[k] = jmin;
1607: i = bj[jmin];
1608: jl[k] = jl[i];
1609: jl[i] = k;
1610: }
1611: } /* end of for (k = 0; k<mbs; k++) */
1612: } while (sctx.newshift);
1613: PetscCall(PetscFree(rtmp));
1614: PetscCall(PetscFree2(il, jl));
1616: C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1617: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1618: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1619: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1620: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1622: C->assembled = PETSC_TRUE;
1623: C->preallocated = PETSC_TRUE;
1625: PetscCall(PetscLogFlops(C->rmap->N));
1626: if (sctx.nshift) {
1627: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1628: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1629: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1630: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1631: }
1632: }
1633: PetscFunctionReturn(PETSC_SUCCESS);
1634: }
1636: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1637: {
1638: Mat C;
1640: PetscFunctionBegin;
1641: PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1642: PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1643: PetscCall(MatCholeskyFactorNumeric(C, A, info));
1645: A->ops->solve = C->ops->solve;
1646: A->ops->solvetranspose = C->ops->solvetranspose;
1648: PetscCall(MatHeaderMerge(A, &C));
1649: PetscFunctionReturn(PETSC_SUCCESS);
1650: }