Actual source code: brdn.c
1: #include <../src/ksp/ksp/utils/lmvm/brdn/brdn.h>
3: /*------------------------------------------------------------*/
5: /*
6: The solution method is the matrix-free implementation of the inverse Hessian
7: representation in page 312 of Griewank "Broyden Updating, The Good and The Bad!"
8: (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf).
10: Q[i] = (B_i)^{-1}*S[i] terms are computed ahead of time whenever
11: the matrix is updated with a new (S[i], Y[i]) pair. This allows
12: repeated calls of MatSolve without incurring redundant computation.
14: dX <- J0^{-1} * F
16: for i=0,1,2,...,k
17: # Q[i] = (B_i)^{-1} * Y[i]
18: tau = (S[i]^T dX) / (S[i]^T Q[i])
19: dX <- dX + (tau * (S[i] - Q[i]))
20: end
21: */
23: static PetscErrorCode MatSolve_LMVMBrdn(Mat B, Vec F, Vec dX)
24: {
25: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
26: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
27: PetscInt i, j;
28: PetscScalar sjtqi, stx, stq;
30: VecCheckSameSize(F, 2, dX, 3);
31: VecCheckMatCompatible(B, dX, 3, F, 2);
33: if (lbrdn->needQ) {
34: /* Pre-compute (Q[i] = (B_i)^{-1} * Y[i]) */
35: for (i = 0; i <= lmvm->k; ++i) {
36: MatLMVMApplyJ0Inv(B, lmvm->Y[i], lbrdn->Q[i]);
37: for (j = 0; j <= i-1; ++j) {
38: VecDot(lmvm->S[j], lbrdn->Q[i], &sjtqi);
39: VecAXPBYPCZ(lbrdn->Q[i], PetscRealPart(sjtqi)/lbrdn->stq[j], -PetscRealPart(sjtqi)/lbrdn->stq[j], 1.0, lmvm->S[j], lbrdn->Q[j]);
40: }
41: VecDot(lmvm->S[i], lbrdn->Q[i], &stq);
42: lbrdn->stq[i] = PetscRealPart(stq);
43: }
44: lbrdn->needQ = PETSC_FALSE;
45: }
47: MatLMVMApplyJ0Inv(B, F, dX);
48: for (i = 0; i <= lmvm->k; ++i) {
49: VecDot(lmvm->S[i], dX, &stx);
50: VecAXPBYPCZ(dX, PetscRealPart(stx)/lbrdn->stq[i], -PetscRealPart(stx)/lbrdn->stq[i], 1.0, lmvm->S[i], lbrdn->Q[i]);
51: }
52: return 0;
53: }
55: /*------------------------------------------------------------*/
57: /*
58: The forward product is the matrix-free implementation of Equation 2 in
59: page 302 of Griewank "Broyden Updating, The Good and The Bad!"
60: (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf).
62: P[i] = (B_i)*S[i] terms are computed ahead of time whenever
63: the matrix is updated with a new (S[i], Y[i]) pair. This allows
64: repeated calls of MatMult inside KSP solvers without unnecessarily
65: recomputing P[i] terms in expensive nested-loops.
67: Z <- J0 * X
69: for i=0,1,2,...,k
70: # P[i] = B_i * S[i]
71: tau = (S[i]^T X) / (S[i]^T S[i])
72: dX <- dX + (tau * (Y[i] - P[i]))
73: end
74: */
76: static PetscErrorCode MatMult_LMVMBrdn(Mat B, Vec X, Vec Z)
77: {
78: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
79: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
80: PetscInt i, j;
81: PetscScalar sjtsi, stx;
83: VecCheckSameSize(X, 2, Z, 3);
84: VecCheckMatCompatible(B, X, 2, Z, 3);
86: if (lbrdn->needP) {
87: /* Pre-compute (P[i] = (B_i) * S[i]) */
88: for (i = 0; i <= lmvm->k; ++i) {
89: MatLMVMApplyJ0Fwd(B, lmvm->S[i], lbrdn->P[i]);
90: for (j = 0; j <= i-1; ++j) {
91: VecDot(lmvm->S[j], lmvm->S[i], &sjtsi);
92: VecAXPBYPCZ(lbrdn->P[i], PetscRealPart(sjtsi)/lbrdn->sts[j], -PetscRealPart(sjtsi)/lbrdn->sts[j], 1.0, lmvm->Y[j], lbrdn->P[j]);
93: }
94: }
95: lbrdn->needP = PETSC_FALSE;
96: }
98: MatLMVMApplyJ0Fwd(B, X, Z);
99: for (i = 0; i <= lmvm->k; ++i) {
100: VecDot(lmvm->S[i], X, &stx);
101: VecAXPBYPCZ(Z, PetscRealPart(stx)/lbrdn->sts[i], -PetscRealPart(stx)/lbrdn->sts[i], 1.0, lmvm->Y[i], lbrdn->P[i]);
102: }
103: return 0;
104: }
106: /*------------------------------------------------------------*/
108: static PetscErrorCode MatUpdate_LMVMBrdn(Mat B, Vec X, Vec F)
109: {
110: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
111: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
112: PetscInt old_k, i;
113: PetscScalar sts;
115: if (!lmvm->m) return 0;
116: if (lmvm->prev_set) {
117: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
118: VecAYPX(lmvm->Xprev, -1.0, X);
119: VecAYPX(lmvm->Fprev, -1.0, F);
120: /* Accept the update */
121: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
122: old_k = lmvm->k;
123: MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev);
124: /* If we hit the memory limit, shift the sts array */
125: if (old_k == lmvm->k) {
126: for (i = 0; i <= lmvm->k-1; ++i) {
127: lbrdn->sts[i] = lbrdn->sts[i+1];
128: }
129: }
130: VecDot(lmvm->S[lmvm->k], lmvm->S[lmvm->k], &sts);
131: lbrdn->sts[lmvm->k] = PetscRealPart(sts);
132: }
133: /* Save the solution and function to be used in the next update */
134: VecCopy(X, lmvm->Xprev);
135: VecCopy(F, lmvm->Fprev);
136: lmvm->prev_set = PETSC_TRUE;
137: return 0;
138: }
140: /*------------------------------------------------------------*/
142: static PetscErrorCode MatCopy_LMVMBrdn(Mat B, Mat M, MatStructure str)
143: {
144: Mat_LMVM *bdata = (Mat_LMVM*)B->data;
145: Mat_Brdn *bctx = (Mat_Brdn*)bdata->ctx;
146: Mat_LMVM *mdata = (Mat_LMVM*)M->data;
147: Mat_Brdn *mctx = (Mat_Brdn*)mdata->ctx;
148: PetscInt i;
150: mctx->needP = bctx->needP;
151: mctx->needQ = bctx->needQ;
152: for (i=0; i<=bdata->k; ++i) {
153: mctx->sts[i] = bctx->sts[i];
154: mctx->stq[i] = bctx->stq[i];
155: VecCopy(bctx->P[i], mctx->P[i]);
156: VecCopy(bctx->Q[i], mctx->Q[i]);
157: }
158: return 0;
159: }
161: /*------------------------------------------------------------*/
163: static PetscErrorCode MatReset_LMVMBrdn(Mat B, PetscBool destructive)
164: {
165: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
166: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
168: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
169: if (destructive && lbrdn->allocated) {
170: PetscFree2(lbrdn->sts, lbrdn->stq);
171: VecDestroyVecs(lmvm->m, &lbrdn->P);
172: VecDestroyVecs(lmvm->m, &lbrdn->Q);
173: lbrdn->allocated = PETSC_FALSE;
174: }
175: MatReset_LMVM(B, destructive);
176: return 0;
177: }
179: /*------------------------------------------------------------*/
181: static PetscErrorCode MatAllocate_LMVMBrdn(Mat B, Vec X, Vec F)
182: {
183: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
184: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
186: MatAllocate_LMVM(B, X, F);
187: if (!lbrdn->allocated) {
188: PetscMalloc2(lmvm->m, &lbrdn->sts, lmvm->m, &lbrdn->stq);
189: if (lmvm->m > 0) {
190: VecDuplicateVecs(X, lmvm->m, &lbrdn->P);
191: VecDuplicateVecs(X, lmvm->m, &lbrdn->Q);
192: }
193: lbrdn->allocated = PETSC_TRUE;
194: }
195: return 0;
196: }
198: /*------------------------------------------------------------*/
200: static PetscErrorCode MatDestroy_LMVMBrdn(Mat B)
201: {
202: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
203: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
205: if (lbrdn->allocated) {
206: PetscFree2(lbrdn->sts, lbrdn->stq);
207: VecDestroyVecs(lmvm->m, &lbrdn->P);
208: VecDestroyVecs(lmvm->m, &lbrdn->Q);
209: lbrdn->allocated = PETSC_FALSE;
210: }
211: PetscFree(lmvm->ctx);
212: MatDestroy_LMVM(B);
213: return 0;
214: }
216: /*------------------------------------------------------------*/
218: static PetscErrorCode MatSetUp_LMVMBrdn(Mat B)
219: {
220: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
221: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
223: MatSetUp_LMVM(B);
224: if (!lbrdn->allocated) {
225: PetscMalloc2(lmvm->m, &lbrdn->sts, lmvm->m, &lbrdn->stq);
226: if (lmvm->m > 0) {
227: VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbrdn->P);
228: VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbrdn->Q);
229: }
230: lbrdn->allocated = PETSC_TRUE;
231: }
232: return 0;
233: }
235: /*------------------------------------------------------------*/
237: PetscErrorCode MatCreate_LMVMBrdn(Mat B)
238: {
239: Mat_LMVM *lmvm;
240: Mat_Brdn *lbrdn;
242: MatCreate_LMVM(B);
243: PetscObjectChangeTypeName((PetscObject)B, MATLMVMBROYDEN);
244: B->ops->setup = MatSetUp_LMVMBrdn;
245: B->ops->destroy = MatDestroy_LMVMBrdn;
246: B->ops->solve = MatSolve_LMVMBrdn;
248: lmvm = (Mat_LMVM*)B->data;
249: lmvm->square = PETSC_TRUE;
250: lmvm->ops->allocate = MatAllocate_LMVMBrdn;
251: lmvm->ops->reset = MatReset_LMVMBrdn;
252: lmvm->ops->mult = MatMult_LMVMBrdn;
253: lmvm->ops->update = MatUpdate_LMVMBrdn;
254: lmvm->ops->copy = MatCopy_LMVMBrdn;
256: PetscNewLog(B, &lbrdn);
257: lmvm->ctx = (void*)lbrdn;
258: lbrdn->allocated = PETSC_FALSE;
259: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
260: return 0;
261: }
263: /*------------------------------------------------------------*/
265: /*@
266: MatCreateLMVMBroyden - Creates a limited-memory "good" Broyden-type approximation
267: matrix used for a Jacobian. L-Brdn is not guaranteed to be symmetric or
268: positive-definite.
270: The provided local and global sizes must match the solution and function vectors
271: used with MatLMVMUpdate() and MatSolve(). The resulting L-Brdn matrix will have
272: storage vectors allocated with VecCreateSeq() in serial and VecCreateMPI() in
273: parallel. To use the L-Brdn matrix with other vector types, the matrix must be
274: created using MatCreate() and MatSetType(), followed by MatLMVMAllocate().
275: This ensures that the internal storage and work vectors are duplicated from the
276: correct type of vector.
278: Collective
280: Input Parameters:
281: + comm - MPI communicator, set to PETSC_COMM_SELF
282: . n - number of local rows for storage vectors
283: - N - global size of the storage vectors
285: Output Parameter:
286: . B - the matrix
288: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
289: paradigm instead of this routine directly.
291: Options Database Keys:
292: . -mat_lmvm_num_vecs - maximum number of correction vectors (i.e.: updates) stored
294: Level: intermediate
296: .seealso: MatCreate(), MATLMVM, MATLMVMBRDN, MatCreateLMVMDFP(), MatCreateLMVMSR1(),
297: MatCreateLMVMBFGS(), MatCreateLMVMBadBrdn(), MatCreateLMVMSymBrdn()
298: @*/
299: PetscErrorCode MatCreateLMVMBroyden(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
300: {
301: MatCreate(comm, B);
302: MatSetSizes(*B, n, n, N, N);
303: MatSetType(*B, MATLMVMBROYDEN);
304: MatSetUp(*B);
305: return 0;
306: }