Actual source code: dgmres.c

  1: /*
  2:     Implements deflated GMRES.
  3: */

  5: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>

  7: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;

  9: #define GMRES_DELTA_DIRECTIONS 10
 10: #define GMRES_DEFAULT_MAXK     30
 11: static PetscErrorCode    KSPDGMRESGetNewVectors(KSP,PetscInt);
 12: static PetscErrorCode    KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
 13: static PetscErrorCode    KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);

 15: PetscErrorCode  KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
 16: {
 17:   PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
 18:   return 0;
 19: }
 20: PetscErrorCode  KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
 21: {
 22:   PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
 23:   return 0;
 24: }
 25: PetscErrorCode  KSPDGMRESForce(KSP ksp,PetscBool force)
 26: {
 27:   PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscBool),(ksp,force));
 28:   return 0;
 29: }
 30: PetscErrorCode  KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
 31: {
 32:   PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
 33:   return 0;
 34: }
 35: PetscErrorCode  KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
 36: {
 37:   PetscUseMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
 38:   return 0;
 39: }
 40: PetscErrorCode  KSPDGMRESComputeDeflationData(KSP ksp,PetscInt *curneigh)
 41: {
 42:   PetscUseMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP,PetscInt*),(ksp,curneigh));
 43:   return 0;
 44: }
 45: PetscErrorCode  KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
 46: {
 47:   PetscUseMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
 48:   return 0;
 49: }

 51: PetscErrorCode  KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
 52: {
 53:   PetscUseMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
 54:   return 0;
 55: }

 57: PetscErrorCode  KSPSetUp_DGMRES(KSP ksp)
 58: {
 59:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
 60:   PetscInt       neig    = dgmres->neig+EIG_OFFSET;
 61:   PetscInt       max_k   = dgmres->max_k+1;

 63:   KSPSetUp_GMRES(ksp);
 64:   if (!dgmres->neig) return 0;

 66:   /* Allocate workspace for the Schur vectors*/
 67:   PetscMalloc1(neig*max_k, &SR);
 68:   dgmres->wr    = NULL;
 69:   dgmres->wi    = NULL;
 70:   dgmres->perm  = NULL;
 71:   dgmres->modul = NULL;
 72:   dgmres->Q     = NULL;
 73:   dgmres->Z     = NULL;

 75:   UU   = NULL;
 76:   XX   = NULL;
 77:   MX   = NULL;
 78:   AUU  = NULL;
 79:   XMX  = NULL;
 80:   XMU  = NULL;
 81:   UMX  = NULL;
 82:   AUAU = NULL;
 83:   TT   = NULL;
 84:   TTF  = NULL;
 85:   INVP = NULL;
 86:   X1   = NULL;
 87:   X2   = NULL;
 88:   MU   = NULL;
 89:   return 0;
 90: }

 92: /*
 93:  Run GMRES, possibly with restart.  Return residual history if requested.
 94:  input parameters:

 96:  .       gmres  - structure containing parameters and work areas

 98:  output parameters:
 99:  .        nres    - residuals (from preconditioned system) at each step.
100:  If restarting, consider passing nres+it.  If null,
101:  ignored
102:  .        itcount - number of iterations used.  nres[0] to nres[itcount]
103:  are defined.  If null, ignored.

105:  Notes:
106:  On entry, the value in vector VEC_VV(0) should be the initial residual
107:  (this allows shortcuts where the initial preconditioned residual is 0).
108:  */
109: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
110: {
111:   KSP_DGMRES     *dgmres = (KSP_DGMRES*)(ksp->data);
112:   PetscReal      res_norm,res,hapbnd,tt;
113:   PetscInt       it     = 0;
114:   PetscInt       max_k  = dgmres->max_k;
115:   PetscBool      hapend = PETSC_FALSE;
116:   PetscReal      res_old;
117:   PetscInt       test = 0;

119:   VecNormalize(VEC_VV(0),&res_norm);
120:   KSPCheckNorm(ksp,res_norm);
121:   res     = res_norm;
122:   *GRS(0) = res_norm;

124:   /* check for the convergence */
125:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
126:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
127:   else ksp->rnorm = 0.0;
128:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
129:   dgmres->it = (it - 1);
130:   KSPLogResidualHistory(ksp,ksp->rnorm);
131:   KSPMonitor(ksp,ksp->its,ksp->rnorm);
132:   if (!res) {
133:     if (itcount) *itcount = 0;
134:     ksp->reason = KSP_CONVERGED_ATOL;
135:     PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
136:     return 0;
137:   }
138:   /* record the residual norm to test if deflation is needed */
139:   res_old = res;

141:   (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);
142:   while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
143:     if (it) {
144:       KSPLogResidualHistory(ksp,ksp->rnorm);
145:       KSPMonitor(ksp,ksp->its,ksp->rnorm);
146:     }
147:     dgmres->it = (it - 1);
148:     if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
149:       KSPDGMRESGetNewVectors(ksp,it+1);
150:     }
151:     if (dgmres->r > 0) {
152:       if (ksp->pc_side == PC_LEFT) {
153:         /* Apply the first preconditioner */
154:         KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
155:         /* Then apply Deflation as a preconditioner */
156:         KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
157:       } else if (ksp->pc_side == PC_RIGHT) {
158:         KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
159:         KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
160:       }
161:     } else {
162:       KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
163:     }
164:     dgmres->matvecs += 1;
165:     /* update hessenberg matrix and do Gram-Schmidt */
166:     (*dgmres->orthog)(ksp,it);

168:     /* vv(i+1) . vv(i+1) */
169:     VecNormalize(VEC_VV(it+1),&tt);
170:     /* save the magnitude */
171:     *HH(it+1,it)  = tt;
172:     *HES(it+1,it) = tt;

174:     /* check for the happy breakdown */
175:     hapbnd = PetscAbsScalar(tt / *GRS(it));
176:     if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
177:     if (tt < hapbnd) {
178:       PetscInfo(ksp,"Detected happy breakdown, current hapbnd = %g tt = %g\n",(double)hapbnd,(double)tt);
179:       hapend = PETSC_TRUE;
180:     }
181:     KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);

183:     it++;
184:     dgmres->it = (it-1);     /* For converged */
185:     ksp->its++;
186:     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
187:     else ksp->rnorm = 0.0;
188:     if (ksp->reason) break;

190:     (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);

192:     /* Catch error in happy breakdown and signal convergence and break from loop */
193:     if (hapend) {
194:       if (!ksp->reason) {
196:         else {
197:           ksp->reason = KSP_DIVERGED_BREAKDOWN;
198:           break;
199:         }
200:       }
201:     }
202:   }

204:   /* Monitor if we know that we will not return for a restart */
205:   if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
206:     KSPLogResidualHistory(ksp,ksp->rnorm);
207:     KSPMonitor(ksp,ksp->its,ksp->rnorm);
208:   }
209:   if (itcount) *itcount = it;

211:   /*
212:    Down here we have to solve for the "best" coefficients of the Krylov
213:    columns, add the solution values together, and possibly unwind the
214:    preconditioning from the solution
215:    */
216:   /* Form the solution (or the solution so far) */
217:   KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);

219:   /* Compute data for the deflation to be used during the next restart */
220:   if (!ksp->reason && ksp->its < ksp->max_it) {
221:     test = max_k *PetscLogReal(ksp->rtol/res) /PetscLogReal(res/res_old);
222:     /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
223:     if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
224:       KSPDGMRESComputeDeflationData(ksp,NULL);
225:     }
226:   }
227:   return 0;
228: }

230: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
231: {
232:   PetscInt       i,its,itcount;
233:   KSP_DGMRES     *dgmres    = (KSP_DGMRES*) ksp->data;
234:   PetscBool      guess_zero = ksp->guess_zero;


238:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
239:   ksp->its        = 0;
240:   dgmres->matvecs = 0;
241:   PetscObjectSAWsGrantAccess((PetscObject)ksp);

243:   itcount     = 0;
244:   while (!ksp->reason) {
245:     KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
246:     if (ksp->pc_side == PC_LEFT) {
247:       dgmres->matvecs += 1;
248:       if (dgmres->r > 0) {
249:         KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
250:         VecCopy(VEC_TEMP, VEC_VV(0));
251:       }
252:     }

254:     KSPDGMRESCycle(&its,ksp);
255:     itcount += its;
256:     if (itcount >= ksp->max_it) {
257:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
258:       break;
259:     }
260:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
261:   }
262:   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */

264:   for (i = 0; i < dgmres->r; i++) {
265:     VecViewFromOptions(UU[i],(PetscObject)ksp,"-ksp_dgmres_view_deflation_vecs");
266:   }
267:   return 0;
268: }

270: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
271: {
272:   KSP_DGMRES     *dgmres  = (KSP_DGMRES*) ksp->data;
273:   PetscInt       neig1    = dgmres->neig+EIG_OFFSET;
274:   PetscInt       max_neig = dgmres->max_neig;

276:   if (dgmres->r) {
277:     VecDestroyVecs(max_neig, &UU);
278:     VecDestroyVecs(max_neig, &MU);
279:     if (XX) {
280:       VecDestroyVecs(neig1, &XX);
281:       VecDestroyVecs(neig1, &MX);
282:     }
283:     PetscFree(TT);
284:     PetscFree(TTF);
285:     PetscFree(INVP);
286:     PetscFree(XMX);
287:     PetscFree(UMX);
288:     PetscFree(XMU);
289:     PetscFree(X1);
290:     PetscFree(X2);
291:     PetscFree(dgmres->work);
292:     PetscFree(dgmres->iwork);
293:     PetscFree(dgmres->wr);
294:     PetscFree(dgmres->wi);
295:     PetscFree(dgmres->modul);
296:     PetscFree(dgmres->Q);
297:     PetscFree(ORTH);
298:     PetscFree(AUAU);
299:     PetscFree(AUU);
300:     PetscFree(SR2);
301:   }
302:   PetscFree(SR);
303:   KSPDestroy_GMRES(ksp);
304:   return 0;
305: }

307: /*
308:  KSPDGMRESBuildSoln - create the solution from the starting vector and the
309:  current iterates.

311:  Input parameters:
312:  nrs - work area of size it + 1.
313:  vs  - index of initial guess
314:  vdest - index of result.  Note that vs may == vdest (replace
315:  guess with the solution).

317:  This is an internal routine that knows about the GMRES internals.
318:  */
319: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
320: {
321:   PetscScalar    tt;
322:   PetscInt       ii,k,j;
323:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) (ksp->data);

325:   /* Solve for solution vector that minimizes the residual */

327:   /* If it is < 0, no gmres steps have been performed */
328:   if (it < 0) {
329:     VecCopy(vs,vdest);     /* VecCopy() is smart, exists immediately if vguess == vdest */
330:     return 0;
331:   }
333:   if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
334:   else nrs[it] = 0.0;

336:   for (ii=1; ii<=it; ii++) {
337:     k  = it - ii;
338:     tt = *GRS(k);
339:     for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
341:     nrs[k] = tt / *HH(k,k);
342:   }

344:   /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
345:   VecSet(VEC_TEMP,0.0);
346:   VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));

348:   /* Apply deflation */
349:   if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
350:     KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
351:     VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
352:   }
353:   KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);

355:   /* add solution to previous solution */
356:   if (vdest != vs) {
357:     VecCopy(vs,vdest);
358:   }
359:   VecAXPY(vdest,1.0,VEC_TEMP);
360:   return 0;
361: }

363: /*
364:  Do the scalar work for the orthogonalization.  Return new residual norm.
365:  */
366: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
367: {
368:   PetscScalar *hh,*cc,*ss,tt;
369:   PetscInt    j;
370:   KSP_DGMRES  *dgmres = (KSP_DGMRES*) (ksp->data);

372:   hh = HH(0,it);
373:   cc = CC(0);
374:   ss = SS(0);

376:   /* Apply all the previously computed plane rotations to the new column
377:    of the Hessenberg matrix */
378:   for (j=1; j<=it; j++) {
379:     tt  = *hh;
380:     *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
381:     hh++;
382:     *hh = *cc++ * *hh -(*ss++ * tt);
383:   }

385:   /*
386:    compute the new plane rotation, and apply it to:
387:    1) the right-hand-side of the Hessenberg system
388:    2) the new column of the Hessenberg matrix
389:    thus obtaining the updated value of the residual
390:    */
391:   if (!hapend) {
392:     tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
393:     if (tt == 0.0) {
394:       ksp->reason = KSP_DIVERGED_NULL;
395:       return 0;
396:     }
397:     *cc        = *hh / tt;
398:     *ss        = *(hh+1) / tt;
399:     *GRS(it+1) = -(*ss * *GRS(it));
400:     *GRS(it)   = PetscConj(*cc) * *GRS(it);
401:     *hh        = PetscConj(*cc) * *hh + *ss * *(hh+1);
402:     *res       = PetscAbsScalar(*GRS(it+1));
403:   } else {
404:     /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
405:      another rotation matrix (so RH doesn't change).  The new residual is
406:      always the new sine term times the residual from last time (GRS(it)),
407:      but now the new sine rotation would be zero...so the residual should
408:      be zero...so we will multiply "zero" by the last residual.  This might
409:      not be exactly what we want to do here -could just return "zero". */

411:     *res = 0.0;
412:   }
413:   return 0;
414: }

416: /*
417:   Allocates more work vectors, starting from VEC_VV(it).
418:  */
419: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
420: {
421:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
422:   PetscInt       nwork = dgmres->nwork_alloc,k,nalloc;

424:   nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
425:   /* Adjust the number to allocate to make sure that we don't exceed the
426:    number of available slots */
427:   if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
428:     nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
429:   }
430:   if (!nalloc) return 0;

432:   dgmres->vv_allocated += nalloc;

434:   KSPCreateVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
435:   PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);

437:   dgmres->mwork_alloc[nwork] = nalloc;
438:   for (k=0; k<nalloc; k++) {
439:     dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
440:   }
441:   dgmres->nwork_alloc++;
442:   return 0;
443: }

445: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
446: {
447:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;

449:   if (!ptr) {
450:     if (!dgmres->sol_temp) {
451:       VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
452:       PetscLogObjectParent((PetscObject)ksp,(PetscObject)dgmres->sol_temp);
453:     }
454:     ptr = dgmres->sol_temp;
455:   }
456:   if (!dgmres->nrs) {
457:     /* allocate the work area */
458:     PetscMalloc1(dgmres->max_k,&dgmres->nrs);
459:     PetscLogObjectMemory((PetscObject)ksp,dgmres->max_k*sizeof(PetscScalar));
460:   }
461:   KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
462:   if (result) *result = ptr;
463:   return 0;
464: }

466: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
467: {
468:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
469:   PetscBool      iascii,isharmonic;

471:   KSPView_GMRES(ksp,viewer);
472:   PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
473:   if (iascii) {
474:     if (dgmres->force) PetscViewerASCIIPrintf(viewer, "    Adaptive strategy is used: FALSE\n");
475:     else PetscViewerASCIIPrintf(viewer, "    Adaptive strategy is used: TRUE\n");
476:     PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic);
477:     if (isharmonic) {
478:       PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
479:     } else {
480:       PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
481:     }
482:     PetscViewerASCIIPrintf(viewer, "   Total number of extracted eigenvalues = %D\n", dgmres->r);
483:     PetscViewerASCIIPrintf(viewer, "   Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
484:     PetscViewerASCIIPrintf(viewer, "   relaxation parameter for the adaptive strategy(smv)  = %g\n", dgmres->smv);
485:     PetscViewerASCIIPrintf(viewer, "   Number of matvecs : %D\n", dgmres->matvecs);
486:   }
487:   return 0;
488: }

490: PetscErrorCode  KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
491: {
492:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

495:   dgmres->neig=neig;
496:   return 0;
497: }

499: static PetscErrorCode  KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
500: {
501:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

504:   dgmres->max_neig=max_neig;
505:   return 0;
506: }

508: static PetscErrorCode  KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
509: {
510:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

513:   dgmres->smv=ratio;
514:   return 0;
515: }

517: static PetscErrorCode  KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)
518: {
519:   KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;

521:   dgmres->force = force;
522:   return 0;
523: }

525: PetscErrorCode KSPSetFromOptions_DGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
526: {
527:   PetscInt       neig;
528:   PetscInt       max_neig;
529:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
530:   PetscBool      flg;

532:   KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);
533:   PetscOptionsHead(PetscOptionsObject,"KSP DGMRES Options");
534:   PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
535:   if (flg) {
536:     KSPDGMRESSetEigen(ksp, neig);
537:   }
538:   PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
539:   if (flg) {
540:     KSPDGMRESSetMaxEigen(ksp, max_neig);
541:   }
542:   PetscOptionsReal("-ksp_dgmres_ratio","Relaxation parameter for the smaller number of matrix-vectors product allowed","KSPDGMRESSetRatio",dgmres->smv,&dgmres->smv,NULL);
543:   PetscOptionsBool("-ksp_dgmres_improve","Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)",NULL,dgmres->improve,&dgmres->improve,NULL);
544:   PetscOptionsBool("-ksp_dgmres_force","Sets DGMRES always at restart active, i.e do not use the adaptive strategy","KSPDGMRESForce",dgmres->force,&dgmres->force,NULL);
545:   PetscOptionsTail();
546:   return 0;
547: }

549: PetscErrorCode  KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
550: {
551:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
552:   PetscInt       i,j, k;
553:   PetscBLASInt   nr, bmax;
554:   PetscInt       r = dgmres->r;
555:   PetscInt       neig;          /* number of eigenvalues to extract at each restart */
556:   PetscInt       neig1    = dgmres->neig + EIG_OFFSET;  /* max number of eig that can be extracted at each restart */
557:   PetscInt       max_neig = dgmres->max_neig;  /* Max number of eigenvalues to extract during the iterative process */
558:   PetscInt       N        = dgmres->max_k+1;
559:   PetscInt       n        = dgmres->it+1;
560:   PetscReal      alpha;

562:   PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
563:   if (dgmres->neig == 0 || (max_neig < (r+neig1) && !dgmres->improve)) {
564:     PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
565:     return 0;
566:   }

568:   KSPDGMRESComputeSchurForm(ksp, &neig);
569:   /* Form the extended Schur vectors X=VV*Sr */
570:   if (!XX) {
571:     VecDuplicateVecs(VEC_VV(0), neig1, &XX);
572:   }
573:   for (j = 0; j<neig; j++) {
574:     VecZeroEntries(XX[j]);
575:     VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
576:   }

578:   /* Orthogonalize X against U */
579:   if (!ORTH) {
580:     PetscMalloc1(max_neig, &ORTH);
581:   }
582:   if (r > 0) {
583:     /* modified Gram-Schmidt */
584:     for (j = 0; j<neig; j++) {
585:       for (i=0; i<r; i++) {
586:         /* First, compute U'*X[j] */
587:         VecDot(XX[j], UU[i], &alpha);
588:         /* Then, compute X(j)=X(j)-U*U'*X(j) */
589:         VecAXPY(XX[j], -alpha, UU[i]);
590:       }
591:     }
592:   }
593:   /* Compute MX = M^{-1}*A*X */
594:   if (!MX) {
595:     VecDuplicateVecs(VEC_VV(0), neig1, &MX);
596:   }
597:   for (j = 0; j<neig; j++) {
598:     KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
599:   }
600:   dgmres->matvecs += neig;

602:   if ((r+neig1) > max_neig && dgmres->improve) {    /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
603:     KSPDGMRESImproveEig(ksp, neig);
604:     PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
605:     return 0;   /* We return here since data for M have been improved in  KSPDGMRESImproveEig()*/
606:   }

608:   /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
609:   if (!XMX) {
610:     PetscMalloc1(neig1*neig1, &XMX);
611:   }
612:   for (j = 0; j < neig; j++) {
613:     VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
614:   }

616:   if (r > 0) {
617:     /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
618:     if (!UMX) {
619:       PetscMalloc1(max_neig*neig1, &UMX);
620:     }
621:     for (j = 0; j < neig; j++) {
622:       VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
623:     }
624:     /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
625:     if (!XMU) {
626:       PetscMalloc1(max_neig*neig1, &XMU);
627:     }
628:     for (j = 0; j<r; j++) {
629:       VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
630:     }
631:   }

633:   /* Form the new matrix T = [T UMX; XMU XMX]; */
634:   if (!TT) {
635:     PetscMalloc1(max_neig*max_neig, &TT);
636:   }
637:   if (r > 0) {
638:     /* Add XMU to T */
639:     for (j = 0; j < r; j++) {
640:       PetscArraycpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig);
641:     }
642:     /* Add [UMX; XMX] to T */
643:     for (j = 0; j < neig; j++) {
644:       k = r+j;
645:       PetscArraycpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r);
646:       PetscArraycpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig);
647:     }
648:   } else { /* Add XMX to T */
649:     for (j = 0; j < neig; j++) {
650:       PetscArraycpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig);
651:     }
652:   }

654:   dgmres->r += neig;
655:   r          = dgmres->r;
656:   PetscBLASIntCast(r,&nr);
657:   /*LU Factorize T with Lapack xgetrf routine */

659:   PetscBLASIntCast(max_neig,&bmax);
660:   if (!TTF) {
661:     PetscMalloc1(bmax*bmax, &TTF);
662:   }
663:   PetscArraycpy(TTF, TT, bmax*r);
664:   if (!INVP) {
665:     PetscMalloc1(bmax, &INVP);
666:   }
667:   {
668:     PetscBLASInt info;
669:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
671:   }

673:   /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
674:   if (!UU) {
675:     VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
676:     VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
677:   }
678:   for (j=0; j<neig; j++) {
679:     VecCopy(XX[j], UU[r-neig+j]);
680:     VecCopy(MX[j], MU[r-neig+j]);
681:   }
682:   PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
683:   return 0;
684: }

686: PetscErrorCode  KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
687: {
688:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
689:   PetscInt       N = dgmres->max_k + 1, n=dgmres->it+1;
690:   PetscBLASInt   bn;
691:   PetscReal      *A;
692:   PetscBLASInt   ihi;
693:   PetscBLASInt   ldA = 0;          /* leading dimension of A */
694:   PetscBLASInt   ldQ;              /* leading dimension of Q */
695:   PetscReal      *Q;               /*  orthogonal matrix of  (left) Schur vectors */
696:   PetscReal      *work;            /* working vector */
697:   PetscBLASInt   lwork;            /* size of the working vector */
698:   PetscInt       *perm;            /* Permutation vector to sort eigenvalues */
699:   PetscInt       i, j;
700:   PetscBLASInt   NbrEig;           /* Number of eigenvalues really extracted */
701:   PetscReal      *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A */
702:   PetscBLASInt   *select;
703:   PetscBLASInt   *iwork;
704:   PetscBLASInt   liwork;
705:   PetscScalar    *Ht;              /* Transpose of the Hessenberg matrix */
706:   PetscScalar    *t;               /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
707:   PetscBLASInt   *ipiv;            /* Permutation vector to be used in LAPACK */
708:   PetscBool      flag;             /* determine whether to use Ritz vectors or harmonic Ritz vectors */

710:   PetscBLASIntCast(n,&bn);
711:   PetscBLASIntCast(N,&ldA);
712:   ihi  = ldQ = bn;
713:   PetscBLASIntCast(5*N,&lwork);

715: #if defined(PETSC_USE_COMPLEX)
716:   SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "No support for complex numbers.");
717: #endif

719:   PetscMalloc1(ldA*ldA, &A);
720:   PetscMalloc1(ldQ*n, &Q);
721:   PetscMalloc1(lwork, &work);
722:   if (!dgmres->wr) {
723:     PetscMalloc1(n, &dgmres->wr);
724:     PetscMalloc1(n, &dgmres->wi);
725:   }
726:   wr   = dgmres->wr;
727:   wi   = dgmres->wi;
728:   PetscMalloc1(n,&modul);
729:   PetscMalloc1(n,&perm);
730:   /* copy the Hessenberg matrix to work space */
731:   PetscArraycpy(A, dgmres->hes_origin, ldA*ldA);
732:   PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag);
733:   if (flag) {
734:     /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
735:     /* Transpose the Hessenberg matrix */
736:     PetscMalloc1(bn*bn, &Ht);
737:     for (i = 0; i < bn; i++) {
738:       for (j = 0; j < bn; j++) {
739:         Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
740:       }
741:     }

743:     /* Solve the system H^T*t = h_{m+1,m}e_m */
744:     PetscCalloc1(bn, &t);
745:     t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
746:     PetscMalloc1(bn, &ipiv);
747:     /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
748:     {
749:       PetscBLASInt info;
750:       PetscBLASInt nrhs = 1;
751:       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
753:     }
754:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
755:     for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
756:     PetscFree(t);
757:     PetscFree(Ht);
758:   }
759:   /* Compute eigenvalues with the Schur form */
760:   {
761:     PetscBLASInt info=0;
762:     PetscBLASInt ilo = 1;
763:     PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
765:   }
766:   PetscFree(work);

768:   /* sort the eigenvalues */
769:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
770:   for (i=0; i<n; i++) perm[i] = i;

772:   PetscSortRealWithPermutation(n, modul, perm);
773:   /* save the complex modulus of the largest eigenvalue in magnitude */
774:   if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
775:   /* count the number of extracted eigenvalues (with complex conjugates) */
776:   NbrEig = 0;
777:   while (NbrEig < dgmres->neig) {
778:     if (wi[perm[NbrEig]] != 0) NbrEig += 2;
779:     else NbrEig += 1;
780:   }
781:   /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */

783:   PetscCalloc1(n, &select);

785:   if (!dgmres->GreatestEig) {
786:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
787:   } else {
788:     for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
789:   }
790:   /* call Lapack dtrsen */
791:   lwork  =  PetscMax(1, 4 * NbrEig *(bn-NbrEig));
792:   liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
793:   PetscMalloc1(lwork, &work);
794:   PetscMalloc1(liwork, &iwork);
795:   {
796:     PetscBLASInt info=0;
797:     PetscReal    CondEig;         /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
798:     PetscReal    CondSub;         /* estimated reciprocal condition number of the specified invariant subspace. */
799:     PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
801:   }
802:   PetscFree(select);

804:   /* Extract the Schur vectors */
805:   for (j = 0; j < NbrEig; j++) {
806:     PetscArraycpy(&SR[j*N], &(Q[j*ldQ]), n);
807:   }
808:   *neig = NbrEig;
809:   PetscFree(A);
810:   PetscFree(work);
811:   PetscFree(perm);
812:   PetscFree(work);
813:   PetscFree(iwork);
814:   PetscFree(modul);
815:   PetscFree(Q);
816:   return 0;
817: }

819: PetscErrorCode  KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
820: {
821:   KSP_DGMRES     *dgmres = (KSP_DGMRES*) ksp->data;
822:   PetscInt       i, r     = dgmres->r;
823:   PetscReal      alpha    = 1.0;
824:   PetscInt       max_neig = dgmres->max_neig;
825:   PetscBLASInt   br,bmax;
826:   PetscReal      lambda = dgmres->lambdaN;

828:   PetscBLASIntCast(r,&br);
829:   PetscBLASIntCast(max_neig,&bmax);
830:   PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
831:   if (!r) {
832:     VecCopy(x,y);
833:     return 0;
834:   }
835:   /* Compute U'*x */
836:   if (!X1) {
837:     PetscMalloc1(bmax, &X1);
838:     PetscMalloc1(bmax, &X2);
839:   }
840:   VecMDot(x, r, UU, X1);

842:   /* Solve T*X1=X2 for X1*/
843:   PetscArraycpy(X2, X1, br);
844:   {
845:     PetscBLASInt info;
846:     PetscBLASInt nrhs = 1;
847:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
849:   }
850:   /* Iterative refinement -- is it really necessary ?? */
851:   if (!WORK) {
852:     PetscMalloc1(3*bmax, &WORK);
853:     PetscMalloc1(bmax, &IWORK);
854:   }
855:   {
856:     PetscBLASInt info;
857:     PetscReal    berr, ferr;
858:     PetscBLASInt nrhs = 1;
859:     PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
861:   }

863:   for (i = 0; i < r; i++) X2[i] =  X1[i]/lambda - X2[i];

865:   /* Compute X2=U*X2 */
866:   VecZeroEntries(y);
867:   VecMAXPY(y, r, X2, UU);
868:   VecAXPY(y, alpha, x);

870:   PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
871:   return 0;
872: }

874: static PetscErrorCode  KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
875: {
876:   KSP_DGMRES   *dgmres = (KSP_DGMRES*) ksp->data;
877:   PetscInt     j,r_old, r = dgmres->r;
878:   PetscBLASInt i     = 0;
879:   PetscInt     neig1 = dgmres->neig + EIG_OFFSET;
880:   PetscInt     bmax  = dgmres->max_neig;
881:   PetscInt     aug   = r + neig;         /* actual size of the augmented invariant basis */
882:   PetscInt     aug1  = bmax+neig1;       /* maximum size of the augmented invariant basis */
883:   PetscBLASInt ldA;            /* leading dimension of AUAU and AUU*/
884:   PetscBLASInt N;              /* size of AUAU */
885:   PetscReal    *Q;             /*  orthogonal matrix of  (left) schur vectors */
886:   PetscReal    *Z;             /*  orthogonal matrix of  (right) schur vectors */
887:   PetscReal    *work;          /* working vector */
888:   PetscBLASInt lwork;          /* size of the working vector */
889:   PetscInt     *perm;          /* Permutation vector to sort eigenvalues */
890:   PetscReal    *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
891:   PetscBLASInt NbrEig = 0,nr,bm;
892:   PetscBLASInt *select;
893:   PetscBLASInt liwork, *iwork;

895:   /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
896:   if (!AUU) {
897:     PetscMalloc1(aug1*aug1, &AUU);
898:     PetscMalloc1(aug1*aug1, &AUAU);
899:   }
900:   /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
901:    * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
902:   /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
903:   for (j=0; j < r; j++) {
904:     VecMDot(UU[j], r, MU, &AUU[j*aug1]);
905:   }
906:   /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
907:   for (j = 0; j < neig; j++) {
908:     VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
909:   }
910:   /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
911:   for (j = 0; j < r; j++) {
912:     VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
913:   }
914:   /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
915:   for (j = 0; j < neig; j++) {
916:     VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
917:   }

919:   /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
920:   /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
921:   for (j=0; j < r; j++) {
922:     VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
923:   }
924:   /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
925:   for (j = 0; j < neig; j++) {
926:     VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
927:   }
928:   /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
929:   for (j = 0; j < r; j++) {
930:     VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
931:   }
932:   /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
933:   for (j = 0; j < neig; j++) {
934:     VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
935:   }

937:   /* Computation of the eigenvectors */
938:   PetscBLASIntCast(aug1,&ldA);
939:   PetscBLASIntCast(aug,&N);
940:   lwork = 8 * N + 20; /* sizeof the working space */
941:   PetscMalloc1(N, &wr);
942:   PetscMalloc1(N, &wi);
943:   PetscMalloc1(N, &beta);
944:   PetscMalloc1(N, &modul);
945:   PetscMalloc1(N, &perm);
946:   PetscMalloc1(N*N, &Q);
947:   PetscMalloc1(N*N, &Z);
948:   PetscMalloc1(lwork, &work);
949:   {
950:     PetscBLASInt info=0;
951:     PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
953:   }
954:   for (i=0; i<N; i++) {
955:     if (beta[i] !=0.0) {
956:       wr[i] /=beta[i];
957:       wi[i] /=beta[i];
958:     }
959:   }
960:   /* sort the eigenvalues */
961:   for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
962:   for (i=0; i<N; i++) perm[i] = i;
963:   PetscSortRealWithPermutation(N, modul, perm);
964:   /* Save the norm of the largest eigenvalue */
965:   if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
966:   /* Allocate space to extract the first r schur vectors   */
967:   if (!SR2) {
968:     PetscMalloc1(aug1*bmax, &SR2);
969:   }
970:   /* count the number of extracted eigenvalues (complex conjugates count as 2) */
971:   while (NbrEig < bmax) {
972:     if (wi[perm[NbrEig]] == 0) NbrEig += 1;
973:     else NbrEig += 2;
974:   }
975:   if (NbrEig > bmax) NbrEig = bmax - 1;
976:   r_old     = r; /* previous size of r */
977:   dgmres->r = r = NbrEig;

979:   /* Select the eigenvalues to reorder */
980:   PetscCalloc1(N, &select);
981:   if (!dgmres->GreatestEig) {
982:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
983:   } else {
984:     for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
985:   }
986:   /* Reorder and extract the new <r> schur vectors */
987:   lwork  = PetscMax(4 * N + 16,  2 * NbrEig *(N - NbrEig));
988:   liwork = PetscMax(N + 6,  2 * NbrEig *(N - NbrEig));
989:   PetscFree(work);
990:   PetscMalloc1(lwork, &work);
991:   PetscMalloc1(liwork, &iwork);
992:   {
993:     PetscBLASInt info=0;
994:     PetscReal    Dif[2];
995:     PetscBLASInt ijob  = 2;
996:     PetscBLASInt wantQ = 1, wantZ = 1;
997:     PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
999:   }
1000:   PetscFree(select);

1002:   for (j=0; j<r; j++) {
1003:     PetscArraycpy(&SR2[j*aug1], &(Z[j*N]), N);
1004:   }

1006:   /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
1007:    -- save it temporarily in MU */
1008:   for (j = 0; j < r; j++) {
1009:     VecZeroEntries(MU[j]);
1010:     VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1011:     VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1012:   }
1013:   /* Form T = U'*MU*U */
1014:   for (j = 0; j < r; j++) {
1015:     VecCopy(MU[j], UU[j]);
1016:     KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1017:   }
1018:   dgmres->matvecs += r;
1019:   for (j = 0; j < r; j++) {
1020:     VecMDot(MU[j], r, UU, &TT[j*bmax]);
1021:   }
1022:   /* Factorize T */
1023:   PetscArraycpy(TTF, TT, bmax*r);
1024:   PetscBLASIntCast(r,&nr);
1025:   PetscBLASIntCast(bmax,&bm);
1026:   {
1027:     PetscBLASInt info;
1028:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1030:   }
1031:   /* Free Memory */
1032:   PetscFree(wr);
1033:   PetscFree(wi);
1034:   PetscFree(beta);
1035:   PetscFree(modul);
1036:   PetscFree(perm);
1037:   PetscFree(Q);
1038:   PetscFree(Z);
1039:   PetscFree(work);
1040:   PetscFree(iwork);
1041:   return 0;
1042: }

1044: /*MC
1045:      KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1046:                  In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1047:                  stagnation occurs.

1049:    Options Database Keys:
1050:    GMRES Options (inherited):
1051: +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1052: .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1053: .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1054:                              vectors are allocated as needed)
1055: .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1056: .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1057: .   -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
1058:                                    stability of the classical Gram-Schmidt  orthogonalization.
1059: -   -ksp_gmres_krylov_monitor - plot the Krylov space generated

1061:    DGMRES Options Database Keys:
1062: +   -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1063: .   -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1064:                                        process
1065: .   -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1066: -   -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1067:                                                    parsed by PetscOptionsGetViewer().  If neig > 1, viewerspec should
1068:                                                    end with ":append".  No vectors will be viewed if the adaptive
1069:                                                    strategy chooses not to deflate, so -ksp_dgmres_force should also
1070:                                                    be given.
1071:                                                    The deflation vectors span a subspace that may be a good
1072:                                                    approximation of the subspace of smallest eigenvectors of the
1073:                                                    preconditioned operator, so this option can aid in understanding
1074:                                                    the performance of a preconditioner.

1076:  Level: beginner

1078:  Notes:
1079:     Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported

1081:  References:
1082: + * - J. Erhel, K. Burrage and B. Pohl,  Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1083: - * - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1084:    In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023

1086:  Contributed by: Desire NUENTSA WAKAM,INRIA

1088:  .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1089:  KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1090:  KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1091:  KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()

1093:  M*/

1095: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1096: {
1097:   KSP_DGMRES     *dgmres;

1099:   PetscNewLog(ksp,&dgmres);
1100:   ksp->data = (void*) dgmres;

1102:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
1103:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
1104:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);

1106:   ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1107:   ksp->ops->setup                        = KSPSetUp_DGMRES;
1108:   ksp->ops->solve                        = KSPSolve_DGMRES;
1109:   ksp->ops->destroy                      = KSPDestroy_DGMRES;
1110:   ksp->ops->view                         = KSPView_DGMRES;
1111:   ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1112:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1113:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

1115:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1116:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1117:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1118:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1119:   PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1120:   /* -- New functions defined in DGMRES -- */
1121:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1122:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1123:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1124:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1125:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1126:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1127:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1128:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);

1130:   PetscLogEventRegister("DGMRESCompDefl",  KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1131:   PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);

1133:   dgmres->haptol         = 1.0e-30;
1134:   dgmres->q_preallocate  = 0;
1135:   dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1136:   dgmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
1137:   dgmres->nrs            = NULL;
1138:   dgmres->sol_temp       = NULL;
1139:   dgmres->max_k          = GMRES_DEFAULT_MAXK;
1140:   dgmres->Rsvd           = NULL;
1141:   dgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
1142:   dgmres->orthogwork     = NULL;

1144:   /* Default values for the deflation */
1145:   dgmres->r           = 0;
1146:   dgmres->neig        = DGMRES_DEFAULT_EIG;
1147:   dgmres->max_neig    = DGMRES_DEFAULT_MAXEIG-1;
1148:   dgmres->lambdaN     = 0.0;
1149:   dgmres->smv         = SMV;
1150:   dgmres->matvecs     = 0;
1151:   dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1152:   dgmres->HasSchur    = PETSC_FALSE;
1153:   return 0;
1154: }