Actual source code: fbcgsr.c


  2: /*
  3:     This file implements FBiCGStab-R.
  4:     Only allow right preconditioning.
  5:     FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
  6:       (1) There are fewer MPI_Allreduce calls.
  7:       (2) The convergence occasionally is much faster than that of FBiCGStab.
  8: */
  9: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
 10: #include <petsc/private/vecimpl.h>

 12: static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
 13: {
 14:   KSPSetWorkVecs(ksp,8);
 15:   return 0;
 16: }

 18: static PetscErrorCode  KSPSolve_FBCGSR(KSP ksp)
 19: {
 20:   PetscInt          i,j,N;
 21:   PetscScalar       tau,sigma,alpha,omega,beta;
 22:   PetscReal         rho;
 23:   PetscScalar       xi1,xi2,xi3,xi4;
 24:   Vec               X,B,P,P2,RP,R,V,S,T,S2;
 25:   PetscScalar       *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
 26:   PetscScalar       *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
 27:   PetscScalar       insums[4],outsums[4];
 28:   KSP_BCGS          *bcgs = (KSP_BCGS*)ksp->data;
 29:   PC                pc;
 30:   Mat               mat;

 33:   VecGetLocalSize(ksp->vec_sol,&N);

 35:   X  = ksp->vec_sol;
 36:   B  = ksp->vec_rhs;
 37:   P2 = ksp->work[0];

 39:   /* The followings are involved in modified inner product calculations and vector updates */
 40:   RP = ksp->work[1]; VecGetArray(RP,(PetscScalar**)&rp)); PetscCall(VecRestoreArray(RP,NULL);
 41:   R  = ksp->work[2]; VecGetArray(R,(PetscScalar**)&r));   PetscCall(VecRestoreArray(R,NULL);
 42:   P  = ksp->work[3]; VecGetArray(P,(PetscScalar**)&p));   PetscCall(VecRestoreArray(P,NULL);
 43:   V  = ksp->work[4]; VecGetArray(V,(PetscScalar**)&v));   PetscCall(VecRestoreArray(V,NULL);
 44:   S  = ksp->work[5]; VecGetArray(S,(PetscScalar**)&s));   PetscCall(VecRestoreArray(S,NULL);
 45:   T  = ksp->work[6]; VecGetArray(T,(PetscScalar**)&t));   PetscCall(VecRestoreArray(T,NULL);
 46:   S2 = ksp->work[7]; VecGetArray(S2,(PetscScalar**)&s2)); PetscCall(VecRestoreArray(S2,NULL);

 48:   /* Only supports right preconditioning */
 50:   if (!ksp->guess_zero) {
 51:     if (!bcgs->guess) {
 52:       VecDuplicate(X,&bcgs->guess);
 53:     }
 54:     VecCopy(X,bcgs->guess);
 55:   } else {
 56:     VecSet(X,0.0);
 57:   }

 59:   /* Compute initial residual */
 60:   KSPGetPC(ksp,&pc);
 61:   PCSetUp(pc);
 62:   PCGetOperators(pc,&mat,NULL);
 63:   if (!ksp->guess_zero) {
 64:     KSP_MatMult(ksp,mat,X,P2); /* P2 is used as temporary storage */
 65:     VecCopy(B,R);
 66:     VecAXPY(R,-1.0,P2);
 67:   } else {
 68:     VecCopy(B,R);
 69:   }

 71:   /* Test for nothing to do */
 72:   VecNorm(R,NORM_2,&rho);
 73:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
 74:   ksp->its = 0;
 75:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
 76:   else ksp->rnorm = 0;
 77:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
 78:   KSPLogResidualHistory(ksp,ksp->rnorm);
 79:   KSPMonitor(ksp,0,ksp->rnorm);
 80:   (*ksp->converged)(ksp,0,ksp->rnorm,&ksp->reason,ksp->cnvP);
 81:   if (ksp->reason) return 0;

 83:   /* Initialize iterates */
 84:   VecCopy(R,RP); /* rp <- r */
 85:   VecCopy(R,P); /* p <- r */

 87:   /* Big loop */
 88:   for (i=0; i<ksp->max_it; i++) {

 90:     /* matmult and pc */
 91:     KSP_PCApply(ksp,P,P2); /* p2 <- K p */
 92:     KSP_MatMult(ksp,mat,P2,V); /* v <- A p2 */

 94:     /* inner prodcuts */
 95:     if (i==0) {
 96:       tau  = rho*rho;
 97:       VecDot(V,RP,&sigma); /* sigma <- (v,rp) */
 98:     } else {
 99:       PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
100:       tau  = sigma = 0.0;
101:       for (j=0; j<N; j++) {
102:         tau   += r[j]*rp[j]; /* tau <- (r,rp) */
103:         sigma += v[j]*rp[j]; /* sigma <- (v,rp) */
104:       }
105:       PetscLogFlops(4.0*N);
106:       PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
107:       insums[0] = tau;
108:       insums[1] = sigma;
109:       PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
110:       MPIU_Allreduce(insums,outsums,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
111:       PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
112:       tau       = outsums[0];
113:       sigma     = outsums[1];
114:     }

116:     /* scalar update */
117:     alpha = tau / sigma;

119:     /* vector update */
120:     VecWAXPY(S,-alpha,V,R);  /* s <- r - alpha v */

122:     /* matmult and pc */
123:     KSP_PCApply(ksp,S,S2); /* s2 <- K s */
124:     KSP_MatMult(ksp,mat,S2,T); /* t <- A s2 */

126:     /* inner prodcuts */
127:     PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
128:     xi1  = xi2 = xi3 = xi4 = 0.0;
129:     for (j=0; j<N; j++) {
130:       xi1 += s[j]*s[j]; /* xi1 <- (s,s) */
131:       xi2 += t[j]*s[j]; /* xi2 <- (t,s) */
132:       xi3 += t[j]*t[j]; /* xi3 <- (t,t) */
133:       xi4 += t[j]*rp[j]; /* xi4 <- (t,rp) */
134:     }
135:     PetscLogFlops(8.0*N);
136:     PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);

138:     insums[0] = xi1;
139:     insums[1] = xi2;
140:     insums[2] = xi3;
141:     insums[3] = xi4;

143:     PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
144:     MPIU_Allreduce(insums,outsums,4,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
145:     PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
146:     xi1  = outsums[0];
147:     xi2  = outsums[1];
148:     xi3  = outsums[2];
149:     xi4  = outsums[3];

151:     /* test denominator */
152:     if ((xi3 == 0.0) || (sigma == 0.0)) {
154:       else ksp->reason = KSP_DIVERGED_BREAKDOWN;
155:       PetscInfo(ksp,"KSPSolve has failed due to zero inner product\n");
156:       break;
157:     }

159:     /* scalar updates */
160:     omega = xi2 / xi3;
161:     beta  = -xi4 / sigma;
162:     rho   = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */

164:     /* vector updates */
165:     VecAXPBYPCZ(X,alpha,omega,1.0,P2,S2); /* x <- alpha * p2 + omega * s2 + x */

167:     /* convergence test */
168:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
169:     ksp->its++;
170:     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
171:     else ksp->rnorm = 0;
172:     PetscObjectSAWsGrantAccess((PetscObject)ksp);
173:     KSPLogResidualHistory(ksp,ksp->rnorm);
174:     KSPMonitor(ksp,i+1,ksp->rnorm);
175:     (*ksp->converged)(ksp,i+1,ksp->rnorm,&ksp->reason,ksp->cnvP);
176:     if (ksp->reason) break;

178:     /* vector updates */
179:     PetscLogEventBegin(VEC_Ops,0,0,0,0);
180:     for (j=0; j<N; j++) {
181:       r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
182:       p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
183:     }
184:     PetscLogFlops(6.0*N);
185:     PetscLogEventEnd(VEC_Ops,0,0,0,0);

187:   }

189:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
190:   return 0;
191: }

193: /*MC
194:      KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab.

196:    Options Database Keys:
197:     see KSPSolve()

199:    Level: beginner

201:    Notes:
202:     Only allow right preconditioning

204: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGSL, KSPSetPCSide()
205: M*/
206: PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
207: {
208:   KSP_BCGS       *bcgs;

210:   PetscNewLog(ksp,&bcgs);

212:   ksp->data                = bcgs;
213:   ksp->ops->setup          = KSPSetUp_FBCGSR;
214:   ksp->ops->solve          = KSPSolve_FBCGSR;
215:   ksp->ops->destroy        = KSPDestroy_BCGS;
216:   ksp->ops->reset          = KSPReset_BCGS;
217:   ksp->ops->buildsolution  = KSPBuildSolution_BCGS;
218:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
219:   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
220:   ksp->pc_side             = PC_RIGHT; /* set default PC side */

222:   KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
223:   KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
224:   KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);
225:   return 0;
226: }