Actual source code: mpiviennacl.cxx
2: /*
3: This file contains routines for Parallel vector operations.
4: */
5: #include <petscconf.h>
6: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
7: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
9: /*MC
10: VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.
12: Options Database Keys:
13: . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()
15: Level: beginner
17: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECSEQVIENNACL, VECMPIVIENNACL, VECSTANDARD, VecType, VecCreateMPI(), VecCreateMPI()
18: M*/
20: PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
21: {
22: try {
23: if (v->spptr) {
24: delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
25: delete (Vec_ViennaCL*) v->spptr;
26: }
27: } catch(std::exception const & ex) {
28: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
29: }
30: VecDestroy_MPI(v);
31: return 0;
32: }
34: PetscErrorCode VecNorm_MPIViennaCL(Vec xin,NormType type,PetscReal *z)
35: {
36: PetscReal sum,work = 0.0;
38: if (type == NORM_2 || type == NORM_FROBENIUS) {
39: VecNorm_SeqViennaCL(xin,NORM_2,&work);
40: work *= work;
41: MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
42: *z = PetscSqrtReal(sum);
43: } else if (type == NORM_1) {
44: /* Find the local part */
45: VecNorm_SeqViennaCL(xin,NORM_1,&work);
46: /* Find the global max */
47: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
48: } else if (type == NORM_INFINITY) {
49: /* Find the local max */
50: VecNorm_SeqViennaCL(xin,NORM_INFINITY,&work);
51: /* Find the global max */
52: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
53: } else if (type == NORM_1_AND_2) {
54: PetscReal temp[2];
55: VecNorm_SeqViennaCL(xin,NORM_1,temp);
56: VecNorm_SeqViennaCL(xin,NORM_2,temp+1);
57: temp[1] = temp[1]*temp[1];
58: MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
59: z[1] = PetscSqrtReal(z[1]);
60: }
61: return 0;
62: }
64: PetscErrorCode VecDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
65: {
66: PetscScalar sum,work;
68: VecDot_SeqViennaCL(xin,yin,&work);
69: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
70: *z = sum;
71: return 0;
72: }
74: PetscErrorCode VecTDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
75: {
76: PetscScalar sum,work;
78: VecTDot_SeqViennaCL(xin,yin,&work);
79: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
80: *z = sum;
81: return 0;
82: }
84: PetscErrorCode VecMDot_MPIViennaCL(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
85: {
86: PetscScalar awork[128],*work = awork;
88: if (nv > 128) {
89: PetscMalloc1(nv,&work);
90: }
91: VecMDot_SeqViennaCL(xin,nv,y,work);
92: MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
93: if (nv > 128) {
94: PetscFree(work);
95: }
96: return 0;
97: }
99: /*MC
100: VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL
102: Options Database Keys:
103: . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()
105: Level: beginner
107: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateMPI()
108: M*/
110: PetscErrorCode VecDuplicate_MPIViennaCL(Vec win,Vec *v)
111: {
112: Vec_MPI *vw,*w = (Vec_MPI*)win->data;
113: PetscScalar *array;
115: VecCreate(PetscObjectComm((PetscObject)win),v);
116: PetscLayoutReference(win->map,&(*v)->map);
118: VecCreate_MPI_Private(*v,PETSC_FALSE,w->nghost,0);
119: vw = (Vec_MPI*)(*v)->data;
120: PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));
122: /* save local representation of the parallel vector (and scatter) if it exists */
123: if (w->localrep) {
124: VecGetArray(*v,&array);
125: VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
126: PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
127: VecRestoreArray(*v,&array);
128: PetscLogObjectParent((PetscObject)*v,(PetscObject)vw->localrep);
129: vw->localupdate = w->localupdate;
130: if (vw->localupdate) {
131: PetscObjectReference((PetscObject)vw->localupdate);
132: }
133: }
135: /* New vector should inherit stashing property of parent */
136: (*v)->stash.donotstash = win->stash.donotstash;
137: (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
139: /* change type_name appropriately */
140: PetscObjectChangeTypeName((PetscObject)(*v),VECMPIVIENNACL);
142: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
143: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
144: (*v)->map->bs = PetscAbs(win->map->bs);
145: (*v)->bstash.bs = win->bstash.bs;
146: return 0;
147: }
149: PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
150: {
151: PetscScalar work[2],sum[2];
153: VecDotNorm2_SeqViennaCL(s,t,work,work+1);
154: MPIU_Allreduce((void*)&work,(void*)&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
155: *dp = sum[0];
156: *nm = sum[1];
157: return 0;
158: }
160: PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool bind)
161: {
162: vv->boundtocpu = bind;
164: if (bind) {
165: VecViennaCLCopyFromGPU(vv);
166: vv->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
167: vv->ops->dotnorm2 = NULL;
168: vv->ops->waxpy = VecWAXPY_Seq;
169: vv->ops->dot = VecDot_MPI;
170: vv->ops->mdot = VecMDot_MPI;
171: vv->ops->tdot = VecTDot_MPI;
172: vv->ops->norm = VecNorm_MPI;
173: vv->ops->scale = VecScale_Seq;
174: vv->ops->copy = VecCopy_Seq;
175: vv->ops->set = VecSet_Seq;
176: vv->ops->swap = VecSwap_Seq;
177: vv->ops->axpy = VecAXPY_Seq;
178: vv->ops->axpby = VecAXPBY_Seq;
179: vv->ops->maxpy = VecMAXPY_Seq;
180: vv->ops->aypx = VecAYPX_Seq;
181: vv->ops->axpbypcz = VecAXPBYPCZ_Seq;
182: vv->ops->pointwisemult = VecPointwiseMult_Seq;
183: vv->ops->setrandom = VecSetRandom_Seq;
184: vv->ops->placearray = VecPlaceArray_Seq;
185: vv->ops->replacearray = VecReplaceArray_Seq;
186: vv->ops->resetarray = VecResetArray_Seq;
187: vv->ops->dot_local = VecDot_Seq;
188: vv->ops->tdot_local = VecTDot_Seq;
189: vv->ops->norm_local = VecNorm_Seq;
190: vv->ops->mdot_local = VecMDot_Seq;
191: vv->ops->pointwisedivide = VecPointwiseDivide_Seq;
192: vv->ops->getlocalvector = NULL;
193: vv->ops->restorelocalvector = NULL;
194: vv->ops->getlocalvectorread = NULL;
195: vv->ops->restorelocalvectorread = NULL;
196: vv->ops->getarraywrite = NULL;
197: } else {
198: vv->ops->dotnorm2 = VecDotNorm2_MPIViennaCL;
199: vv->ops->waxpy = VecWAXPY_SeqViennaCL;
200: vv->ops->duplicate = VecDuplicate_MPIViennaCL;
201: vv->ops->dot = VecDot_MPIViennaCL;
202: vv->ops->mdot = VecMDot_MPIViennaCL;
203: vv->ops->tdot = VecTDot_MPIViennaCL;
204: vv->ops->norm = VecNorm_MPIViennaCL;
205: vv->ops->scale = VecScale_SeqViennaCL;
206: vv->ops->copy = VecCopy_SeqViennaCL;
207: vv->ops->set = VecSet_SeqViennaCL;
208: vv->ops->swap = VecSwap_SeqViennaCL;
209: vv->ops->axpy = VecAXPY_SeqViennaCL;
210: vv->ops->axpby = VecAXPBY_SeqViennaCL;
211: vv->ops->maxpy = VecMAXPY_SeqViennaCL;
212: vv->ops->aypx = VecAYPX_SeqViennaCL;
213: vv->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
214: vv->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
215: vv->ops->setrandom = VecSetRandom_SeqViennaCL;
216: vv->ops->dot_local = VecDot_SeqViennaCL;
217: vv->ops->tdot_local = VecTDot_SeqViennaCL;
218: vv->ops->norm_local = VecNorm_SeqViennaCL;
219: vv->ops->mdot_local = VecMDot_SeqViennaCL;
220: vv->ops->destroy = VecDestroy_MPIViennaCL;
221: vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
222: vv->ops->placearray = VecPlaceArray_SeqViennaCL;
223: vv->ops->replacearray = VecReplaceArray_SeqViennaCL;
224: vv->ops->resetarray = VecResetArray_SeqViennaCL;
225: vv->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
226: vv->ops->getarray = VecGetArray_SeqViennaCL;
227: vv->ops->restorearray = VecRestoreArray_SeqViennaCL;
228: }
229: return 0;
230: }
232: PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
233: {
234: PetscLayoutSetUp(vv->map);
235: VecViennaCLAllocateCheck(vv);
236: VecCreate_MPIViennaCL_Private(vv,PETSC_FALSE,0,((Vec_ViennaCL*)(vv->spptr))->GPUarray);
237: VecViennaCLAllocateCheckHost(vv);
238: VecSet(vv,0.0);
239: VecSet_Seq(vv,0.0);
240: vv->offloadmask = PETSC_OFFLOAD_BOTH;
241: return 0;
242: }
244: PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
245: {
246: PetscMPIInt size;
248: MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
249: if (size == 1) {
250: VecSetType(v,VECSEQVIENNACL);
251: } else {
252: VecSetType(v,VECMPIVIENNACL);
253: }
254: return 0;
255: }
257: /*@C
258: VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
259: where the user provides the viennacl vector to store the vector values.
261: Collective
263: Input Parameters:
264: + comm - the MPI communicator to use
265: . bs - block size, same meaning as VecSetBlockSize()
266: . n - local vector length, cannot be PETSC_DECIDE
267: . N - global vector length (or PETSC_DECIDE to have calculated)
268: - array - the user provided GPU array to store the vector values
270: Output Parameter:
271: . vv - the vector
273: Notes:
274: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
275: same type as an existing vector.
277: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
278: at a later stage to SET the array for storing the vector values.
280: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
281: The user should not free the array until the vector is destroyed.
283: Level: intermediate
285: .seealso: VecCreateSeqViennaCLWithArray(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
286: VecCreate(), VecCreateMPI(), VecCreateGhostWithArray(), VecViennaCLPlaceArray()
288: @*/
289: PetscErrorCode VecCreateMPIViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const ViennaCLVector *array,Vec *vv)
290: {
292: PetscSplitOwnership(comm,&n,&N);
293: VecCreate(comm,vv);
294: VecSetSizes(*vv,n,N);
295: VecSetBlockSize(*vv,bs);
296: VecCreate_MPIViennaCL_Private(*vv,PETSC_FALSE,0,array);
297: return 0;
298: }
300: /*@C
301: VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
302: where the user provides the ViennaCL vector to store the vector values.
304: Collective
306: Input Parameters:
307: + comm - the MPI communicator to use
308: . bs - block size, same meaning as VecSetBlockSize()
309: . n - local vector length, cannot be PETSC_DECIDE
310: . N - global vector length (or PETSC_DECIDE to have calculated)
311: - cpuarray - the user provided CPU array to store the vector values
312: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
314: Output Parameter:
315: . vv - the vector
317: Notes:
318: If both cpuarray and viennaclvec are provided, the caller must ensure that
319: the provided arrays have identical values.
321: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
322: same type as an existing vector.
324: PETSc does NOT free the provided arrays when the vector is destroyed via
325: VecDestroy(). The user should not free the array until the vector is
326: destroyed.
328: Level: intermediate
330: .seealso: VecCreateSeqViennaCLWithArrays(), VecCreateMPIWithArray()
331: VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
332: VecCreateMPI(), VecCreateGhostWithArray(), VecViennaCLPlaceArray(),
333: VecPlaceArray(), VecCreateMPICUDAWithArrays(),
334: VecViennaCLAllocateCheckHost()
335: @*/
336: PetscErrorCode VecCreateMPIViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const ViennaCLVector *viennaclvec,Vec *vv)
337: {
338: VecCreateMPIViennaCLWithArray(comm,bs,n,N,viennaclvec,vv);
339: if (cpuarray && viennaclvec) {
340: Vec_MPI *s = (Vec_MPI*)((*vv)->data);
341: s->array = (PetscScalar*)cpuarray;
342: (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
343: } else if (cpuarray) {
344: Vec_MPI *s = (Vec_MPI*)((*vv)->data);
345: s->array = (PetscScalar*)cpuarray;
346: (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
347: } else if (viennaclvec) {
348: (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
349: } else {
350: (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
351: }
352: return 0;
353: }
355: PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv,PetscBool alloc,PetscInt nghost,const ViennaCLVector *array)
356: {
357: Vec_ViennaCL *vecviennacl;
359: VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);
360: PetscObjectChangeTypeName((PetscObject)vv,VECMPIVIENNACL);
362: VecBindToCPU_MPIViennaCL(vv,PETSC_FALSE);
363: vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;
365: if (alloc && !array) {
366: VecViennaCLAllocateCheck(vv);
367: VecViennaCLAllocateCheckHost(vv);
368: VecSet(vv,0.0);
369: VecSet_Seq(vv,0.0);
370: vv->offloadmask = PETSC_OFFLOAD_BOTH;
371: }
372: if (array) {
373: if (!vv->spptr)
374: vv->spptr = new Vec_ViennaCL;
375: vecviennacl = (Vec_ViennaCL*)vv->spptr;
376: vecviennacl->GPUarray_allocated = 0;
377: vecviennacl->GPUarray = (ViennaCLVector*)array;
378: vv->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
379: }
381: return 0;
382: }