Actual source code: vechip.hip.cpp
1: /*
2: Implementation of the sequential hip vectors.
4: This file contains the code that can be compiled with a C
5: compiler. The companion file vechip2.hip.cpp contains the code that
6: must be compiled with hipcc compiler.
7: */
9: #define PETSC_SKIP_SPINLOCK
11: #include <petscconf.h>
12: #include <petsc/private/vecimpl.h>
13: #include <../src/vec/vec/impls/dvecimpl.h>
14: #include <petsc/private/hipvecimpl.h>
16: PetscErrorCode VecHIPGetArrays_Private(Vec v,const PetscScalar** x,const PetscScalar** x_d,PetscOffloadMask* flg)
17: {
19: if (x) {
20: Vec_Seq *h = (Vec_Seq*)v->data;
22: *x = h->array;
23: }
24: if (x_d) {
25: Vec_HIP *d = (Vec_HIP*)v->spptr;
27: *x_d = d ? d->GPUarray : NULL;
28: }
29: if (flg) *flg = v->offloadmask;
30: return 0;
31: }
33: /*
34: Allocates space for the vector array on the Host if it does not exist.
35: Does NOT change the PetscHIPFlag for the vector
36: Does NOT zero the HIP array
37: */
38: PetscErrorCode VecHIPAllocateCheckHost(Vec v)
39: {
40: PetscScalar *array;
41: Vec_Seq *s = (Vec_Seq*)v->data;
42: PetscInt n = v->map->n;
44: if (!s) {
45: PetscNewLog((PetscObject)v,&s);
46: v->data = s;
47: }
48: if (!s->array) {
49: if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
50: PetscMallocSetHIPHost();
51: v->pinned_memory = PETSC_TRUE;
52: }
53: PetscMalloc1(n,&array);
54: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
55: s->array = array;
56: s->array_allocated = array;
57: if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
58: PetscMallocResetHIPHost();
59: }
60: if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
61: v->offloadmask = PETSC_OFFLOAD_CPU;
62: }
63: }
64: return 0;
65: }
67: PetscErrorCode VecCopy_SeqHIP_Private(Vec xin,Vec yin)
68: {
69: PetscScalar *ya;
70: const PetscScalar *xa;
72: VecHIPAllocateCheckHost(xin);
73: VecHIPAllocateCheckHost(yin);
74: if (xin != yin) {
75: VecGetArrayRead(xin,&xa);
76: VecGetArray(yin,&ya);
77: PetscArraycpy(ya,xa,xin->map->n);
78: VecRestoreArrayRead(xin,&xa);
79: VecRestoreArray(yin,&ya);
80: }
81: return 0;
82: }
84: PetscErrorCode VecSetRandom_SeqHIP(Vec xin,PetscRandom r)
85: {
86: PetscInt n = xin->map->n;
87: PetscScalar *xx;
89: VecGetArrayWrite(xin,&xx);
90: PetscRandomGetValues(r,n,xx);
91: VecRestoreArrayWrite(xin,&xx);
92: return 0;
93: }
95: PetscErrorCode VecDestroy_SeqHIP_Private(Vec v)
96: {
97: Vec_Seq *vs = (Vec_Seq*)v->data;
99: PetscObjectSAWsViewOff(v);
100: #if defined(PETSC_USE_LOG)
101: PetscLogObjectState((PetscObject)v,"Length=%" PetscInt_FMT,v->map->n);
102: #endif
103: if (vs) {
104: if (vs->array_allocated) {
105: if (v->pinned_memory) {
106: PetscMallocSetHIPHost();
107: }
108: PetscFree(vs->array_allocated);
109: if (v->pinned_memory) {
110: PetscMallocResetHIPHost();
111: v->pinned_memory = PETSC_FALSE;
112: }
113: }
114: PetscFree(vs);
115: }
116: return 0;
117: }
119: PetscErrorCode VecResetArray_SeqHIP_Private(Vec vin)
120: {
121: Vec_Seq *v = (Vec_Seq*)vin->data;
123: v->array = v->unplacedarray;
124: v->unplacedarray = 0;
125: return 0;
126: }
128: PetscErrorCode VecResetArray_SeqHIP(Vec vin)
129: {
130: VecHIPCopyFromGPU(vin);
131: VecResetArray_SeqHIP_Private(vin);
132: vin->offloadmask = PETSC_OFFLOAD_CPU;
133: return 0;
134: }
136: PetscErrorCode VecPlaceArray_SeqHIP(Vec vin,const PetscScalar *a)
137: {
138: VecHIPCopyFromGPU(vin);
139: VecPlaceArray_Seq(vin,a);
140: vin->offloadmask = PETSC_OFFLOAD_CPU;
141: return 0;
142: }
144: PetscErrorCode VecReplaceArray_SeqHIP(Vec vin,const PetscScalar *a)
145: {
146: Vec_Seq *vs = (Vec_Seq*)vin->data;
148: if (vs->array != vs->array_allocated) {
149: /* make sure the users array has the latest values */
150: VecHIPCopyFromGPU(vin);
151: }
152: if (vs->array_allocated) {
153: if (vin->pinned_memory) {
154: PetscMallocSetHIPHost();
155: }
156: PetscFree(vs->array_allocated);
157: if (vin->pinned_memory) {
158: PetscMallocResetHIPHost();
159: }
160: }
161: vin->pinned_memory = PETSC_FALSE;
162: vs->array_allocated = vs->array = (PetscScalar*)a;
163: vin->offloadmask = PETSC_OFFLOAD_CPU;
164: return 0;
165: }
167: /*@
168: VecCreateSeqHIP - Creates a standard, sequential array-style vector.
170: Collective
172: Input Parameter:
173: + comm - the communicator, should be PETSC_COMM_SELF
174: - n - the vector length
176: Output Parameter:
177: . v - the vector
179: Notes:
180: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
181: same type as an existing vector.
183: Level: intermediate
185: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
186: @*/
187: PetscErrorCode VecCreateSeqHIP(MPI_Comm comm,PetscInt n,Vec *v)
188: {
189: VecCreate(comm,v);
190: VecSetSizes(*v,n,n);
191: VecSetType(*v,VECSEQHIP);
192: return 0;
193: }
195: PetscErrorCode VecDuplicate_SeqHIP(Vec win,Vec *V)
196: {
197: VecCreateSeqHIP(PetscObjectComm((PetscObject)win),win->map->n,V);
198: PetscLayoutReference(win->map,&(*V)->map);
199: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
200: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
201: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
202: return 0;
203: }
205: PetscErrorCode VecCreate_SeqHIP(Vec V)
206: {
207: PetscDeviceInitialize(PETSC_DEVICE_HIP);
208: PetscLayoutSetUp(V->map);
209: VecHIPAllocateCheck(V);
210: VecCreate_SeqHIP_Private(V,((Vec_HIP*)V->spptr)->GPUarray_allocated);
211: VecSet_SeqHIP(V,0.0);
212: return 0;
213: }
215: /*@C
216: VecCreateSeqHIPWithArray - Creates a HIP sequential array-style vector,
217: where the user provides the array space to store the vector values. The array
218: provided must be a GPU array.
220: Collective
222: Input Parameters:
223: + comm - the communicator, should be PETSC_COMM_SELF
224: . bs - the block size
225: . n - the vector length
226: - array - GPU memory where the vector elements are to be stored.
228: Output Parameter:
229: . V - the vector
231: Notes:
232: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
233: same type as an existing vector.
235: If the user-provided array is NULL, then VecHIPPlaceArray() can be used
236: at a later stage to SET the array for storing the vector values.
238: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
239: The user should not free the array until the vector is destroyed.
241: Level: intermediate
243: .seealso: VecCreateMPIHIPWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
244: VecCreateGhost(), VecCreateSeq(), VecHIPPlaceArray(), VecCreateSeqWithArray(),
245: VecCreateMPIWithArray()
246: @*/
247: PetscErrorCode VecCreateSeqHIPWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
248: {
249: PetscDeviceInitialize(PETSC_DEVICE_HIP);
250: VecCreate(comm,V);
251: VecSetSizes(*V,n,n);
252: VecSetBlockSize(*V,bs);
253: VecCreate_SeqHIP_Private(*V,array);
254: return 0;
255: }
257: /*@C
258: VecCreateSeqHIPWithArrays - Creates a HIP sequential array-style vector,
259: where the user provides the array space to store the vector values.
261: Collective
263: Input Parameters:
264: + comm - the communicator, should be PETSC_COMM_SELF
265: . bs - the block size
266: . n - the vector length
267: - cpuarray - CPU memory where the vector elements are to be stored.
268: - gpuarray - GPU memory where the vector elements are to be stored.
270: Output Parameter:
271: . V - the vector
273: Notes:
274: If both cpuarray and gpuarray are provided, the caller must ensure that
275: the provided arrays have identical values.
277: PETSc does NOT free the provided arrays when the vector is destroyed via
278: VecDestroy(). The user should not free the array until the vector is
279: destroyed.
281: Level: intermediate
283: .seealso: VecCreateMPIHIPWithArrays(), VecCreate(), VecCreateSeqWithArray(),
284: VecHIPPlaceArray(), VecCreateSeqHIPWithArray(),
285: VecHIPAllocateCheckHost()
286: @*/
287: PetscErrorCode VecCreateSeqHIPWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec *V)
288: {
289: // set V's gpuarray to be gpuarray, do not allocate memory on host yet.
290: VecCreateSeqHIPWithArray(comm,bs,n,gpuarray,V);
292: if (cpuarray && gpuarray) {
293: Vec_Seq *s = (Vec_Seq*)((*V)->data);
294: s->array = (PetscScalar*)cpuarray;
295: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
296: } else if (cpuarray) {
297: Vec_Seq *s = (Vec_Seq*)((*V)->data);
298: s->array = (PetscScalar*)cpuarray;
299: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
300: } else if (gpuarray) {
301: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
302: } else {
303: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
304: }
306: return 0;
307: }
309: PetscErrorCode VecGetArray_SeqHIP(Vec v,PetscScalar **a)
310: {
311: VecHIPCopyFromGPU(v);
312: *a = *((PetscScalar**)v->data);
313: return 0;
314: }
316: PetscErrorCode VecRestoreArray_SeqHIP(Vec v,PetscScalar **a)
317: {
318: v->offloadmask = PETSC_OFFLOAD_CPU;
319: return 0;
320: }
322: PetscErrorCode VecGetArrayWrite_SeqHIP(Vec v,PetscScalar **a)
323: {
324: VecHIPAllocateCheckHost(v);
325: *a = *((PetscScalar**)v->data);
326: return 0;
327: }
329: PetscErrorCode VecGetArrayAndMemType_SeqHIP(Vec v,PetscScalar** a,PetscMemType *mtype)
330: {
331: VecHIPCopyToGPU(v);
332: *a = ((Vec_HIP*)v->spptr)->GPUarray;
333: if (mtype) *mtype = PETSC_MEMTYPE_HIP;
334: return 0;
335: }
337: PetscErrorCode VecRestoreArrayAndMemType_SeqHIP(Vec v,PetscScalar** a)
338: {
339: v->offloadmask = PETSC_OFFLOAD_GPU;
340: return 0;
341: }
343: PetscErrorCode VecGetArrayWriteAndMemType_SeqHIP(Vec v,PetscScalar** a,PetscMemType *mtype)
344: {
345: /* Allocate memory (not zeroed) on device if not yet, but no need to sync data from host to device */
346: VecHIPAllocateCheck(v);
347: *a = ((Vec_HIP*)v->spptr)->GPUarray;
348: if (mtype) *mtype = PETSC_MEMTYPE_HIP;
349: return 0;
350: }
352: PetscErrorCode VecBindToCPU_SeqHIP(Vec V,PetscBool bind)
353: {
354: V->boundtocpu = bind;
355: if (bind) {
356: VecHIPCopyFromGPU(V);
357: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
358: V->ops->dot = VecDot_Seq;
359: V->ops->norm = VecNorm_Seq;
360: V->ops->tdot = VecTDot_Seq;
361: V->ops->scale = VecScale_Seq;
362: V->ops->copy = VecCopy_Seq;
363: V->ops->set = VecSet_Seq;
364: V->ops->swap = VecSwap_Seq;
365: V->ops->axpy = VecAXPY_Seq;
366: V->ops->axpby = VecAXPBY_Seq;
367: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
368: V->ops->pointwisemult = VecPointwiseMult_Seq;
369: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
370: V->ops->setrandom = VecSetRandom_Seq;
371: V->ops->dot_local = VecDot_Seq;
372: V->ops->tdot_local = VecTDot_Seq;
373: V->ops->norm_local = VecNorm_Seq;
374: V->ops->mdot_local = VecMDot_Seq;
375: V->ops->mtdot_local = VecMTDot_Seq;
376: V->ops->maxpy = VecMAXPY_Seq;
377: V->ops->mdot = VecMDot_Seq;
378: V->ops->mtdot = VecMTDot_Seq;
379: V->ops->aypx = VecAYPX_Seq;
380: V->ops->waxpy = VecWAXPY_Seq;
381: V->ops->dotnorm2 = NULL;
382: V->ops->placearray = VecPlaceArray_Seq;
383: V->ops->replacearray = VecReplaceArray_SeqHIP;
384: V->ops->resetarray = VecResetArray_Seq;
385: V->ops->duplicate = VecDuplicate_Seq;
386: V->ops->conjugate = VecConjugate_Seq;
387: V->ops->getlocalvector = NULL;
388: V->ops->restorelocalvector = NULL;
389: V->ops->getlocalvectorread = NULL;
390: V->ops->restorelocalvectorread = NULL;
391: V->ops->getarraywrite = NULL;
392: V->ops->getarrayandmemtype = NULL;
393: V->ops->restorearrayandmemtype = NULL;
394: V->ops->getarraywriteandmemtype= NULL;
395: V->ops->max = VecMax_Seq;
396: V->ops->min = VecMin_Seq;
397: V->ops->reciprocal = VecReciprocal_Default;
398: V->ops->sum = NULL;
399: V->ops->shift = NULL;
400: } else {
401: V->ops->dot = VecDot_SeqHIP;
402: V->ops->norm = VecNorm_SeqHIP;
403: V->ops->tdot = VecTDot_SeqHIP;
404: V->ops->scale = VecScale_SeqHIP;
405: V->ops->copy = VecCopy_SeqHIP;
406: V->ops->set = VecSet_SeqHIP;
407: V->ops->swap = VecSwap_SeqHIP;
408: V->ops->axpy = VecAXPY_SeqHIP;
409: V->ops->axpby = VecAXPBY_SeqHIP;
410: V->ops->axpbypcz = VecAXPBYPCZ_SeqHIP;
411: V->ops->pointwisemult = VecPointwiseMult_SeqHIP;
412: V->ops->pointwisedivide = VecPointwiseDivide_SeqHIP;
413: V->ops->setrandom = VecSetRandom_SeqHIP;
414: V->ops->dot_local = VecDot_SeqHIP;
415: V->ops->tdot_local = VecTDot_SeqHIP;
416: V->ops->norm_local = VecNorm_SeqHIP;
417: V->ops->mdot_local = VecMDot_SeqHIP;
418: V->ops->maxpy = VecMAXPY_SeqHIP;
419: V->ops->mdot = VecMDot_SeqHIP;
420: V->ops->aypx = VecAYPX_SeqHIP;
421: V->ops->waxpy = VecWAXPY_SeqHIP;
422: V->ops->dotnorm2 = VecDotNorm2_SeqHIP;
423: V->ops->placearray = VecPlaceArray_SeqHIP;
424: V->ops->replacearray = VecReplaceArray_SeqHIP;
425: V->ops->resetarray = VecResetArray_SeqHIP;
426: V->ops->destroy = VecDestroy_SeqHIP;
427: V->ops->duplicate = VecDuplicate_SeqHIP;
428: V->ops->conjugate = VecConjugate_SeqHIP;
429: V->ops->getlocalvector = VecGetLocalVector_SeqHIP;
430: V->ops->restorelocalvector = VecRestoreLocalVector_SeqHIP;
431: V->ops->getlocalvectorread = VecGetLocalVectorRead_SeqHIP;
432: V->ops->restorelocalvectorread = VecRestoreLocalVectorRead_SeqHIP;
433: V->ops->getarraywrite = VecGetArrayWrite_SeqHIP;
434: V->ops->getarray = VecGetArray_SeqHIP;
435: V->ops->restorearray = VecRestoreArray_SeqHIP;
436: V->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqHIP;
437: V->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqHIP;
438: V->ops->getarraywriteandmemtype= VecGetArrayWriteAndMemType_SeqHIP;
439: V->ops->max = VecMax_SeqHIP;
440: V->ops->min = VecMin_SeqHIP;
441: V->ops->reciprocal = VecReciprocal_SeqHIP;
442: V->ops->sum = VecSum_SeqHIP;
443: V->ops->shift = VecShift_SeqHIP;
444: }
445: return 0;
446: }
448: PetscErrorCode VecCreate_SeqHIP_Private(Vec V,const PetscScalar *array)
449: {
450: Vec_HIP *vechip;
451: PetscMPIInt size;
452: PetscBool option_set;
454: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
456: VecCreate_Seq_Private(V,0);
457: PetscObjectChangeTypeName((PetscObject)V,VECSEQHIP);
458: VecBindToCPU_SeqHIP(V,PETSC_FALSE);
459: V->ops->bindtocpu = VecBindToCPU_SeqHIP;
461: /* Later, functions check for the Vec_HIP structure existence, so do not create it without array */
462: if (array) {
463: if (!V->spptr) {
464: PetscReal pinned_memory_min;
467: PetscCalloc(sizeof(Vec_HIP),&V->spptr);
468: vechip = (Vec_HIP*)V->spptr;
469: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
471: pinned_memory_min = 0;
472: /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
473: Note: This same code duplicated in VecHIPAllocateCheck() and VecCreate_MPIHIP_Private(). Is there a good way to avoid this? */
474: PetscOptionsBegin(PetscObjectComm((PetscObject)V),((PetscObject)V)->prefix,"VECHIP Options","Vec");
475: PetscOptionsReal("-vec_pinned_memory_min","Minimum size (in bytes) for an allocation to use pinned memory on host","VecSetPinnedMemoryMin",pinned_memory_min,&pinned_memory_min,&option_set);
476: if (option_set) V->minimum_bytes_pinned_memory = pinned_memory_min;
477: PetscOptionsEnd();
478: }
479: vechip = (Vec_HIP*)V->spptr;
480: vechip->GPUarray = (PetscScalar*)array;
481: V->offloadmask = PETSC_OFFLOAD_GPU;
483: }
484: return 0;
485: }