Actual source code: math2opusutils.cu
1: #include <petsc/private/matimpl.h>
2: #include <petsc/private/vecimpl.h>
3: #include <petscsf.h>
4: #if defined(PETSC_HAVE_CUDA)
5: #include <thrust/for_each.h>
6: #include <thrust/device_vector.h>
7: #include <thrust/execution_policy.h>
8: #endif
10: PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
11: {
12: PetscSF rankssf;
13: const PetscSFNode *iremote;
14: PetscSFNode *viremote,*rremotes;
15: const PetscInt *ilocal;
16: PetscInt *vilocal = NULL,*ldrs;
17: const PetscMPIInt *ranks;
18: PetscMPIInt *sranks;
19: PetscInt nranks,nr,nl,vnr,vnl,i,v,j,maxl;
20: MPI_Comm comm;
25: if (nv == 1) {
26: PetscObjectReference((PetscObject)sf);
27: *vsf = sf;
28: return 0;
29: }
30: PetscObjectGetComm((PetscObject)sf,&comm);
31: PetscSFGetGraph(sf,&nr,&nl,&ilocal,&iremote);
32: PetscSFGetLeafRange(sf,NULL,&maxl);
33: maxl += 1;
34: if (ldl == PETSC_DECIDE) ldl = maxl;
35: if (ldr == PETSC_DECIDE) ldr = nr;
38: vnr = nr*nv;
39: vnl = nl*nv;
40: PetscMalloc1(vnl,&viremote);
41: if (ilocal) PetscMalloc1(vnl,&vilocal);
43: /* TODO: Should this special SF be available, e.g.
44: PetscSFGetRanksSF or similar? */
45: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
46: PetscMalloc1(nranks,&sranks);
47: PetscArraycpy(sranks,ranks,nranks);
48: PetscSortMPIInt(nranks,sranks);
49: PetscMalloc1(nranks,&rremotes);
50: for (i=0;i<nranks;i++) {
51: rremotes[i].rank = sranks[i];
52: rremotes[i].index = 0;
53: }
54: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&rankssf);
55: PetscSFSetGraph(rankssf,1,nranks,NULL,PETSC_OWN_POINTER,rremotes,PETSC_OWN_POINTER);
56: PetscMalloc1(nranks,&ldrs);
57: PetscSFBcastBegin(rankssf,MPIU_INT,&ldr,ldrs,MPI_REPLACE);
58: PetscSFBcastEnd(rankssf,MPIU_INT,&ldr,ldrs,MPI_REPLACE);
59: PetscSFDestroy(&rankssf);
61: j = -1;
62: for (i=0;i<nl;i++) {
63: const PetscInt r = iremote[i].rank;
64: const PetscInt ii = iremote[i].index;
66: if (j < 0 || sranks[j] != r) {
67: PetscFindMPIInt(r,nranks,sranks,&j);
68: }
70: for (v=0;v<nv;v++) {
71: viremote[v*nl + i].rank = r;
72: viremote[v*nl + i].index = v*ldrs[j] + ii;
73: if (ilocal) vilocal[v*nl + i] = v*ldl + ilocal[i];
74: }
75: }
76: PetscFree(sranks);
77: PetscFree(ldrs);
78: PetscSFCreate(comm,vsf);
79: PetscSFSetGraph(*vsf,vnr,vnl,vilocal,PETSC_OWN_POINTER,viremote,PETSC_OWN_POINTER);
80: return 0;
81: }
83: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf)
84: {
85: PetscSF asf;
90: PetscObjectQuery((PetscObject)A,"_math2opus_vectorsf",(PetscObject*)&asf);
91: if (!asf) {
92: PetscInt lda;
94: MatDenseGetLDA(A,&lda);
95: PetscSFGetVectorSF(h2sf,A->cmap->N,lda,PETSC_DECIDE,&asf);
96: PetscObjectCompose((PetscObject)A,"_math2opus_vectorsf",(PetscObject)asf);
97: PetscObjectDereference((PetscObject)asf);
98: }
99: *osf = asf;
100: return 0;
101: }
103: #if defined(PETSC_HAVE_CUDA)
104: struct SignVector_Functor
105: {
106: const PetscScalar *v;
107: PetscScalar *s;
108: SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) {}
110: __host__ __device__ void operator()(PetscInt i)
111: {
112: s[i] = (v[i] < 0 ? -1 : 1);
113: }
114: };
115: #endif
117: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
118: {
119: const PetscScalar *av;
120: PetscScalar *as;
121: PetscInt i,n;
122: #if defined(PETSC_HAVE_CUDA)
123: PetscBool viscuda,siscuda;
124: #endif
128: VecGetLocalSize(s,&n);
129: VecGetLocalSize(v,&i);
131: #if defined(PETSC_HAVE_CUDA)
132: PetscObjectTypeCompareAny((PetscObject)v,&viscuda,VECSEQCUDA,VECMPICUDA,"");
133: PetscObjectTypeCompareAny((PetscObject)s,&siscuda,VECSEQCUDA,VECMPICUDA,"");
134: viscuda = (PetscBool)(viscuda && !v->boundtocpu);
135: siscuda = (PetscBool)(siscuda && !s->boundtocpu);
136: if (viscuda && siscuda) {
137: VecCUDAGetArrayRead(v,&av);
138: VecCUDAGetArrayWrite(s,&as);
139: SignVector_Functor sign_vector(av, as);
140: thrust::for_each(thrust::device,thrust::counting_iterator<PetscInt>(0),
141: thrust::counting_iterator<PetscInt>(n), sign_vector);
142: VecCUDARestoreArrayWrite(s,&as);
143: VecCUDARestoreArrayRead(v,&av);
144: } else
145: #endif
146: {
147: VecGetArrayRead(v,&av);
148: VecGetArrayWrite(s,&as);
149: for (i=0;i<n;i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
150: VecRestoreArrayWrite(s,&as);
151: VecRestoreArrayRead(v,&av);
152: }
153: return 0;
154: }
156: #if defined(PETSC_HAVE_CUDA)
157: struct StandardBasis_Functor
158: {
159: PetscScalar *v;
160: PetscInt j;
162: StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) {}
163: __host__ __device__ void operator()(PetscInt i)
164: {
165: v[i] = (i == j ? 1 : 0);
166: }
167: };
168: #endif
170: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
171: {
172: #if defined(PETSC_HAVE_CUDA)
173: PetscBool iscuda;
174: #endif
175: PetscInt st,en;
177: VecGetOwnershipRange(x,&st,&en);
178: #if defined(PETSC_HAVE_CUDA)
179: PetscObjectTypeCompareAny((PetscObject)x,&iscuda,VECSEQCUDA,VECMPICUDA,"");
180: iscuda = (PetscBool)(iscuda && !x->boundtocpu);
181: if (iscuda) {
182: PetscScalar *ax;
183: VecCUDAGetArrayWrite(x,&ax);
184: StandardBasis_Functor delta(ax, i-st);
185: thrust::for_each(thrust::device,thrust::counting_iterator<PetscInt>(0),
186: thrust::counting_iterator<PetscInt>(en-st), delta);
187: VecCUDARestoreArrayWrite(x,&ax);
188: } else
189: #endif
190: {
191: VecSet(x,0.);
192: if (st <= i && i < en) {
193: VecSetValue(x,i,1.0,INSERT_VALUES);
194: }
195: VecAssemblyBegin(x);
196: VecAssemblyEnd(x);
197: }
198: return 0;
199: }
201: /* these are approximate norms */
202: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
203: NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
204: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal* n)
205: {
206: Vec x,y,w,z;
207: PetscReal normz,adot;
208: PetscScalar dot;
209: PetscInt i,j,N,jold = -1;
210: PetscBool boundtocpu = PETSC_TRUE;
212: #if defined(PETSC_HAVE_DEVICE)
213: boundtocpu = A->boundtocpu;
214: #endif
215: switch (normtype) {
216: case NORM_INFINITY:
217: case NORM_1:
218: if (normsamples < 0) normsamples = 10; /* pure guess */
219: if (normtype == NORM_INFINITY) {
220: Mat B;
221: MatCreateTranspose(A,&B);
222: A = B;
223: } else {
224: PetscObjectReference((PetscObject)A);
225: }
226: MatCreateVecs(A,&x,&y);
227: MatCreateVecs(A,&z,&w);
228: VecBindToCPU(x,boundtocpu);
229: VecBindToCPU(y,boundtocpu);
230: VecBindToCPU(z,boundtocpu);
231: VecBindToCPU(w,boundtocpu);
232: VecGetSize(x,&N);
233: VecSet(x,1./N);
234: *n = 0.0;
235: for (i = 0; i < normsamples; i++) {
236: MatMult(A,x,y);
237: VecSign(y,w);
238: MatMultTranspose(A,w,z);
239: VecNorm(z,NORM_INFINITY,&normz);
240: VecDot(x,z,&dot);
241: adot = PetscAbsScalar(dot);
242: PetscInfo(A,"%s norm it %" PetscInt_FMT " -> (%g %g)\n",NormTypes[normtype],i,(double)normz,(double)adot);
243: if (normz <= adot && i > 0) {
244: VecNorm(y,NORM_1,n);
245: break;
246: }
247: VecMax(z,&j,&normz);
248: if (j == jold) {
249: VecNorm(y,NORM_1,n);
250: PetscInfo(A,"%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n",NormTypes[normtype],i);
251: break;
252: }
253: jold = j;
254: VecSetDelta(x,j);
255: }
256: MatDestroy(&A);
257: VecDestroy(&x);
258: VecDestroy(&w);
259: VecDestroy(&y);
260: VecDestroy(&z);
261: break;
262: case NORM_2:
263: if (normsamples < 0) normsamples = 20; /* pure guess */
264: MatCreateVecs(A,&x,&y);
265: MatCreateVecs(A,&z,NULL);
266: VecBindToCPU(x,boundtocpu);
267: VecBindToCPU(y,boundtocpu);
268: VecBindToCPU(z,boundtocpu);
269: VecSetRandom(x,NULL);
270: VecNormalize(x,NULL);
271: *n = 0.0;
272: for (i = 0; i < normsamples; i++) {
273: MatMult(A,x,y);
274: VecNormalize(y,n);
275: MatMultTranspose(A,y,z);
276: VecNorm(z,NORM_2,&normz);
277: VecDot(x,z,&dot);
278: adot = PetscAbsScalar(dot);
279: PetscInfo(A,"%s norm it %" PetscInt_FMT " -> %g (%g %g)\n",NormTypes[normtype],i,(double)*n,(double)normz,(double)adot);
280: if (normz <= adot) break;
281: if (i < normsamples - 1) {
282: Vec t;
284: VecNormalize(z,NULL);
285: t = x;
286: x = z;
287: z = t;
288: }
289: }
290: VecDestroy(&x);
291: VecDestroy(&y);
292: VecDestroy(&z);
293: break;
294: default:
295: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"%s norm not supported",NormTypes[normtype]);
296: }
297: PetscInfo(A,"%s norm %g computed in %" PetscInt_FMT " iterations\n",NormTypes[normtype],(double)*n,i);
298: return 0;
299: }