Actual source code: ex32.c
2: static char help[] = "Tests MATSEQDENSECUDA\n\n";
4: #include <petscmat.h>
6: int main(int argc,char **argv)
7: {
8: Mat A,AC,B;
9: PetscInt m = 10,n = 10;
10: PetscReal r,tol = 10*PETSC_SMALL;
12: PetscInitialize(&argc,&argv,(char*) 0,help);
13: PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
14: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
15: MatCreate(PETSC_COMM_SELF,&A);
16: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m,n);
17: MatSetType(A,MATSEQDENSE);
18: MatSetFromOptions(A);
19: MatSeqDenseSetPreallocation(A,NULL);
20: MatSetRandom(A,NULL);
21: #if 0
22: PetscInt i,j;
23: PetscScalar val;
24: for (i=0; i<m; i++) {
25: for (j=0; j<n; j++) {
26: val = (PetscScalar)(i+j);
27: MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES);
28: }
29: }
30: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
31: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
32: #endif
34: /* Create a CUDA version of A */
35: #if defined(PETSC_HAVE_CUDA)
36: MatConvert(A,MATSEQDENSECUDA,MAT_INITIAL_MATRIX,&AC);
37: #else
38: MatDuplicate(A,MAT_COPY_VALUES,&AC);
39: #endif
40: MatDuplicate(AC,MAT_COPY_VALUES,&B);
42: /* full CUDA AXPY */
43: MatAXPY(B,-1.0,AC,SAME_NONZERO_PATTERN);
44: MatNorm(B,NORM_INFINITY,&r);
47: /* test Copy */
48: MatCopy(AC,B,SAME_NONZERO_PATTERN);
50: /* call MatAXPY_Basic since B is CUDA, A is CPU, */
51: MatAXPY(B,-1.0,A,SAME_NONZERO_PATTERN);
52: MatNorm(B,NORM_INFINITY,&r);
55: if (m == n) {
56: Mat B1,B2;
58: MatCopy(AC,B,SAME_NONZERO_PATTERN);
59: /* full CUDA PtAP */
60: MatPtAP(B,AC,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B1);
62: /* CPU PtAP since A is on the CPU only */
63: MatPtAP(B,A,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B2);
65: MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);
66: MatNorm(B2,NORM_INFINITY,&r);
69: /* test reuse */
70: MatPtAP(B,AC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B1);
71: MatPtAP(B,A,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B2);
72: MatAXPY(B2,-1.0,B1,SAME_NONZERO_PATTERN);
73: MatNorm(B2,NORM_INFINITY,&r);
76: MatDestroy(&B1);
77: MatDestroy(&B2);
78: }
80: MatDestroy(&B);
81: MatDestroy(&AC);
82: MatDestroy(&A);
83: PetscFinalize();
84: return 0;
85: }
87: /*TEST
89: build:
90: requires: cuda
92: test:
93: output_file: output/ex32_1.out
94: args: -m {{3 5 12}} -n {{3 5 12}}
95: suffix: seqdensecuda
97: TEST*/