Actual source code: ex1hip.hip.cpp

  1: static char help[] = "Benchmarking HIP kernel launch time\n";
  2: /*
  3:   Running example on Crusher at OLCF:
  4:   # run with 1 mpi rank (-n1), 32 CPUs (-c32), and map the process to CPU 0 and GPU 0
  5:   $ srun -n1 -c32 --cpu-bind=map_cpu:0 --gpus-per-node=8 --gpu-bind=map_gpu:0 ./ex1hip
  6:   Average asynchronous HIP kernel launch time = 3.74 microseconds
  7:   Average synchronous  HIP kernel launch time = 6.66 microseconds
  8: */
  9: #include <petscsys.h>
 10: #include <petscdevice.h>

 12: __global__ void NullKernel(){}

 14: int main(int argc,char **argv)
 15: {
 16:   PetscInt       i,n=100000;
 17:   hipError_t     cerr;
 18:   PetscLogDouble tstart,tend,time;

 20:   PetscInitialize(&argc,&argv,(char*)0,help);
 21:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);

 23:   /* Launch a sequence of kernels asynchronously. Previous launched kernels do not need to be completed before launching a new one */
 24:   PetscTime(&tstart);
 25:   for (i=0; i<n; i++) {hipLaunchKernelGGL(NullKernel,dim3(1),dim3(1),0,NULL);}
 26:   PetscTime(&tend);
 27:   hipStreamSynchronize(NULL); /* Sync after tend since we don't want to count kernel execution time */
 28:   time = (tend-tstart)*1e6/n;
 29:   PetscPrintf(PETSC_COMM_WORLD,"Average asynchronous HIP kernel launch time = %.2f microseconds\n",time);

 31:   /* Launch a sequence of kernels synchronously. Only launch a new kernel after the one before it has been completed */
 32:   PetscTime(&tstart);
 33:   for (i=0; i<n; i++) {
 34:     hipLaunchKernelGGL(NullKernel,dim3(1),dim3(1),0,NULL);
 35:     hipStreamSynchronize(NULL);
 36:   }
 37:   PetscTime(&tend);
 38:   time = (tend-tstart)*1e6/n;
 39:   PetscPrintf(PETSC_COMM_WORLD,"Average synchronous  HIP kernel launch time = %.2f microseconds\n",time);

 41:   PetscFinalize();
 42:   return 0;
 43: }

 45: /*TEST
 46:   build:
 47:     requires: hip

 49:   test:
 50:     requires: hip
 51:     args: -n 2
 52:     output_file: output/empty.out
 53:     filter: grep "DOES_NOT_EXIST"

 55: TEST*/