21 #ifndef _libint2_src_lib_libint_osvrrsxsx_h_ 22 #define _libint2_src_lib_libint_osvrrsxsx_h_ 26 #include <util_types.h> 27 #include <libint2/cgshell_ordering.h> 30 #pragma implementation 35 template <
int part,
int Lb,
int Ld,
bool unit_a,
bool vectorize>
struct OSVRR_sx_sx {
36 static void compute(
const Libint_t* inteval,
37 LIBINT2_REALTYPE* target,
38 const LIBINT2_REALTYPE* src1,
39 const LIBINT2_REALTYPE* src0,
40 const LIBINT2_REALTYPE* src2,
41 const LIBINT2_REALTYPE* src3,
42 const LIBINT2_REALTYPE* src4);
52 template <
int Lb,
int Ld,
54 bool vectorize>
struct OSVRR_sx_sx<0,Lb,Ld,unit_a,vectorize> {
56 static void compute(
const Libint_t* inteval,
57 LIBINT2_REALTYPE* target,
58 const LIBINT2_REALTYPE* src0,
59 const LIBINT2_REALTYPE* src1,
60 const LIBINT2_REALTYPE* src2,
61 const LIBINT2_REALTYPE* src3,
62 const LIBINT2_REALTYPE* src4) {
65 assert(not (Lb < 2 || Ld < 1));
67 const unsigned int veclen = vectorize ? inteval->veclen : 1;
69 const unsigned int Nd = INT_NCART(Ld);
70 const unsigned int NdV = Nd * veclen;
73 FOR_CART(bx, by, bz, Lb)
75 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
77 enum XYZ {x=0, y=1, z=2};
85 const LIBINT2_REALTYPE *PB, *WP;
88 #if LIBINT2_DEFINED(eri,PB_x) 89 if (not unit_a) PB = inteval->PB_x;
94 #if LIBINT2_DEFINED(eri,PB_y) 95 if (not unit_a) PB = inteval->PB_y;
100 #if LIBINT2_DEFINED(eri,PB_z) 101 if (not unit_a) PB = inteval->PB_z;
107 const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
108 const unsigned int bm10d0_offset = ibm1 * NdV;
109 const LIBINT2_REALTYPE* src0_ptr = unit_a ? 0 : src0 + bm10d0_offset;
110 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
115 const unsigned int ibm2 = INT_CARTINDEX(Lb-2,b[0],b[1]);
116 const unsigned int bm20d0_offset = ibm2 * NdV;
118 const LIBINT2_REALTYPE* src2_ptr = src2 + bm20d0_offset;
119 const LIBINT2_REALTYPE* src3_ptr = src3 + bm20d0_offset;
120 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
123 for(
unsigned int d = 0; d < Nd; ++d) {
124 for(
unsigned int v=0; v<veclen; ++v, ++dv) {
125 LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv] + bxyz * inteval->oo2z[v] * (src2_ptr[dv] - inteval->roz[v] * src3_ptr[dv]);
126 if (not unit_a) value += PB[v] * src0_ptr[dv];
130 #if LIBINT2_FLOP_COUNT 131 inteval->nflops[0] += (unit_a ? 6 : 8) * NdV;
137 for(
unsigned int d = 0; d < Nd; ++d) {
138 for(
unsigned int v=0; v<veclen; ++v, ++dv) {
139 LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv];
140 if (not unit_a) value += PB[v] * src0_ptr[dv];
144 #if LIBINT2_FLOP_COUNT 145 inteval->nflops[0] += (unit_a ? 1 : 3) * NdV;
150 const unsigned int Ndm1 = INT_NCART(Ld-1);
151 const unsigned int Ndm1V = Ndm1 * veclen;
152 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
153 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
157 FOR_CART(dx, dy, dz, Ld-1)
159 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
162 const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
163 const unsigned int dc_offset = dc * veclen;
164 LIBINT2_REALTYPE* tptr = target + dc_offset;
165 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
166 for(
unsigned int v=0; v<veclen; ++v) {
167 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
169 #if LIBINT2_FLOP_COUNT 170 inteval->nflops[0] += 3 * veclen;
193 template <
int Lb,
int Ld,
bool vectorize>
struct OSVRR_sx_sx<1,Lb,Ld,vectorize> {
195 static void compute(
const Libint_t* inteval,
196 LIBINT2_REALTYPE* target,
197 const LIBINT2_REALTYPE* src0,
198 const LIBINT2_REALTYPE* src1,
199 const LIBINT2_REALTYPE* src2,
200 const LIBINT2_REALTYPE* src3,
201 const LIBINT2_REALTYPE* src4) {
204 if (Lb < 1 || Ld < 2)
220 const unsigned int veclen = vectorize ? inteval->veclen : 1;
222 const unsigned int Nb = INT_NCART(Lb);
223 const unsigned int Nd = INT_NCART(Ld);
224 const unsigned int Ndv = Nd * veclen;
225 const unsigned int Ndm1 = INT_NCART(Ld-1);
226 const unsigned int Ndm1v = Ndm1 * veclen;
227 const unsigned int Ndm2 = INT_NCART(Ld-2);
228 const unsigned int Ndm2v = Ndm2 * veclen;
232 FOR_CART(dx, dy, dz, Ld)
234 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
236 enum XYZ {x=0, y=1, z=2};
239 if (dy != 0) xyz = y;
240 if (dx != 0) xyz = x;
244 const LIBINT2_REALTYPE *QD, *WQ;
260 const unsigned int idm1 = INT_CARTINDEX(Ld-1,d[0],d[1]);
261 const unsigned int d0_offset =
id * veclen;
262 const unsigned int dm10_offset = idm1 * veclen;
263 LIBINT2_REALTYPE* target_ptr = target + d0_offset;
264 const LIBINT2_REALTYPE* src0_ptr = src0 + dm10_offset;
265 const LIBINT2_REALTYPE* src1_ptr = src1 + dm10_offset;
270 const unsigned int idm2 = INT_CARTINDEX(Ld-2,d[0],d[1]);
271 const unsigned int dm20_offset = idm2 * veclen;
273 const LIBINT2_REALTYPE* src2_ptr = src2 + dm20_offset;
274 const LIBINT2_REALTYPE* src3_ptr = src3 + dm20_offset;
275 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
277 for(
unsigned int b = 0; b < Nb; ++b) {
278 for(
unsigned int v=0; v<veclen; ++v) {
279 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v]
280 + dxyz * inteval->oo2e[v] * (src2_ptr[v] - inteval->roe[v] * src3_ptr[v]);
288 #if LIBINT2_FLOP_COUNT 289 inteval->nflops[0] += 8 * Nb * veclen;
294 for(
unsigned int b = 0; b < Nb; ++b) {
295 for(
unsigned int v=0; v<veclen; ++v) {
296 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v];
302 #if LIBINT2_FLOP_COUNT 303 inteval->nflops[0] += 3 * Nb * veclen;
308 const LIBINT2_REALTYPE* src4_ptr = src4 + dm10_offset;
312 FOR_CART(bx, by, bz, Lb-1)
314 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
317 const unsigned int ib = INT_CARTINDEX(Lb,b[0],b[1]);
318 const unsigned int b0d0_offset = ib * Ndv + d0_offset;
319 LIBINT2_REALTYPE* target_ptr = target + b0d0_offset;
320 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
321 for(
unsigned int v=0; v<veclen; ++v) {
322 target_ptr[v] += bxyz * inteval->oo2ze[v] * src4_ptr[v];
324 #if LIBINT2_FLOP_COUNT 325 inteval->nflops[0] += 3 * veclen;
341 template <
int part,
int Lb,
int Ld,
bool vectorize>
struct OSAVRR_sx_sx {
342 static void compute(
const Libint_t* inteval,
343 LIBINT2_REALTYPE* target,
344 const LIBINT2_REALTYPE* src1,
345 const LIBINT2_REALTYPE* src4);
352 template <
int Lb,
int Ld,
355 static void compute(
const Libint_t* inteval,
356 LIBINT2_REALTYPE* target,
357 const LIBINT2_REALTYPE* src1,
358 const LIBINT2_REALTYPE* src4) {
361 assert(not (Lb < 2 || Ld < 1));
363 const unsigned int veclen = vectorize ? inteval->veclen : 1;
365 const unsigned int Nd = INT_NCART(Ld);
366 const unsigned int NdV = Nd * veclen;
369 FOR_CART(bx, by, bz, Lb)
371 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
373 enum XYZ {x=0, y=1, z=2};
376 if (by != 0) xyz = y;
377 if (bx != 0) xyz = x;
381 const LIBINT2_REALTYPE *WP;
394 const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
395 const unsigned int bm10d0_offset = ibm1 * NdV;
396 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
400 for(
unsigned int d = 0; d < Nd; ++d) {
401 for(
unsigned int v=0; v<veclen; ++v, ++dv) {
402 target[dv] = WP[v] * src1_ptr[dv];
405 #if LIBINT2_FLOP_COUNT 406 inteval->nflops[0] += NdV;
411 const unsigned int Ndm1 = INT_NCART(Ld-1);
412 const unsigned int Ndm1V = Ndm1 * veclen;
413 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
414 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
418 FOR_CART(dx, dy, dz, Ld-1)
420 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
423 const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
424 const unsigned int dc_offset = dc * veclen;
425 LIBINT2_REALTYPE* tptr = target + dc_offset;
426 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
427 for(
unsigned int v=0; v<veclen; ++v) {
428 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
430 #if LIBINT2_FLOP_COUNT 431 inteval->nflops[0] += 3 * veclen;
449 #endif // header guard Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
Definition: OSVRR_sx_sx.h:341
Definition: OSVRR_sx_sx.h:35