LIBINT  2.6.0
OSVRR_sx_sx.h
1 /*
2  * Copyright (C) 2004-2019 Edward F. Valeev
3  *
4  * This file is part of Libint.
5  *
6  * Libint is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Libint is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 #ifndef _libint2_src_lib_libint_osvrrsxsx_h_
22 #define _libint2_src_lib_libint_osvrrsxsx_h_
23 
24 #include <cstdlib>
25 #include <libint2.h>
26 #include <util_types.h>
27 #include <libint2/cgshell_ordering.h>
28 
29 #ifdef __GNUC__
30 #pragma implementation
31 #endif
32 
33 namespace libint2 {
34 
35  template <int part, int Lb, int Ld, bool unit_a, bool vectorize> struct OSVRR_sx_sx {
36  static void compute(const Libint_t* inteval,
37  LIBINT2_REALTYPE* target,
38  const LIBINT2_REALTYPE* src1,
39  const LIBINT2_REALTYPE* src0,
40  const LIBINT2_REALTYPE* src2,
41  const LIBINT2_REALTYPE* src3,
42  const LIBINT2_REALTYPE* src4);
43  };
44 
52  template <int Lb, int Ld,
53  bool unit_a,
54  bool vectorize> struct OSVRR_sx_sx<0,Lb,Ld,unit_a,vectorize> {
55 
56  static void compute(const Libint_t* inteval,
57  LIBINT2_REALTYPE* target,
58  const LIBINT2_REALTYPE* src0,
59  const LIBINT2_REALTYPE* src1,
60  const LIBINT2_REALTYPE* src2,
61  const LIBINT2_REALTYPE* src3,
62  const LIBINT2_REALTYPE* src4) {
63 
64  // works for (sd|sp) and higher
65  assert(not (Lb < 2 || Ld < 1));
66 
67  const unsigned int veclen = vectorize ? inteval->veclen : 1;
68 
69  const unsigned int Nd = INT_NCART(Ld);
70  const unsigned int NdV = Nd * veclen;
71 
72  int bx, by, bz;
73  FOR_CART(bx, by, bz, Lb)
74 
75  int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
76 
77  enum XYZ {x=0, y=1, z=2};
78  // Build along x, if possible
79  XYZ xyz = z;
80  if (by != 0) xyz = y;
81  if (bx != 0) xyz = x;
82  --b[xyz];
83 
84  // redirect
85  const LIBINT2_REALTYPE *PB, *WP;
86  switch(xyz) {
87  case x:
88 #if LIBINT2_DEFINED(eri,PB_x)
89  if (not unit_a) PB = inteval->PB_x;
90 #endif
91  WP = inteval->WP_x;
92  break;
93  case y:
94 #if LIBINT2_DEFINED(eri,PB_y)
95  if (not unit_a) PB = inteval->PB_y;
96 #endif
97  WP = inteval->WP_y;
98  break;
99  case z:
100 #if LIBINT2_DEFINED(eri,PB_z)
101  if (not unit_a) PB = inteval->PB_z;
102 #endif
103  WP = inteval->WP_z;
104  break;
105  }
106 
107  const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
108  const unsigned int bm10d0_offset = ibm1 * NdV;
109  const LIBINT2_REALTYPE* src0_ptr = unit_a ? 0 : src0 + bm10d0_offset;
110  const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
111 
112  // if b-2_xyz exists, include (0 b-2_xyz | 0 d)
113  if (b[xyz] > 0) {
114  --b[xyz];
115  const unsigned int ibm2 = INT_CARTINDEX(Lb-2,b[0],b[1]);
116  const unsigned int bm20d0_offset = ibm2 * NdV;
117  ++b[xyz];
118  const LIBINT2_REALTYPE* src2_ptr = src2 + bm20d0_offset;
119  const LIBINT2_REALTYPE* src3_ptr = src3 + bm20d0_offset;
120  const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
121 
122  unsigned int dv = 0;
123  for(unsigned int d = 0; d < Nd; ++d) {
124  for(unsigned int v=0; v<veclen; ++v, ++dv) {
125  LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv] + bxyz * inteval->oo2z[v] * (src2_ptr[dv] - inteval->roz[v] * src3_ptr[dv]);
126  if (not unit_a) value += PB[v] * src0_ptr[dv];
127  target[dv] = value;
128  }
129  }
130 #if LIBINT2_FLOP_COUNT
131  inteval->nflops[0] += (unit_a ? 6 : 8) * NdV;
132 #endif
133 
134  }
135  else {
136  unsigned int dv = 0;
137  for(unsigned int d = 0; d < Nd; ++d) {
138  for(unsigned int v=0; v<veclen; ++v, ++dv) {
139  LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv];
140  if (not unit_a) value += PB[v] * src0_ptr[dv];
141  target[dv] = value;
142  }
143  }
144 #if LIBINT2_FLOP_COUNT
145  inteval->nflops[0] += (unit_a ? 1 : 3) * NdV;
146 #endif
147  }
148 
149  {
150  const unsigned int Ndm1 = INT_NCART(Ld-1);
151  const unsigned int Ndm1V = Ndm1 * veclen;
152  const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
153  const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
154 
155  // loop over d-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
156  int dx, dy, dz;
157  FOR_CART(dx, dy, dz, Ld-1)
158 
159  int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
160  ++d[xyz];
161 
162  const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
163  const unsigned int dc_offset = dc * veclen;
164  LIBINT2_REALTYPE* tptr = target + dc_offset;
165  const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
166  for(unsigned int v=0; v<veclen; ++v) {
167  tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
168  }
169 #if LIBINT2_FLOP_COUNT
170  inteval->nflops[0] += 3 * veclen;
171 #endif
172  src4_ptr += veclen;
173 
174  END_FOR_CART
175  }
176 
177  target += NdV;
178 
179  END_FOR_CART
180 
181  }
182 
183  };
184 
185 #if 0
186 
193  template <int Lb, int Ld, bool vectorize> struct OSVRR_sx_sx<1,Lb,Ld,vectorize> {
194 
195  static void compute(const Libint_t* inteval,
196  LIBINT2_REALTYPE* target,
197  const LIBINT2_REALTYPE* src0,
198  const LIBINT2_REALTYPE* src1,
199  const LIBINT2_REALTYPE* src2,
200  const LIBINT2_REALTYPE* src3,
201  const LIBINT2_REALTYPE* src4) {
202 
203  // works for (sp|sd) and higher
204  if (Lb < 1 || Ld < 2)
205  abort();
206 
207  // ALGORITHM
208  // loop over functions in d
209  // decide in which direction can build (x, y, or z)
210  // decide whether d-2 exists
211  // loop over functions in b
212  // include contributions from (0 b | 0 d-1)^(m), (0 b | 0 d-1)^(m+1),
213  // and possibly (0 b | 0 d-2)^(m), (0 b | 0 d-2)^(m+1) for each b
214  // end of loop over b
215  // loop over b-1
216  // include contribution from (0 b-1 | 0 d-1)^(m+1)
217  // end of loop over b-1
218  // end of loop over d
219 
220  const unsigned int veclen = vectorize ? inteval->veclen : 1;
221 
222  const unsigned int Nb = INT_NCART(Lb);
223  const unsigned int Nd = INT_NCART(Ld);
224  const unsigned int Ndv = Nd * veclen;
225  const unsigned int Ndm1 = INT_NCART(Ld-1);
226  const unsigned int Ndm1v = Ndm1 * veclen;
227  const unsigned int Ndm2 = INT_NCART(Ld-2);
228  const unsigned int Ndm2v = Ndm2 * veclen;
229 
230  int dx, dy, dz;
231  int id = 0;
232  FOR_CART(dx, dy, dz, Ld)
233 
234  int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
235 
236  enum XYZ {x=0, y=1, z=2};
237  // Build along x, if possible
238  XYZ xyz = z;
239  if (dy != 0) xyz = y;
240  if (dx != 0) xyz = x;
241  --d[xyz];
242 
243  // redirect
244  const LIBINT2_REALTYPE *QD, *WQ;
245  switch(xyz) {
246  case x:
247  QD = inteval->QD_x;
248  WQ = inteval->WQ_x;
249  break;
250  case y:
251  QD = inteval->QD_y;
252  WQ = inteval->WQ_y;
253  break;
254  case z:
255  QD = inteval->QD_z;
256  WQ = inteval->WQ_z;
257  break;
258  }
259 
260  const unsigned int idm1 = INT_CARTINDEX(Ld-1,d[0],d[1]);
261  const unsigned int d0_offset = id * veclen;
262  const unsigned int dm10_offset = idm1 * veclen;
263  LIBINT2_REALTYPE* target_ptr = target + d0_offset;
264  const LIBINT2_REALTYPE* src0_ptr = src0 + dm10_offset;
265  const LIBINT2_REALTYPE* src1_ptr = src1 + dm10_offset;
266 
267  // if d-2_xyz exists, include (0 b | 0 d-2_xyz)
268  if (d[xyz] > 0) {
269  --d[xyz];
270  const unsigned int idm2 = INT_CARTINDEX(Ld-2,d[0],d[1]);
271  const unsigned int dm20_offset = idm2 * veclen;
272  ++d[xyz];
273  const LIBINT2_REALTYPE* src2_ptr = src2 + dm20_offset;
274  const LIBINT2_REALTYPE* src3_ptr = src3 + dm20_offset;
275  const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
276 
277  for(unsigned int b = 0; b < Nb; ++b) {
278  for(unsigned int v=0; v<veclen; ++v) {
279  target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v]
280  + dxyz * inteval->oo2e[v] * (src2_ptr[v] - inteval->roe[v] * src3_ptr[v]);
281  }
282  target_ptr += Ndv;
283  src0_ptr += Ndm1v;
284  src1_ptr += Ndm1v;
285  src2_ptr += Ndm2v;
286  src3_ptr += Ndm2v;
287  }
288 #if LIBINT2_FLOP_COUNT
289  inteval->nflops[0] += 8 * Nb * veclen;
290 #endif
291 
292  }
293  else {
294  for(unsigned int b = 0; b < Nb; ++b) {
295  for(unsigned int v=0; v<veclen; ++v) {
296  target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v];
297  }
298  target_ptr += Ndv;
299  src0_ptr += Ndm1v;
300  src1_ptr += Ndm1v;
301  }
302 #if LIBINT2_FLOP_COUNT
303  inteval->nflops[0] += 3 * Nb * veclen;
304 #endif
305  }
306 
307  {
308  const LIBINT2_REALTYPE* src4_ptr = src4 + dm10_offset;
309 
310  // loop over b-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
311  int bx, by, bz;
312  FOR_CART(bx, by, bz, Lb-1)
313 
314  int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
315  ++b[xyz];
316 
317  const unsigned int ib = INT_CARTINDEX(Lb,b[0],b[1]);
318  const unsigned int b0d0_offset = ib * Ndv + d0_offset;
319  LIBINT2_REALTYPE* target_ptr = target + b0d0_offset;
320  const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
321  for(unsigned int v=0; v<veclen; ++v) {
322  target_ptr[v] += bxyz * inteval->oo2ze[v] * src4_ptr[v];
323  }
324 #if LIBINT2_FLOP_COUNT
325  inteval->nflops[0] += 3 * veclen;
326 #endif
327  src4_ptr += Ndm1v;
328 
329  END_FOR_CART // end of loop over b
330  }
331 
332  ++id;
333 
334  END_FOR_CART // end of loop over d
335 
336  }
337 
338  };
339 #endif
340 
341  template <int part, int Lb, int Ld, bool vectorize> struct OSAVRR_sx_sx {
342  static void compute(const Libint_t* inteval,
343  LIBINT2_REALTYPE* target,
344  const LIBINT2_REALTYPE* src1,
345  const LIBINT2_REALTYPE* src4);
346  };
347 
352  template <int Lb, int Ld,
353  bool vectorize> struct OSAVRR_sx_sx<0,Lb,Ld,vectorize> {
354 
355  static void compute(const Libint_t* inteval,
356  LIBINT2_REALTYPE* target,
357  const LIBINT2_REALTYPE* src1,
358  const LIBINT2_REALTYPE* src4) {
359 
360  // works for (sd|sp) and higher
361  assert(not (Lb < 2 || Ld < 1));
362 
363  const unsigned int veclen = vectorize ? inteval->veclen : 1;
364 
365  const unsigned int Nd = INT_NCART(Ld);
366  const unsigned int NdV = Nd * veclen;
367 
368  int bx, by, bz;
369  FOR_CART(bx, by, bz, Lb)
370 
371  int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
372 
373  enum XYZ {x=0, y=1, z=2};
374  // Build along x, if possible
375  XYZ xyz = z;
376  if (by != 0) xyz = y;
377  if (bx != 0) xyz = x;
378  --b[xyz];
379 
380  // redirect
381  const LIBINT2_REALTYPE *WP;
382  switch(xyz) {
383  case x:
384  WP = inteval->WP_x;
385  break;
386  case y:
387  WP = inteval->WP_y;
388  break;
389  case z:
390  WP = inteval->WP_z;
391  break;
392  }
393 
394  const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
395  const unsigned int bm10d0_offset = ibm1 * NdV;
396  const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
397 
398  {
399  unsigned int dv = 0;
400  for(unsigned int d = 0; d < Nd; ++d) {
401  for(unsigned int v=0; v<veclen; ++v, ++dv) {
402  target[dv] = WP[v] * src1_ptr[dv];
403  }
404  }
405 #if LIBINT2_FLOP_COUNT
406  inteval->nflops[0] += NdV;
407 #endif
408  }
409 
410  {
411  const unsigned int Ndm1 = INT_NCART(Ld-1);
412  const unsigned int Ndm1V = Ndm1 * veclen;
413  const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
414  const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
415 
416  // loop over d-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
417  int dx, dy, dz;
418  FOR_CART(dx, dy, dz, Ld-1)
419 
420  int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
421  ++d[xyz];
422 
423  const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
424  const unsigned int dc_offset = dc * veclen;
425  LIBINT2_REALTYPE* tptr = target + dc_offset;
426  const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
427  for(unsigned int v=0; v<veclen; ++v) {
428  tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
429  }
430 #if LIBINT2_FLOP_COUNT
431  inteval->nflops[0] += 3 * veclen;
432 #endif
433  src4_ptr += veclen;
434 
435  END_FOR_CART
436  }
437 
438  target += NdV;
439 
440  END_FOR_CART
441 
442  }
443 
444  };
445 
446 
447 };
448 
449 #endif // header guard
450 
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
Definition: OSVRR_sx_sx.h:341
Definition: OSVRR_sx_sx.h:35