LIBINT  2.6.0
engine.impl.h
1 /*
2  * Copyright (C) 2004-2019 Edward F. Valeev
3  *
4  * This file is part of Libint.
5  *
6  * Libint is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Libint is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 #ifndef _libint2_src_lib_libint_engineimpl_h_
22 #define _libint2_src_lib_libint_engineimpl_h_
23 
24 #include "./engine.h"
25 
26 #include <iterator>
27 
28 #pragma GCC diagnostic push
29 #pragma GCC system_header
30 #include <Eigen/Core>
31 #pragma GCC diagnostic pop
32 
33 #include <libint2/boys.h>
34 #if LIBINT_HAS_SYSTEM_BOOST_PREPROCESSOR_VARIADICS
35 # include <boost/preprocessor.hpp>
36 # include <boost/preprocessor/facilities/is_1.hpp>
37 #else // use bundled boost
38 # include <libint2/boost/preprocessor.hpp>
39 # include <libint2/boost/preprocessor/facilities/is_1.hpp>
40 #endif
41 
42 // extra PP macros
43 
44 #define BOOST_PP_MAKE_TUPLE_INTERNAL(z, i, last) \
45  i BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i, last))
46 #define BOOST_PP_MAKE_TUPLE(n) \
48  (BOOST_PP_REPEAT(n, BOOST_PP_MAKE_TUPLE_INTERNAL, BOOST_PP_DEC(n)))
49 
50 // the engine will be profiled by default if library was configured with
51 // --enable-profile
52 #ifdef LIBINT2_PROFILE
53 #define LIBINT2_ENGINE_TIMERS
54 // uncomment if want to profile each integral class
55 #define LIBINT2_ENGINE_PROFILE_CLASS
56 #endif
57 // uncomment if want to profile the engine even if library was configured
58 // without --enable-profile
59 //# define LIBINT2_ENGINE_TIMERS
60 
61 namespace libint2 {
62 
63 template <typename T, unsigned N>
64 typename std::remove_all_extents<T>::type* to_ptr1(T (&a)[N]) {
65  return reinterpret_cast<typename std::remove_all_extents<T>::type*>(&a);
66 }
67 
72 #define BOOST_PP_NBODY_OPERATOR_LIST \
73  (overlap, \
74  (kinetic, \
75  (elecpot, \
76  (elecpot, \
77  (elecpot, \
78  (1emultipole, \
79  (2emultipole, \
80  (3emultipole, \
81  (sphemultipole, \
82  (eri, (eri, (eri, (eri, (eri, (eri, (eri, (eri, (eri, (eri, BOOST_PP_NIL)))))))))))))))))))
83 
84 #define BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE \
85  BOOST_PP_MAKE_TUPLE(BOOST_PP_LIST_SIZE(BOOST_PP_NBODY_OPERATOR_LIST))
86 #define BOOST_PP_NBODY_OPERATOR_INDEX_LIST \
87  BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE)
88 #define BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX \
89  8 // sphemultipole, the 9th member of BOOST_PP_NBODY_OPERATOR_LIST, is the last
90  // 1-body operator
91 
92 // make list of braket indices for n-body ints
93 #define BOOST_PP_NBODY_BRAKET_INDEX_TUPLE \
94  BOOST_PP_MAKE_TUPLE(BOOST_PP_INC(BOOST_PP_NBODY_BRAKET_MAX_INDEX))
95 #define BOOST_PP_NBODY_BRAKET_INDEX_LIST \
96  BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_BRAKET_INDEX_TUPLE)
97 #define BOOST_PP_NBODY_BRAKET_RANK_TUPLE (2, 3, 4)
98 #define BOOST_PP_NBODY_BRAKET_RANK_LIST \
99  BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_BRAKET_RANK_TUPLE)
100 
101 // make list of derivative orders for n-body ints
102 #define BOOST_PP_NBODY_DERIV_ORDER_TUPLE \
103  BOOST_PP_MAKE_TUPLE(BOOST_PP_INC(LIBINT2_MAX_DERIV_ORDER))
104 #define BOOST_PP_NBODY_DERIV_ORDER_LIST \
105  BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_DERIV_ORDER_TUPLE)
106 
107 
109 __libint2_engine_inline libint2::any
110 default_params(const Operator& oper) {
111  switch (static_cast<int>(oper)) {
112 #define BOOST_PP_NBODYENGINE_MCR1(r, data, i, elem) \
113  case i: \
114  return operator_traits<static_cast<Operator>(i)>::default_params();
115  BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR1, _,
116  BOOST_PP_NBODY_OPERATOR_LIST)
117  default:
118  break;
119  }
120  assert(false && "missing case in switch"); // unreachable
121  return libint2::any();
122 }
123 
125 
133 template <typename... ShellPack>
134 __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute(
135  const libint2::Shell& first_shell, const ShellPack&... rest_of_shells) {
136  constexpr auto nargs = 1 + sizeof...(rest_of_shells);
137  assert(nargs == braket_rank() && "# of arguments to compute() does not match the braket type");
138 
140  first_shell, rest_of_shells...}};
141 
142  if (operator_rank() == 1) {
143  if (nargs == 2) return compute1(shells[0], shells[1]);
144  } else if (operator_rank() == 2) {
145  auto compute_ptr_idx = ((static_cast<int>(oper_) -
146  static_cast<int>(Operator::first_2body_oper)) *
147  nbrakets_2body +
148  (static_cast<int>(braket_) -
149  static_cast<int>(BraKet::first_2body_braket))) *
150  nderivorders_2body +
151  deriv_order_;
152  auto compute_ptr = compute2_ptrs().at(compute_ptr_idx);
153  assert(compute_ptr != nullptr && "2-body compute function not found");
154  if (nargs == 2)
155  return (this->*compute_ptr)(shells[0], Shell::unit(), shells[1],
156  Shell::unit(), nullptr, nullptr);
157  if (nargs == 3)
158  return (this->*compute_ptr)(shells[0], Shell::unit(), shells[1],
159  shells[2], nullptr, nullptr);
160  if (nargs == 4)
161  return (this->*compute_ptr)(shells[0], shells[1], shells[2], shells[3], nullptr, nullptr);
162  }
163 
164  assert(false && "missing feature"); // only reached if missing a feature
165  return targets_;
166 }
167 
172 __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute1(
173  const libint2::Shell& s1, const libint2::Shell& s2) {
174  // can only handle 1 contraction at a time
175  assert((s1.ncontr() == 1 && s2.ncontr() == 1) &&
176  "generally-contracted shells not yet supported");
177 
178  const auto oper_is_nuclear =
179  (oper_ == Operator::nuclear || oper_ == Operator::erf_nuclear ||
180  oper_ == Operator::erfc_nuclear);
181 
182  const auto l1 = s1.contr[0].l;
183  const auto l2 = s2.contr[0].l;
184  assert(l1 <= lmax_ && "the angular momentum limit is exceeded");
185  assert(l2 <= lmax_ && "the angular momentum limit is exceeded");
186 
187  // if want nuclear, make sure there is at least one nucleus .. otherwise the
188  // user likely forgot to call set_params
189  if (oper_is_nuclear && nparams() == 0)
190  throw std::logic_error(
191  "Engine<*nuclear>, but no charges found; forgot to call "
192  "set_params()?");
193 
194  const auto n1 = s1.size();
195  const auto n2 = s2.size();
196  const auto n12 = n1 * n2;
197  const auto ncart1 = s1.cartesian_size();
198  const auto ncart2 = s2.cartesian_size();
199  const auto ncart12 = ncart1 * ncart2;
200 
201  // assert # of primitive pairs
202  const auto nprim1 = s1.nprim();
203  const auto nprim2 = s2.nprim();
204  const auto nprimpairs = nprim1 * nprim2;
205  assert(nprimpairs <= primdata_.size() && "the max number of primitive pairs exceeded");
206 
207  auto nparam_sets = nparams();
208 
209  // keep track if need to set targets_ explicitly
210  bool set_targets = set_targets_;
211 
212  // # of targets computed by libint
213  const auto ntargets = nopers() * num_geometrical_derivatives(2, deriv_order_);
214 
215  // Libint computes derivatives with respect to basis functions only, must
216  // must use translational invariance to recover derivatives w.r.t. operator
217  // degrees of freedom
218  // will compute derivs w.r.t. 2 Gaussian centers + (if nuclear) nparam_sets
219  // operator centers
220  const auto nderivcenters_shset = 2 + (oper_is_nuclear ? nparam_sets : 0);
221  const auto nderivcoord = 3 * nderivcenters_shset;
222  const auto num_shellsets_computed =
223  nopers() * num_geometrical_derivatives(nderivcenters_shset, deriv_order_);
224 
225  // will use scratch_ if:
226  // - Coulomb ints are computed 1 charge at a time, contributions are
227  // accumulated in scratch_ (unless la==lb==0)
228  // - derivatives on the missing center need to be reconstructed (no need to
229  // accumulate into scratch though)
230  // NB ints in scratch are packed in order
231  const auto accumulate_ints_in_scratch = oper_is_nuclear;
232 
233  // adjust max angular momentum, if needed
234  const auto lmax = std::max(l1, l2);
235  assert(lmax <= lmax_ && "the angular momentum limit is exceeded");
236 
237  // N.B. for l=0 no need to transform to solid harmonics
238  // this is a workaround for the corner case of oper_ == Operator::*nuclear,
239  // and solid harmonics (s|s) integral ... beware the integral storage state
240  // machine
241  const auto tform_to_solids =
242  (s1.contr[0].pure || s2.contr[0].pure) && lmax != 0;
243 
244  // simple (s|s) ints will be computed directly and accumulated in the first
245  // element of stack
246  const auto compute_directly =
247  lmax == 0 && deriv_order_ == 0 &&
248  (oper_ == Operator::overlap || oper_is_nuclear);
249  if (compute_directly) {
250  primdata_[0].stack[0] = 0;
251  targets_[0] = primdata_[0].stack;
252  }
253 
254  if (accumulate_ints_in_scratch)
255  std::fill(std::begin(scratch_),
256  std::begin(scratch_) + num_shellsets_computed * ncart12, 0.0);
257 
258  // loop over accumulation batches
259  for (auto pset = 0u; pset != nparam_sets; ++pset) {
260  if (!oper_is_nuclear)
261  assert(nparam_sets == 1 && "unexpected number of operator parameters");
262 
263  auto p12 = 0;
264  for (auto p1 = 0; p1 != nprim1; ++p1) {
265  for (auto p2 = 0; p2 != nprim2; ++p2, ++p12) {
266  compute_primdata(primdata_[p12], s1, s2, p1, p2, pset);
267  }
268  }
269  primdata_[0].contrdepth = p12;
270 
271  if (compute_directly) {
272  auto& result = primdata_[0].stack[0];
273  switch (oper_) {
274  case Operator::overlap:
275  for (auto p12 = 0; p12 != primdata_[0].contrdepth; ++p12)
276  result += primdata_[p12]._0_Overlap_0_x[0] *
277  primdata_[p12]._0_Overlap_0_y[0] *
278  primdata_[p12]._0_Overlap_0_z[0];
279  break;
280  case Operator::nuclear:
281  case Operator::erf_nuclear:
282  case Operator::erfc_nuclear:
283  for (auto p12 = 0; p12 != primdata_[0].contrdepth; ++p12)
284  result += primdata_[p12].LIBINT_T_S_ELECPOT_S(0)[0];
285  break;
286  default:
287  assert(false && "missing case in switch");
288  }
289  primdata_[0].targets[0] = &result;
290  } else {
291  const auto buildfnidx = s1.contr[0].l * hard_lmax_ + s2.contr[0].l;
292  assert(buildfnptrs_[buildfnidx] && "null build function ptr");
293  buildfnptrs_[buildfnidx](&primdata_[0]);
294 
295  if (accumulate_ints_in_scratch) {
296  set_targets = true;
297  // - for non-derivative ints and first derivative ints the target
298  // ints computed by libint will appear at the front of targets_
299  // - for second and higher derivs need to re-index targets, hence
300  // will accumulate later, when computing operator derivatives via
301  // transinv
302  if (deriv_order_ <= 1) {
303  // accumulate targets computed by libint for this pset into the
304  // accumulated targets in scratch
305  auto s_target = &scratch_[0];
306  for (auto s = 0; s != ntargets; ++s, s_target += ncart12)
307  if (pset != 0)
308  std::transform(primdata_[0].targets[s],
309  primdata_[0].targets[s] + ncart12, s_target,
310  s_target, std::plus<value_type>());
311  else
312  std::copy(primdata_[0].targets[s],
313  primdata_[0].targets[s] + ncart12, s_target);
314  }
315 
316  // 2. reconstruct derivatives of nuclear ints for each nucleus
317  // using translational invariance
318  // NB this is done in cartesian basis, otherwise would have to tform
319  // to solids contributions from every atom, rather than the running
320  // total at the end
321  if (deriv_order_ > 0) {
322  switch (deriv_order_) {
323  case 1: {
324  // first 6 shellsets are derivatives with respect to Gaussian
325  // positions
326  // following them are derivs with respect to nuclear coordinates
327  // (3 per nucleus)
328  assert(ntargets == 6 && "unexpected # of targets");
329  auto dest = &scratch_[0] + (6 + pset * 3) * ncart12;
330  for (auto s = 0; s != 3; ++s, dest += ncart12) {
331  auto src = primdata_[0].targets[s];
332  for (auto i = 0; i != ncart12; ++i) {
333  dest[i] = -src[i];
334  }
335  }
336  dest -= 3 * ncart12;
337  for (auto s = 3; s != 6; ++s, dest += ncart12) {
338  auto src = primdata_[0].targets[s];
339  for (auto i = 0; i != ncart12; ++i) {
340  dest[i] -= src[i];
341  }
342  }
343  } break;
344 
345  case 2: {
346 // computes upper triangle index
347 // n2 = matrix size times 2
348 // i,j = indices, i<j
349 #define upper_triangle_index_ord(n2, i, j) ((i) * ((n2) - (i)-1) / 2 + (j))
350 // same as above, but orders i and j
351 #define upper_triangle_index(n2, i, j) \
352  upper_triangle_index_ord(n2, std::min((i), (j)), std::max((i), (j)))
353 
354  // accumulate ints for this pset to scratch in locations
355  // remapped to overall deriv index
356  const auto ncoords_times_two = nderivcoord * 2;
357  for (auto d0 = 0, d01 = 0; d0 != 6; ++d0) {
358  for (auto d1 = d0; d1 != 6; ++d1, ++d01) {
359  const auto d01_full =
360  upper_triangle_index_ord(ncoords_times_two, d0, d1);
361  auto tgt = &scratch_[d01_full * ncart12];
362  if (pset != 0)
363  std::transform(primdata_[0].targets[d01],
364  primdata_[0].targets[d01] + ncart12, tgt,
365  tgt, std::plus<value_type>());
366  else
367  std::copy(primdata_[0].targets[d01],
368  primdata_[0].targets[d01] + ncart12, tgt);
369  }
370  }
371 
372  // use translational invariance to build derivatives w.r.t.
373  // operator centers
374  {
375  // mixed derivatives: first deriv w.r.t. Gaussian, second
376  // w.r.t. operator coord pset
377  const auto c1 = 2 + pset;
378  for (auto c0 = 0; c0 != 2; ++c0) {
379  for (auto xyz0 = 0; xyz0 != 3; ++xyz0) {
380  const auto coord0 = c0 * 3 + xyz0;
381  for (auto xyz1 = 0; xyz1 != 3; ++xyz1) {
382  const auto coord1 = c1 * 3 + xyz1; // coord1 > coord0
383 
384  const auto coord01_abs = upper_triangle_index_ord(
385  ncoords_times_two, coord0, coord1);
386  auto tgt = &scratch_[coord01_abs * ncart12];
387 
388  // d2 / dAi dOj = - d2 / dAi dAj
389  {
390  auto coord1_A = xyz1;
391  const auto coord01_A =
392  upper_triangle_index(12, coord0, coord1_A);
393  const auto src = primdata_[0].targets[coord01_A];
394  for (auto i = 0; i != ncart12; ++i) tgt[i] = -src[i];
395  }
396 
397  // d2 / dAi dOj -= d2 / dAi dBj
398  {
399  auto coord1_B = 3 + xyz1;
400  const auto coord01_B =
401  upper_triangle_index(12, coord0, coord1_B);
402  const auto src = primdata_[0].targets[coord01_B];
403  for (auto i = 0; i != ncart12; ++i) tgt[i] -= src[i];
404  }
405  }
406  }
407  }
408  } // mixed derivs
409  {
410  // operator derivs
411  const auto c0 = 2 + pset;
412  const auto c1 = c0;
413  for (auto xyz0 = 0; xyz0 != 3; ++xyz0) {
414  const auto coord0 = c0 * 3 + xyz0;
415  for (auto xyz1 = xyz0; xyz1 != 3; ++xyz1) {
416  const auto coord1 = c1 * 3 + xyz1; // coord1 > coord0
417 
418  const auto coord01_abs = upper_triangle_index_ord(
419  ncoords_times_two, coord0, coord1);
420  auto tgt = &scratch_[coord01_abs * ncart12];
421 
422  // d2 / dOi dOj = d2 / dAi dAj
423  {
424  auto coord0_A = xyz0;
425  auto coord1_A = xyz1;
426  const auto coord01_AA =
427  upper_triangle_index_ord(12, coord0_A, coord1_A);
428  const auto src = primdata_[0].targets[coord01_AA];
429  for (auto i = 0; i != ncart12; ++i) tgt[i] = src[i];
430  }
431 
432  // d2 / dOi dOj += d2 / dAi dBj
433  {
434  auto coord0_A = xyz0;
435  auto coord1_B = 3 + xyz1;
436  const auto coord01_AB =
437  upper_triangle_index_ord(12, coord0_A, coord1_B);
438  const auto src = primdata_[0].targets[coord01_AB];
439  for (auto i = 0; i != ncart12; ++i) tgt[i] += src[i];
440  }
441 
442  // d2 / dOi dOj += d2 / dBi dAj
443  {
444  auto coord0_B = 3 + xyz0;
445  auto coord1_A = xyz1;
446  const auto coord01_BA =
447  upper_triangle_index_ord(12, coord1_A, coord0_B);
448  const auto src = primdata_[0].targets[coord01_BA];
449  for (auto i = 0; i != ncart12; ++i) tgt[i] += src[i];
450  }
451 
452  // d2 / dOi dOj += d2 / dBi dBj
453  {
454  auto coord0_B = 3 + xyz0;
455  auto coord1_B = 3 + xyz1;
456  const auto coord01_BB =
457  upper_triangle_index_ord(12, coord0_B, coord1_B);
458  const auto src = primdata_[0].targets[coord01_BB];
459  for (auto i = 0; i != ncart12; ++i) tgt[i] += src[i];
460  }
461  }
462  }
463  } // operator derivs
464 
465 #undef upper_triangle_index
466  } break;
467 
468  default: {
469  assert(deriv_order_ <= 2 && "feature not implemented");
470 
471  // 1. since # of derivatives changes, remap derivatives computed
472  // by libint; targets_ will hold the "remapped" pointers to
473  // the data
474  using ShellSetDerivIterator =
476  std::vector<unsigned int>>;
477  ShellSetDerivIterator shellset_gaussian_diter(deriv_order_, 2);
478  ShellSetDerivIterator shellset_full_diter(deriv_order_,
479  nderivcenters_shset);
480  std::vector<unsigned int> full_deriv(3 * nderivcenters_shset, 0);
481  std::size_t s = 0;
482  while (shellset_gaussian_diter) { // loop over derivs computed
483  // by libint
484  const auto& s1s2_deriv = *shellset_gaussian_diter;
485  std::copy(std::begin(s1s2_deriv), std::end(s1s2_deriv),
486  std::begin(full_deriv));
487  const auto full_rank = ShellSetDerivIterator::rank(full_deriv);
488  targets_[full_rank] = primdata_[0].targets[s];
489  }
490  // use translational invariance to build derivatives w.r.t.
491  // operator centers
492  }
493 
494  } // deriv_order_ switch
495  } // reconstruct derivatives
496  }
497  } // ltot != 0
498  } // pset (accumulation batches)
499 
500  if (tform_to_solids) {
501  set_targets = false;
502  // where do spherical ints go?
503  auto* spherical_ints =
504  (accumulate_ints_in_scratch) ? scratch2_ : &scratch_[0];
505 
506  // transform to solid harmonics, one shell set at a time:
507  // for each computed shell set ...
508  for (auto s = 0ul; s != num_shellsets_computed;
509  ++s, spherical_ints += n12) {
510  auto cartesian_ints = accumulate_ints_in_scratch
511  ? &scratch_[s * ncart12]
512  : primdata_[0].targets[s];
513  // transform
514  if (s1.contr[0].pure && s2.contr[0].pure) {
515  libint2::solidharmonics::tform(l1, l2, cartesian_ints, spherical_ints);
516  } else {
517  if (s1.contr[0].pure)
518  libint2::solidharmonics::tform_rows(l1, n2, cartesian_ints,
519  spherical_ints);
520  else
521  libint2::solidharmonics::tform_cols(n1, l2, cartesian_ints,
522  spherical_ints);
523  }
524  // .. and compute the destination
525  targets_[s] = spherical_ints;
526  } // loop cartesian shell set
527  } // tform to solids
528 
529  if (set_targets) {
530  for (auto s = 0ul; s != num_shellsets_computed; ++s) {
531  auto cartesian_ints = accumulate_ints_in_scratch
532  ? &scratch_[s * ncart12]
533  : primdata_[0].targets[s];
534  targets_[s] = cartesian_ints;
535  }
536  }
537 
538  if (cartesian_shell_normalization() == CartesianShellNormalization::uniform) {
540  for (auto s = 0ul; s != num_shellsets_computed; ++s) {
541  uniform_normalize_cartesian_shells(const_cast<value_type*>(targets_[s]), shells);
542  }
543  }
544 
545  return targets_;
546 }
547 
548 // generic _initializer
549 __libint2_engine_inline void Engine::_initialize() {
550 #define BOOST_PP_NBODYENGINE_MCR3_ncenter(product) \
551  BOOST_PP_TUPLE_ELEM(3, 1, product)
552 
553 #define BOOST_PP_NBODYENGINE_MCR3_default_ncenter(product) \
554  BOOST_PP_IIF(BOOST_PP_GREATER(BOOST_PP_TUPLE_ELEM(3, 0, product), \
555  BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX), \
556  4, 2)
557 
558 #define BOOST_PP_NBODYENGINE_MCR3_NCENTER(product) \
559  BOOST_PP_IIF( \
560  BOOST_PP_NOT_EQUAL(BOOST_PP_NBODYENGINE_MCR3_ncenter(product), \
561  BOOST_PP_NBODYENGINE_MCR3_default_ncenter(product)), \
562  BOOST_PP_NBODYENGINE_MCR3_ncenter(product), BOOST_PP_EMPTY())
563 
564 #define BOOST_PP_NBODYENGINE_MCR3_OPER(product) \
565  BOOST_PP_LIST_AT(BOOST_PP_NBODY_OPERATOR_LIST, \
566  BOOST_PP_TUPLE_ELEM(3, 0, product))
567 
568 #define BOOST_PP_NBODYENGINE_MCR3_DERIV(product) \
569  BOOST_PP_IIF(BOOST_PP_GREATER(BOOST_PP_TUPLE_ELEM(3, 2, product), 0), \
570  BOOST_PP_TUPLE_ELEM(3, 2, product), BOOST_PP_EMPTY())
571 
572 #define BOOST_PP_NBODYENGINE_MCR3_task(product) \
573  BOOST_PP_CAT(BOOST_PP_CAT(BOOST_PP_NBODYENGINE_MCR3_ncenter(product), \
574  BOOST_PP_NBODYENGINE_MCR3_OPER(product)), \
575  BOOST_PP_NBODYENGINE_MCR3_DERIV(product))
576 
577 #define BOOST_PP_NBODYENGINE_MCR3_TASK(product) \
578  BOOST_PP_IIF( \
579  BOOST_PP_CAT(LIBINT2_TASK_EXISTS_, \
580  BOOST_PP_NBODYENGINE_MCR3_task(product)), \
581  BOOST_PP_CAT(BOOST_PP_CAT(BOOST_PP_NBODYENGINE_MCR3_NCENTER(product), \
582  BOOST_PP_NBODYENGINE_MCR3_OPER(product)), \
583  BOOST_PP_NBODYENGINE_MCR3_DERIV(product)), \
584  default)
585 
586 #define BOOST_PP_NBODYENGINE_MCR3(r, product) \
587  if (static_cast<int>(oper_) == BOOST_PP_TUPLE_ELEM(3, 0, product) && \
588  static_cast<int>(rank(braket_)) == BOOST_PP_TUPLE_ELEM(3, 1, product) && \
589  deriv_order_ == BOOST_PP_TUPLE_ELEM(3, 2, product)) { \
590  hard_lmax_ = BOOST_PP_CAT(LIBINT2_MAX_AM_, \
591  BOOST_PP_NBODYENGINE_MCR3_TASK(product)) + \
592  1; \
593  hard_default_lmax_ = \
594  BOOST_PP_IF(BOOST_PP_IS_1(BOOST_PP_CAT(LIBINT2_CENTER_DEPENDENT_MAX_AM_, \
595  BOOST_PP_NBODYENGINE_MCR3_task(product))), \
596  BOOST_PP_CAT(LIBINT2_MAX_AM_, \
597  BOOST_PP_CAT(default, \
598  BOOST_PP_NBODYENGINE_MCR3_DERIV(product) \
599  ) \
600  ) + 1, std::numeric_limits<int>::max()); \
601  const auto lmax = \
602  BOOST_PP_IF(BOOST_PP_IS_1(BOOST_PP_CAT(LIBINT2_CENTER_DEPENDENT_MAX_AM_, \
603  BOOST_PP_NBODYENGINE_MCR3_task(product))), \
604  std::max(hard_lmax_,hard_default_lmax_), hard_lmax_); \
605  if (lmax_ >= lmax) { \
606  throw Engine::lmax_exceeded( \
607  BOOST_PP_STRINGIZE(BOOST_PP_NBODYENGINE_MCR3_TASK(product)), \
608  lmax, lmax_); \
609  } \
610  if (stack_size_ > 0) \
611  libint2_cleanup_default(&primdata_[0]); \
612  stack_size_ = LIBINT2_PREFIXED_NAME(BOOST_PP_CAT( \
613  libint2_need_memory_, BOOST_PP_NBODYENGINE_MCR3_TASK(product)))( \
614  lmax_); \
615  LIBINT2_PREFIXED_NAME( \
616  BOOST_PP_CAT(libint2_init_, BOOST_PP_NBODYENGINE_MCR3_TASK(product))) \
617  (&primdata_[0], lmax_, 0); \
618  BOOST_PP_IF(BOOST_PP_IS_1(LIBINT2_FLOP_COUNT), \
619  LIBINT2_PREFIXED_NAME(libint2_init_flopcounter) \
620  (&primdata_[0], primdata_.size()), BOOST_PP_EMPTY()); \
621  buildfnptrs_ = to_ptr1(LIBINT2_PREFIXED_NAME(BOOST_PP_CAT( \
622  libint2_build_, BOOST_PP_NBODYENGINE_MCR3_TASK(product)))); \
623  reset_scratch(); \
624  return; \
625  }
626 
627  BOOST_PP_LIST_FOR_EACH_PRODUCT(
628  BOOST_PP_NBODYENGINE_MCR3, 3,
629  (BOOST_PP_NBODY_OPERATOR_INDEX_LIST, BOOST_PP_NBODY_BRAKET_RANK_LIST,
630  BOOST_PP_NBODY_DERIV_ORDER_LIST))
631 
632  assert(
633  false &&
634  "missing case in switch"); // either deriv_order_ or oper_ is wrong
635 } // _initialize<R>()
636 
637 __libint2_engine_inline void Engine::initialize(size_t max_nprim) {
638  assert(libint2::initialized() && "libint is not initialized");
639  assert(deriv_order_ <= LIBINT2_MAX_DERIV_ORDER &&
640  "exceeded the max derivative order of the library");
641 
642  // validate braket
643 #ifndef INCLUDE_ONEBODY
644  assert(braket_ != BraKet::x_x &&
645  "this braket type not supported by the library; give --enable-1body to configure");
646 #endif
647 #ifndef INCLUDE_ERI
648  assert(braket_ != BraKet::xx_xx &&
649  "this braket type not supported by the library; give --enable-eri to configure");
650 #endif
651 #ifndef INCLUDE_ERI3
652  assert((braket_ != BraKet::xs_xx && braket_ != BraKet::xx_xs) &&
653  "this braket type not supported by the library; give --enable-eri3 to configure");
654 #endif
655 #ifndef INCLUDE_ERI2
656  assert(braket_ != BraKet::xs_xs &&
657  "this braket type not supported by the library; give --enable-eri2 to configure");
658 #endif
659 
660  // make sure it's no default initialized
661  if (lmax_ < 0)
662  throw using_default_initialized();
663 
664  // initialize braket, if needed
665  if (braket_ == BraKet::invalid) braket_ = default_braket(oper_);
666 
667  if (max_nprim != 0) primdata_.resize(std::pow(max_nprim, braket_rank()));
668 
669  // initialize targets
670  {
671  decltype(targets_)::allocator_type alloc(primdata_[0].targets);
672  targets_ = decltype(targets_)(alloc);
673  // in some cases extra memory use can be avoided if targets_ manages its own
674  // memory
675  // the only instance is where we permute derivative integrals, this calls
676  // for permuting
677  // target indices.
678  const auto permutable_targets =
679  deriv_order_ > 0 &&
680  (braket_ == BraKet::xx_xx || braket_ == BraKet::xs_xx ||
681  braket_ == BraKet::xx_xs);
682  if (permutable_targets)
683  targets_.reserve(max_ntargets + 1);
684  else
685  targets_.reserve(max_ntargets);
686  // will be resized to appropriate size in reset_scratch via _initialize
687  }
688 
689 #ifdef LIBINT2_ENGINE_TIMERS
690  timers.set_now_overhead(25);
691 #endif
692 #ifdef LIBINT2_PROFILE
693  primdata_[0].timers->set_now_overhead(25);
694 #endif
695 
696  _initialize();
697 }
698 
699 namespace detail {
700 __libint2_engine_inline std::vector<Engine::compute2_ptr_type>
701 init_compute2_ptrs() {
702  auto max_ncompute2_ptrs = nopers_2body * nbrakets_2body * nderivorders_2body;
703  std::vector<Engine::compute2_ptr_type> result(max_ncompute2_ptrs, nullptr);
704 
705 #define BOOST_PP_NBODYENGINE_MCR7(r, product) \
706  if (BOOST_PP_TUPLE_ELEM(3, 0, product) >= \
707  static_cast<int>(Operator::first_2body_oper) && \
708  BOOST_PP_TUPLE_ELEM(3, 0, product) <= \
709  static_cast<int>(Operator::last_2body_oper) && \
710  BOOST_PP_TUPLE_ELEM(3, 1, product) >= \
711  static_cast<int>(BraKet::first_2body_braket) && \
712  BOOST_PP_TUPLE_ELEM(3, 1, product) <= \
713  static_cast<int>(BraKet::last_2body_braket)) { \
714  auto compute_ptr_idx = ((BOOST_PP_TUPLE_ELEM(3, 0, product) - \
715  static_cast<int>(Operator::first_2body_oper)) * \
716  nbrakets_2body + \
717  (BOOST_PP_TUPLE_ELEM(3, 1, product) - \
718  static_cast<int>(BraKet::first_2body_braket))) * \
719  nderivorders_2body + \
720  BOOST_PP_TUPLE_ELEM(3, 2, product); \
721  result.at(compute_ptr_idx) = &Engine::compute2< \
722  static_cast<Operator>(BOOST_PP_TUPLE_ELEM(3, 0, product)), \
723  static_cast<BraKet>(BOOST_PP_TUPLE_ELEM(3, 1, product)), \
724  BOOST_PP_TUPLE_ELEM(3, 2, product)>; \
725  }
726 
727  BOOST_PP_LIST_FOR_EACH_PRODUCT(
728  BOOST_PP_NBODYENGINE_MCR7, 3,
729  (BOOST_PP_NBODY_OPERATOR_INDEX_LIST, BOOST_PP_NBODY_BRAKET_INDEX_LIST,
730  BOOST_PP_NBODY_DERIV_ORDER_LIST))
731 
732  return result;
733 }
734 } // namespace detail
735 
736 __libint2_engine_inline const std::vector<Engine::compute2_ptr_type>&
737 Engine::compute2_ptrs() const {
738  static std::vector<compute2_ptr_type> compute2_ptrs_ =
739  detail::init_compute2_ptrs();
740  return compute2_ptrs_;
741 }
742 
743 __libint2_engine_inline unsigned int Engine::nparams() const {
744  switch (oper_) {
745  case Operator::nuclear:
746  return any_cast<const operator_traits<Operator::nuclear>::oper_params_type&>(params_)
747  .size();
748  case Operator::erf_nuclear:
749  case Operator::erfc_nuclear:
750  return std::get<1>(any_cast<const operator_traits<Operator::erfc_nuclear>::oper_params_type&>(params_))
751  .size();
752  default:
753  return 1;
754  }
755  return 1;
756 }
757 __libint2_engine_inline unsigned int Engine::nopers() const {
758  switch (static_cast<int>(oper_)) {
759 #define BOOST_PP_NBODYENGINE_MCR4(r, data, i, elem) \
760  case i: \
761  return operator_traits<static_cast<Operator>(i)>::nopers;
762  BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR4, _,
763  BOOST_PP_NBODY_OPERATOR_LIST)
764  default:
765  break;
766  }
767  assert(false && "missing case in switch"); // unreachable
768  return 0;
769 }
770 
771 template <>
772 __libint2_engine_inline any Engine::enforce_params_type<any>(
773  Operator oper, const any& params, bool throw_if_wrong_type) {
774  any result;
775  switch (static_cast<int>(oper)) {
776 #define BOOST_PP_NBODYENGINE_MCR5A(r, data, i, elem) \
777  case i: \
778  if (any_cast<operator_traits<static_cast<Operator>(i)>::oper_params_type>( \
779  &params) != nullptr) { \
780  result = params; \
781  } else { \
782  if (throw_if_wrong_type) throw bad_any_cast(); \
783  result = operator_traits<static_cast<Operator>(i)>::default_params(); \
784  } \
785  break;
786 
787  BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR5A, _,
788  BOOST_PP_NBODY_OPERATOR_LIST)
789 
790  default:
791  assert(false && "missing case in switch"); // missed a case?
792  }
793  return result;
794 }
795 
796 template <typename Params>
797 __libint2_engine_inline any Engine::enforce_params_type(
798  Operator oper, const Params& params, bool throw_if_wrong_type) {
799  any result;
800  switch (static_cast<int>(oper)) {
801 #define BOOST_PP_NBODYENGINE_MCR5B(r, data, i, elem) \
802  case i: \
803  if (std::is_same<Params, operator_traits<static_cast<Operator>( \
804  i)>::oper_params_type>::value) { \
805  result = params; \
806  } else { \
807  if (throw_if_wrong_type) throw std::bad_cast(); \
808  result = operator_traits<static_cast<Operator>(i)>::default_params(); \
809  } \
810  break;
811 
812  BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR5B, _,
813  BOOST_PP_NBODY_OPERATOR_LIST)
814 
815  default:
816  assert(false && "missing case in switch"); // missed a case?
817  }
818  return result;
819 }
820 
821 __libint2_engine_inline any Engine::make_core_eval_pack(Operator oper) const {
822  any result;
823  switch (static_cast<int>(oper)) {
824 #define BOOST_PP_NBODYENGINE_MCR6(r, data, i, elem) \
825  case i: \
826  result = libint2::detail::make_compressed_pair( \
827  operator_traits<static_cast<Operator>(i)>::core_eval_type::instance( \
828  braket_rank() * lmax_ + deriv_order_, \
829  std::numeric_limits<scalar_type>::epsilon()), \
830  libint2::detail::CoreEvalScratch< \
831  operator_traits<static_cast<Operator>(i)>::core_eval_type>( \
832  braket_rank() * lmax_ + deriv_order_)); \
833  assert(any_cast<detail::core_eval_pack_type<static_cast<Operator>(i)>>( \
834  &result) != nullptr); \
835  break;
836 
837  BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR6, _,
838  BOOST_PP_NBODY_OPERATOR_LIST)
839 
840  default:
841  assert(false && "missing case in switch"); // missed a case?
842  }
843  return result;
844 }
845 
846 __libint2_engine_inline void Engine::init_core_ints_params(const any& params) {
847  if (oper_ == Operator::delcgtg2) {
848  // [g12,[- \Del^2, g12] = 2 (\Del g12) \cdot (\Del g12)
849  // (\Del exp(-a r_12^2) \cdot (\Del exp(-b r_12^2) = 4 a b (r_{12}^2 exp(-
850  // (a+b) r_{12}^2) )
851  // i.e. need to scale each coefficient by 4 a b
852  auto oparams =
853  any_cast<const operator_traits<Operator::delcgtg2>::oper_params_type&>(params);
854  const auto ng = oparams.size();
855  operator_traits<Operator::delcgtg2>::oper_params_type core_ints_params;
856  core_ints_params.reserve(ng * (ng + 1) / 2);
857  for (size_t b = 0; b < ng; ++b)
858  for (size_t k = 0; k <= b; ++k) {
859  const auto gexp = oparams[b].first + oparams[k].first;
860  const auto gcoeff = oparams[b].second * oparams[k].second *
861  (b == k ? 1 : 2); // if a != b include ab and ba
862  const auto gcoeff_rescaled =
863  4 * oparams[b].first * oparams[k].first * gcoeff;
864  core_ints_params.push_back(std::make_pair(gexp, gcoeff_rescaled));
865  }
866  core_ints_params_ = core_ints_params;
867  } else {
868  core_ints_params_ = params;
869  }
870 }
871 
872 __libint2_engine_inline void Engine::compute_primdata(Libint_t& primdata, const Shell& s1,
873  const Shell& s2, size_t p1, size_t p2,
874  size_t oset) {
875  const auto& A = s1.O;
876  const auto& B = s2.O;
877 
878  const auto alpha1 = s1.alpha[p1];
879  const auto alpha2 = s2.alpha[p2];
880 
881  const auto c1 = s1.contr[0].coeff[p1];
882  const auto c2 = s2.contr[0].coeff[p2];
883 
884  const auto gammap = alpha1 + alpha2;
885  const auto oogammap = 1 / gammap;
886  const auto rhop_over_alpha1 = alpha2 * oogammap;
887  const auto rhop = alpha1 * rhop_over_alpha1;
888  const auto Px = (alpha1 * A[0] + alpha2 * B[0]) * oogammap;
889  const auto Py = (alpha1 * A[1] + alpha2 * B[1]) * oogammap;
890  const auto Pz = (alpha1 * A[2] + alpha2 * B[2]) * oogammap;
891  const auto AB_x = A[0] - B[0];
892  const auto AB_y = A[1] - B[1];
893  const auto AB_z = A[2] - B[2];
894  const auto AB2_x = AB_x * AB_x;
895  const auto AB2_y = AB_y * AB_y;
896  const auto AB2_z = AB_z * AB_z;
897 
898  assert(LIBINT2_SHELLQUARTET_SET == LIBINT2_SHELLQUARTET_SET_STANDARD && "non-standard shell ordering");
899 
900  const auto oper_is_nuclear =
901  (oper_ == Operator::nuclear || oper_ == Operator::erf_nuclear ||
902  oper_ == Operator::erfc_nuclear);
903 
904  // need to use HRR? see strategy.cc
905  const auto l1 = s1.contr[0].l;
906  const auto l2 = s2.contr[0].l;
907  const bool use_hrr = (oper_is_nuclear || oper_ == Operator::sphemultipole) && l1 > 0 && l2 > 0;
908  // unlike the 2-body ints, can go both ways, determine which way to go (the logic must match TwoCenter_OS_Tactic)
909  const bool hrr_ket_to_bra = l1 >= l2;
910  if (use_hrr) {
911  if (hrr_ket_to_bra) {
912 #if LIBINT2_DEFINED(eri, AB_x)
913  primdata.AB_x[0] = AB_x;
914 #endif
915 #if LIBINT2_DEFINED(eri, AB_y)
916  primdata.AB_y[0] = AB_y;
917 #endif
918 #if LIBINT2_DEFINED(eri, AB_z)
919  primdata.AB_z[0] = AB_z;
920 #endif
921  }
922  else {
923 #if LIBINT2_DEFINED(eri, BA_x)
924  primdata.BA_x[0] = - AB_x;
925 #endif
926 #if LIBINT2_DEFINED(eri, BA_y)
927  primdata.BA_y[0] = - AB_y;
928 #endif
929 #if LIBINT2_DEFINED(eri, BA_z)
930  primdata.BA_z[0] = - AB_z;
931 #endif
932  }
933  }
934 
935  // figure out whether will do VRR on center A and/or B
936 // if ((!use_hrr && l1 > 0) || hrr_ket_to_bra) {
937 #if LIBINT2_DEFINED(eri, PA_x)
938  primdata.PA_x[0] = Px - A[0];
939 #endif
940 #if LIBINT2_DEFINED(eri, PA_y)
941  primdata.PA_y[0] = Py - A[1];
942 #endif
943 #if LIBINT2_DEFINED(eri, PA_z)
944  primdata.PA_z[0] = Pz - A[2];
945 #endif
946 // }
947 //
948 // if ((!use_hrr && l2 > 0) || !hrr_ket_to_bra) {
949 #if LIBINT2_DEFINED(eri, PB_x)
950  primdata.PB_x[0] = Px - B[0];
951 #endif
952 #if LIBINT2_DEFINED(eri, PB_y)
953  primdata.PB_y[0] = Py - B[1];
954 #endif
955 #if LIBINT2_DEFINED(eri, PB_z)
956  primdata.PB_z[0] = Pz - B[2];
957 #endif
958 // }
959 
960  if (oper_ == Operator::emultipole1 || oper_ == Operator::emultipole2 ||
961  oper_ == Operator::emultipole3) {
962  const auto& O = any_cast<const operator_traits<
963  Operator::emultipole1>::oper_params_type&>(params_); // same as emultipoleX
964 #if LIBINT2_DEFINED(eri, BO_x)
965  primdata.BO_x[0] = B[0] - O[0];
966 #endif
967 #if LIBINT2_DEFINED(eri, BO_y)
968  primdata.BO_y[0] = B[1] - O[1];
969 #endif
970 #if LIBINT2_DEFINED(eri, BO_z)
971  primdata.BO_z[0] = B[2] - O[2];
972 #endif
973  }
974  if (oper_ == Operator::sphemultipole) {
975  const auto& O = any_cast<const operator_traits<
976  Operator::emultipole1>::oper_params_type&>(params_);
977 #if LIBINT2_DEFINED(eri, PO_x)
978  primdata.PO_x[0] = Px - O[0];
979 #endif
980 #if LIBINT2_DEFINED(eri, PO_y)
981  primdata.PO_y[0] = Py - O[1];
982 #endif
983 #if LIBINT2_DEFINED(eri, PO_z)
984  primdata.PO_z[0] = Pz - O[2];
985 #endif
986 #if LIBINT2_DEFINED(eri, PO2)
987  primdata.PO2[0] = (Px - O[0])*(Px - O[0]) + (Py - O[1])*(Py - O[1]) + (Pz - O[2])*(Pz - O[2]);
988 #endif
989  }
990 
991 #if LIBINT2_DEFINED(eri, oo2z)
992  primdata.oo2z[0] = 0.5 * oogammap;
993 #endif
994 
995  decltype(c1) sqrt_PI(1.77245385090551602729816748334);
996  const auto xyz_pfac = sqrt_PI * sqrt(oogammap);
997  const auto ovlp_ss_x = exp(-rhop * AB2_x) * xyz_pfac * c1 * c2;
998  const auto ovlp_ss_y = exp(-rhop * AB2_y) * xyz_pfac;
999  const auto ovlp_ss_z = exp(-rhop * AB2_z) * xyz_pfac;
1000 
1001  primdata._0_Overlap_0_x[0] = ovlp_ss_x;
1002  primdata._0_Overlap_0_y[0] = ovlp_ss_y;
1003  primdata._0_Overlap_0_z[0] = ovlp_ss_z;
1004 
1005  if (oper_ == Operator::kinetic || (deriv_order_ > 0)) {
1006 #if LIBINT2_DEFINED(eri, two_alpha0_bra)
1007  primdata.two_alpha0_bra[0] = 2.0 * alpha1;
1008 #endif
1009 #if LIBINT2_DEFINED(eri, two_alpha0_ket)
1010  primdata.two_alpha0_ket[0] = 2.0 * alpha2;
1011 #endif
1012  }
1013 
1014  if (oper_is_nuclear) {
1015 
1016  const auto& params = (oper_ == Operator::nuclear) ?
1017  any_cast<const operator_traits<Operator::nuclear>::oper_params_type&>(params_) :
1018  std::get<1>(any_cast<const operator_traits<Operator::erfc_nuclear>::oper_params_type&>(params_));
1019 
1020  const auto& C = params[oset].second;
1021  const auto& q = params[oset].first;
1022 #if LIBINT2_DEFINED(eri, PC_x)
1023  primdata.PC_x[0] = Px - C[0];
1024 #endif
1025 #if LIBINT2_DEFINED(eri, PC_y)
1026  primdata.PC_y[0] = Py - C[1];
1027 #endif
1028 #if LIBINT2_DEFINED(eri, PC_z)
1029  primdata.PC_z[0] = Pz - C[2];
1030 #endif
1031 
1032 #if LIBINT2_DEFINED(eri, rho12_over_alpha1) || \
1033  LIBINT2_DEFINED(eri, rho12_over_alpha2)
1034  if (deriv_order_ > 0) {
1035 #if LIBINT2_DEFINED(eri, rho12_over_alpha1)
1036  primdata.rho12_over_alpha1[0] = rhop_over_alpha1;
1037 #endif
1038 #if LIBINT2_DEFINED(eri, rho12_over_alpha2)
1039  primdata.rho12_over_alpha2[0] = alpha1 * oogammap;
1040 #endif
1041  }
1042 #endif
1043 #if LIBINT2_DEFINED(eri, PC_x) && LIBINT2_DEFINED(eri, PC_y) && \
1044  LIBINT2_DEFINED(eri, PC_z)
1045  const auto PC2 = primdata.PC_x[0] * primdata.PC_x[0] +
1046  primdata.PC_y[0] * primdata.PC_y[0] +
1047  primdata.PC_z[0] * primdata.PC_z[0];
1048  const scalar_type U = gammap * PC2;
1049  const scalar_type rho = rhop;
1050  const auto mmax = s1.contr[0].l + s2.contr[0].l + deriv_order_;
1051  auto* fm_ptr = &(primdata.LIBINT_T_S_ELECPOT_S(0)[0]);
1052  if (oper_ == Operator::nuclear) {
1053  auto fm_engine_ptr =
1054  any_cast<const detail::core_eval_pack_type<Operator::nuclear>&>(core_eval_pack_)
1055  .first();
1056  fm_engine_ptr->eval(fm_ptr, U, mmax);
1057  } else if (oper_ == Operator::erf_nuclear) {
1058  const auto& core_eval_ptr =
1059  any_cast<const detail::core_eval_pack_type<Operator::erf_nuclear>&>(core_eval_pack_)
1060  .first();
1061  auto core_ints_params =
1062  std::get<0>(any_cast<const typename operator_traits<
1063  Operator::erf_nuclear>::oper_params_type&>(core_ints_params_));
1064  core_eval_ptr->eval(fm_ptr, rho, U, mmax, core_ints_params);
1065  } else if (oper_ == Operator::erfc_nuclear) {
1066  const auto& core_eval_ptr =
1067  any_cast<const detail::core_eval_pack_type<Operator::erfc_nuclear>&>(core_eval_pack_)
1068  .first();
1069  auto core_ints_params =
1070  std::get<0>(any_cast<const typename operator_traits<
1071  Operator::erfc_nuclear>::oper_params_type&>(core_ints_params_));
1072  core_eval_ptr->eval(fm_ptr, rho, U, mmax, core_ints_params);
1073  }
1074 
1075  decltype(U) two_o_sqrt_PI(1.12837916709551257389615890312);
1076  const auto pfac =
1077  -q * sqrt(gammap) * two_o_sqrt_PI * ovlp_ss_x * ovlp_ss_y * ovlp_ss_z;
1078  const auto m_fence = mmax + 1;
1079  for (auto m = 0; m != m_fence; ++m) {
1080  fm_ptr[m] *= pfac;
1081  }
1082 #endif
1083  }
1084 } // Engine::compute_primdata()
1085 
1090 template <Operator op, BraKet bk, size_t deriv_order>
1091 __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2(
1092  const libint2::Shell& tbra1, const libint2::Shell& tbra2,
1093  const libint2::Shell& tket1, const libint2::Shell& tket2,
1094  const ShellPair* tspbra, const ShellPair* tspket) {
1095  assert(op == oper_ && "Engine::compute2 -- operator mismatch");
1096  assert(bk == braket_ && "Engine::compute2 -- braket mismatch");
1097  assert(deriv_order == deriv_order_ &&
1098  "Engine::compute2 -- deriv_order mismatch");
1099  assert(((tspbra == nullptr && tspket == nullptr) || (tspbra != nullptr && tspket != nullptr)) &&
1100  "Engine::compute2 -- expects zero or two ShellPair objects");
1101 
1102  //
1103  // i.e. bra and ket refer to chemists bra and ket
1104  //
1105 
1106  // can only handle 1 contraction at a time
1107  assert((tbra1.ncontr() == 1 && tbra2.ncontr() == 1 && tket1.ncontr() == 1 &&
1108  tket2.ncontr() == 1) && "generally-contracted shells are not yet supported");
1109 
1110  // angular momentum limit obeyed?
1111  assert(tbra1.contr[0].l <= lmax_ && "the angular momentum limit is exceeded");
1112  assert(tbra2.contr[0].l <= lmax_ && "the angular momentum limit is exceeded");
1113  assert(tket1.contr[0].l <= lmax_ && "the angular momentum limit is exceeded");
1114  assert(tket2.contr[0].l <= lmax_ && "the angular momentum limit is exceeded");
1115 
1116 #if LIBINT2_SHELLQUARTET_SET == \
1117  LIBINT2_SHELLQUARTET_SET_STANDARD // standard angular momentum ordering
1118  const auto swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l);
1119  const auto swap_tket = (tket1.contr[0].l < tket2.contr[0].l);
1120  const auto swap_braket =
1121  ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l >
1122  tket1.contr[0].l + tket2.contr[0].l)) ||
1123  braket_ == BraKet::xx_xs;
1124 #else // orca angular momentum ordering
1125  const auto swap_tbra = (tbra1.contr[0].l > tbra2.contr[0].l);
1126  const auto swap_tket = (tket1.contr[0].l > tket2.contr[0].l);
1127  const auto swap_braket =
1128  ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l <
1129  tket1.contr[0].l + tket2.contr[0].l)) ||
1130  braket_ == BraKet::xx_xs;
1131  assert(false && "feature not implemented");
1132 #endif
1133  const auto& bra1 =
1134  swap_braket ? (swap_tket ? tket2 : tket1) : (swap_tbra ? tbra2 : tbra1);
1135  const auto& bra2 =
1136  swap_braket ? (swap_tket ? tket1 : tket2) : (swap_tbra ? tbra1 : tbra2);
1137  const auto& ket1 =
1138  swap_braket ? (swap_tbra ? tbra2 : tbra1) : (swap_tket ? tket2 : tket1);
1139  const auto& ket2 =
1140  swap_braket ? (swap_tbra ? tbra1 : tbra2) : (swap_tket ? tket1 : tket2);
1141  const auto swap_bra = swap_braket ? swap_tket : swap_tbra;
1142  const auto swap_ket = swap_braket ? swap_tbra : swap_tket;
1143  // "permute" also the user-provided shell pair data
1144  const auto* spbra_precomputed = swap_braket ? tspket : tspbra;
1145  const auto* spket_precomputed = swap_braket ? tspbra : tspket;
1146 
1147  const auto tform = bra1.contr[0].pure || bra2.contr[0].pure ||
1148  ket1.contr[0].pure || ket2.contr[0].pure;
1149  const auto permute = swap_braket || swap_tbra || swap_tket;
1150  const auto use_scratch = permute || tform;
1151 
1152  // assert # of primitive pairs
1153  auto nprim_bra1 = bra1.nprim();
1154  auto nprim_bra2 = bra2.nprim();
1155  auto nprim_ket1 = ket1.nprim();
1156  auto nprim_ket2 = ket2.nprim();
1157 
1158  // adjust max angular momentum, if needed
1159  auto lmax = std::max(std::max(bra1.contr[0].l, bra2.contr[0].l),
1160  std::max(ket1.contr[0].l, ket2.contr[0].l));
1161  assert(lmax <= lmax_ && "the angular momentum limit is exceeded");
1162  const auto lmax_bra = std::max(bra1.contr[0].l, bra2.contr[0].l);
1163  const auto lmax_ket = std::max(ket1.contr[0].l, ket2.contr[0].l);
1164 
1165 #ifdef LIBINT2_ENGINE_PROFILE_CLASS
1166  class_id id(bra1.contr[0].l, bra2.contr[0].l, ket1.contr[0].l,
1167  ket2.contr[0].l);
1168  if (class_profiles.find(id) == class_profiles.end()) {
1169  class_profile dummy;
1170  class_profiles[id] = dummy;
1171  }
1172 #endif
1173 
1174 // compute primitive data
1175 #ifdef LIBINT2_ENGINE_TIMERS
1176  timers.start(0);
1177 #endif
1178  {
1179  auto p = 0;
1180  // initialize shell pairs, if not given ...
1181  // using ln_precision_ is far less aggressive than should be, but proper analysis
1182  // involves both bra and ket *bases* and thus cannot be done on shell-set
1183  // basis ... probably ln_precision_/2 - 10 is enough
1184  const ShellPair& spbra = spbra_precomputed ? *spbra_precomputed : (spbra_.init(bra1, bra2, ln_precision_), spbra_) ;
1185  const ShellPair& spket = spket_precomputed ? *spket_precomputed : (spket_.init(ket1, ket2, ln_precision_), spket_);
1186  // determine whether shell pair data refers to the actual ({bra1,bra2}) or swapped ({bra2,bra1}) pairs
1187  // if computed the shell pair data here then it's always in actual order, otherwise check swap_bra/swap_ket
1188  const auto spbra_is_swapped = spbra_precomputed ? swap_bra : false;
1189  const auto spket_is_swapped = spket_precomputed ? swap_ket : false;
1190 
1191  using real_t = Shell::real_t;
1192  // swapping bra turns AB into BA = -AB
1193  real_t BA[3];
1194  if (spbra_is_swapped) {
1195  for(auto xyz=0; xyz!=3; ++xyz)
1196  BA[xyz] = - spbra_precomputed->AB[xyz];
1197  }
1198  const auto& AB = spbra_is_swapped ? BA : spbra.AB;
1199  // swapping ket turns CD into DC = -CD
1200  real_t DC[3];
1201  if (spket_is_swapped) {
1202  for(auto xyz=0; xyz!=3; ++xyz)
1203  DC[xyz] = - spket_precomputed->AB[xyz];
1204  }
1205  const auto& CD = spket_is_swapped ? DC : spket.AB;
1206 
1207  const auto& A = bra1.O;
1208  const auto& B = bra2.O;
1209  const auto& C = ket1.O;
1210  const auto& D = ket2.O;
1211 
1212  // compute all primitive quartet data
1213  const auto npbra = spbra.primpairs.size();
1214  const auto npket = spket.primpairs.size();
1215  for (auto pb = 0; pb != npbra; ++pb) {
1216  for (auto pk = 0; pk != npket; ++pk) {
1217  // primitive quartet screening
1218  if (spbra.primpairs[pb].scr + spket.primpairs[pk].scr >
1219  ln_precision_) {
1220  Libint_t& primdata = primdata_[p];
1221  const auto& sbra1 = bra1;
1222  const auto& sbra2 = bra2;
1223  const auto& sket1 = ket1;
1224  const auto& sket2 = ket2;
1225  auto pbra = pb;
1226  auto pket = pk;
1227 
1228  const auto& spbrapp = spbra.primpairs[pbra];
1229  const auto& spketpp = spket.primpairs[pket];
1230  // if shell-pair data given by user
1231  const auto& pbra1 = spbra_is_swapped ? spbrapp.p2 : spbrapp.p1;
1232  const auto& pbra2 = spbra_is_swapped ? spbrapp.p1 : spbrapp.p2;
1233  const auto& pket1 = spket_is_swapped ? spketpp.p2 : spketpp.p1;
1234  const auto& pket2 = spket_is_swapped ? spketpp.p1 : spketpp.p2;
1235 
1236  const auto alpha0 = sbra1.alpha[pbra1];
1237  const auto alpha1 = sbra2.alpha[pbra2];
1238  const auto alpha2 = sket1.alpha[pket1];
1239  const auto alpha3 = sket2.alpha[pket2];
1240 
1241  const auto c0 = sbra1.contr[0].coeff[pbra1];
1242  const auto c1 = sbra2.contr[0].coeff[pbra2];
1243  const auto c2 = sket1.contr[0].coeff[pket1];
1244  const auto c3 = sket2.contr[0].coeff[pket2];
1245 
1246  const auto amtot = sbra1.contr[0].l + sket1.contr[0].l +
1247  sbra2.contr[0].l + sket2.contr[0].l;
1248 
1249  const auto gammap = alpha0 + alpha1;
1250  const auto oogammap = spbrapp.one_over_gamma;
1251  const auto rhop = alpha0 * alpha1 * oogammap;
1252 
1253  const auto gammaq = alpha2 + alpha3;
1254  const auto oogammaq = spketpp.one_over_gamma;
1255  const auto rhoq = alpha2 * alpha3 * oogammaq;
1256 
1257  const auto& P = spbrapp.P;
1258  const auto& Q = spketpp.P;
1259  const auto PQx = P[0] - Q[0];
1260  const auto PQy = P[1] - Q[1];
1261  const auto PQz = P[2] - Q[2];
1262  const auto PQ2 = PQx * PQx + PQy * PQy + PQz * PQz;
1263 
1264  const auto K12 = spbrapp.K * spketpp.K;
1265  decltype(K12) two_times_M_PI_to_25(
1266  34.986836655249725693); // (2 \pi)^{5/2}
1267  const auto gammapq = gammap + gammaq;
1268  const auto sqrt_gammapq = sqrt(gammapq);
1269  const auto oogammapq = 1.0 / (gammapq);
1270  auto pfac = two_times_M_PI_to_25 * K12 * sqrt_gammapq * oogammapq;
1271  pfac *= c0 * c1 * c2 * c3;
1272 
1273  if (std::abs(pfac) >= precision_) {
1274  const scalar_type rho = gammap * gammaq * oogammapq;
1275  const scalar_type T = PQ2 * rho;
1276  auto* gm_ptr = &(primdata.LIBINT_T_SS_EREP_SS(0)[0]);
1277  const auto mmax = amtot + deriv_order;
1278 
1279  if (!skip_core_ints) {
1280  switch (oper_) {
1281  case Operator::coulomb: {
1282  const auto& core_eval_ptr =
1283  any_cast<const detail::core_eval_pack_type<Operator::coulomb>&>(core_eval_pack_)
1284  .first();
1285  core_eval_ptr->eval(gm_ptr, T, mmax);
1286  } break;
1287  case Operator::cgtg_x_coulomb: {
1288  const auto& core_eval_ptr =
1289  any_cast<const detail::core_eval_pack_type<
1290  Operator::cgtg_x_coulomb>&>(core_eval_pack_)
1291  .first();
1292  auto& core_eval_scratch = any_cast<detail::core_eval_pack_type<
1293  Operator::cgtg_x_coulomb>&>(core_eval_pack_)
1294  .second();
1295  const auto& core_ints_params =
1296  any_cast<const typename operator_traits<
1297  Operator::cgtg>::oper_params_type&>(core_ints_params_);
1298  core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params,
1299  &core_eval_scratch);
1300  } break;
1301  case Operator::cgtg: {
1302  const auto& core_eval_ptr =
1303  any_cast<const detail::core_eval_pack_type<Operator::cgtg>&>(core_eval_pack_)
1304  .first();
1305  const auto& core_ints_params =
1306  any_cast<const typename operator_traits<
1307  Operator::cgtg>::oper_params_type&>(core_ints_params_);
1308  core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params);
1309  } break;
1310  case Operator::delcgtg2: {
1311  const auto& core_eval_ptr =
1312  any_cast<const detail::core_eval_pack_type<Operator::delcgtg2>&>(core_eval_pack_)
1313  .first();
1314  const auto& core_ints_params =
1315  any_cast<const typename operator_traits<
1316  Operator::cgtg>::oper_params_type&>(core_ints_params_);
1317  core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params);
1318  } break;
1319  case Operator::delta: {
1320  const auto& core_eval_ptr =
1321  any_cast<const detail::core_eval_pack_type<Operator::delta>&>(core_eval_pack_)
1322  .first();
1323  core_eval_ptr->eval(gm_ptr, rho, T, mmax);
1324  } break;
1325  case Operator::r12: {
1326  const auto& core_eval_ptr =
1327  any_cast<const detail::core_eval_pack_type<Operator::r12>&>(core_eval_pack_)
1328  .first();
1329  core_eval_ptr->eval(gm_ptr, rho, T, mmax);
1330  } break;
1331  case Operator::erf_coulomb: {
1332  const auto& core_eval_ptr =
1333  any_cast<const detail::core_eval_pack_type<Operator::erf_coulomb>&>(core_eval_pack_)
1334  .first();
1335  auto core_ints_params =
1336  any_cast<const typename operator_traits<
1337  Operator::erf_coulomb>::oper_params_type&>(core_ints_params_);
1338  core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params);
1339  } break;
1340  case Operator::erfc_coulomb: {
1341  const auto& core_eval_ptr =
1342  any_cast<const detail::core_eval_pack_type<Operator::erfc_coulomb>&>(core_eval_pack_)
1343  .first();
1344  auto core_ints_params =
1345  any_cast<const typename operator_traits<
1346  Operator::erfc_coulomb>::oper_params_type&>(core_ints_params_);
1347  core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params);
1348  } break;
1349  case Operator::stg: {
1350  const auto& core_eval_ptr =
1351  any_cast<const detail::core_eval_pack_type<Operator::stg>&>(core_eval_pack_)
1352  .first();
1353  auto zeta =
1354  any_cast<const typename operator_traits<
1355  Operator::stg>::oper_params_type&>(core_ints_params_);
1356  const auto one_over_rho = gammapq * oogammap * oogammaq;
1357  core_eval_ptr->eval_slater(gm_ptr, one_over_rho, T, mmax, zeta);
1358  } break;
1359  case Operator::stg_x_coulomb: {
1360  const auto& core_eval_ptr =
1361  any_cast<const detail::core_eval_pack_type<Operator::stg>&>(core_eval_pack_)
1362  .first();
1363  auto zeta =
1364  any_cast<const typename operator_traits<
1365  Operator::stg>::oper_params_type&>(core_ints_params_);
1366  const auto one_over_rho = gammapq * oogammap * oogammaq;
1367  core_eval_ptr->eval_yukawa(gm_ptr, one_over_rho, T, mmax, zeta);
1368  } break;
1369  default:
1370  assert(false && "missing case in a switch"); // unreachable
1371  }
1372  }
1373 
1374  for (auto m = 0; m != mmax + 1; ++m) {
1375  gm_ptr[m] *= pfac;
1376  }
1377 
1378  if (mmax != 0) {
1379  if (braket_ == BraKet::xx_xx) {
1380 #if LIBINT2_DEFINED(eri, PA_x)
1381  primdata.PA_x[0] = P[0] - A[0];
1382 #endif
1383 #if LIBINT2_DEFINED(eri, PA_y)
1384  primdata.PA_y[0] = P[1] - A[1];
1385 #endif
1386 #if LIBINT2_DEFINED(eri, PA_z)
1387  primdata.PA_z[0] = P[2] - A[2];
1388 #endif
1389 #if LIBINT2_DEFINED(eri, PB_x)
1390  primdata.PB_x[0] = P[0] - B[0];
1391 #endif
1392 #if LIBINT2_DEFINED(eri, PB_y)
1393  primdata.PB_y[0] = P[1] - B[1];
1394 #endif
1395 #if LIBINT2_DEFINED(eri, PB_z)
1396  primdata.PB_z[0] = P[2] - B[2];
1397 #endif
1398  }
1399 
1400  if (braket_ != BraKet::xs_xs) {
1401 #if LIBINT2_DEFINED(eri, QC_x)
1402  primdata.QC_x[0] = Q[0] - C[0];
1403 #endif
1404 #if LIBINT2_DEFINED(eri, QC_y)
1405  primdata.QC_y[0] = Q[1] - C[1];
1406 #endif
1407 #if LIBINT2_DEFINED(eri, QC_z)
1408  primdata.QC_z[0] = Q[2] - C[2];
1409 #endif
1410 #if LIBINT2_DEFINED(eri, QD_x)
1411  primdata.QD_x[0] = Q[0] - D[0];
1412 #endif
1413 #if LIBINT2_DEFINED(eri, QD_y)
1414  primdata.QD_y[0] = Q[1] - D[1];
1415 #endif
1416 #if LIBINT2_DEFINED(eri, QD_z)
1417  primdata.QD_z[0] = Q[2] - D[2];
1418 #endif
1419  }
1420 
1421  if (braket_ == BraKet::xx_xx) {
1422 #if LIBINT2_DEFINED(eri, AB_x)
1423  primdata.AB_x[0] = AB[0];
1424 #endif
1425 #if LIBINT2_DEFINED(eri, AB_y)
1426  primdata.AB_y[0] = AB[1];
1427 #endif
1428 #if LIBINT2_DEFINED(eri, AB_z)
1429  primdata.AB_z[0] = AB[2];
1430 #endif
1431 #if LIBINT2_DEFINED(eri, BA_x)
1432  primdata.BA_x[0] = -AB[0];
1433 #endif
1434 #if LIBINT2_DEFINED(eri, BA_y)
1435  primdata.BA_y[0] = -AB[1];
1436 #endif
1437 #if LIBINT2_DEFINED(eri, BA_z)
1438  primdata.BA_z[0] = -AB[2];
1439 #endif
1440  }
1441 
1442  if (braket_ != BraKet::xs_xs) {
1443 #if LIBINT2_DEFINED(eri, CD_x)
1444  primdata.CD_x[0] = CD[0];
1445 #endif
1446 #if LIBINT2_DEFINED(eri, CD_y)
1447  primdata.CD_y[0] = CD[1];
1448 #endif
1449 #if LIBINT2_DEFINED(eri, CD_z)
1450  primdata.CD_z[0] = CD[2];
1451 #endif
1452 #if LIBINT2_DEFINED(eri, DC_x)
1453  primdata.DC_x[0] = -CD[0];
1454 #endif
1455 #if LIBINT2_DEFINED(eri, DC_y)
1456  primdata.DC_y[0] = -CD[1];
1457 #endif
1458 #if LIBINT2_DEFINED(eri, DC_z)
1459  primdata.DC_z[0] = -CD[2];
1460 #endif
1461  }
1462 
1463  const auto gammap_o_gammapgammaq = oogammapq * gammap;
1464  const auto gammaq_o_gammapgammaq = oogammapq * gammaq;
1465 
1466  const auto Wx =
1467  (gammap_o_gammapgammaq * P[0] + gammaq_o_gammapgammaq * Q[0]);
1468  const auto Wy =
1469  (gammap_o_gammapgammaq * P[1] + gammaq_o_gammapgammaq * Q[1]);
1470  const auto Wz =
1471  (gammap_o_gammapgammaq * P[2] + gammaq_o_gammapgammaq * Q[2]);
1472 
1473  if (deriv_order > 0 || lmax_bra > 0) {
1474 #if LIBINT2_DEFINED(eri, WP_x)
1475  primdata.WP_x[0] = Wx - P[0];
1476 #endif
1477 #if LIBINT2_DEFINED(eri, WP_y)
1478  primdata.WP_y[0] = Wy - P[1];
1479 #endif
1480 #if LIBINT2_DEFINED(eri, WP_z)
1481  primdata.WP_z[0] = Wz - P[2];
1482 #endif
1483  }
1484  if (deriv_order > 0 || lmax_ket > 0) {
1485 #if LIBINT2_DEFINED(eri, WQ_x)
1486  primdata.WQ_x[0] = Wx - Q[0];
1487 #endif
1488 #if LIBINT2_DEFINED(eri, WQ_y)
1489  primdata.WQ_y[0] = Wy - Q[1];
1490 #endif
1491 #if LIBINT2_DEFINED(eri, WQ_z)
1492  primdata.WQ_z[0] = Wz - Q[2];
1493 #endif
1494  }
1495 #if LIBINT2_DEFINED(eri, oo2z)
1496  primdata.oo2z[0] = 0.5 * oogammap;
1497 #endif
1498 #if LIBINT2_DEFINED(eri, oo2e)
1499  primdata.oo2e[0] = 0.5 * oogammaq;
1500 #endif
1501 #if LIBINT2_DEFINED(eri, oo2ze)
1502  primdata.oo2ze[0] = 0.5 * oogammapq;
1503 #endif
1504 #if LIBINT2_DEFINED(eri, roz)
1505  primdata.roz[0] = rho * oogammap;
1506 #endif
1507 #if LIBINT2_DEFINED(eri, roe)
1508  primdata.roe[0] = rho * oogammaq;
1509 #endif
1510 
1511 // using ITR?
1512 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_0_x)
1513  primdata.TwoPRepITR_pfac0_0_0_x[0] =
1514  -(alpha1 * AB[0] + alpha3 * CD[0]) * oogammap;
1515 #endif
1516 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_0_y)
1517  primdata.TwoPRepITR_pfac0_0_0_y[0] =
1518  -(alpha1 * AB[1] + alpha3 * CD[1]) * oogammap;
1519 #endif
1520 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_0_z)
1521  primdata.TwoPRepITR_pfac0_0_0_z[0] =
1522  -(alpha1 * AB[2] + alpha3 * CD[2]) * oogammap;
1523 #endif
1524 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_0_x)
1525  primdata.TwoPRepITR_pfac0_1_0_x[0] =
1526  -(alpha1 * AB[0] + alpha3 * CD[0]) * oogammaq;
1527 #endif
1528 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_0_y)
1529  primdata.TwoPRepITR_pfac0_1_0_y[0] =
1530  -(alpha1 * AB[1] + alpha3 * CD[1]) * oogammaq;
1531 #endif
1532 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_0_z)
1533  primdata.TwoPRepITR_pfac0_1_0_z[0] =
1534  -(alpha1 * AB[2] + alpha3 * CD[2]) * oogammaq;
1535 #endif
1536 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_1_x)
1537  primdata.TwoPRepITR_pfac0_0_1_x[0] =
1538  (alpha0 * AB[0] + alpha2 * CD[0]) * oogammap;
1539 #endif
1540 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_1_y)
1541  primdata.TwoPRepITR_pfac0_0_1_y[0] =
1542  (alpha0 * AB[1] + alpha2 * CD[1]) * oogammap;
1543 #endif
1544 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_1_z)
1545  primdata.TwoPRepITR_pfac0_0_1_z[0] =
1546  (alpha0 * AB[2] + alpha2 * CD[2]) * oogammap;
1547 #endif
1548 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_1_x)
1549  primdata.TwoPRepITR_pfac0_1_1_x[0] =
1550  (alpha0 * AB[0] + alpha2 * CD[0]) * oogammaq;
1551 #endif
1552 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_1_y)
1553  primdata.TwoPRepITR_pfac0_1_1_y[0] =
1554  (alpha0 * AB[1] + alpha2 * CD[1]) * oogammaq;
1555 #endif
1556 #if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_1_z)
1557  primdata.TwoPRepITR_pfac0_1_1_z[0] =
1558  (alpha0 * AB[2] + alpha2 * CD[2]) * oogammaq;
1559 #endif
1560 #if LIBINT2_DEFINED(eri, eoz)
1561  primdata.eoz[0] = gammaq * oogammap;
1562 #endif
1563 #if LIBINT2_DEFINED(eri, zoe)
1564  primdata.zoe[0] = gammap * oogammaq;
1565 #endif
1566 
1567  // prefactors for derivative ERI relations
1568  if (deriv_order > 0) {
1569 #if LIBINT2_DEFINED(eri, alpha1_rho_over_zeta2)
1570  primdata.alpha1_rho_over_zeta2[0] =
1571  alpha0 * (oogammap * gammaq_o_gammapgammaq);
1572 #endif
1573 #if LIBINT2_DEFINED(eri, alpha2_rho_over_zeta2)
1574  primdata.alpha2_rho_over_zeta2[0] =
1575  alpha1 * (oogammap * gammaq_o_gammapgammaq);
1576 #endif
1577 #if LIBINT2_DEFINED(eri, alpha3_rho_over_eta2)
1578  primdata.alpha3_rho_over_eta2[0] =
1579  alpha2 * (oogammaq * gammap_o_gammapgammaq);
1580 #endif
1581 #if LIBINT2_DEFINED(eri, alpha4_rho_over_eta2)
1582  primdata.alpha4_rho_over_eta2[0] =
1583  alpha3 * (oogammaq * gammap_o_gammapgammaq);
1584 #endif
1585 #if LIBINT2_DEFINED(eri, alpha1_over_zetapluseta)
1586  primdata.alpha1_over_zetapluseta[0] = alpha0 * oogammapq;
1587 #endif
1588 #if LIBINT2_DEFINED(eri, alpha2_over_zetapluseta)
1589  primdata.alpha2_over_zetapluseta[0] = alpha1 * oogammapq;
1590 #endif
1591 #if LIBINT2_DEFINED(eri, alpha3_over_zetapluseta)
1592  primdata.alpha3_over_zetapluseta[0] = alpha2 * oogammapq;
1593 #endif
1594 #if LIBINT2_DEFINED(eri, alpha4_over_zetapluseta)
1595  primdata.alpha4_over_zetapluseta[0] = alpha3 * oogammapq;
1596 #endif
1597 #if LIBINT2_DEFINED(eri, rho12_over_alpha1)
1598  primdata.rho12_over_alpha1[0] = alpha1 * oogammap;
1599 #endif
1600 #if LIBINT2_DEFINED(eri, rho12_over_alpha2)
1601  primdata.rho12_over_alpha2[0] = alpha0 * oogammap;
1602 #endif
1603 #if LIBINT2_DEFINED(eri, rho34_over_alpha3)
1604  primdata.rho34_over_alpha3[0] = alpha3 * oogammaq;
1605 #endif
1606 #if LIBINT2_DEFINED(eri, rho34_over_alpha4)
1607  primdata.rho34_over_alpha4[0] = alpha2 * oogammaq;
1608 #endif
1609 #if LIBINT2_DEFINED(eri, two_alpha0_bra)
1610  primdata.two_alpha0_bra[0] = 2.0 * alpha0;
1611 #endif
1612 #if LIBINT2_DEFINED(eri, two_alpha0_ket)
1613  primdata.two_alpha0_ket[0] = 2.0 * alpha1;
1614 #endif
1615 #if LIBINT2_DEFINED(eri, two_alpha1_bra)
1616  primdata.two_alpha1_bra[0] = 2.0 * alpha2;
1617 #endif
1618 #if LIBINT2_DEFINED(eri, two_alpha1_ket)
1619  primdata.two_alpha1_ket[0] = 2.0 * alpha3;
1620 #endif
1621  }
1622  } // m != 0
1623 
1624  ++p;
1625  } // prefac-based prim quartet screen
1626 
1627  } // rough prim quartet screen based on pair values
1628  } // ket prim pair
1629  } // bra prim pair
1630  primdata_[0].contrdepth = p;
1631  }
1632 
1633 #ifdef LIBINT2_ENGINE_TIMERS
1634  const auto t0 = timers.stop(0);
1635 #ifdef LIBINT2_ENGINE_PROFILE_CLASS
1636  class_profiles[id].prereqs += t0.count();
1637  if (primdata_[0].contrdepth != 0) {
1638  class_profiles[id].nshellset += 1;
1639  class_profiles[id].nprimset += primdata_[0].contrdepth;
1640  }
1641 #endif
1642 #endif
1643 
1644  // all primitive combinations screened out? set 1st target ptr to nullptr
1645  if (primdata_[0].contrdepth == 0) {
1646  targets_[0] = nullptr;
1647  return targets_;
1648  }
1649 
1650  // compute directly (ss|ss)
1651  const auto compute_directly = lmax == 0 && deriv_order == 0;
1652 
1653  if (compute_directly) {
1654 #ifdef LIBINT2_ENGINE_TIMERS
1655  timers.start(1);
1656 #endif
1657  auto& stack = primdata_[0].stack[0];
1658  stack = 0;
1659  for (auto p = 0; p != primdata_[0].contrdepth; ++p)
1660  stack += primdata_[p].LIBINT_T_SS_EREP_SS(0)[0];
1661  primdata_[0].targets[0] = primdata_[0].stack;
1662 #ifdef LIBINT2_ENGINE_TIMERS
1663  const auto t1 = timers.stop(1);
1664 #ifdef LIBINT2_ENGINE_PROFILE_CLASS
1665  class_profiles[id].build_vrr += t1.count();
1666 #endif
1667 #endif
1668  } // compute directly
1669  else { // call libint
1670 #ifdef LIBINT2_ENGINE_TIMERS
1671 #ifdef LIBINT2_PROFILE
1672  const auto t1_hrr_start = primdata_[0].timers->read(0);
1673  const auto t1_vrr_start = primdata_[0].timers->read(1);
1674 #endif
1675  timers.start(1);
1676 #endif
1677 
1678  size_t buildfnidx;
1679  switch (braket_) {
1680  case BraKet::xx_xx:
1681  buildfnidx =
1682  ((bra1.contr[0].l * hard_lmax_ + bra2.contr[0].l) * hard_lmax_ +
1683  ket1.contr[0].l) *
1684  hard_lmax_ +
1685  ket2.contr[0].l;
1686  break;
1687 
1688  case BraKet::xx_xs: assert(false && "this braket is not supported"); break;
1689  case BraKet::xs_xx: {
1691  int ket_lmax = hard_lmax_;
1692  switch (deriv_order_) {
1693  case 0:
1694 #ifdef LIBINT2_CENTER_DEPENDENT_MAX_AM_3eri
1695  ket_lmax = hard_default_lmax_;
1696 #endif
1697  break;
1698  case 1:
1699 #ifdef LIBINT2_CENTER_DEPENDENT_MAX_AM_3eri1
1700  ket_lmax = hard_default_lmax_;
1701 #endif
1702  break;
1703  case 2:
1704 #ifdef LIBINT2_CENTER_DEPENDENT_MAX_AM_3eri2
1705  ket_lmax = hard_default_lmax_;
1706 #endif
1707  break;
1708  default:assert(false && "deriv_order>2 not yet supported");
1709  }
1710  buildfnidx =
1711  (bra1.contr[0].l * ket_lmax + ket1.contr[0].l) * ket_lmax +
1712  ket2.contr[0].l;
1713 #ifdef ERI3_PURE_SH
1714  if (bra1.contr[0].l > 1)
1715  assert(bra1.contr[0].pure &&
1716  "library assumes a solid harmonics shell in bra of a 3-center "
1717  "2-body int, but a cartesian shell given");
1718 #endif
1719  } break;
1720 
1721  case BraKet::xs_xs:
1722  buildfnidx = bra1.contr[0].l * hard_lmax_ + ket1.contr[0].l;
1723 #ifdef ERI2_PURE_SH
1724  if (bra1.contr[0].l > 1)
1725  assert(bra1.contr[0].pure &&
1726  "library assumes solid harmonics shells in a 2-center "
1727  "2-body int, but a cartesian shell given in bra");
1728  if (ket1.contr[0].l > 1)
1729  assert(ket1.contr[0].pure &&
1730  "library assumes solid harmonics shells in a 2-center "
1731  "2-body int, but a cartesian shell given in bra");
1732 #endif
1733  break;
1734 
1735  default:
1736  assert(false && "invalid braket");
1737  }
1738 
1739  assert(buildfnptrs_[buildfnidx] && "null build function ptr");
1740  buildfnptrs_[buildfnidx](&primdata_[0]);
1741 
1742 #ifdef LIBINT2_ENGINE_TIMERS
1743  const auto t1 = timers.stop(1);
1744 #ifdef LIBINT2_ENGINE_PROFILE_CLASS
1745 #ifndef LIBINT2_PROFILE
1746  class_profiles[id].build_vrr += t1.count();
1747 #else
1748  class_profiles[id].build_hrr += primdata_[0].timers->read(0) - t1_hrr_start;
1749  class_profiles[id].build_vrr += primdata_[0].timers->read(1) - t1_vrr_start;
1750 #endif
1751 #endif
1752 #endif
1753 
1754 #ifdef LIBINT2_ENGINE_TIMERS
1755  timers.start(2);
1756 #endif
1757 
1758  const auto ntargets = nshellsets();
1759 
1760  // if needed, permute and transform
1761  if (use_scratch) {
1762  constexpr auto using_scalar_real = sizeof(value_type) == sizeof(scalar_type);
1763  static_assert(using_scalar_real,
1764  "Libint2 C++11 API only supports scalar real types");
1765  typedef Eigen::Matrix<scalar_type, Eigen::Dynamic, Eigen::Dynamic,
1766  Eigen::RowMajor>
1767  Matrix;
1768 
1769  // a 2-d view of the 4-d source tensor
1770  const auto nr1_cart = bra1.cartesian_size();
1771  const auto nr2_cart = bra2.cartesian_size();
1772  const auto nc1_cart = ket1.cartesian_size();
1773  const auto nc2_cart = ket2.cartesian_size();
1774  const auto ncol_cart = nc1_cart * nc2_cart;
1775  const auto n1234_cart = nr1_cart * nr2_cart * ncol_cart;
1776  const auto nr1 = bra1.size();
1777  const auto nr2 = bra2.size();
1778  const auto nc1 = ket1.size();
1779  const auto nc2 = ket2.size();
1780  const auto nrow = nr1 * nr2;
1781  const auto ncol = nc1 * nc2;
1782 
1783  // a 2-d view of the 4-d target tensor
1784  const auto nr1_tgt = tbra1.size();
1785  const auto nr2_tgt = tbra2.size();
1786  const auto nc1_tgt = tket1.size();
1787  const auto nc2_tgt = tket2.size();
1788  const auto ncol_tgt = nc1_tgt * nc2_tgt;
1789  const auto n_tgt = nr1_tgt * nr2_tgt * ncol_tgt;
1790 
1791  auto hotscr = &scratch_[0]; // points to the hot scratch
1792 
1793  // transform to solid harmonics first, then unpermute, if necessary
1794  for (auto s = 0; s != ntargets; ++s) {
1795  // when permuting derivatives may need to permute shellsets also, not
1796  // just integrals
1797  // within shellsets; this will point to where source shellset s should end up
1798  auto s_target = s;
1799 
1800  auto source =
1801  primdata_[0].targets[s]; // points to the most recent result
1802  auto target = hotscr;
1803 
1804  if (bra1.contr[0].pure) {
1805  libint2::solidharmonics::transform_first(
1806  bra1.contr[0].l, nr2_cart * ncol_cart, source, target);
1807  std::swap(source, target);
1808  }
1809  if (bra2.contr[0].pure) {
1810  libint2::solidharmonics::transform_inner(bra1.size(), bra2.contr[0].l,
1811  ncol_cart, source, target);
1812  std::swap(source, target);
1813  }
1814  if (ket1.contr[0].pure) {
1815  libint2::solidharmonics::transform_inner(nrow, ket1.contr[0].l,
1816  nc2_cart, source, target);
1817  std::swap(source, target);
1818  }
1819  if (ket2.contr[0].pure) {
1820  libint2::solidharmonics::transform_last(
1821  bra1.size() * bra2.size() * ket1.size(), ket2.contr[0].l, source,
1822  target);
1823  std::swap(source, target);
1824  }
1825 
1826  // need to permute?
1827  if (permute) {
1828  // loop over rows of the source matrix
1829  const auto* src_row_ptr = source;
1830  auto tgt_ptr = target;
1831 
1832  // if permuting derivatives ints must update their derivative index
1833  switch (deriv_order) {
1834  case 0:
1835  break; // nothing to do
1836 
1837  case 1: {
1838  switch(braket_) {
1839  case BraKet::xx_xx: {
1840  const unsigned mapDerivIndex1_xxxx[2][2][2][12] = {
1841  {{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
1842  {0, 1, 2, 3, 4, 5, 9, 10, 11, 6, 7, 8}},
1843  {{3, 4, 5, 0, 1, 2, 6, 7, 8, 9, 10, 11},
1844  {3, 4, 5, 0, 1, 2, 9, 10, 11, 6, 7, 8}}},
1845  {{{6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5},
1846  {9, 10, 11, 6, 7, 8, 0, 1, 2, 3, 4, 5}},
1847  {{6, 7, 8, 9, 10, 11, 3, 4, 5, 0, 1, 2},
1848  {9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2}}}};
1849  s_target = mapDerivIndex1_xxxx[swap_braket][swap_tbra][swap_tket][s];
1850  }
1851  break;
1852 
1853  case BraKet::xs_xx: {
1854  assert(swap_bra == false);
1855  assert(swap_braket == false);
1856  const unsigned mapDerivIndex1_xsxx[2][9] = {
1857  {0,1,2,3,4,5,6,7,8},
1858  {0,1,2,6,7,8,3,4,5}
1859  };
1860  s_target = mapDerivIndex1_xsxx[swap_tket][s];
1861  }
1862  break;
1863 
1864  case BraKet::xs_xs: {
1865  assert(swap_bra == false);
1866  assert(swap_ket == false);
1867  assert(swap_braket == false);
1868  s_target = s;
1869  }
1870  break;
1871 
1872  default:
1873  assert(false && "this backet type not yet supported for 1st geometric derivatives");
1874  }
1875  } break;
1876 
1877  case 2: {
1878  switch(braket_) {
1879  case BraKet::xx_xx: {
1880  const unsigned mapDerivIndex2_xxxx[2][2][2][78] = {
1881  {{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1882  13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
1883  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
1884  39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
1885  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
1886  65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77},
1887  {0, 1, 2, 3, 4, 5, 9, 10, 11, 6, 7, 8, 12,
1888  13, 14, 15, 16, 20, 21, 22, 17, 18, 19, 23, 24, 25,
1889  26, 30, 31, 32, 27, 28, 29, 33, 34, 35, 39, 40, 41,
1890  36, 37, 38, 42, 43, 47, 48, 49, 44, 45, 46, 50, 54,
1891  55, 56, 51, 52, 53, 72, 73, 74, 60, 65, 69, 75, 76,
1892  61, 66, 70, 77, 62, 67, 71, 57, 58, 59, 63, 64, 68}},
1893  {{33, 34, 35, 3, 14, 24, 36, 37, 38, 39, 40, 41, 42,
1894  43, 4, 15, 25, 44, 45, 46, 47, 48, 49, 50, 5, 16,
1895  26, 51, 52, 53, 54, 55, 56, 0, 1, 2, 6, 7, 8,
1896  9, 10, 11, 12, 13, 17, 18, 19, 20, 21, 22, 23, 27,
1897  28, 29, 30, 31, 32, 57, 58, 59, 60, 61, 62, 63, 64,
1898  65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77},
1899  {33, 34, 35, 3, 14, 24, 39, 40, 41, 36, 37, 38, 42,
1900  43, 4, 15, 25, 47, 48, 49, 44, 45, 46, 50, 5, 16,
1901  26, 54, 55, 56, 51, 52, 53, 0, 1, 2, 9, 10, 11,
1902  6, 7, 8, 12, 13, 20, 21, 22, 17, 18, 19, 23, 30,
1903  31, 32, 27, 28, 29, 72, 73, 74, 60, 65, 69, 75, 76,
1904  61, 66, 70, 77, 62, 67, 71, 57, 58, 59, 63, 64, 68}}},
1905  {{{57, 58, 59, 60, 61, 62, 6, 17, 27, 36, 44, 51, 63,
1906  64, 65, 66, 67, 7, 18, 28, 37, 45, 52, 68, 69, 70,
1907  71, 8, 19, 29, 38, 46, 53, 72, 73, 74, 9, 20, 30,
1908  39, 47, 54, 75, 76, 10, 21, 31, 40, 48, 55, 77, 11,
1909  22, 32, 41, 49, 56, 0, 1, 2, 3, 4, 5, 12, 13,
1910  14, 15, 16, 23, 24, 25, 26, 33, 34, 35, 42, 43, 50},
1911  {72, 73, 74, 60, 65, 69, 9, 20, 30, 39, 47, 54, 75,
1912  76, 61, 66, 70, 10, 21, 31, 40, 48, 55, 77, 62, 67,
1913  71, 11, 22, 32, 41, 49, 56, 57, 58, 59, 6, 17, 27,
1914  36, 44, 51, 63, 64, 7, 18, 28, 37, 45, 52, 68, 8,
1915  19, 29, 38, 46, 53, 0, 1, 2, 3, 4, 5, 12, 13,
1916  14, 15, 16, 23, 24, 25, 26, 33, 34, 35, 42, 43, 50}},
1917  {{57, 58, 59, 60, 61, 62, 36, 44, 51, 6, 17, 27, 63,
1918  64, 65, 66, 67, 37, 45, 52, 7, 18, 28, 68, 69, 70,
1919  71, 38, 46, 53, 8, 19, 29, 72, 73, 74, 39, 47, 54,
1920  9, 20, 30, 75, 76, 40, 48, 55, 10, 21, 31, 77, 41,
1921  49, 56, 11, 22, 32, 33, 34, 35, 3, 14, 24, 42, 43,
1922  4, 15, 25, 50, 5, 16, 26, 0, 1, 2, 12, 13, 23},
1923  {72, 73, 74, 60, 65, 69, 39, 47, 54, 9, 20, 30, 75,
1924  76, 61, 66, 70, 40, 48, 55, 10, 21, 31, 77, 62, 67,
1925  71, 41, 49, 56, 11, 22, 32, 57, 58, 59, 36, 44, 51,
1926  6, 17, 27, 63, 64, 37, 45, 52, 7, 18, 28, 68, 38,
1927  46, 53, 8, 19, 29, 33, 34, 35, 3, 14, 24, 42, 43,
1928  4, 15, 25, 50, 5, 16, 26, 0, 1, 2, 12, 13, 23}}}};
1929  s_target = mapDerivIndex2_xxxx[swap_braket][swap_tbra][swap_tket][s];
1930  }
1931  break;
1932 
1933  case BraKet::xs_xx: {
1934  assert(swap_bra == false);
1935  assert(swap_braket == false);
1936  const unsigned mapDerivIndex2_xsxx[2][45] = {
1937  {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1938  12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
1939  24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
1940  36, 37, 38, 39, 40, 41, 42, 43, 44},
1941  {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 14,
1942  15, 16, 11, 12, 13, 17, 21, 22, 23, 18, 19, 20,
1943  39, 40, 41, 27, 32, 36, 42, 43, 28, 33, 37, 44,
1944  29, 34, 38, 24, 25, 26, 30, 31, 35}};
1945  s_target = mapDerivIndex2_xsxx[swap_tket][s];
1946  }
1947  break;
1948 
1949  case BraKet::xs_xs: {
1950  assert(swap_bra == false);
1951  assert(swap_ket == false);
1952  assert(swap_braket == false);
1953  s_target = s;
1954  }
1955  break;
1956 
1957  default:
1958  assert(false && "this backet type not yet supported for 2st geometric derivatives");
1959  }
1960  } break;
1961 
1962  default:
1963  assert(false &&
1964  "3-rd and higher derivatives not yet generalized");
1965  }
1966 
1967  for (auto r1 = 0; r1 != nr1; ++r1) {
1968  for (auto r2 = 0; r2 != nr2; ++r2, src_row_ptr += ncol) {
1969  typedef Eigen::Map<const Matrix> ConstMap;
1970  typedef Eigen::Map<Matrix> Map;
1971  typedef Eigen::Map<Matrix, Eigen::Unaligned,
1972  Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>>
1973  StridedMap;
1974 
1975  // represent this source row as a matrix
1976  ConstMap src_blk_mat(src_row_ptr, nc1, nc2);
1977 
1978  // and copy to the block of the target matrix
1979  if (swap_braket) {
1980  // if swapped bra and ket, a row of source becomes a column
1981  // of
1982  // target
1983  // source row {r1,r2} is mapped to target column {r1,r2} if
1984  // !swap_tket, else to {r2,r1}
1985  const auto tgt_col_idx =
1986  !swap_tket ? r1 * nr2 + r2 : r2 * nr1 + r1;
1987  StridedMap tgt_blk_mat(
1988  tgt_ptr + tgt_col_idx, nr1_tgt, nr2_tgt,
1989  Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>(
1990  nr2_tgt * ncol_tgt, ncol_tgt));
1991  if (swap_tbra)
1992  tgt_blk_mat = src_blk_mat.transpose();
1993  else
1994  tgt_blk_mat = src_blk_mat;
1995  } else {
1996  // source row {r1,r2} is mapped to target row {r1,r2} if
1997  // !swap_tbra, else to {r2,r1}
1998  const auto tgt_row_idx =
1999  !swap_tbra ? r1 * nr2 + r2 : r2 * nr1 + r1;
2000  Map tgt_blk_mat(tgt_ptr + tgt_row_idx * ncol, nc1_tgt, nc2_tgt);
2001  if (swap_tket)
2002  tgt_blk_mat = src_blk_mat.transpose();
2003  else
2004  tgt_blk_mat = src_blk_mat;
2005  }
2006  } // end of loop
2007  } // over rows of source
2008  std::swap(source, target);
2009  } // need to permute?
2010 
2011  // if the integrals ended up in scratch_, keep them there, update the
2012  // hot buffer
2013  // to the next available scratch space, and update targets_
2014  if (source != primdata_[0].targets[s]) {
2015  hotscr += n1234_cart;
2016  if (s != s_target)
2017  assert(set_targets_ && "logic error"); // mess if targets_ points
2018  // to primdata_[0].targets
2019  targets_[s_target] = source;
2020  } else {
2021  // only needed if permuted derivs or set_targets_ is true
2022  // for simplicity always set targets_
2023  if (s != s_target)
2024  assert(set_targets_ && "logic error"); // mess if targets_ points
2025  // to primdata_[0].targets
2026  targets_[s_target] = source;
2027  }
2028  } // loop over shellsets
2029  } // if need_scratch => needed to transpose and/or tform
2030  else { // did not use scratch? may still need to update targets_
2031  if (set_targets_) {
2032  for (auto s = 0; s != ntargets; ++s)
2033  targets_[s] = primdata_[0].targets[s];
2034  }
2035  }
2036 
2037 #ifdef LIBINT2_ENGINE_TIMERS
2038  const auto t2 = timers.stop(2);
2039 #ifdef LIBINT2_ENGINE_PROFILE_CLASS
2040  class_profiles[id].tform += t2.count();
2041 #endif
2042 #endif
2043  } // not (ss|ss)
2044 
2045  if (cartesian_shell_normalization() == CartesianShellNormalization::uniform) {
2046  std::array<std::reference_wrapper<const Shell>, 4> shells{bra1, bra2, ket1, ket2};
2047  for (auto s = 0ul; s != targets_.size(); ++s) {
2048  uniform_normalize_cartesian_shells(const_cast<value_type*>(targets_[s]), shells);
2049  }
2050  }
2051 
2052  return targets_;
2053 }
2054 
2055 #undef BOOST_PP_NBODY_OPERATOR_LIST
2056 #undef BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE
2057 #undef BOOST_PP_NBODY_OPERATOR_INDEX_LIST
2058 #undef BOOST_PP_NBODY_BRAKET_INDEX_TUPLE
2059 #undef BOOST_PP_NBODY_BRAKET_INDEX_LIST
2060 #undef BOOST_PP_NBODY_DERIV_ORDER_TUPLE
2061 #undef BOOST_PP_NBODY_DERIV_ORDER_LIST
2062 #undef BOOST_PP_NBODYENGINE_MCR3
2063 #undef BOOST_PP_NBODYENGINE_MCR3_ncenter
2064 #undef BOOST_PP_NBODYENGINE_MCR3_default_ncenter
2065 #undef BOOST_PP_NBODYENGINE_MCR3_NCENTER
2066 #undef BOOST_PP_NBODYENGINE_MCR3_OPER
2067 #undef BOOST_PP_NBODYENGINE_MCR3_DERIV
2068 #undef BOOST_PP_NBODYENGINE_MCR3_task
2069 #undef BOOST_PP_NBODYENGINE_MCR3_TASK
2070 #undef BOOST_PP_NBODYENGINE_MCR4
2071 #undef BOOST_PP_NBODYENGINE_MCR5
2072 #undef BOOST_PP_NBODYENGINE_MCR6
2073 #undef BOOST_PP_NBODYENGINE_MCR7
2074 
2075 #ifdef LIBINT2_DOES_NOT_INLINE_ENGINE
2076 template any Engine::enforce_params_type<Engine::empty_pod>(
2077  Operator oper, const Engine::empty_pod& params, bool throw_if_wrong_type);
2078 
2079 template const Engine::target_ptr_vec& Engine::compute<Shell>(
2080  const Shell& first_shell, const Shell&);
2081 
2082 template const Engine::target_ptr_vec& Engine::compute<Shell, Shell>(
2083  const Shell& first_shell, const Shell&, const Shell&);
2084 
2085 template const Engine::target_ptr_vec& Engine::compute<Shell, Shell, Shell>(
2086  const Shell& first_shell, const Shell&, const Shell&, const Shell&);
2087 #endif
2088 
2089 } // namespace libint2
2090 
2091 #endif /* _libint2_src_lib_libint_engineimpl_h_ */
generally-contracted Solid-Harmonic/Cartesion Gaussian Shell
Definition: shell.h:81
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
svector< Contraction > contr
contractions
Definition: shell.h:105
void uniform_normalize_cartesian_shells(Real *intset, std::array< std::reference_wrapper< const Shell >, N > shells)
rescales cartesian Gaussians to convert from standard to uniform-normalized convention
Definition: cartesian.h:95
bool initialized()
checks if the libint has been initialized.
Definition: initialize.h:61
Array idential to C++0X arrays.
Definition: stdarray_bits.h:14
Iterates over all partitions of a non-negative integer into nonnegative integers in reverse lexicog...
Definition: intpart_iter.h:74
static const Shell & unit()
Definition: shell.h:218
a partial C++17 std::any implementation (and less efficient than can be)
Definition: any.h:67
__libint2_engine_inline libint2::any default_params(const Operator &oper)
the runtime version of operator_traits<oper>::default_params()
Definition: engine.impl.h:110