LIBINT  2.6.0
1body.h
1 /*
2  * Copyright (C) 2004-2019 Edward F. Valeev
3  *
4  * This file is part of Libint.
5  *
6  * Libint is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Libint is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 // standard C++ headers
22 #include <atomic>
23 #include <chrono>
24 #include <cmath>
25 #include <fstream>
26 #include <iomanip>
27 #include <iostream>
28 #include <iterator>
29 #include <memory>
30 #include <mutex>
31 #include <sstream>
32 #include <thread>
33 #include <unordered_map>
34 #include <vector>
35 
36 // have BTAS library?
37 #ifdef LIBINT2_HAVE_BTAS
38 # include <btas/btas.h>
39 #else // LIBINT2_HAVE_BTAS
40 # error "libint2::lcao requires BTAS"
41 #endif
42 
43 // Libint Gaussian integrals library
44 #include <libint2/diis.h>
45 #include <libint2/util/intpart_iter.h>
46 #include <libint2/chemistry/sto3g_atomic_density.h>
47 #include <libint2.hpp>
48 
49 #if defined(_OPENMP)
50 #include <omp.h>
51 #endif
52 
53 typedef btas::RangeNd<CblasRowMajor, std::array<long, 2>> Range2;
54 typedef btas::RangeNd<CblasRowMajor, std::array<long, 3>> Range3;
55 typedef btas::RangeNd<CblasRowMajor, std::array<long, 4>> Range4;
56 typedef btas::Tensor<double, Range2> Tensor2d;
57 typedef btas::Tensor<double, Range3> Tensor3d;
58 typedef btas::Tensor<double, Range3> Tensor4d;
59 
60 typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
61  Matrix; // import dense, dynamically sized Matrix type from Eigen;
62  // this is a matrix with row-major storage
63  // (http://en.wikipedia.org/wiki/Row-major_order)
64 // to meet the layout of the integrals returned by the Libint integral library
65 typedef Eigen::DiagonalMatrix<double, Eigen::Dynamic, Eigen::Dynamic>
66  DiagonalMatrix;
67 
68 using libint2::Shell;
69 using libint2::libint2::Atom;
70 using libint2::BasisSet;
71 using libint2::Operator;
72 using libint2::BraKet;
73 
74 std::vector<libint2::Atom> read_geometry(const std::string& filename);
75 Matrix compute_soad(const std::vector<libint2::Atom>& atoms);
76 // computes norm of shell-blocks of A
77 Matrix compute_shellblock_norm(const BasisSet& obs, const Matrix& A);
78 
79 template <Operator obtype>
81  const BasisSet& obs, const std::vector<libint2::Atom>& atoms = std::vector<libint2::Atom>());
82 
83 #if LIBINT2_DERIV_ONEBODY_ORDER
84 template <Operator obtype>
85 std::vector<Matrix> compute_1body_ints_deriv(unsigned deriv_order,
86  const BasisSet& obs,
87  const std::vector<libint2::Atom>& atoms);
88 #endif // LIBINT2_DERIV_ONEBODY_ORDER
89 
90 template <libint2::Operator Kernel = libint2::Operator::coulomb>
91 Matrix compute_schwarz_ints(
92  const BasisSet& bs1, const BasisSet& bs2 = BasisSet(),
93  bool use_2norm = false, // use infty norm by default
94  typename libint2::operator_traits<Kernel>::oper_params_type params =
95  libint2::operator_traits<Kernel>::default_params());
96 Matrix compute_do_ints(const BasisSet& bs1, const BasisSet& bs2 = BasisSet(),
97  bool use_2norm = false // use infty norm by default
98  );
99 
100 using shellpair_list_t = std::unordered_map<size_t, std::vector<size_t>>;
101 shellpair_list_t obs_shellpair_list; // shellpair list for OBS
102 
107 shellpair_list_t compute_shellpair_list(const BasisSet& bs1,
108  const BasisSet& bs2 = BasisSet(),
109  double threshold = 1e-12);
110 
111 Matrix compute_2body_fock(
112  const BasisSet& obs, const Matrix& D,
113  double precision = std::numeric_limits<
114  double>::epsilon(), // discard contributions smaller than this
115  const Matrix& Schwarz = Matrix() // K_ij = sqrt(||(ij|ij)||_\infty); if
116  // empty, do not Schwarz screen
117  );
118 // an Fock builder that can accept densities expressed a separate basis
119 Matrix compute_2body_fock_general(
120  const BasisSet& obs, const Matrix& D, const BasisSet& D_bs,
121  bool D_is_sheldiagonal = false, // set D_is_shelldiagonal if doing SOAD
122  double precision = std::numeric_limits<
123  double>::epsilon() // discard contributions smaller than this
124  );
125 
126 #if LIBINT2_DERIV_ERI_ORDER
127 template <unsigned deriv_order>
128 std::vector<Matrix> compute_2body_fock_deriv(
129  const BasisSet& obs, const std::vector<libint2::Atom>& atoms,
130  const Matrix& D,
131  double precision = std::numeric_limits<
132  double>::epsilon(), // discard contributions smaller than this
133  const Matrix& Schwarz = Matrix() // K_ij = sqrt(||(ij|ij)||_\infty); if
134  // empty, do not Schwarz screen
135  );
136 #endif // LIBINT2_DERIV_ERI_ORDER
137 
138 // returns {X,X^{-1},S_condition_number_after_conditioning}, where
139 // X is the generalized square-root-inverse such that X.transpose() * S * X = I
140 // columns of Xinv is the basis conditioned such that
141 // the condition number of its metric (Xinv.transpose . Xinv) <
142 // S_condition_number_threshold
143 std::tuple<Matrix, Matrix, double> conditioning_orthogonalizer(
144  const Matrix& S, double S_condition_number_threshold);
145 
146 #ifdef LIBINT2_HAVE_BTAS
147 #define HAVE_DENSITY_FITTING 1
148 struct DFFockEngine {
149  const BasisSet& obs;
150  const BasisSet& dfbs;
151  DFFockEngine(const BasisSet& _obs, const BasisSet& _dfbs)
152  : obs(_obs), dfbs(_dfbs) {}
153 
154  typedef btas::RangeNd<CblasRowMajor, std::array<long, 3>> Range3d;
155  typedef btas::Tensor<double, Range3d> Tensor3d;
156  Tensor3d xyK;
157 
158  // a DF-based builder, using coefficients of occupied MOs
159  Matrix compute_2body_fock_dfC(const Matrix& Cocc);
160 };
161 #endif // HAVE_DENSITY_FITTING
162 
163 namespace libint2 {
164 int nthreads;
165 
167 template <typename Lambda>
168 void parallel_do(Lambda& lambda) {
169 #ifdef _OPENMP
170 #pragma omp parallel
171  {
172  auto thread_id = omp_get_thread_num();
173  lambda(thread_id);
174  }
175 #else // use C++11 threads
176  std::vector<std::thread> threads;
177  for (int thread_id = 0; thread_id != libint2::nthreads; ++thread_id) {
178  if (thread_id != nthreads - 1)
179  threads.push_back(std::thread(lambda, thread_id));
180  else
181  lambda(thread_id);
182  } // threads_id
183  for (int thread_id = 0; thread_id < nthreads - 1; ++thread_id)
184  threads[thread_id].join();
185 #endif
186 }
187 }
188 
189 int main(int argc, char* argv[]) {
190  using std::cout;
191  using std::cerr;
192  using std::endl;
193 
194  try {
195  /*** =========================== ***/
196  /*** initialize molecule ***/
197  /*** =========================== ***/
198 
199  // read geometry from a file; by default read from h2o.xyz, else take
200  // filename (.xyz) from the command line
201  const auto filename = (argc > 1) ? argv[1] : "h2o.xyz";
202  const auto basisname = (argc > 2) ? argv[2] : "aug-cc-pVDZ";
203  bool do_density_fitting = false;
204 #ifdef HAVE_DENSITY_FITTING
205  do_density_fitting = (argc > 3);
206  const auto dfbasisname = do_density_fitting ? argv[3] : "";
207 #endif
208  std::vector<libint2::Atom> atoms = read_geometry(filename);
209 
210  // set up thread pool
211  {
212  using libint2::nthreads;
213  auto nthreads_cstr = getenv("LIBINT_NUM_THREADS");
214  nthreads = 1;
215  if (nthreads_cstr && strcmp(nthreads_cstr, "")) {
216  std::istringstream iss(nthreads_cstr);
217  iss >> nthreads;
218  if (nthreads > 1 << 16 || nthreads <= 0) nthreads = 1;
219  }
220 #if defined(_OPENMP)
221  omp_set_num_threads(nthreads);
222 #endif
223  std::cout << "Will scale over " << nthreads
224 #if defined(_OPENMP)
225  << " OpenMP"
226 #else
227  << " C++11"
228 #endif
229  << " threads" << std::endl;
230  }
231 
232  // count the number of electrons
233  auto nelectron = 0;
234  for (auto i = 0; i < atoms.size(); ++i) nelectron += atoms[i].atomic_number;
235  const auto ndocc = nelectron / 2;
236  cout << "# of electrons = " << nelectron << endl;
237 
238  // compute the nuclear repulsion energy
239  auto enuc = 0.0;
240  for (auto i = 0; i < atoms.size(); i++)
241  for (auto j = i + 1; j < atoms.size(); j++) {
242  auto xij = atoms[i].x - atoms[j].x;
243  auto yij = atoms[i].y - atoms[j].y;
244  auto zij = atoms[i].z - atoms[j].z;
245  auto r2 = xij * xij + yij * yij + zij * zij;
246  auto r = sqrt(r2);
247  enuc += atoms[i].atomic_number * atoms[j].atomic_number / r;
248  }
249  cout << "Nuclear repulsion energy = " << std::setprecision(15) << enuc
250  << endl;
251 
253 
254  cout << "libint2::Atomic Cartesian coordinates (a.u.):" << endl;
255  for (const auto& a : atoms)
256  std::cout << a.atomic_number << " " << a.x << " " << a.y << " " << a.z
257  << std::endl;
258 
259  BasisSet obs(basisname, atoms);
260  cout << "orbital basis set rank = " << obs.nbf() << endl;
261 
262 #ifdef HAVE_DENSITY_FITTING
263  BasisSet dfbs;
264  if (do_density_fitting) {
265  dfbs = BasisSet(dfbasisname, atoms);
266  cout << "density-fitting basis set rank = " << dfbs.nbf() << endl;
267  }
268 #endif // HAVE_DENSITY_FITTING
269 
270  /*** =========================== ***/
271  /*** compute 1-e integrals ***/
272  /*** =========================== ***/
273 
274  // initializes the Libint integrals library ... now ready to compute
276 
277  // compute OBS non-negligible shell-pair list
278  {
279  obs_shellpair_list = compute_shellpair_list(obs);
280  size_t nsp = 0;
281  for (auto& sp : obs_shellpair_list) {
282  nsp += sp.second.size();
283  }
284  std::cout << "# of {all,non-negligible} shell-pairs = {"
285  << obs.size() * (obs.size() + 1) / 2 << "," << nsp << "}"
286  << std::endl;
287  }
288 
289  // compute one-body integrals
290  auto S = compute_1body_ints<Operator::overlap>(obs)[0];
291  auto T = compute_1body_ints<Operator::kinetic>(obs)[0];
292  auto V = compute_1body_ints<Operator::nuclear>(obs, libint2::make_point_charges(atoms))[0];
293  Matrix H = T + V;
294  T.resize(0, 0);
295  V.resize(0, 0);
296 
297  // compute orthogonalizer X such that X.transpose() . S . X = I
298  Matrix X, Xinv;
299  double XtX_condition_number; // condition number of "re-conditioned"
300  // overlap obtained as Xinv.transpose() . Xinv
301  // one should think of columns of Xinv as the conditioned basis
302  // Re: name ... cond # (Xinv.transpose() . Xinv) = cond # (X.transpose() .
303  // X)
304  // by default assume can manage to compute with condition number of S <=
305  // 1/eps
306  // this is probably too optimistic, but in well-behaved cases even 10^11 is
307  // OK
308  double S_condition_number_threshold =
309  1.0 / std::numeric_limits<double>::epsilon();
310  std::tie(X, Xinv, XtX_condition_number) =
311  conditioning_orthogonalizer(S, S_condition_number_threshold);
312 
313  Matrix D;
314  Matrix C_occ;
315  Matrix evals;
316  { // use SOAD as the guess density
317  const auto tstart = std::chrono::high_resolution_clock::now();
318 
319  auto D_minbs = compute_soad(atoms); // compute guess in minimal basis
320  BasisSet minbs("STO-3G", atoms);
321  if (minbs == obs)
322  D = D_minbs;
323  else { // if basis != minimal basis, map non-representable SOAD guess
324  // into the AO basis
325  // by diagonalizing a Fock matrix
326  std::cout << "projecting SOAD into AO basis ... ";
327  auto F = H;
328  F += compute_2body_fock_general(
329  obs, D_minbs, minbs, true /* SOAD_D_is_shelldiagonal */,
330  std::numeric_limits<double>::epsilon() // this is cheap, no reason
331  // to be cheaper
332  );
333 
334  // solve F C = e S C by (conditioned) transformation to F' C' = e C',
335  // where
336  // F' = X.transpose() . F . X; the original C is obtained as C = X . C'
337  Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(X.transpose() * F * X);
338  auto C = X * eig_solver.eigenvectors();
339 
340  // compute density, D = C(occ) . C(occ)T
341  C_occ = C.leftCols(ndocc);
342  D = C_occ * C_occ.transpose();
343 
344  const auto tstop = std::chrono::high_resolution_clock::now();
345  const std::chrono::duration<double> time_elapsed = tstop - tstart;
346  std::cout << "done (" << time_elapsed.count() << " s)" << std::endl;
347  }
348  }
349 
350  // pre-compute data for Schwarz bounds
351  auto K = compute_schwarz_ints<>(obs, BasisSet(), true);
352 
353 // prepare for density fitting
354 #ifdef HAVE_DENSITY_FITTING
355  std::unique_ptr<DFFockEngine> dffockengine(
356  do_density_fitting ? new DFFockEngine(obs, dfbs) : nullptr);
357 #endif // HAVE_DENSITY_FITTING
358 
359  /*** =========================== ***/
360  /*** SCF loop ***/
361  /*** =========================== ***/
362 
363  const auto maxiter = 100;
364  const auto conv = 1e-12;
365  auto iter = 0;
366  auto rms_error = 1.0;
367  auto ediff_rel = 0.0;
368  auto ehf = 0.0;
369  auto n2 = D.cols() * D.rows();
370  libint2::DIIS<Matrix> diis(2); // start DIIS on second iteration
371 
372  // prepare for incremental Fock build ...
373  Matrix D_diff = D;
374  Matrix F = H;
375  bool reset_incremental_fock_formation = false;
376  bool incremental_Fbuild_started = false;
377  double start_incremental_F_threshold = 1e-5;
378  double next_reset_threshold = 0.0;
379  size_t last_reset_iteration = 0;
380  // ... unless doing DF, then use MO coefficients, hence not "incremental"
381  if (do_density_fitting) start_incremental_F_threshold = 0.0;
382 
383  do {
384  const auto tstart = std::chrono::high_resolution_clock::now();
385  ++iter;
386 
387  // Last iteration's energy and density
388  auto ehf_last = ehf;
389  Matrix D_last = D;
390 
391  if (not incremental_Fbuild_started &&
392  rms_error < start_incremental_F_threshold) {
393  incremental_Fbuild_started = true;
394  reset_incremental_fock_formation = false;
395  last_reset_iteration = iter - 1;
396  next_reset_threshold = rms_error / 1e1;
397  std::cout << "== started incremental fock build" << std::endl;
398  }
399  if (reset_incremental_fock_formation || not incremental_Fbuild_started) {
400  F = H;
401  D_diff = D;
402  }
403  if (reset_incremental_fock_formation && incremental_Fbuild_started) {
404  reset_incremental_fock_formation = false;
405  last_reset_iteration = iter;
406  next_reset_threshold = rms_error / 1e1;
407  std::cout << "== reset incremental fock build" << std::endl;
408  }
409 
410  // build a new Fock matrix
411  if (not do_density_fitting) {
412  // totally empirical precision variation, involves the condition number
413  const auto precision_F = std::min(
414  std::min(1e-3 / XtX_condition_number, 1e-7),
415  std::max(rms_error / 1e4, std::numeric_limits<double>::epsilon()));
416  F += compute_2body_fock(obs, D_diff, precision_F, K);
417  }
418 #if HAVE_DENSITY_FITTING
419  else { // do DF
420  F = H + dffockengine->compute_2body_fock_dfC(C_occ);
421  }
422 #else
423  else {
424  assert(false);
425  } // do_density_fitting is true but HAVE_DENSITY_FITTING is not defined!
426  // should not happen
427 #endif // HAVE_DENSITY_FITTING
428 
429  // compute HF energy with the non-extrapolated Fock matrix
430  ehf = D.cwiseProduct(H + F).sum();
431  ediff_rel = std::abs((ehf - ehf_last) / ehf);
432 
433  // compute SCF error
434  Matrix FD_comm = F * D * S - S * D * F;
435  rms_error = FD_comm.norm() / n2;
436  if (rms_error < next_reset_threshold || iter - last_reset_iteration >= 8)
437  reset_incremental_fock_formation = true;
438 
439  // DIIS extrapolate F
440  Matrix F_diis = F; // extrapolated F cannot be used in incremental Fock
441  // build; only used to produce the density
442  // make a copy of the unextrapolated matrix
443  diis.extrapolate(F_diis, FD_comm);
444 
445  // solve F C = e S C by (conditioned) transformation to F' C' = e C',
446  // where
447  // F' = X.transpose() . F . X; the original C is obtained as C = X . C'
448  Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(X.transpose() * F_diis *
449  X);
450  evals = eig_solver.eigenvalues();
451  auto C = X * eig_solver.eigenvectors();
452 
453  // compute density, D = C(occ) . C(occ)T
454  C_occ = C.leftCols(ndocc);
455  D = C_occ * C_occ.transpose();
456  D_diff = D - D_last;
457 
458  const auto tstop = std::chrono::high_resolution_clock::now();
459  const std::chrono::duration<double> time_elapsed = tstop - tstart;
460 
461  if (iter == 1)
462  std::cout << "\n\nIter E(HF) D(E)/E "
463  "RMS([F,D])/nn Time(s)\n";
464  printf(" %02d %20.12f %20.12e %20.12e %10.5lf\n", iter, ehf + enuc,
465  ediff_rel, rms_error, time_elapsed.count());
466 
467  } while (((ediff_rel > conv) || (rms_error > conv)) && (iter < maxiter));
468 
469  printf("** Hartree-Fock energy = %20.12f\n", ehf + enuc);
470 
471  auto Mu = compute_1body_ints<Operator::emultipole2>(obs);
474  for (int xyz = 0; xyz != 3; ++xyz)
475  mu[xyz] = -2 *
476  D.cwiseProduct(Mu[xyz + 1])
477  .sum(); // 2 = alpha + beta, -1 = electron charge
478  for (int k = 0; k != 6; ++k)
479  qu[k] = -2 *
480  D.cwiseProduct(Mu[k + 4])
481  .sum(); // 2 = alpha + beta, -1 = electron charge
482  std::cout << "** edipole = ";
483  std::copy(mu.begin(), mu.end(),
484  std::ostream_iterator<double>(std::cout, " "));
485  std::cout << std::endl;
486  std::cout << "** equadrupole = ";
487  std::copy(qu.begin(), qu.end(),
488  std::ostream_iterator<double>(std::cout, " "));
489  std::cout << std::endl;
490 
491  { // compute force
492 #if LIBINT2_DERIV_ONEBODY_ORDER
493  // compute 1-e forces
494  Matrix F1 = Matrix::Zero(atoms.size(), 3);
495  Matrix F_Pulay = Matrix::Zero(atoms.size(), 3);
497  // one-body contributions to the forces
499  auto T1 = compute_1body_ints_deriv<Operator::kinetic>(1, obs, atoms);
500  auto V1 = compute_1body_ints_deriv<Operator::nuclear>(1, obs, atoms);
501  for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
502  for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
503  auto force = 2 * (T1[i] + V1[i]).cwiseProduct(D).sum();
504  F1(atom, xyz) += force;
505  }
506  }
507 
509  // Pulay force
511  // orbital energy density
512  DiagonalMatrix evals_occ(evals.topRows(ndocc));
513  Matrix W = C_occ * evals_occ * C_occ.transpose();
514  auto S1 = compute_1body_ints_deriv<Operator::overlap>(1, obs, atoms);
515  for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
516  for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
517  auto force = 2 * S1[i].cwiseProduct(W).sum();
518  F_Pulay(atom, xyz) -= force;
519  }
520  }
521 
522  std::cout << "** 1-body forces = ";
523  for (int atom = 0; atom != atoms.size(); ++atom)
524  for (int xyz = 0; xyz != 3; ++xyz) std::cout << F1(atom, xyz) << " ";
525  std::cout << std::endl;
526  std::cout << "** Pulay forces = ";
527  for (int atom = 0; atom != atoms.size(); ++atom)
528  for (int xyz = 0; xyz != 3; ++xyz)
529  std::cout << F_Pulay(atom, xyz) << " ";
530  std::cout << std::endl;
531 #endif // LIBINT2_DERIV_ONEBODY_ORDER
532 
533 #if LIBINT2_DERIV_ERI_ORDER
534  // compute 2-e forces
535  Matrix F2 = Matrix::Zero(atoms.size(), 3);
536 
538  // two-body contributions to the forces
540  auto G1 = compute_2body_fock_deriv<1>(obs, atoms, D);
541  for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
542  for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
543  // identity prefactor since E(HF) = trace(H + F, D) = trace(2H + G, D)
544  auto force = G1[i].cwiseProduct(D).sum();
545  F2(atom, xyz) += force;
546  }
547  }
548 
549  std::cout << "** 2-body forces = ";
550  for (int atom = 0; atom != atoms.size(); ++atom)
551  for (int xyz = 0; xyz != 3; ++xyz) std::cout << F2(atom, xyz) << " ";
552  std::cout << std::endl;
553 #endif
554 
555 // if support 1-e and 2-e derivatives compute nuclear repulsion force and the
556 // total force
557 #if LIBINT2_DERIV_ONEBODY_ORDER && LIBINT2_DERIV_ERI_ORDER
558  // compute nuclear repulsion forces
559  Matrix FN = Matrix::Zero(atoms.size(), 3);
561  // nuclear repulsion contribution to the forces
563  for (auto a1 = 1; a1 != atoms.size(); ++a1) {
564  const auto& atom1 = atoms[a1];
565  for (auto a2 = 0; a2 < a1; ++a2) {
566  const auto& atom2 = atoms[a2];
567 
568  auto x12 = atom1.x - atom2.x;
569  auto y12 = atom1.y - atom2.y;
570  auto z12 = atom1.z - atom2.z;
571  auto r12_2 = x12 * x12 + y12 * y12 + z12 * z12;
572  auto r12 = sqrt(r12_2);
573  auto r12_3 = r12 * r12_2;
574 
575  auto z1z2_over_r12_3 =
576  atom1.atomic_number * atom2.atomic_number / r12_3;
577 
578  auto fx = -x12 * z1z2_over_r12_3;
579  auto fy = -y12 * z1z2_over_r12_3;
580  auto fz = -z12 * z1z2_over_r12_3;
581  FN(a1, 0) += fx;
582  FN(a1, 1) += fy;
583  FN(a1, 2) += fz;
584  FN(a2, 0) -= fx;
585  FN(a2, 1) -= fy;
586  FN(a2, 2) -= fz;
587  }
588  }
589 
590  std::cout << "** nuclear repulsion forces = ";
591  for (int atom = 0; atom != atoms.size(); ++atom)
592  for (int xyz = 0; xyz != 3; ++xyz) std::cout << FN(atom, xyz) << " ";
593  std::cout << std::endl;
594 
595  auto F = F1 + F_Pulay + F2 + FN;
596  std::cout << "** Hartree-Fock forces = ";
597  for (int atom = 0; atom != atoms.size(); ++atom)
598  for (int xyz = 0; xyz != 3; ++xyz) std::cout << F(atom, xyz) << " ";
599  std::cout << std::endl;
600 #endif
601  }
602 
603  { // compute hessian
604  const auto ncoords = 3 * atoms.size();
605  // # of elems in upper triangle
606  const auto nelem = ncoords * (ncoords+1) / 2;
607 #if LIBINT2_DERIV_ONEBODY_ORDER > 1
608  // compute 1-e hessian
609  Matrix H1 = Matrix::Zero(ncoords, ncoords);
610  Matrix H_Pulay = Matrix::Zero(ncoords, ncoords);
612  // one-body contributions to the hessian
614  auto T2 = compute_1body_ints_deriv<Operator::kinetic>(2, obs, atoms);
615  auto V2 = compute_1body_ints_deriv<Operator::nuclear>(2, obs, atoms);
616  for (auto row = 0, i = 0; row != ncoords; ++row) {
617  for (auto col = row; col != ncoords; ++col, ++i) {
618  auto hess = 2 * (T2[i] + V2[i]).cwiseProduct(D).sum();
619  H1(row, col) += hess;
620  }
621  }
622 
624  // Pulay hessian
626  // orbital energy density
627  DiagonalMatrix evals_occ(evals.topRows(ndocc));
628  Matrix W = C_occ * evals_occ * C_occ.transpose();
629  auto S2 = compute_1body_ints_deriv<Operator::overlap>(2, obs, atoms);
630  for (auto row = 0, i = 0; row != ncoords; ++row) {
631  for (auto col = row; col != ncoords; ++col, ++i) {
632  auto hess = 2 * S2[i].cwiseProduct(W).sum();
633  H_Pulay(row, col) -= hess;
634  }
635  }
636 
637  std::cout << "** 1-body hessian = ";
638  for (auto row = 0, i = 0; row != ncoords; ++row) {
639  for (auto col = row; col != ncoords; ++col) {
640  std::cout << H1(row, col) << " ";
641  }
642  }
643  std::cout << std::endl;
644 
645  std::cout << "** Pulay hessian = ";
646  for (auto row = 0, i = 0; row != ncoords; ++row) {
647  for (auto col = row; col != ncoords; ++col) {
648  std::cout << H_Pulay(row, col) << " ";
649  }
650  }
651  std::cout << std::endl;
652 #endif // LIBINT2_DERIV_ONEBODY_ORDER > 1
653 
654 #if LIBINT2_DERIV_ERI_ORDER > 1
655  // compute 2-e forces
656  Matrix H2 = Matrix::Zero(ncoords, ncoords);
657 
659  // two-body contributions to the forces
661  auto G2 = compute_2body_fock_deriv<2>(obs, atoms, D);
662  for (auto row = 0, i = 0; row != ncoords; ++row) {
663  for (auto col = row; col != ncoords; ++col, ++i) {
664  // identity prefactor since E(HF) = trace(H + F, D) = trace(2H + G, D)
665  auto hess = G2[i].cwiseProduct(D).sum();
666  H2(row, col) += hess;
667  }
668  }
669 
670  std::cout << "** 2-body hessian = ";
671  for (auto row = 0, i = 0; row != ncoords; ++row) {
672  for (auto col = row; col != ncoords; ++col) {
673  std::cout << H2(row, col) << " ";
674  }
675  }
676  std::cout << std::endl;
677 #endif
678 
679 // if support 1-e and 2-e 2nd derivatives compute nuclear repulsion hessian and
680 // the total hessian
681 #if LIBINT2_DERIV_ONEBODY_ORDER > 1 && LIBINT2_DERIV_ERI_ORDER > 1
682  // compute nuclear repulsion hessian
683  // NB only the upper triangle is computed!!!
684  Matrix HN = Matrix::Zero(ncoords, ncoords);
685 
687  // nuclear repulsion contribution to the hessian
689  for (auto a1 = 1; a1 != atoms.size(); ++a1) {
690  const auto& atom1 = atoms[a1];
691  for (auto a2 = 0; a2 < a1; ++a2) {
692  const auto& atom2 = atoms[a2];
693 
694  auto x12 = atom1.x - atom2.x;
695  auto y12 = atom1.y - atom2.y;
696  auto z12 = atom1.z - atom2.z;
697  auto x12_2 = x12 * x12;
698  auto y12_2 = y12 * y12;
699  auto z12_2 = z12 * z12;
700  auto r12_2 = x12 * x12 + y12 * y12 + z12 * z12;
701  auto r12 = sqrt(r12_2);
702  auto r12_5 = r12 * r12_2 * r12_2;
703 
704  auto z1z2_over_r12_5 =
705  atom1.atomic_number * atom2.atomic_number / r12_5;
706 
707  HN(3*a1 + 0, 3*a1 + 0) += z1z2_over_r12_5 * (3*x12_2 - r12_2);
708  HN(3*a1 + 1, 3*a1 + 1) += z1z2_over_r12_5 * (3*y12_2 - r12_2);
709  HN(3*a1 + 2, 3*a1 + 2) += z1z2_over_r12_5 * (3*z12_2 - r12_2);
710  HN(3*a1 + 0, 3*a1 + 1) += z1z2_over_r12_5 * (3*x12*y12);
711  HN(3*a1 + 0, 3*a1 + 2) += z1z2_over_r12_5 * (3*x12*z12);
712  HN(3*a1 + 1, 3*a1 + 2) += z1z2_over_r12_5 * (3*y12*z12);
713 
714  HN(3*a2 + 0, 3*a2 + 0) += z1z2_over_r12_5 * (3*x12_2 - r12_2);
715  HN(3*a2 + 1, 3*a2 + 1) += z1z2_over_r12_5 * (3*y12_2 - r12_2);
716  HN(3*a2 + 2, 3*a2 + 2) += z1z2_over_r12_5 * (3*z12_2 - r12_2);
717  HN(3*a2 + 0, 3*a2 + 1) += z1z2_over_r12_5 * (3*x12*y12);
718  HN(3*a2 + 0, 3*a2 + 2) += z1z2_over_r12_5 * (3*x12*z12);
719  HN(3*a2 + 1, 3*a2 + 2) += z1z2_over_r12_5 * (3*y12*z12);
720 
721  HN(3*a2 + 0, 3*a1 + 0) -= z1z2_over_r12_5 * (3*x12_2 - r12_2);
722  HN(3*a2 + 1, 3*a1 + 1) -= z1z2_over_r12_5 * (3*y12_2 - r12_2);
723  HN(3*a2 + 2, 3*a1 + 2) -= z1z2_over_r12_5 * (3*z12_2 - r12_2);
724  HN(3*a2 + 1, 3*a1 + 0) -= z1z2_over_r12_5 * (3*y12*x12);
725  HN(3*a2 + 2, 3*a1 + 0) -= z1z2_over_r12_5 * (3*z12*x12);
726  HN(3*a2 + 2, 3*a1 + 1) -= z1z2_over_r12_5 * (3*z12*y12);
727  HN(3*a2 + 0, 3*a1 + 1) -= z1z2_over_r12_5 * (3*x12*y12);
728  HN(3*a2 + 0, 3*a1 + 2) -= z1z2_over_r12_5 * (3*x12*z12);
729  HN(3*a2 + 1, 3*a1 + 2) -= z1z2_over_r12_5 * (3*y12*z12);
730  }
731  }
732 
733  std::cout << "** nuclear repulsion hessian = ";
734  for (auto row = 0, i = 0; row != ncoords; ++row) {
735  for (auto col = row; col != ncoords; ++col) {
736  std::cout << HN(row, col) << " ";
737  }
738  }
739  std::cout << std::endl;
740 
741  auto H = H1 + H_Pulay + H2 + HN;
742  std::cout << "** Hartree-Fock hessian = ";
743  for (auto row = 0, i = 0; row != ncoords; ++row) {
744  for (auto col = row; col != ncoords; ++col) {
745  std::cout << H(row, col) << " ";
746  }
747  }
748  std::cout << std::endl;
749 #endif
750  }
751 
752  libint2::finalize(); // done with libint
753 
754  } // end of try block; if any exceptions occurred, report them and exit
755  // cleanly
756 
757  catch (const char* ex) {
758  cerr << "caught exception: " << ex << endl;
759  return 1;
760  } catch (std::string& ex) {
761  cerr << "caught exception: " << ex << endl;
762  return 1;
763  } catch (std::exception& ex) {
764  cerr << ex.what() << endl;
765  return 1;
766  } catch (...) {
767  cerr << "caught unknown exception\n";
768  return 1;
769  }
770 
771  return 0;
772 }
773 
774 std::vector<libint2::Atom> read_geometry(const std::string& filename) {
775  std::cout << "Will read geometry from " << filename << std::endl;
776  std::ifstream is(filename);
777  if (not is.good()) {
778  char errmsg[256] = "Could not open file ";
779  strncpy(errmsg + 20, filename.c_str(), 235);
780  errmsg[255] = '\0';
781  throw std::ios_base::failure(errmsg);
782  }
783 
784  // to prepare for MPI parallelization, we will read the entire file into a
785  // string that can be
786  // broadcast to everyone, then converted to an std::istringstream object that
787  // can be used just like std::ifstream
788  std::ostringstream oss;
789  oss << is.rdbuf();
790  // use ss.str() to get the entire contents of the file as an std::string
791  // broadcast
792  // then make an std::istringstream in each process
793  std::istringstream iss(oss.str());
794 
795  // check the extension: if .xyz, assume the standard XYZ format, otherwise
796  // throw an exception
797  if (filename.rfind(".xyz") != std::string::npos)
798  return libint2::read_dotxyz(iss);
799  else
800  throw "only .xyz files are accepted";
801 }
802 
803 // computes Superposition-Of-libint2::Atomic-Densities guess for the molecular density
804 // matrix
805 // in minimal basis; occupies subshells by smearing electrons evenly over the
806 // orbitals
807 Matrix compute_soad(const std::vector<libint2::Atom>& atoms) {
808  // compute number of atomic orbitals
809  size_t nao = 0;
810  for (const auto& atom : atoms) {
811  const auto Z = atom.atomic_number;
812  nao += libint2::sto3g_num_ao(Z);
813  }
814 
815  // compute the minimal basis density
816  Matrix D = Matrix::Zero(nao, nao);
817  size_t ao_offset = 0; // first AO of this atom
818  for (const auto& atom : atoms) {
819  const auto Z = atom.atomic_number;
820  const auto& occvec = libint2::sto3g_ao_occupation_vector(Z);
821  for(const auto& occ: occvec) {
822  D(ao_offset, ao_offset) = occ;
823  ++ao_offset;
824  }
825  }
826 
827  return D * 0.5; // we use densities normalized to # of electrons/2
828 }
829 
830 Matrix compute_shellblock_norm(const BasisSet& obs, const Matrix& A) {
831  const auto nsh = obs.size();
832  Matrix Ash(nsh, nsh);
833 
834  auto shell2bf = obs.shell2bf();
835  for (size_t s1 = 0; s1 != nsh; ++s1) {
836  const auto& s1_first = shell2bf[s1];
837  const auto& s1_size = obs[s1].size();
838  for (size_t s2 = 0; s2 != nsh; ++s2) {
839  const auto& s2_first = shell2bf[s2];
840  const auto& s2_size = obs[s2].size();
841 
842  Ash(s1, s2) = A.block(s1_first, s2_first, s1_size, s2_size)
843  .lpNorm<Eigen::Infinity>();
844  }
845  }
846 
847  return Ash;
848 }
849 
850 template <Operator obtype, typename OperatorParams>
852  const BasisSet& obs, OperatorParams params) {
853  const auto n = obs.nbf();
854  const auto nshells = obs.size();
855  using libint2::nthreads;
857  result_type;
858  const unsigned int nopers = libint2::operator_traits<obtype>::nopers;
859  result_type result;
860  for (auto& r : result) r = Matrix::Zero(n, n);
861 
862  // construct the 1-body integrals engine
863  std::vector<libint2::Engine> engines(nthreads);
864  engines[0] = libint2::Engine(obtype, obs.max_nprim(), obs.max_l(), 0);
865  // nuclear attraction ints engine needs to know where the charges sit ...
866  // the nuclei are charges in this case; in QM/MM there will also be classical
867  // charges
868  if (obtype == Operator::nuclear || obtype == Operator::erf_nuclear ||
869  obtype == Operator::erfc_nuclear) {
870  engines[0].set_params(params);
871  }
872  for (size_t i = 1; i != nthreads; ++i) {
873  engines[i] = engines[0];
874  }
875 
876  auto shell2bf = obs.shell2bf();
877 
878  auto compute = [&](int thread_id) {
879 
880  const auto& buf = engines[thread_id].results();
881 
882  // loop over unique shell pairs, {s1,s2} such that s1 >= s2
883  // this is due to the permutational symmetry of the real integrals over
884  // Hermitian operators: (1|2) = (2|1)
885  for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
886  auto bf1 = shell2bf[s1]; // first basis function in this shell
887  auto n1 = obs[s1].size();
888  auto s1_offset = s1 * (s1+1) / 2;
889 
890  for(auto s2: obs_shellpair_list[s1]) {
891  auto s12 = s1_offset + s2;
892  if (s12 % nthreads != thread_id) continue;
893 
894  auto bf2 = shell2bf[s2];
895  auto n2 = obs[s2].size();
896 
897  auto n12 = n1 * n2;
898 
899  // compute shell pair; return is the pointer to the buffer
900  engines[thread_id].compute(obs[s1], obs[s2]);
901 
902  for (unsigned int op = 0; op != nopers; ++op) {
903  // "map" buffer to a const Eigen Matrix, and copy it to the
904  // corresponding blocks of the result
905  Eigen::Map<const Matrix> buf_mat(buf[op], n1, n2);
906  result[op].block(bf1, bf2, n1, n2) = buf_mat;
907  if (s1 != s2) // if s1 >= s2, copy {s1,s2} to the corresponding
908  // {s2,s1} block, note the transpose!
909  result[op].block(bf2, bf1, n2, n1) = buf_mat.transpose();
910  }
911  }
912  }
913  }; // compute lambda
914 
915  libint2::parallel_do(compute);
916 
917  return result;
918 }
919 
920 #if LIBINT2_DERIV_ONEBODY_ORDER
921 template <Operator obtype>
922 std::vector<Matrix> compute_1body_ints_deriv(unsigned deriv_order,
923  const BasisSet& obs,
924  const std::vector<libint2::Atom>& atoms) {
925  using libint2::nthreads;
926  const auto n = obs.nbf();
927  const auto nshells = obs.size();
928  constexpr auto nopers = libint2::operator_traits<obtype>::nopers;
929  const auto nresults =
930  nopers * libint2::num_geometrical_derivatives(atoms.size(), deriv_order);
931  typedef std::vector<Matrix> result_type;
932  result_type result(nresults);
933  for (auto& r : result) r = Matrix::Zero(n, n);
934 
935  // construct the 1-body integrals engine
936  std::vector<libint2::Engine> engines(nthreads);
937  engines[0] =
938  libint2::Engine(obtype, obs.max_nprim(), obs.max_l(), deriv_order);
939  // nuclear attraction ints engine needs to know where the charges sit ...
940  // the nuclei are charges in this case; in QM/MM there will also be classical
941  // charges
942  if (obtype == Operator::nuclear) {
943  std::vector<std::pair<double, std::array<double, 3>>> q;
944  for (const auto& atom : atoms) {
945  q.push_back({static_cast<double>(atom.atomic_number),
946  {{atom.x, atom.y, atom.z}}});
947  }
948  engines[0].set_params(q);
949  }
950  for (size_t i = 1; i != nthreads; ++i) {
951  engines[i] = engines[0];
952  }
953 
954  auto shell2bf = obs.shell2bf();
955  auto shell2atom = obs.shell2atom(atoms);
956 
957  const auto natoms = atoms.size();
958  const auto two_times_ncoords = 6*natoms;
959  const auto nderivcenters_shset =
960  2 + ((obtype == Operator::nuclear) ? natoms : 0);
961 
962  auto compute = [&](int thread_id) {
963 
964  const auto& buf = engines[thread_id].results();
965 
966  // loop over unique shell pairs, {s1,s2} such that s1 >= s2
967  // this is due to the permutational symmetry of the real integrals over
968  // Hermitian operators: (1|2) = (2|1)
969  for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
970  auto bf1 = shell2bf[s1]; // first basis function in this shell
971  auto n1 = obs[s1].size();
972  auto atom1 = shell2atom[s1];
973  assert(atom1 != -1);
974 
975  auto s1_offset = s1 * (s1+1) / 2;
976  for(auto s2: obs_shellpair_list[s1]) {
977  auto s12 = s1_offset + s2;
978  if (s12 % nthreads != thread_id) continue;
979 
980  auto bf2 = shell2bf[s2];
981  auto n2 = obs[s2].size();
982  auto atom2 = shell2atom[s2];
983 
984  auto n12 = n1 * n2;
985 
986  // compute shell pair; return is the pointer to the buffer
987  engines[thread_id].compute(obs[s1], obs[s2]);
988 
989  // "copy" lambda copies shell set \c idx to the operator matrix with
990  // index \c op
991  auto add_shellset_to_dest = [&](std::size_t op, std::size_t idx,
992  double scale = 1.0) {
993  // "map" buffer to a const Eigen Matrix, and copy it to the
994  // corresponding blocks of the result
995  Eigen::Map<const Matrix> buf_mat(buf[idx], n1, n2);
996  if (scale == 1.0)
997  result[op].block(bf1, bf2, n1, n2) += buf_mat;
998  else
999  result[op].block(bf1, bf2, n1, n2) += scale * buf_mat;
1000  if (s1 != s2) { // if s1 >= s2, copy {s1,s2} to the corresponding
1001  // {s2,s1} block, note the transpose!
1002  if (scale == 1.0)
1003  result[op].block(bf2, bf1, n2, n1) += buf_mat.transpose();
1004  else
1005  result[op].block(bf2, bf1, n2, n1) += scale * buf_mat.transpose();
1006  }
1007  };
1008 
1009  switch (deriv_order) {
1010  case 0:
1011  for (std::size_t op = 0; op != nopers; ++op) {
1012  add_shellset_to_dest(op, op);
1013  }
1014  break;
1015 
1016  // map deriv quanta for this shell pair to the overall deriv quanta
1017  //
1018  // easiest to explain with example:
1019  // in sto-3g water shells 0 1 2 sit on atom 0, shells 3 and 4 on atoms
1020  // 1 and 2 respectively
1021  // each call to engine::compute for nuclear ints will return
1022  // derivatives
1023  // with respect to 15 coordinates, obtained as 3 (x,y,z) times 2 + 3 =
1024  // 5 centers
1025  // (2 centers on which shells sit + 3 nuclear charges)
1026  // (for overlap, kinetic, and emultipole ints we there are only 6
1027  // coordinates
1028  // since the operator is coordinate-independent, or derivatives with
1029  // respect to
1030  // the operator coordinates are not computed)
1031  //
1032 
1033  case 1: {
1034  std::size_t shellset_idx = 0;
1035  for (auto c = 0; c != nderivcenters_shset; ++c) {
1036  auto atom = (c == 0) ? atom1 : ((c == 1) ? atom2 : c - 2);
1037  auto op_start = 3 * atom * nopers;
1038  auto op_fence = op_start + nopers;
1039  for (auto xyz = 0; xyz != 3;
1040  ++xyz, op_start += nopers, op_fence += nopers) {
1041  for (unsigned int op = op_start; op != op_fence;
1042  ++op, ++shellset_idx) {
1043  add_shellset_to_dest(op, shellset_idx);
1044  }
1045  }
1046  }
1047  } break;
1048 
1049  case 2: {
1050  //
1051  // must pay attention to symmetry when computing 2nd and higher-order derivs
1052  // e.g. d2 (s1|s2) / dX dY involves several cases:
1053  // 1. only s1 (or only s2) depends on X AND Y (i.e. X and Y refer to same atom) =>
1054  // d2 (s1|s2) / dX dY = (d2 s1 / dX dY | s2)
1055  // 2. s1 depends on X only, s2 depends on Y only (or vice versa) =>
1056  // d2 (s1|s2) / dX dY = (d s1 / dX | d s2 / dY)
1057  // 3. s1 AND s2 depend on X AND Y (i.e. X and Y refer to same atom) =>
1058  // case A: X != Y
1059  // d2 (s1|s2) / dX dY = (d2 s1 / dX dY | s2) + (d s1 / dX | d s2 / dY)
1060  // + (d s1 / dY | d s2 / dX) + (s1| d2 s2 / dX dY )
1061  // case B: X == Y
1062  // d2 (s1|s2) / dX2 = (d2 s1 / dX2 | s2) + 2 (d s1 / dX | d s2 / dX)
1063  // + (s1| d2 s2 / dX2 )
1064 
1065  // computes upper triangle index
1066  // n2 = matrix size times 2
1067  // i,j = (unordered) indices
1068 #define upper_triangle_index(n2, i, j) \
1069  (std::min((i), (j))) * ((n2) - (std::min((i), (j))) - 1) / 2 + \
1070  (std::max((i), (j)))
1071 
1072  // look over shellsets in the order in which they appear
1073  std::size_t shellset_idx = 0;
1074  for (auto c1 = 0; c1 != nderivcenters_shset; ++c1) {
1075  auto a1 = (c1 == 0) ? atom1 : ((c1 == 1) ? atom2 : c1 - 2);
1076  auto coord1 = 3 * a1;
1077  for (auto xyz1 = 0; xyz1 != 3; ++xyz1, ++coord1) {
1078 
1079  for (auto c2 = c1; c2 != nderivcenters_shset; ++c2) {
1080  auto a2 = (c2 == 0) ? atom1 : ((c2 == 1) ? atom2 : c2 - 2);
1081  auto xyz2_start = (c1 == c2) ? xyz1 : 0;
1082  auto coord2 = 3 * a2 + xyz2_start;
1083  for (auto xyz2 = xyz2_start; xyz2 != 3;
1084  ++xyz2, ++coord2) {
1085 
1086  double scale = (coord1 == coord2 && c1 != c2) ? 2.0 : 1.0;
1087 
1088  const auto coord12 =
1089  upper_triangle_index(two_times_ncoords, coord1, coord2);
1090  auto op_start = coord12 * nopers;
1091  auto op_fence = op_start + nopers;
1092  for (auto op = op_start; op != op_fence;
1093  ++op, ++shellset_idx) {
1094  add_shellset_to_dest(op, shellset_idx, scale);
1095  }
1096  }
1097  }
1098  }
1099  }
1100  } break;
1101 #undef upper_triangle_index
1102 
1103  default: {
1104  assert(false && "not yet implemented");
1105 
1106  using ShellSetDerivIterator =
1108  std::vector<unsigned int>>;
1109  ShellSetDerivIterator shellset_diter(deriv_order,
1110  nderivcenters_shset);
1111  while (shellset_diter) {
1112  const auto& deriv = *shellset_diter;
1113  }
1114  }
1115  } // copy shell block switch
1116 
1117  } // s2 <= s1
1118  } // s1
1119  }; // compute lambda
1120 
1121  libint2::parallel_do(compute);
1122 
1123  return result;
1124 }
1125 #endif
1126 
1127 template <libint2::Operator Kernel>
1128 Matrix compute_schwarz_ints(
1129  const BasisSet& bs1, const BasisSet& _bs2, bool use_2norm,
1130  typename libint2::operator_traits<Kernel>::oper_params_type params) {
1131  const BasisSet& bs2 = (_bs2.empty() ? bs1 : _bs2);
1132  const auto nsh1 = bs1.size();
1133  const auto nsh2 = bs2.size();
1134  const auto bs1_equiv_bs2 = (&bs1 == &bs2);
1135 
1136  Matrix K = Matrix::Zero(nsh1, nsh2);
1137 
1138  // construct the 2-electron repulsion integrals engine
1139  using libint2::Engine;
1140  using libint2::nthreads;
1141  std::vector<Engine> engines(nthreads);
1142 
1143  // !!! very important: cannot screen primitives in Schwarz computation !!!
1144  auto epsilon = 0.;
1145  engines[0] = Engine(Kernel, std::max(bs1.max_nprim(), bs2.max_nprim()),
1146  std::max(bs1.max_l(), bs2.max_l()), 0, epsilon, params);
1147  for (size_t i = 1; i != nthreads; ++i) {
1148  engines[i] = engines[0];
1149  }
1150 
1151  std::cout << "computing Schwarz bound prerequisites (kernel=" << (int)Kernel
1152  << ") ... ";
1153 
1154  libint2::Timers<1> timer;
1155  timer.set_now_overhead(25);
1156  timer.start(0);
1157 
1158  auto compute = [&](int thread_id) {
1159 
1160  const auto& buf = engines[thread_id].results();
1161 
1162  // loop over permutationally-unique set of shells
1163  for (auto s1 = 0l, s12 = 0l; s1 != nsh1; ++s1) {
1164  auto n1 = bs1[s1].size(); // number of basis functions in this shell
1165 
1166  auto s2_max = bs1_equiv_bs2 ? s1 : nsh2 - 1;
1167  for (auto s2 = 0; s2 <= s2_max; ++s2, ++s12) {
1168  if (s12 % nthreads != thread_id) continue;
1169 
1170  auto n2 = bs2[s2].size();
1171  auto n12 = n1 * n2;
1172 
1173  engines[thread_id].compute2<Kernel, BraKet::xx_xx, 0>(bs1[s1], bs2[s2],
1174  bs1[s1], bs2[s2]);
1175  assert(buf[0] != nullptr &&
1176  "to compute Schwarz ints turn off primitive screening");
1177 
1178  // the diagonal elements are the Schwarz ints ... use Map.diagonal()
1179  Eigen::Map<const Matrix> buf_mat(buf[0], n12, n12);
1180  auto norm2 = use_2norm ? buf_mat.diagonal().norm()
1181  : buf_mat.diagonal().lpNorm<Eigen::Infinity>();
1182  K(s1, s2) = std::sqrt(norm2);
1183  if (bs1_equiv_bs2) K(s2, s1) = K(s1, s2);
1184  }
1185  }
1186  }; // thread lambda
1187 
1188  libint2::parallel_do(compute);
1189 
1190  timer.stop(0);
1191  std::cout << "done (" << timer.read(0) << " s)" << std::endl;
1192 
1193  return K;
1194 }
1195 
1196 Matrix compute_do_ints(const BasisSet& bs1, const BasisSet& bs2,
1197  bool use_2norm) {
1198  return compute_schwarz_ints<libint2::Operator::delta>(bs1, bs2, use_2norm);
1199 }
1200 
1201 shellpair_list_t compute_shellpair_list(const BasisSet& bs1,
1202  const BasisSet& _bs2,
1203  const double threshold) {
1204  const BasisSet& bs2 = (_bs2.empty() ? bs1 : _bs2);
1205  const auto nsh1 = bs1.size();
1206  const auto nsh2 = bs2.size();
1207  const auto bs1_equiv_bs2 = (&bs1 == &bs2);
1208 
1209  using libint2::nthreads;
1210 
1211  // construct the 2-electron repulsion integrals engine
1212  using libint2::Engine;
1213  std::vector<Engine> engines;
1214  engines.reserve(nthreads);
1215  engines.emplace_back(Operator::overlap,
1216  std::max(bs1.max_nprim(), bs2.max_nprim()),
1217  std::max(bs1.max_l(), bs2.max_l()), 0);
1218  for (size_t i = 1; i != nthreads; ++i) {
1219  engines.push_back(engines[0]);
1220  }
1221 
1222  std::cout << "computing non-negligible shell-pair list ... ";
1223 
1224  libint2::Timers<1> timer;
1225  timer.set_now_overhead(25);
1226  timer.start(0);
1227 
1228  shellpair_list_t result;
1229 
1230  std::mutex mx;
1231 
1232  auto compute = [&](int thread_id) {
1233 
1234  auto& engine = engines[thread_id];
1235  const auto& buf = engine.results();
1236 
1237  // loop over permutationally-unique set of shells
1238  for (auto s1 = 0l, s12 = 0l; s1 != nsh1; ++s1) {
1239  mx.lock();
1240  if (result.find(s1) == result.end())
1241  result.insert(std::make_pair(s1, std::vector<size_t>()));
1242  mx.unlock();
1243 
1244  auto n1 = bs1[s1].size(); // number of basis functions in this shell
1245 
1246  auto s2_max = bs1_equiv_bs2 ? s1 : nsh2 - 1;
1247  for (auto s2 = 0; s2 <= s2_max; ++s2, ++s12) {
1248  if (s12 % nthreads != thread_id) continue;
1249 
1250  auto on_same_center = (bs1[s1].O == bs2[s2].O);
1251  bool significant = on_same_center;
1252  if (not on_same_center) {
1253  auto n2 = bs2[s2].size();
1254  engines[thread_id].compute(bs1[s1], bs2[s2]);
1255  Eigen::Map<const Matrix> buf_mat(buf[0], n1, n2);
1256  auto norm = buf_mat.norm();
1257  significant = (norm >= threshold);
1258  }
1259 
1260  if (significant) {
1261  mx.lock();
1262  result[s1].emplace_back(s2);
1263  mx.unlock();
1264  }
1265  }
1266  }
1267  }; // end of compute
1268 
1269  libint2::parallel_do(compute);
1270 
1271  // resort shell list in increasing order, i.e. result[s][s1] < result[s][s2]
1272  // if s1 < s2
1273  auto sort = [&](int thread_id) {
1274  for (auto s1 = 0l; s1 != nsh1; ++s1) {
1275  if (s1 % nthreads == thread_id) {
1276  auto& list = result[s1];
1277  std::sort(list.begin(), list.end());
1278  }
1279  }
1280  }; // end of sort
1281 
1282  libint2::parallel_do(sort);
1283 
1284  timer.stop(0);
1285  std::cout << "done (" << timer.read(0) << " s)" << std::endl;
1286 
1287  return result;
1288 }
1289 
1290 // returns {X,X^{-1},rank,A_condition_number,result_A_condition_number}, where
1291 // X is the generalized square-root-inverse such that X.transpose() * A * X = I
1292 //
1293 // if symmetric is true, produce "symmetric" sqrtinv: X = U . A_evals_sqrtinv .
1294 // U.transpose()),
1295 // else produce "canonical" sqrtinv: X = U . A_evals_sqrtinv
1296 // where U are eigenvectors of A
1297 // rows and cols of symmetric X are equivalent; for canonical X the rows are
1298 // original basis (AO),
1299 // cols are transformed basis ("orthogonal" AO)
1300 //
1301 // A is conditioned to max_condition_number
1302 std::tuple<Matrix, Matrix, size_t, double, double> gensqrtinv(
1303  const Matrix& S, bool symmetric = false,
1304  double max_condition_number = 1e8) {
1305  Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(S);
1306  auto U = eig_solver.eigenvectors();
1307  auto s = eig_solver.eigenvalues();
1308  auto s_max = s.maxCoeff();
1309  auto condition_number = std::min(
1310  s_max / std::max(s.minCoeff(), std::numeric_limits<double>::min()),
1311  1.0 / std::numeric_limits<double>::epsilon());
1312  auto threshold = s_max / max_condition_number;
1313  long n = s.rows();
1314  long n_cond = 0;
1315  for (long i = n - 1; i >= 0; --i) {
1316  if (s(i) >= threshold) {
1317  ++n_cond;
1318  } else
1319  i = 0; // skip rest since eigenvalues are in ascending order
1320  }
1321 
1322  auto sigma = s.bottomRows(n_cond);
1323  auto result_condition_number = sigma.maxCoeff() / sigma.minCoeff();
1324  auto sigma_sqrt = sigma.array().sqrt().matrix().asDiagonal();
1325  auto sigma_invsqrt = sigma.array().sqrt().inverse().matrix().asDiagonal();
1326 
1327  // make canonical X/Xinv
1328  auto U_cond = U.block(0, n - n_cond, n, n_cond);
1329  Matrix X = U_cond * sigma_invsqrt;
1330  Matrix Xinv = U_cond * sigma_sqrt;
1331  // convert to symmetric, if needed
1332  if (symmetric) {
1333  X = X * U_cond.transpose();
1334  Xinv = Xinv * U_cond.transpose();
1335  }
1336  return std::make_tuple(X, Xinv, size_t(n_cond), condition_number,
1337  result_condition_number);
1338 }
1339 
1340 std::tuple<Matrix, Matrix, double> conditioning_orthogonalizer(
1341  const Matrix& S, double S_condition_number_threshold) {
1342  size_t obs_rank;
1343  double S_condition_number;
1344  double XtX_condition_number;
1345  Matrix X, Xinv;
1346 
1347  assert(S.rows() == S.cols());
1348 
1349  std::tie(X, Xinv, obs_rank, S_condition_number, XtX_condition_number) =
1350  gensqrtinv(S, false, S_condition_number_threshold);
1351  auto obs_nbf_omitted = (long)S.rows() - (long)obs_rank;
1352  std::cout << "overlap condition number = " << S_condition_number;
1353  if (obs_nbf_omitted > 0)
1354  std::cout << " (dropped " << obs_nbf_omitted << " "
1355  << (obs_nbf_omitted > 1 ? "fns" : "fn") << " to reduce to "
1356  << XtX_condition_number << ")";
1357  std::cout << std::endl;
1358 
1359  if (obs_nbf_omitted > 0) {
1360  Matrix should_be_I = X.transpose() * S * X;
1361  Matrix I = Matrix::Identity(should_be_I.rows(), should_be_I.cols());
1362  std::cout << "||X^t * S * X - I||_2 = " << (should_be_I - I).norm()
1363  << " (should be 0)" << std::endl;
1364  }
1365 
1366  return std::make_tuple(X, Xinv, XtX_condition_number);
1367 }
1368 
1369 Matrix compute_2body_2index_ints(const BasisSet& bs) {
1370  using libint2::nthreads;
1371  const auto n = bs.nbf();
1372  const auto nshells = bs.size();
1373  Matrix result = Matrix::Zero(n, n);
1374 
1375  // build engines for each thread
1376  using libint2::Engine;
1377  std::vector<Engine> engines(nthreads);
1378  engines[0] =
1379  Engine(libint2::Operator::coulomb, bs.max_nprim(), bs.max_l(), 0);
1380  engines[0].set(BraKet::xs_xs);
1381  for (size_t i = 1; i != nthreads; ++i) {
1382  engines[i] = engines[0];
1383  }
1384 
1385  auto shell2bf = bs.shell2bf();
1386  auto unitshell = Shell::unit();
1387 
1388  auto compute = [&](int thread_id) {
1389 
1390  auto& engine = engines[thread_id];
1391  const auto& buf = engine.results();
1392 
1393  // loop over unique shell pairs, {s1,s2} such that s1 >= s2
1394  // this is due to the permutational symmetry of the real integrals over
1395  // Hermitian operators: (1|2) = (2|1)
1396  for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
1397  auto bf1 = shell2bf[s1]; // first basis function in this shell
1398  auto n1 = bs[s1].size();
1399 
1400  for (auto s2 = 0; s2 <= s1; ++s2, ++s12) {
1401  if (s12 % nthreads != thread_id) continue;
1402 
1403  auto bf2 = shell2bf[s2];
1404  auto n2 = bs[s2].size();
1405 
1406  // compute shell pair; return is the pointer to the buffer
1407  engine.compute(bs[s1], bs[s2]);
1408  if (buf[0] == nullptr)
1409  continue; // if all integrals screened out, skip to next shell set
1410 
1411  // "map" buffer to a const Eigen Matrix, and copy it to the
1412  // corresponding blocks of the result
1413  Eigen::Map<const Matrix> buf_mat(buf[0], n1, n2);
1414  result.block(bf1, bf2, n1, n2) = buf_mat;
1415  if (s1 != s2) // if s1 >= s2, copy {s1,s2} to the corresponding {s2,s1}
1416  // block, note the transpose!
1417  result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
1418  }
1419  }
1420  }; // compute lambda
1421 
1422  libint2::parallel_do(compute);
1423 
1424  return result;
1425 }
1426 
1427 Matrix compute_2body_fock(const BasisSet& obs, const Matrix& D,
1428  double precision, const Matrix& Schwarz) {
1429  const auto n = obs.nbf();
1430  const auto nshells = obs.size();
1431  using libint2::nthreads;
1432  std::vector<Matrix> G(nthreads, Matrix::Zero(n, n));
1433 
1434  const auto do_schwarz_screen = Schwarz.cols() != 0 && Schwarz.rows() != 0;
1435  Matrix D_shblk_norm =
1436  compute_shellblock_norm(obs, D); // matrix of infty-norms of shell blocks
1437 
1438  auto fock_precision = precision;
1439  // engine precision controls primitive truncation, assume worst-case scenario
1440  // (all primitive combinations add up constructively)
1441  auto max_nprim = obs.max_nprim();
1442  auto max_nprim4 = max_nprim * max_nprim * max_nprim * max_nprim;
1443  auto engine_precision = std::min(fock_precision / D_shblk_norm.maxCoeff(),
1444  std::numeric_limits<double>::epsilon()) /
1445  max_nprim4;
1446 
1447  // construct the 2-electron repulsion integrals engine pool
1448  using libint2::Engine;
1449  std::vector<Engine> engines(nthreads);
1450  engines[0] = Engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 0);
1451  engines[0].set_precision(engine_precision); // shellset-dependent precision
1452  // control will likely break
1453  // positive definiteness
1454  // stick with this simple recipe
1455  std::cout << "compute_2body_fock:precision = " << precision << std::endl;
1456  std::cout << "Engine::precision = " << engines[0].precision() << std::endl;
1457  for (size_t i = 1; i != nthreads; ++i) {
1458  engines[i] = engines[0];
1459  }
1460  std::atomic<size_t> num_ints_computed{0};
1461 
1462 #if defined(REPORT_INTEGRAL_TIMINGS)
1463  std::vector<libint2::Timers<1>> timers(nthreads);
1464 #endif
1465 
1466  auto shell2bf = obs.shell2bf();
1467 
1468  auto lambda = [&](int thread_id) {
1469 
1470  auto& engine = engines[thread_id];
1471  auto& g = G[thread_id];
1472  const auto& buf = engine.results();
1473 
1474 #if defined(REPORT_INTEGRAL_TIMINGS)
1475  auto& timer = timers[thread_id];
1476  timer.clear();
1477  timer.set_now_overhead(25);
1478 #endif
1479 
1480  // loop over permutationally-unique set of shells
1481  for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1482  auto bf1_first = shell2bf[s1]; // first basis function in this shell
1483  auto n1 = obs[s1].size(); // number of basis functions in this shell
1484 
1485  for (const auto& s2 : obs_shellpair_list[s1]) {
1486  auto bf2_first = shell2bf[s2];
1487  auto n2 = obs[s2].size();
1488 
1489  const auto Dnorm12 = do_schwarz_screen ? D_shblk_norm(s1, s2) : 0.;
1490 
1491  for (auto s3 = 0; s3 <= s1; ++s3) {
1492  auto bf3_first = shell2bf[s3];
1493  auto n3 = obs[s3].size();
1494 
1495  const auto Dnorm123 =
1496  do_schwarz_screen
1497  ? std::max(D_shblk_norm(s1, s3),
1498  std::max(D_shblk_norm(s2, s3), Dnorm12))
1499  : 0.;
1500 
1501  const auto s4_max = (s1 == s3) ? s2 : s3;
1502  for (const auto& s4 : obs_shellpair_list[s3]) {
1503  if (s4 > s4_max)
1504  break; // for each s3, s4 are stored in monotonically increasing
1505  // order
1506 
1507  if ((s1234++) % nthreads != thread_id) continue;
1508 
1509  const auto Dnorm1234 =
1510  do_schwarz_screen
1511  ? std::max(
1512  D_shblk_norm(s1, s4),
1513  std::max(D_shblk_norm(s2, s4),
1514  std::max(D_shblk_norm(s3, s4), Dnorm123)))
1515  : 0.;
1516 
1517  if (do_schwarz_screen &&
1518  Dnorm1234 * Schwarz(s1, s2) * Schwarz(s3, s4) <
1519  fock_precision)
1520  continue;
1521 
1522  auto bf4_first = shell2bf[s4];
1523  auto n4 = obs[s4].size();
1524 
1525  num_ints_computed += n1 * n2 * n3 * n4;
1526 
1527  // compute the permutational degeneracy (i.e. # of equivalents) of
1528  // the given shell set
1529  auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1530  auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1531  auto s12_34_deg = (s1 == s3) ? (s2 == s4 ? 1.0 : 2.0) : 2.0;
1532  auto s1234_deg = s12_deg * s34_deg * s12_34_deg;
1533 
1534 #if defined(REPORT_INTEGRAL_TIMINGS)
1535  timer.start(0);
1536 #endif
1537 
1538  engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1539  obs[s1], obs[s2], obs[s3], obs[s4]);
1540  const auto* buf_1234 = buf[0];
1541  if (buf_1234 == nullptr)
1542  continue; // if all integrals screened out, skip to next quartet
1543 
1544 #if defined(REPORT_INTEGRAL_TIMINGS)
1545  timer.stop(0);
1546 #endif
1547 
1548  Eigen::Map<MatrixXd> buf_1234_map(buf_1234, n12, n34);
1549  assert(buf_1234_map.norm() < Schwarz(s1, s2) * Schwarz(s3, s4));
1550 
1551  // 1) each shell set of integrals contributes up to 6 shell sets of
1552  // the Fock matrix:
1553  // F(a,b) += (ab|cd) * D(c,d)
1554  // F(c,d) += (ab|cd) * D(a,b)
1555  // F(b,d) -= 1/4 * (ab|cd) * D(a,c)
1556  // F(b,c) -= 1/4 * (ab|cd) * D(a,d)
1557  // F(a,c) -= 1/4 * (ab|cd) * D(b,d)
1558  // F(a,d) -= 1/4 * (ab|cd) * D(b,c)
1559  // 2) each permutationally-unique integral (shell set) must be
1560  // scaled by its degeneracy,
1561  // i.e. the number of the integrals/sets equivalent to it
1562  // 3) the end result must be symmetrized
1563  for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1564  const auto bf1 = f1 + bf1_first;
1565  for (auto f2 = 0; f2 != n2; ++f2) {
1566  const auto bf2 = f2 + bf2_first;
1567  for (auto f3 = 0; f3 != n3; ++f3) {
1568  const auto bf3 = f3 + bf3_first;
1569  for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1570  const auto bf4 = f4 + bf4_first;
1571 
1572  const auto value = buf_1234[f1234];
1573 
1574  const auto value_scal_by_deg = value * s1234_deg;
1575 
1576  g(bf1, bf2) += D(bf3, bf4) * value_scal_by_deg;
1577  g(bf3, bf4) += D(bf1, bf2) * value_scal_by_deg;
1578  g(bf1, bf3) -= 0.25 * D(bf2, bf4) * value_scal_by_deg;
1579  g(bf2, bf4) -= 0.25 * D(bf1, bf3) * value_scal_by_deg;
1580  g(bf1, bf4) -= 0.25 * D(bf2, bf3) * value_scal_by_deg;
1581  g(bf2, bf3) -= 0.25 * D(bf1, bf4) * value_scal_by_deg;
1582  }
1583  }
1584  }
1585  }
1586  }
1587  }
1588  }
1589  }
1590 
1591  }; // end of lambda
1592 
1593  libint2::parallel_do(lambda);
1594 
1595  // accumulate contributions from all threads
1596  for (size_t i = 1; i != nthreads; ++i) {
1597  G[0] += G[i];
1598  }
1599 
1600 #if defined(REPORT_INTEGRAL_TIMINGS)
1601  double time_for_ints = 0.0;
1602  for (auto& t : timers) {
1603  time_for_ints += t.read(0);
1604  }
1605  std::cout << "time for integrals = " << time_for_ints << std::endl;
1606  for (int t = 0; t != nthreads; ++t) engines[t].print_timers();
1607 #endif
1608 
1609  Matrix GG = 0.5 * (G[0] + G[0].transpose());
1610 
1611  std::cout << "# of integrals = " << num_ints_computed << std::endl;
1612 
1613  // symmetrize the result and return
1614  return GG;
1615 }
1616 
1617 #if LIBINT2_DERIV_ERI_ORDER
1618 template <unsigned deriv_order>
1619 std::vector<Matrix> compute_2body_fock_deriv(const BasisSet& obs,
1620  const std::vector<libint2::Atom>& atoms,
1621  const Matrix& D, double precision,
1622  const Matrix& Schwarz) {
1623  const auto n = obs.nbf();
1624  const auto nshells = obs.size();
1625  const auto nderiv_shellset =
1626  libint2::num_geometrical_derivatives(4, deriv_order); // # of derivs for each shell quartet
1627  const auto nderiv = libint2::num_geometrical_derivatives(
1628  atoms.size(), deriv_order); // total # of derivs
1629  const auto ncoords_times_two = (atoms.size() * 3) * 2;
1630  using libint2::nthreads;
1631  std::vector<Matrix> G(nthreads * nderiv, Matrix::Zero(n, n));
1632 
1633  const auto do_schwarz_screen = Schwarz.cols() != 0 && Schwarz.rows() != 0;
1634  Matrix D_shblk_norm =
1635  compute_shellblock_norm(obs, D); // matrix of infty-norms of shell blocks
1636 
1637  auto fock_precision = precision;
1638  // engine precision controls primitive truncation, assume worst-case scenario
1639  // (all primitive combinations add up constructively)
1640  auto max_nprim = obs.max_nprim();
1641  auto max_nprim4 = max_nprim * max_nprim * max_nprim * max_nprim;
1642  auto engine_precision = std::min(fock_precision / D_shblk_norm.maxCoeff(),
1643  std::numeric_limits<double>::epsilon()) /
1644  max_nprim4;
1645 
1646  // construct the 2-electron repulsion integrals engine pool
1647  using libint2::Engine;
1648  std::vector<Engine> engines(nthreads);
1649  engines[0] =
1650  Engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), deriv_order);
1651  engines[0].set_precision(engine_precision); // shellset-dependent precision
1652  // control will likely break
1653  // positive definiteness
1654  // stick with this simple recipe
1655  std::cout << "compute_2body_fock:precision = " << precision << std::endl;
1656  std::cout << "Engine::precision = " << engines[0].precision() << std::endl;
1657  for (size_t i = 1; i != nthreads; ++i) {
1658  engines[i] = engines[0];
1659  }
1660  std::atomic<size_t> num_ints_computed{0};
1661 
1662 #if defined(REPORT_INTEGRAL_TIMINGS)
1663  std::vector<libint2::Timers<1>> timers(nthreads);
1664 #endif
1665 
1666  auto shell2bf = obs.shell2bf();
1667  auto shell2atom = obs.shell2atom(atoms);
1668 
1669  auto lambda = [&](int thread_id) {
1670 
1671  auto& engine = engines[thread_id];
1672  const auto& buf = engine.results();
1673 
1674 #if defined(REPORT_INTEGRAL_TIMINGS)
1675  auto& timer = timers[thread_id];
1676  timer.clear();
1677  timer.set_now_overhead(25);
1678 #endif
1679 
1680  size_t shell_atoms[4];
1681 
1682  // loop over permutationally-unique set of shells
1683  for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1684  auto bf1_first = shell2bf[s1]; // first basis function in this shell
1685  auto n1 = obs[s1].size(); // number of basis functions in this shell
1686  shell_atoms[0] = shell2atom[s1];
1687 
1688  for (const auto& s2 : obs_shellpair_list[s1]) {
1689  auto bf2_first = shell2bf[s2];
1690  auto n2 = obs[s2].size();
1691  shell_atoms[1] = shell2atom[s2];
1692 
1693  const auto Dnorm12 = do_schwarz_screen ? D_shblk_norm(s1, s2) : 0.;
1694 
1695  for (auto s3 = 0; s3 <= s1; ++s3) {
1696  auto bf3_first = shell2bf[s3];
1697  auto n3 = obs[s3].size();
1698  shell_atoms[2] = shell2atom[s3];
1699 
1700  const auto Dnorm123 =
1701  do_schwarz_screen
1702  ? std::max(D_shblk_norm(s1, s3),
1703  std::max(D_shblk_norm(s2, s3), Dnorm12))
1704  : 0.;
1705 
1706  const auto s4_max = (s1 == s3) ? s2 : s3;
1707  for (const auto& s4 : obs_shellpair_list[s3]) {
1708  if (s4 > s4_max)
1709  break; // for each s3, s4 are stored in monotonically increasing
1710  // order
1711 
1712  if ((s1234++) % nthreads != thread_id) continue;
1713 
1714  const auto Dnorm1234 =
1715  do_schwarz_screen
1716  ? std::max(
1717  D_shblk_norm(s1, s4),
1718  std::max(D_shblk_norm(s2, s4),
1719  std::max(D_shblk_norm(s3, s4), Dnorm123)))
1720  : 0.;
1721 
1722  if (do_schwarz_screen &&
1723  Dnorm1234 * Schwarz(s1, s2) * Schwarz(s3, s4) <
1724  fock_precision)
1725  continue;
1726 
1727  auto bf4_first = shell2bf[s4];
1728  auto n4 = obs[s4].size();
1729  shell_atoms[3] = shell2atom[s4];
1730 
1731  const auto n1234 = n1 * n2 * n3 * n4;
1732 
1733  // compute the permutational degeneracy (i.e. # of equivalents) of
1734  // the given shell set
1735  auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1736  auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1737  auto s12_34_deg = (s1 == s3) ? (s2 == s4 ? 1.0 : 2.0) : 2.0;
1738  auto s1234_deg = s12_deg * s34_deg * s12_34_deg;
1739 
1740  // computes contribution from shell set \c idx to the operator matrix with
1741  // index \c op
1742  auto add_shellset_to_dest = [&](
1743  std::size_t op, std::size_t idx, int coord1, int coord2, double scale = 1.0) {
1744  auto& g = G[op];
1745  auto shset = buf[idx];
1746  const auto weight = scale * s1234_deg;
1747 
1748  for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1749  const auto bf1 = f1 + bf1_first;
1750  for (auto f2 = 0; f2 != n2; ++f2) {
1751  const auto bf2 = f2 + bf2_first;
1752  for (auto f3 = 0; f3 != n3; ++f3) {
1753  const auto bf3 = f3 + bf3_first;
1754  for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1755  const auto bf4 = f4 + bf4_first;
1756 
1757  const auto value = shset[f1234];
1758  const auto wvalue = value * weight;
1759 
1760  g(bf1, bf2) += D(bf3, bf4) * wvalue;
1761  g(bf3, bf4) += D(bf1, bf2) * wvalue;
1762  g(bf1, bf3) -= 0.25 * D(bf2, bf4) * wvalue;
1763  g(bf2, bf4) -= 0.25 * D(bf1, bf3) * wvalue;
1764  g(bf1, bf4) -= 0.25 * D(bf2, bf3) * wvalue;
1765  g(bf2, bf3) -= 0.25 * D(bf1, bf4) * wvalue;
1766  }
1767  }
1768  }
1769  }
1770  };
1771 
1772 #if defined(REPORT_INTEGRAL_TIMINGS)
1773  timer.start(0);
1774 #endif
1775 
1776  engine.compute2<Operator::coulomb, BraKet::xx_xx, deriv_order>(
1777  obs[s1], obs[s2], obs[s3], obs[s4]);
1778  if (buf[0] == nullptr)
1779  continue; // if all integrals screened out, skip to next quartet
1780  num_ints_computed += nderiv_shellset * n1234;
1781 
1782 #if defined(REPORT_INTEGRAL_TIMINGS)
1783  timer.stop(0);
1784 #endif
1785 
1786  switch (deriv_order) {
1787  case 0: {
1788  int coord1 = 0, coord2 = 0;
1789  add_shellset_to_dest(thread_id, 0, coord1, coord2);
1790  } break;
1791 
1792  case 1: {
1793  for (auto d = 0; d != 12; ++d) {
1794  const int a = d / 3;
1795  const int xyz = d % 3;
1796 
1797  auto coord = shell_atoms[a] * 3 + xyz;
1798  auto& g = G[thread_id * nderiv + coord];
1799 
1800  int coord1 = 0, coord2 = 0;
1801 
1802  add_shellset_to_dest(thread_id * nderiv + coord, d, coord1, coord2);
1803 
1804  } // d \in [0,12)
1805  } break;
1806 
1807  case 2: {
1808 // computes upper triangle index
1809 // n2 = matrix size times 2
1810 // i,j = (unordered) indices
1811 #define upper_triangle_index(n2, i, j) \
1812  (std::min((i), (j))) * ((n2) - (std::min((i), (j))) - 1) / 2 + \
1813  (std::max((i), (j)))
1814  // look over shellsets in the order in which they appear
1815  std::size_t shellset_idx = 0;
1816  for (auto c1 = 0; c1 != 4; ++c1) {
1817  auto a1 = shell_atoms[c1];
1818  auto coord1 = 3 * a1;
1819  for (auto xyz1 = 0; xyz1 != 3; ++xyz1, ++coord1) {
1820  for (auto c2 = c1; c2 != 4; ++c2) {
1821  auto a2 = shell_atoms[c2];
1822  auto xyz2_start = (c1 == c2) ? xyz1 : 0;
1823  auto coord2 = 3 * a2 + xyz2_start;
1824  for (auto xyz2 = xyz2_start; xyz2 != 3;
1825  ++xyz2, ++coord2) {
1826  double scale =
1827  (coord1 == coord2 && c1 != c2) ? 2.0 : 1.0;
1828 
1829  const auto coord12 = upper_triangle_index(
1830  ncoords_times_two, coord1, coord2);
1831  auto op = thread_id * nderiv + coord12;
1832  add_shellset_to_dest(op, shellset_idx, coord1, coord2, scale);
1833  ++shellset_idx;
1834  }
1835  }
1836  }
1837  }
1838  } break;
1839 #undef upper_triangle_index
1840 
1841  default:
1842  assert(deriv_order <= 2 &&
1843  "support for 3rd and higher derivatives of the Fock "
1844  "matrix not yet implemented");
1845  }
1846  }
1847  }
1848  }
1849  }
1850 
1851  }; // end of lambda
1852 
1853  libint2::parallel_do(lambda);
1854 
1855  // accumulate contributions from all threads
1856  for (size_t t = 1; t != nthreads; ++t) {
1857  for (auto d = 0; d != nderiv; ++d) {
1858  G[d] += G[t * nderiv + d];
1859  }
1860  }
1861 
1862 #if defined(REPORT_INTEGRAL_TIMINGS)
1863  double time_for_ints = 0.0;
1864  for (auto& t : timers) {
1865  time_for_ints += t.read(0);
1866  }
1867  std::cout << "time for integrals = " << time_for_ints << std::endl;
1868  for (int t = 0; t != nthreads; ++t) engines[t].print_timers();
1869 #endif
1870 
1871  std::vector<Matrix> GG(nderiv);
1872  for (auto d = 0; d != nderiv; ++d) {
1873  GG[d] = 0.5 * (G[d] + G[d].transpose());
1874  }
1875 
1876  std::cout << "# of integrals = " << num_ints_computed << std::endl;
1877 
1878  // symmetrize the result and return
1879  return GG;
1880 }
1881 
1882 #endif
1883 
1884 Matrix compute_2body_fock_general(const BasisSet& obs, const Matrix& D,
1885  const BasisSet& D_bs, bool D_is_shelldiagonal,
1886  double precision) {
1887  const auto n = obs.nbf();
1888  const auto nshells = obs.size();
1889  const auto n_D = D_bs.nbf();
1890  assert(D.cols() == D.rows() && D.cols() == n_D);
1891 
1892  using libint2::nthreads;
1893  std::vector<Matrix> G(nthreads, Matrix::Zero(n, n));
1894 
1895  // construct the 2-electron repulsion integrals engine
1896  using libint2::Engine;
1897  std::vector<Engine> engines(nthreads);
1898  engines[0] = Engine(libint2::Operator::coulomb,
1899  std::max(obs.max_nprim(), D_bs.max_nprim()),
1900  std::max(obs.max_l(), D_bs.max_l()), 0);
1901  engines[0].set_precision(precision); // shellset-dependent precision control
1902  // will likely break positive
1903  // definiteness
1904  // stick with this simple recipe
1905  for (size_t i = 1; i != nthreads; ++i) {
1906  engines[i] = engines[0];
1907  }
1908  auto shell2bf = obs.shell2bf();
1909  auto shell2bf_D = D_bs.shell2bf();
1910 
1911  auto lambda = [&](int thread_id) {
1912 
1913  auto& engine = engines[thread_id];
1914  auto& g = G[thread_id];
1915  const auto& buf = engine.results();
1916 
1917  // loop over permutationally-unique set of shells
1918  for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1919  auto bf1_first = shell2bf[s1]; // first basis function in this shell
1920  auto n1 = obs[s1].size(); // number of basis functions in this shell
1921 
1922  for (auto s2 = 0; s2 <= s1; ++s2) {
1923  auto bf2_first = shell2bf[s2];
1924  auto n2 = obs[s2].size();
1925 
1926  for (auto s3 = 0; s3 < D_bs.size(); ++s3) {
1927  auto bf3_first = shell2bf_D[s3];
1928  auto n3 = D_bs[s3].size();
1929 
1930  auto s4_begin = D_is_shelldiagonal ? s3 : 0;
1931  auto s4_fence = D_is_shelldiagonal ? s3 + 1 : D_bs.size();
1932 
1933  for (auto s4 = s4_begin; s4 != s4_fence; ++s4, ++s1234) {
1934  if (s1234 % nthreads != thread_id) continue;
1935 
1936  auto bf4_first = shell2bf_D[s4];
1937  auto n4 = D_bs[s4].size();
1938 
1939  // compute the permutational degeneracy (i.e. # of equivalents) of
1940  // the given shell set
1941  auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1942 
1943  if (s3 >= s4) {
1944  auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1945  auto s1234_deg = s12_deg * s34_deg;
1946  // auto s1234_deg = s12_deg;
1947  engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1948  obs[s1], obs[s2], D_bs[s3], D_bs[s4]);
1949  const auto* buf_1234 = buf[0];
1950  if (buf_1234 != nullptr) {
1951  for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1952  const auto bf1 = f1 + bf1_first;
1953  for (auto f2 = 0; f2 != n2; ++f2) {
1954  const auto bf2 = f2 + bf2_first;
1955  for (auto f3 = 0; f3 != n3; ++f3) {
1956  const auto bf3 = f3 + bf3_first;
1957  for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1958  const auto bf4 = f4 + bf4_first;
1959 
1960  const auto value = buf_1234[f1234];
1961  const auto value_scal_by_deg = value * s1234_deg;
1962  g(bf1, bf2) += 2.0 * D(bf3, bf4) * value_scal_by_deg;
1963  }
1964  }
1965  }
1966  }
1967  }
1968  }
1969 
1970  engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1971  obs[s1], D_bs[s3], obs[s2], D_bs[s4]);
1972  const auto* buf_1324 = buf[0];
1973  if (buf_1324 == nullptr)
1974  continue; // if all integrals screened out, skip to next quartet
1975 
1976  for (auto f1 = 0, f1324 = 0; f1 != n1; ++f1) {
1977  const auto bf1 = f1 + bf1_first;
1978  for (auto f3 = 0; f3 != n3; ++f3) {
1979  const auto bf3 = f3 + bf3_first;
1980  for (auto f2 = 0; f2 != n2; ++f2) {
1981  const auto bf2 = f2 + bf2_first;
1982  for (auto f4 = 0; f4 != n4; ++f4, ++f1324) {
1983  const auto bf4 = f4 + bf4_first;
1984 
1985  const auto value = buf_1324[f1324];
1986  const auto value_scal_by_deg = value * s12_deg;
1987  g(bf1, bf2) -= D(bf3, bf4) * value_scal_by_deg;
1988  }
1989  }
1990  }
1991  }
1992  }
1993  }
1994  }
1995  }
1996 
1997  }; // thread lambda
1998 
1999  libint2::parallel_do(lambda);
2000 
2001  // accumulate contributions from all threads
2002  for (size_t i = 1; i != nthreads; ++i) {
2003  G[0] += G[i];
2004  }
2005 
2006  // symmetrize the result and return
2007  return 0.5 * (G[0] + G[0].transpose());
2008 }
2009 
2010 #ifdef HAVE_DENSITY_FITTING
2011 
2012 Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
2013 
2014  using libint2::nthreads;
2015 
2016  const auto n = obs.nbf();
2017  const auto ndf = dfbs.nbf();
2018 
2019  libint2::Timers<1> wall_timer;
2020  wall_timer.set_now_overhead(25);
2021  std::vector<libint2::Timers<5>> timers(nthreads);
2022  for(auto& timer: timers) timer.set_now_overhead(25);
2023 
2024  typedef btas::RangeNd<CblasRowMajor, std::array<long, 1>> Range1d;
2025  typedef btas::RangeNd<CblasRowMajor, std::array<long, 2>> Range2d;
2026  typedef btas::Tensor<double, Range1d> Tensor1d;
2027  typedef btas::Tensor<double, Range2d> Tensor2d;
2028 
2029  // using first time? compute 3-center ints and transform to inv sqrt
2030  // representation
2031  if (xyK.size() == 0) {
2032 
2033  wall_timer.start(0);
2034 
2035  const auto nshells = obs.size();
2036  const auto nshells_df = dfbs.size();
2037  const auto& unitshell = libint2::Shell::unit();
2038 
2039  // construct the 2-electron 3-center repulsion integrals engine
2040  // since the code assumes (xx|xs) braket, and Engine/libint only produces
2041  // (xs|xx), use 4-center engine
2042  std::vector<libint2::Engine> engines(nthreads);
2043  engines[0] = libint2::Engine(libint2::Operator::coulomb,
2044  std::max(obs.max_nprim(), dfbs.max_nprim()),
2045  std::max(obs.max_l(), dfbs.max_l()), 0);
2046  engines[0].set(BraKet::xs_xx);
2047  for (size_t i = 1; i != nthreads; ++i) {
2048  engines[i] = engines[0];
2049  }
2050 
2051  auto shell2bf = obs.shell2bf();
2052  auto shell2bf_df = dfbs.shell2bf();
2053 
2054  Tensor3d Zxy{ndf, n, n};
2055 
2056  auto lambda = [&](int thread_id) {
2057 
2058  auto& engine = engines[thread_id];
2059  auto& timer = timers[thread_id];
2060  const auto& results = engine.results();
2061 
2062  // loop over permutationally-unique set of shells
2063  for (auto s1 = 0l, s123 = 0l; s1 != nshells_df; ++s1) {
2064  auto bf1_first = shell2bf_df[s1]; // first basis function in this shell
2065  auto n1 = dfbs[s1].size(); // number of basis functions in this shell
2066 
2067  for (auto s2 = 0; s2 != nshells; ++s2) {
2068  auto bf2_first = shell2bf[s2];
2069  auto n2 = obs[s2].size();
2070  const auto n12 = n1 * n2;
2071 
2072  for (auto s3 = 0; s3 != nshells; ++s3, ++s123) {
2073  if (s123 % nthreads != thread_id) continue;
2074 
2075  auto bf3_first = shell2bf[s3];
2076  auto n3 = obs[s3].size();
2077  const auto n123 = n12 * n3;
2078 
2079  timer.start(0);
2080 
2081  engine.compute2<Operator::coulomb, BraKet::xs_xx, 0>(
2082  dfbs[s1], unitshell, obs[s2], obs[s3]);
2083  const auto* buf = results[0];
2084  if (buf == nullptr)
2085  continue;
2086 
2087  timer.stop(0);
2088  timer.start(1);
2089 
2090  auto lower_bound = {bf1_first, bf2_first, bf3_first};
2091  auto upper_bound = {bf1_first + n1, bf2_first + n2, bf3_first + n3};
2092  auto view = btas::make_view(
2093  Zxy.range().slice(lower_bound, upper_bound), Zxy.storage());
2094  std::copy(buf, buf + n123, view.begin());
2095 
2096  timer.stop(1);
2097  } // s3
2098  } // s2
2099  } // s1
2100 
2101  }; // lambda
2102 
2103  libint2::parallel_do(lambda);
2104 
2105  wall_timer.stop(0);
2106 
2107  double ints_time = 0;
2108  for(const auto& timer: timers) ints_time += timer.read(0);
2109  std::cout << "time for Zxy integrals = " << ints_time << " (total from all threads)" << std::endl;
2110  double copy_time = 0;
2111  for(const auto& timer: timers) copy_time += timer.read(1);
2112  std::cout << "time for copying into BTAS = " << copy_time << " (total from all threads)"<< std::endl;
2113  std::cout << "wall time for Zxy integrals + copy = " << wall_timer.read(0) << std::endl;
2114 
2115  timers[0].start(2);
2116 
2117  Matrix V = compute_2body_2index_ints(dfbs);
2118  Eigen::LLT<Matrix> V_LLt(V);
2119  Matrix I = Matrix::Identity(ndf, ndf);
2120  auto L = V_LLt.matrixL();
2121  Matrix V_L = L;
2122  Matrix Linv = L.solve(I).transpose();
2123  // check
2124  // std::cout << "||V - L L^t|| = " << (V - V_L * V_L.transpose()).norm() <<
2125  // std::endl;
2126  // std::cout << "||I - L L^-1^t|| = " << (I - V_L *
2127  // Linv.transpose()).norm() << std::endl;
2128  // std::cout << "||V^-1 - L^-1 L^-1^t|| = " << (V.inverse() - Linv *
2129  // Linv.transpose()).norm() << std::endl;
2130 
2131  Tensor2d K{ndf, ndf};
2132  std::copy(Linv.data(), Linv.data() + ndf * ndf, K.begin());
2133 
2134  xyK = Tensor3d{n, n, ndf};
2135  btas::contract(1.0, Zxy, {1, 2, 3}, K, {1, 4}, 0.0, xyK, {2, 3, 4});
2136  Zxy = Tensor3d{0, 0, 0}; // release memory
2137 
2138  timers[0].stop(2);
2139  std::cout << "time for integrals metric tform = " << timers[0].read(2)
2140  << std::endl;
2141  } // if (xyK.size() == 0)
2142 
2143  // compute exchange
2144  timers[0].start(3);
2145 
2146  const auto nocc = Cocc.cols();
2147  Tensor2d Co{n, nocc};
2148  std::copy(Cocc.data(), Cocc.data() + n * nocc, Co.begin());
2149  Tensor3d xiK{n, nocc, ndf};
2150  btas::contract(1.0, xyK, {1, 2, 3}, Co, {2, 4}, 0.0, xiK, {1, 4, 3});
2151 
2152  Tensor2d G{n, n};
2153  btas::contract(1.0, xiK, {1, 2, 3}, xiK, {4, 2, 3}, 0.0, G, {1, 4});
2154 
2155  timers[0].stop(3);
2156  std::cout << "time for exchange = " << timers[0].read(3) << std::endl;
2157 
2158  // compute Coulomb
2159  timers[0].start(4);
2160 
2161  Tensor1d Jtmp{ndf};
2162  btas::contract(1.0, xiK, {1, 2, 3}, Co, {1, 2}, 0.0, Jtmp, {3});
2163  xiK = Tensor3d{0, 0, 0};
2164  btas::contract(2.0, xyK, {1, 2, 3}, Jtmp, {3}, -1.0, G, {1, 2});
2165 
2166  timers[0].stop(4);
2167  std::cout << "time for coulomb = " << timers[0].read(4) << std::endl;
2168 
2169  // copy result to an Eigen::Matrix
2170  Matrix result(n, n);
2171  std::copy(G.cbegin(), G.cend(), result.data());
2172  return result;
2173 }
2174 #endif // HAVE_DENSITY_FITTING
2175 
2176 // should be a unit test somewhere
2177 void api_basic_compile_test(const BasisSet& obs) {
2178  using namespace libint2;
2179  Engine onebody_engine(
2180  Operator::overlap, // will compute overlap ints
2181  obs.max_nprim(), // max # of primitives in shells this engine will
2182  // accept
2183  obs.max_l() // max angular momentum of shells this engine will accept
2184  );
2185  auto shell2bf = obs.shell2bf();
2186  const auto& results = onebody_engine.results();
2187  for (auto s1 = 0; s1 != obs.size(); ++s1) {
2188  for (auto s2 = 0; s2 != obs.size(); ++s2) {
2189  std::cout << "compute shell set {" << s1 << "," << s2 << "} ... ";
2190  onebody_engine.compute(obs[s1], obs[s2]);
2191  const auto* ints_shellset = results[0];
2192  std::cout << "done" << std::endl;
2193 
2194  auto bf1 = shell2bf[s1]; // first basis function in first shell
2195  auto n1 = obs[s1].size(); // number of basis functions in first shell
2196  auto bf2 = shell2bf[s2]; // first basis function in second shell
2197  auto n2 = obs[s2].size(); // number of basis functions in second shell
2198 
2199  // this iterates over integrals in the order they are packed in array
2200  // ints_shellset
2201  for (auto f1 = 0; f1 != n1; ++f1)
2202  for (auto f2 = 0; f2 != n2; ++f2)
2203  std::cout << " " << bf1 + f1 << " " << bf2 + f2 << " "
2204  << ints_shellset[f1 * n2 + f2] << std::endl;
2205  }
2206  }
2207 
2208  using libint2::Operator;
2209 
2210  std::vector<std::pair<double, double>> cgtg_params{
2211  {0.1, 0.2}, {0.3, 0.4}, {0.5, 0.6}};
2212  {
2213  auto K =
2214  compute_schwarz_ints<Operator::cgtg>(obs, obs, false, cgtg_params);
2215  std::cout << "cGTG Schwarz ints\n" << K << std::endl;
2216  }
2217  {
2218  auto K = compute_schwarz_ints<Operator::cgtg_x_coulomb>(obs, obs, false,
2219  cgtg_params);
2220  std::cout << "cGTG/r12 Schwarz ints\n" << K << std::endl;
2221  }
2222  {
2223  auto K =
2224  compute_schwarz_ints<Operator::delcgtg2>(obs, obs, false, cgtg_params);
2225  std::cout << "||Del.cGTG||^2 Schwarz ints\n" << K << std::endl;
2226  }
2227 
2228  { // test 2-index ints
2229  Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l());
2230  Engine eri2_engine = eri4_engine;
2231  eri2_engine.set(BraKet::xs_xs);
2232  auto shell2bf = obs.shell2bf();
2233  const auto& results4 = eri4_engine.results();
2234  const auto& results2 = eri2_engine.results();
2235  for (auto s1 = 0; s1 != obs.size(); ++s1) {
2236  for (auto s2 = 0; s2 != obs.size(); ++s2) {
2237  eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2238  eri2_engine.compute(obs[s1], obs[s2]);
2239 
2240  auto bf1 = shell2bf[s1]; // first basis function in first shell
2241  auto n1 = obs[s1].size(); // number of basis functions in first shell
2242  auto bf2 = shell2bf[s2]; // first basis function in second shell
2243  auto n2 = obs[s2].size(); // number of basis functions in second shell
2244 
2245  const auto* buf4 = results4[0];
2246  const auto* buf2 = results2[0];
2247 
2248  // this iterates over integrals in the order they are packed in array
2249  // ints_shellset
2250  for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2251  for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2252  assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2253  "2-center ints test failed");
2254  }
2255  }
2256  }
2257  { // test 3-index ints
2258  Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l());
2259  Engine eri3_engine = eri4_engine;
2260  eri3_engine.set(BraKet::xs_xx);
2261  auto shell2bf = obs.shell2bf();
2262  const auto& results4 = eri4_engine.results();
2263  const auto& results3 = eri3_engine.results();
2264  for (auto s1 = 0; s1 != obs.size(); ++s1) {
2265  for (auto s2 = 0; s2 != obs.size(); ++s2) {
2266  for (auto s3 = 0; s3 != obs.size(); ++s3) {
2267  eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], obs[s3]);
2268  eri3_engine.compute(obs[s1], obs[s2], obs[s3]);
2269 
2270  auto bf1 = shell2bf[s1]; // first basis function in first shell
2271  auto n1 = obs[s1].size(); // number of basis functions in first shell
2272  auto bf2 = shell2bf[s2]; // first basis function in second shell
2273  auto n2 =
2274  obs[s2].size(); // number of basis functions in second shell
2275  auto bf3 = shell2bf[s3]; // first basis function in third shell
2276  auto n3 = obs[s3].size(); // number of basis functions in third shell
2277 
2278  const auto* buf4 = results4[0];
2279  const auto* buf3 = results3[0];
2280 
2281  // this iterates over integrals in the order they are packed in array
2282  // ints_shellset
2283  for (auto f1 = 0, f123 = 0; f1 != n1; ++f1)
2284  for (auto f2 = 0; f2 != n2; ++f2)
2285  for (auto f3 = 0; f3 != n3; ++f3, ++f123)
2286  assert(std::abs(buf4[f123] - buf3[f123]) < 1e-12 &&
2287  "3-center ints test failed");
2288  }
2289  }
2290  }
2291  }
2292 
2293 #if LIBINT2_DERIV_ERI_ORDER
2294  { // test deriv 2-index ints
2295  Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 1);
2296  Engine eri2_engine = eri4_engine;
2297  eri2_engine.set(BraKet::xs_xs);
2298  auto shell2bf = obs.shell2bf();
2299  const auto& results4 = eri4_engine.results();
2300  const auto& results2 = eri2_engine.results();
2301  for (auto s1 = 0; s1 != obs.size(); ++s1) {
2302  for (auto s2 = 0; s2 != obs.size(); ++s2) {
2303  eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2304  eri2_engine.compute(obs[s1], obs[s2]);
2305 
2306  auto bf1 = shell2bf[s1]; // first basis function in first shell
2307  auto n1 = obs[s1].size(); // number of basis functions in first shell
2308  auto bf2 = shell2bf[s2]; // first basis function in second shell
2309  auto n2 = obs[s2].size(); // number of basis functions in second shell
2310 
2311  // loop over derivative shell sets
2312  for(auto d=0; d!=6; ++d) {
2313  const auto* buf4 = results4[d<3 ? d : d+3];
2314  const auto* buf2 = results2[d];
2315 
2316  // this iterates over integrals in the order they are packed in array
2317  // ints_shellset
2318  for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2319  for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2320  assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2321  "deriv 2-center ints test failed");
2322  }
2323 
2324  }
2325  }
2326  }
2327 #endif
2328 
2329 #if LIBINT2_DERIV_ERI_ORDER > 1
2330  { // test 2nd deriv 2-index ints
2331  Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 2);
2332  Engine eri2_engine = eri4_engine;
2333  eri2_engine.set(BraKet::xs_xs);
2334  auto shell2bf = obs.shell2bf();
2335  const auto& results4 = eri4_engine.results();
2336  const auto& results2 = eri2_engine.results();
2337  for (auto s1 = 0; s1 != obs.size(); ++s1) {
2338  for (auto s2 = 0; s2 != obs.size(); ++s2) {
2339  eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2340  eri2_engine.compute(obs[s1], obs[s2]);
2341 
2342  auto bf1 = shell2bf[s1]; // first basis function in first shell
2343  auto n1 = obs[s1].size(); // number of basis functions in first shell
2344  auto bf2 = shell2bf[s2]; // first basis function in second shell
2345  auto n2 = obs[s2].size(); // number of basis functions in second shell
2346 
2347  // loop over derivative shell sets
2348  for (auto d1 = 0, d12 = 0; d1 != 6; ++d1) {
2349  const auto dd1 = d1 < 3 ? d1 : d1 + 3;
2350  for (auto d2 = d1; d2 != 6; ++d2, ++d12) {
2351  const auto dd2 = d2 < 3 ? d2 : d2 + 3;
2352  const auto dd12 = dd1 * (24 - dd1 - 1) / 2 + dd2;
2353  const auto* buf4 = results4[dd12];
2354  const auto* buf2 = results2[d12];
2355 
2356  // this iterates over integrals in the order they are packed in
2357  // array
2358  // ints_shellset
2359  for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2360  for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2361  assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2362  "2nd deriv 2-center ints test failed");
2363  }
2364  }
2365  }
2366  }
2367  }
2368 #endif
2369 
2370 }
std::vector< std::pair< double, std::array< double, 3 > > > make_point_charges(const std::vector< libint2::Atom > &atoms)
converts a vector of Atoms to a vector of point charges
Definition: atom.h:237
void clear()
resets timers to zero
Definition: timer.h:78
generally-contracted Solid-Harmonic/Cartesion Gaussian Shell
Definition: shell.h:81
static bool do_enforce_unit_normalization(defaultable_boolean flag=defaultable_boolean())
sets and/or reports whether the auto-renormalization to unity is set if called without arguments,...
Definition: shell.h:209
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
void parallel_do(Lambda &lambda)
fires off nthreads instances of lambda in parallel
Definition: 1body.h:168
DIIS (`‘direct inversion of iterative subspace’') extrapolation.
Definition: diis.h:132
dur_t stop(size_t t)
stops timer t
Definition: timer.h:67
void initialize(bool verbose=false)
initializes the libint library if not currently initialized, no-op otherwise
Definition: initialize.h:68
double read(size_t t) const
reads value (in seconds) of timer t , converted to double
Definition: timer.h:74
const std::vector< Real > & sto3g_ao_occupation_vector(size_t Z)
computes average orbital occupancies in the ground state of a neutral atoms
Definition: sto3g_atomic_density.h:84
std::vector< Atom > read_dotxyz(std::istream &is, const double bohr_to_angstrom=constants::codata_2010::bohr_to_angstrom)
reads the list of atoms from a file in the standard XYZ format supported by most chemistry software (...
Definition: atom.h:205
Array idential to C++0X arrays.
Definition: stdarray_bits.h:14
Iterates over all partitions of a non-negative integer into nonnegative integers in reverse lexicog...
Definition: intpart_iter.h:74
void start(size_t t)
starts timer t
Definition: timer.h:62
size_t sto3g_num_ao(size_t Z)
Definition: sto3g_atomic_density.h:56
void set_now_overhead(size_t ns)
use this to report the overhead of now() call; if set, the reported timings will be adjusted for this...
Definition: timer.h:57
static const Shell & unit()
Definition: shell.h:218
Definition: 1body.h:148
void finalize()
finalizes the libint library.
Definition: initialize.h:88
Timers aggregates N C++11 "timers"; used to high-resolution profile stages of integral computation.
Definition: timer.h:38