21 #ifndef _libint2_include_solidharmonics_h_ 22 #define _libint2_include_solidharmonics_h_ 24 #include <libint2/util/cxxstd.h> 25 #if LIBINT2_CPLUSPLUS_STD < 2011 26 # error "The simple Libint API requires C++11 support" 34 #ifndef LIBINT2_REALTYPE 35 # define LIBINT2_REALTYPE double 37 #include <libint2/shell.h> 38 #include <libint2/cgshell_ordering.h> 41 template <
typename Int>
42 signed char parity(Int i) {
49 namespace solidharmonics {
55 template <
typename Real>
56 class SolidHarmonicsCoefficients {
58 typedef ::libint2::value_type real_t;
60 SolidHarmonicsCoefficients() : l_(-1) {
62 SolidHarmonicsCoefficients(
unsigned char l) : l_(l) {
63 assert(l <= std::numeric_limits<signed char>::max());
67 SolidHarmonicsCoefficients(SolidHarmonicsCoefficients&& other) :
68 values_(std::move(other.values_)),
69 row_offset_(std::move(other.row_offset_)),
70 colidx_(std::move(other.colidx_)),
74 SolidHarmonicsCoefficients(
const SolidHarmonicsCoefficients& other) =
default;
76 void init(
unsigned char l) {
77 assert(l <= std::numeric_limits<signed char>::max());
82 static const SolidHarmonicsCoefficients& instance(
unsigned int l) {
83 static std::vector<SolidHarmonicsCoefficients> shg_coefs(SolidHarmonicsCoefficients::CtorHelperIter(0),
84 SolidHarmonicsCoefficients::CtorHelperIter(11));
90 const Real* row_values(
size_t r)
const {
91 return &values_[0] + row_offset_[r];
94 const unsigned char* row_idx(
size_t r)
const {
95 return &colidx_[0] + row_offset_[r];
98 unsigned char nnz(
size_t r)
const {
99 return row_offset_[r+1] - row_offset_[r];
103 std::vector<Real> values_;
104 std::vector<unsigned short> row_offset_;
105 std::vector<unsigned char> colidx_;
109 const unsigned short npure = 2*l_ + 1;
110 const unsigned short ncart = (l_ + 1) * (l_ + 2) / 2;
111 std::vector<Real> full_coeff(npure * ncart);
113 #if LIBINT_SHGSHELL_ORDERING == LIBINT_SHGSHELL_ORDERING_STANDARD 114 for(
signed char pure_idx=0, m=-l_; pure_idx!=npure; ++pure_idx, ++m) {
115 #elif LIBINT_SHGSHELL_ORDERING == LIBINT_SHGSHELL_ORDERING_GAUSSIAN 116 for(
signed char pure_idx=0, m=0; pure_idx!=npure; ++pure_idx, m=(m>0?-m:1-m)) {
118 # error "unknown value of macro LIBINT_SHGSHELL_ORDERING" 120 signed char cart_idx = 0;
121 signed char lx, ly, lz;
122 FOR_CART(lx, ly, lz, l_)
123 full_coeff[pure_idx * ncart + cart_idx] = coeff(l_, m, lx, ly, lz);
132 for(
size_t i=0; i!=full_coeff.size(); ++i)
133 nnz += full_coeff[i] == 0.0 ? 0 : 1;
137 row_offset_.resize(npure+1);
140 unsigned short pc = 0;
141 unsigned short cnt = 0;
142 for(
unsigned short p=0; p!=npure; ++p) {
143 row_offset_[p] = cnt;
144 for(
unsigned short c=0; c!=ncart; ++c, ++pc) {
145 if (full_coeff[pc] != 0.0) {
146 values_[cnt] = full_coeff[pc];
152 row_offset_[npure] = cnt;
162 static Real coeff(
int l,
int m,
int lx,
int ly,
int lz) {
163 using libint2::math::fac;
164 using libint2::math::df_Kminus1;
165 using libint2::math::bc;
167 auto abs_m = std::abs(m);
168 if ((lx + ly - abs_m)%2)
171 auto j = (lx + ly - abs_m)/2;
178 auto comp = (m >= 0) ? 1 : -1;
181 if (comp != parity(abs(i)))
185 Real pfac = sqrt( ((Real(fac[2*lx])*Real(fac[2*ly])*Real(fac[2*lz]))/fac[2*l]) *
186 ((Real(fac[l-abs_m]))/(fac[l])) *
187 (Real(1)/fac[l+abs_m]) *
188 (Real(1)/(fac[lx]*fac[ly]*fac[lz]))
193 pfac *= parity((i-1)/2);
198 auto i_max = (l-abs_m)/2;
200 for(
auto i=i_min;i<=i_max;i++) {
201 Real pfac1 = bc(l,i)*bc(i,j);
202 pfac1 *= (Real(parity(i)*fac[2*(l-i)])/fac[l-abs_m-2*i]);
204 const int k_min = std::max((lx-abs_m)/2,0);
205 const int k_max = std::min(j,lx/2);
206 for(
int k=k_min;k<=k_max;k++) {
208 sum1 += bc(j,k)*bc(abs_m,lx-2*k)*parity(k);
212 sum *= sqrt(Real(df_Kminus1[2*l])/(df_Kminus1[2*lx]*df_Kminus1[2*ly]*df_Kminus1[2*lz]));
214 Real result = (m == 0) ? pfac*sum : M_SQRT2*pfac*sum;
218 struct CtorHelperIter :
public std::iterator<std::input_iterator_tag, SolidHarmonicsCoefficients> {
220 using typename std::iterator<std::input_iterator_tag, SolidHarmonicsCoefficients>::value_type;
222 CtorHelperIter() =
default;
223 CtorHelperIter(
unsigned int l) : l_(l) {}
224 CtorHelperIter(
const CtorHelperIter&) =
default;
225 CtorHelperIter& operator=(
const CtorHelperIter& rhs) { l_ = rhs.l_;
return *
this; }
227 CtorHelperIter& operator++() { ++l_;
return *
this; }
228 CtorHelperIter& operator--() { assert(l_ > 0); --l_;
return *
this; }
231 return value_type(l_);
233 bool operator==(
const CtorHelperIter& rhs)
const {
236 bool operator!=(
const CtorHelperIter& rhs)
const {
237 return not (*
this == rhs);
245 template <
typename Real>
246 void transform_first(
size_t l,
size_t n2,
const Real *src, Real *tgt)
248 const auto& coefs = SolidHarmonicsCoefficients<Real>::instance(l);
250 const auto n = 2*l+1;
251 std::fill(tgt, tgt + n * n2, 0);
254 for(
size_t s=0; s!=n; ++s) {
255 const auto nc_s = coefs.nnz(s);
256 const auto* c_idxs = coefs.row_idx(s);
257 const auto* c_vals = coefs.row_values(s);
259 const auto tgt_blk_s_offset = s * n2;
261 for(
size_t ic=0; ic!=nc_s; ++ic) {
262 const auto c = c_idxs[ic];
263 const auto s_c_coeff = c_vals[ic];
265 auto src_blk_s = src + c * n2;
266 auto tgt_blk_s = tgt + tgt_blk_s_offset;
269 for(
size_t i2=0; i2!=n2; ++i2, ++src_blk_s, ++tgt_blk_s) {
270 *tgt_blk_s += s_c_coeff * *src_blk_s;
278 template <
typename Real>
279 void transform_first2(
int l1,
int l2,
size_t inner_dim,
const Real* source_blk, Real* target_blk) {
280 const auto& coefs1 = SolidHarmonicsCoefficients<Real>::instance(l1);
281 const auto& coefs2 = SolidHarmonicsCoefficients<Real>::instance(l2);
283 const auto ncart2 = (l2+1)*(l2+2)/2;
284 const auto npure1 = 2*l1+1;
285 const auto npure2 = 2*l2+1;
286 const auto ncart2inner = ncart2 * inner_dim;
287 const auto npure2inner = npure2 * inner_dim;
288 std::fill(target_blk, target_blk + npure1 * npure2inner, 0);
291 const size_t inner_blk_size = 8;
292 const size_t nblks = (inner_dim+inner_blk_size-1)/inner_blk_size;
293 for(
size_t blk=0; blk!=nblks; ++blk) {
294 const auto blk_begin = blk * inner_blk_size;
295 const auto blk_end = std::min(blk_begin + inner_blk_size,inner_dim);
296 const auto blk_size = blk_end - blk_begin;
299 for(
size_t s1=0; s1!=npure1; ++s1) {
300 const auto nc1 = coefs1.nnz(s1);
301 const auto* c1_idxs = coefs1.row_idx(s1);
302 const auto* c1_vals = coefs1.row_values(s1);
304 auto target_blk_s1 = target_blk + s1 * npure2inner + blk_begin;
307 for(
size_t s2=0; s2!=npure2; ++s2) {
308 const auto nc2 = coefs2.nnz(s2);
309 const auto* c2_idxs = coefs2.row_idx(s2);
310 const auto* c2_vals = coefs2.row_values(s2);
311 const auto s2inner = s2 * inner_dim;
312 const auto target_blk_s1_blk_begin = target_blk_s1 + s2inner;
314 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
315 auto c1 = c1_idxs[ic1];
316 auto s1_c1_coeff = c1_vals[ic1];
318 auto source_blk_c1 = source_blk + c1 * ncart2inner + blk_begin;
320 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
321 auto c2 = c2_idxs[ic2];
322 auto s2_c2_coeff = c2_vals[ic2];
323 const auto c2inner = c2 * inner_dim;
325 const auto coeff = s1_c1_coeff * s2_c2_coeff;
326 const auto source_blk_c1_blk_begin = source_blk_c1 + c2inner;
327 for(
auto b=0; b<blk_size; ++b)
328 target_blk_s1_blk_begin[b] += source_blk_c1_blk_begin[b] * coeff;
342 template <
typename Real>
343 void transform_inner(
size_t n1,
size_t l,
size_t n2,
const Real *src, Real *tgt)
345 const auto& coefs = SolidHarmonicsCoefficients<Real>::instance(l);
347 const auto nc = (l+1)*(l+2)/2;
348 const auto n = 2*l+1;
349 const auto nc_n2 = nc * n2;
350 const auto n_n2 = n * n2;
351 std::fill(tgt, tgt + n1 * n_n2, 0);
354 for(
size_t s=0; s!=n; ++s) {
355 const auto nc_s = coefs.nnz(s);
356 const auto* c_idxs = coefs.row_idx(s);
357 const auto* c_vals = coefs.row_values(s);
359 const auto tgt_blk_s_offset = s * n2;
361 for(
size_t ic=0; ic!=nc_s; ++ic) {
362 const auto c = c_idxs[ic];
363 const auto s_c_coeff = c_vals[ic];
365 auto src_blk_s = src + c * n2;
366 auto tgt_blk_s = tgt + tgt_blk_s_offset;
369 for(
size_t i1=0; i1!=n1; ++i1, src_blk_s+=nc_n2, tgt_blk_s+=n_n2) {
370 for(
size_t i2=0; i2!=n2; ++i2) {
371 tgt_blk_s[i2] += s_c_coeff * src_blk_s[i2];
380 template <
typename Real>
381 void transform_last(
size_t n1,
size_t l,
const Real *src, Real *tgt)
383 const auto& coefs = SolidHarmonicsCoefficients<Real>::instance(l);
385 const auto nc = (l+1)*(l+2)/2;
386 const auto n = 2*l+1;
387 std::fill(tgt, tgt + n1 * n, 0);
390 for(
size_t s=0; s!=n; ++s) {
391 const auto nc_s = coefs.nnz(s);
392 const auto* c_idxs = coefs.row_idx(s);
393 const auto* c_vals = coefs.row_values(s);
395 const auto tgt_blk_s_offset = s;
397 for(
size_t ic=0; ic!=nc_s; ++ic) {
398 const auto c = c_idxs[ic];
399 const auto s_c_coeff = c_vals[ic];
401 auto src_blk_s = src + c;
402 auto tgt_blk_s = tgt + tgt_blk_s_offset;
405 for(
size_t i1=0; i1!=n1; ++i1, src_blk_s+=nc, tgt_blk_s+=n) {
406 *tgt_blk_s += s_c_coeff * *src_blk_s;
414 template <
typename Real>
415 void tform_last2(
size_t n1,
int l_row,
int l_col,
const Real* source_blk, Real* target_blk) {
416 const auto& coefs_row = SolidHarmonicsCoefficients<Real>::instance(l_row);
417 const auto& coefs_col = SolidHarmonicsCoefficients<Real>::instance(l_col);
419 const auto ncart_row = (l_row+1)*(l_row+2)/2;
420 const auto ncart_col = (l_col+1)*(l_col+2)/2;
421 const auto ncart = ncart_row * ncart_col;
422 const auto npure_row = 2*l_row+1;
423 const auto npure_col = 2*l_col+1;
424 const auto npure = npure_row * npure_col;
425 std::fill(target_blk, target_blk + n1 * npure, 0);
427 for(
size_t i1=0; i1!=n1; ++i1, source_blk+=ncart, target_blk+=npure) {
429 for(
size_t s1=0; s1!=npure_row; ++s1) {
430 const auto nc1 = coefs_row.nnz(s1);
431 const auto* c1_idxs = coefs_row.row_idx(s1);
432 const auto* c1_vals = coefs_row.row_values(s1);
434 auto target_blk_s1 = target_blk + s1 * npure_col;
437 for(
size_t s2=0; s2!=npure_col; ++s2) {
438 const auto nc2 = coefs_col.nnz(s2);
439 const auto* c2_idxs = coefs_col.row_idx(s2);
440 const auto* c2_vals = coefs_col.row_values(s2);
442 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
443 auto c1 = c1_idxs[ic1];
444 auto s1_c1_coeff = c1_vals[ic1];
446 auto source_blk_c1 = source_blk + c1 * ncart_col;
448 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
449 auto c2 = c2_idxs[ic2];
450 auto s2_c2_coeff = c2_vals[ic2];
452 target_blk_s1[s2] += source_blk_c1[c2] * s1_c1_coeff * s2_c2_coeff;
464 template <
typename Real>
465 void tform(
int l_row,
int l_col,
const Real* source_blk, Real* target_blk) {
466 const auto& coefs_row = SolidHarmonicsCoefficients<Real>::instance(l_row);
467 const auto& coefs_col = SolidHarmonicsCoefficients<Real>::instance(l_col);
469 const auto ncart_col = (l_col+1)*(l_col+2)/2;
470 const auto npure_row = 2*l_row+1;
471 const auto npure_col = 2*l_col+1;
472 std::fill(target_blk, target_blk + npure_row * npure_col, 0);
475 for(
auto s1=0; s1!=npure_row; ++s1) {
476 const auto nc1 = coefs_row.nnz(s1);
477 const auto* c1_idxs = coefs_row.row_idx(s1);
478 const auto* c1_vals = coefs_row.row_values(s1);
480 auto target_blk_s1 = target_blk + s1 * npure_col;
483 for(
auto s2=0; s2!=npure_col; ++s2) {
484 const auto nc2 = coefs_col.nnz(s2);
485 const auto* c2_idxs = coefs_col.row_idx(s2);
486 const auto* c2_vals = coefs_col.row_values(s2);
488 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
489 auto c1 = c1_idxs[ic1];
490 auto s1_c1_coeff = c1_vals[ic1];
492 auto source_blk_c1 = source_blk + c1 * ncart_col;
494 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
495 auto c2 = c2_idxs[ic2];
496 auto s2_c2_coeff = c2_vals[ic2];
498 target_blk_s1[s2] += source_blk_c1[c2] * s1_c1_coeff * s2_c2_coeff;
509 template <
typename Real>
510 void tform_cols(
size_t nrow,
int l_col,
const Real* source_blk, Real* target_blk) {
511 return transform_last(nrow, l_col, source_blk, target_blk);
512 const auto& coefs_col = SolidHarmonicsCoefficients<Real>::instance(l_col);
514 const auto ncart_col = (l_col+1)*(l_col+2)/2;
515 const auto npure_col = 2*l_col+1;
518 for(
auto r1=0ul; r1!=nrow; ++r1) {
520 auto source_blk_r1 = source_blk + r1 * ncart_col;
521 auto target_blk_r1 = target_blk + r1 * npure_col;
524 for(
auto s2=0; s2!=npure_col; ++s2) {
525 const auto nc2 = coefs_col.nnz(s2);
526 const auto* c2_idxs = coefs_col.row_idx(s2);
527 const auto* c2_vals = coefs_col.row_values(s2);
529 Real r1_s2_value = 0.0;
531 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
532 auto c2 = c2_idxs[ic2];
533 auto s2_c2_coeff = c2_vals[ic2];
535 r1_s2_value += source_blk_r1[c2] * s2_c2_coeff;
539 target_blk_r1[s2] = r1_s2_value;
548 template <
typename Real>
549 void tform_rows(
int l_row,
size_t ncol,
const Real* source_blk, Real* target_blk) {
550 return transform_first(l_row, ncol, source_blk, target_blk);
551 const auto& coefs_row = SolidHarmonicsCoefficients<Real>::instance(l_row);
553 const auto npure_row = 2*l_row+1;
556 for(
auto s1=0; s1!=npure_row; ++s1) {
557 const auto nc1 = coefs_row.nnz(s1);
558 const auto* c1_idxs = coefs_row.row_idx(s1);
559 const auto* c1_vals = coefs_row.row_values(s1);
561 auto target_blk_s1 = target_blk + s1 * ncol;
564 for(decltype(ncol) c2=0; c2!=ncol; ++c2) {
566 Real s1_c2_value = 0.0;
567 auto source_blk_c2_offset = source_blk + c2;
569 for(std::size_t ic1=0; ic1!=nc1; ++ic1) {
570 auto c1 = c1_idxs[ic1];
571 auto s1_c1_coeff = c1_vals[ic1];
573 s1_c2_value += source_blk_c2_offset[c1 * ncol] * s1_c1_coeff;
577 target_blk_s1[c2] = s1_c2_value;
585 template <
typename Real,
typename Shell>
586 void tform(
const Shell& shell_row,
const Shell& shell_col,
const Real* source_blk, Real* target_blk) {
587 const auto trow = shell_row.pure;
588 const auto tcol = shell_col.pure;
592 Real localscratch[500];
593 tform_cols(shell_row.cartesian_size(), shell_col.l, source_blk, &localscratch[0]);
594 tform_rows(shell_row.l, shell_col.size(), &localscratch[0], target_blk);
597 tform_rows(shell_row.l, shell_col.cartesian_size(), source_blk, target_blk);
600 tform_cols(shell_row.cartesian_size(), shell_col.l, source_blk, target_blk);
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
SafePtr< CTimeEntity< typename ProductType< T, U >::result > > operator*(const SafePtr< CTimeEntity< T > > &A, const SafePtr< CTimeEntity< U > > &B)
Creates product A*B.
Definition: entity.h:280