ergo
template_lapack_laln2.h
Go to the documentation of this file.
1/* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LALN2_HEADER
38#define TEMPLATE_LAPACK_LALN2_HEADER
39
40
41template<class Treal>
42int template_lapack_laln2(const logical *ltrans, const integer *na, const integer *nw,
43 const Treal *smin, const Treal *ca, const Treal *a, const integer *lda,
44 const Treal *d1, const Treal *d2, const Treal *b, const integer *ldb,
45 const Treal *wr, const Treal *wi, Treal *x, const integer *ldx,
46 Treal *scale, Treal *xnorm, integer *info)
47{
48/* -- LAPACK auxiliary routine (version 3.0) --
49 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
50 Courant Institute, Argonne National Lab, and Rice University
51 October 31, 1992
52
53
54 Purpose
55 =======
56
57 DLALN2 solves a system of the form (ca A - w D ) X = s B
58 or (ca A' - w D) X = s B with possible scaling ("s") and
59 perturbation of A. (A' means A-transpose.)
60
61 A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
62 real diagonal matrix, w is a real or complex value, and X and B are
63 NA x 1 matrices -- real if w is real, complex if w is complex. NA
64 may be 1 or 2.
65
66 If w is complex, X and B are represented as NA x 2 matrices,
67 the first column of each being the real part and the second
68 being the imaginary part.
69
70 "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
71 so chosen that X can be computed without overflow. X is further
72 scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
73 than overflow.
74
75 If both singular values of (ca A - w D) are less than SMIN,
76 SMIN*identity will be used instead of (ca A - w D). If only one
77 singular value is less than SMIN, one element of (ca A - w D) will be
78 perturbed enough to make the smallest singular value roughly SMIN.
79 If both singular values are at least SMIN, (ca A - w D) will not be
80 perturbed. In any case, the perturbation will be at most some small
81 multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values
82 are computed by infinity-norm approximations, and thus will only be
83 correct to a factor of 2 or so.
84
85 Note: all input quantities are assumed to be smaller than overflow
86 by a reasonable factor. (See BIGNUM.)
87
88 Arguments
89 ==========
90
91 LTRANS (input) LOGICAL
92 =.TRUE.: A-transpose will be used.
93 =.FALSE.: A will be used (not transposed.)
94
95 NA (input) INTEGER
96 The size of the matrix A. It may (only) be 1 or 2.
97
98 NW (input) INTEGER
99 1 if "w" is real, 2 if "w" is complex. It may only be 1
100 or 2.
101
102 SMIN (input) DOUBLE PRECISION
103 The desired lower bound on the singular values of A. This
104 should be a safe distance away from underflow or overflow,
105 say, between (underflow/machine precision) and (machine
106 precision * overflow ). (See BIGNUM and ULP.)
107
108 CA (input) DOUBLE PRECISION
109 The coefficient c, which A is multiplied by.
110
111 A (input) DOUBLE PRECISION array, dimension (LDA,NA)
112 The NA x NA matrix A.
113
114 LDA (input) INTEGER
115 The leading dimension of A. It must be at least NA.
116
117 D1 (input) DOUBLE PRECISION
118 The 1,1 element in the diagonal matrix D.
119
120 D2 (input) DOUBLE PRECISION
121 The 2,2 element in the diagonal matrix D. Not used if NW=1.
122
123 B (input) DOUBLE PRECISION array, dimension (LDB,NW)
124 The NA x NW matrix B (right-hand side). If NW=2 ("w" is
125 complex), column 1 contains the real part of B and column 2
126 contains the imaginary part.
127
128 LDB (input) INTEGER
129 The leading dimension of B. It must be at least NA.
130
131 WR (input) DOUBLE PRECISION
132 The real part of the scalar "w".
133
134 WI (input) DOUBLE PRECISION
135 The imaginary part of the scalar "w". Not used if NW=1.
136
137 X (output) DOUBLE PRECISION array, dimension (LDX,NW)
138 The NA x NW matrix X (unknowns), as computed by DLALN2.
139 If NW=2 ("w" is complex), on exit, column 1 will contain
140 the real part of X and column 2 will contain the imaginary
141 part.
142
143 LDX (input) INTEGER
144 The leading dimension of X. It must be at least NA.
145
146 SCALE (output) DOUBLE PRECISION
147 The scale factor that B must be multiplied by to insure
148 that overflow does not occur when computing X. Thus,
149 (ca A - w D) X will be SCALE*B, not B (ignoring
150 perturbations of A.) It will be at most 1.
151
152 XNORM (output) DOUBLE PRECISION
153 The infinity-norm of X, when X is regarded as an NA x NW
154 real matrix.
155
156 INFO (output) INTEGER
157 An error flag. It will be set to zero if no error occurs,
158 a negative number if an argument is in error, or a positive
159 number if ca A - w D had to be perturbed.
160 The possible values are:
161 = 0: No error occurred, and (ca A - w D) did not have to be
162 perturbed.
163 = 1: (ca A - w D) had to be perturbed to make its smallest
164 (or only) singular value greater than SMIN.
165 NOTE: In the interests of speed, this routine does not
166 check the inputs for errors.
167
168 =====================================================================
169
170 Parameter adjustments */
171 /* Initialized data */
172 logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
173 logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
174 integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
175 4,3,2,1 };
176 /* System generated locals */
177 integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
178 Treal d__1, d__2, d__3, d__4, d__5, d__6;
179 Treal equiv_0[4], equiv_1[4];
180 /* Local variables */
181 Treal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s;
182 integer j;
183 Treal u22abs;
184 integer icmax;
185 Treal bnorm, cnorm, smini;
186#define ci (equiv_0)
187#define cr (equiv_1)
188 Treal bignum, bi1, bi2, br1, br2, smlnum, xi1, xi2, xr1, xr2,
189 ci21, ci22, cr21, cr22, li21, csi, ui11, lr21, ui12, ui22;
190#define civ (equiv_0)
191 Treal csr, ur11, ur12, ur22;
192#define crv (equiv_1)
193#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
194#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
195#define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]
196#define ci_ref(a_1,a_2) ci[(a_2)*2 + a_1 - 3]
197#define cr_ref(a_1,a_2) cr[(a_2)*2 + a_1 - 3]
198#define ipivot_ref(a_1,a_2) ipivot[(a_2)*4 + a_1 - 5]
199
200 a_dim1 = *lda;
201 a_offset = 1 + a_dim1 * 1;
202 a -= a_offset;
203 b_dim1 = *ldb;
204 b_offset = 1 + b_dim1 * 1;
205 b -= b_offset;
206 x_dim1 = *ldx;
207 x_offset = 1 + x_dim1 * 1;
208 x -= x_offset;
209
210 /* Function Body
211
212 Compute BIGNUM */
213
214 smlnum = 2. * template_lapack_lamch("Safe minimum", (Treal)0);
215 bignum = 1. / smlnum;
216 smini = maxMACRO(*smin,smlnum);
217
218/* Don't check for input errors */
219
220 *info = 0;
221
222/* Standard Initializations */
223
224 *scale = 1.;
225
226 if (*na == 1) {
227
228/* 1 x 1 (i.e., scalar) system C X = B */
229
230 if (*nw == 1) {
231
232/* Real 1x1 system.
233
234 C = ca A - w D */
235
236 csr = *ca * a_ref(1, 1) - *wr * *d1;
237 cnorm = absMACRO(csr);
238
239/* If | C | < SMINI, use C = SMINI */
240
241 if (cnorm < smini) {
242 csr = smini;
243 cnorm = smini;
244 *info = 1;
245 }
246
247/* Check scaling for X = B / C */
248
249 bnorm = (d__1 = b_ref(1, 1), absMACRO(d__1));
250 if (cnorm < 1. && bnorm > 1.) {
251 if (bnorm > bignum * cnorm) {
252 *scale = 1. / bnorm;
253 }
254 }
255
256/* Compute X */
257
258 x_ref(1, 1) = b_ref(1, 1) * *scale / csr;
259 *xnorm = (d__1 = x_ref(1, 1), absMACRO(d__1));
260 } else {
261
262/* Complex 1x1 system (w is complex)
263
264 C = ca A - w D */
265
266 csr = *ca * a_ref(1, 1) - *wr * *d1;
267 csi = -(*wi) * *d1;
268 cnorm = absMACRO(csr) + absMACRO(csi);
269
270/* If | C | < SMINI, use C = SMINI */
271
272 if (cnorm < smini) {
273 csr = smini;
274 csi = 0.;
275 cnorm = smini;
276 *info = 1;
277 }
278
279/* Check scaling for X = B / C */
280
281 bnorm = (d__1 = b_ref(1, 1), absMACRO(d__1)) + (d__2 = b_ref(1, 2),
282 absMACRO(d__2));
283 if (cnorm < 1. && bnorm > 1.) {
284 if (bnorm > bignum * cnorm) {
285 *scale = 1. / bnorm;
286 }
287 }
288
289/* Compute X */
290
291 d__1 = *scale * b_ref(1, 1);
292 d__2 = *scale * b_ref(1, 2);
293 template_lapack_ladiv(&d__1, &d__2, &csr, &csi, &x_ref(1, 1), &x_ref(1, 2));
294 *xnorm = (d__1 = x_ref(1, 1), absMACRO(d__1)) + (d__2 = x_ref(1, 2),
295 absMACRO(d__2));
296 }
297
298 } else {
299
300/* 2x2 System
301
302 Compute the real part of C = ca A - w D (or ca A' - w D ) */
303
304 cr_ref(1, 1) = *ca * a_ref(1, 1) - *wr * *d1;
305 cr_ref(2, 2) = *ca * a_ref(2, 2) - *wr * *d2;
306 if (*ltrans) {
307 cr_ref(1, 2) = *ca * a_ref(2, 1);
308 cr_ref(2, 1) = *ca * a_ref(1, 2);
309 } else {
310 cr_ref(2, 1) = *ca * a_ref(2, 1);
311 cr_ref(1, 2) = *ca * a_ref(1, 2);
312 }
313
314 if (*nw == 1) {
315
316/* Real 2x2 system (w is real)
317
318 Find the largest element in C */
319
320 cmax = 0.;
321 icmax = 0;
322
323 for (j = 1; j <= 4; ++j) {
324 if ((d__1 = crv[j - 1], absMACRO(d__1)) > cmax) {
325 cmax = (d__1 = crv[j - 1], absMACRO(d__1));
326 icmax = j;
327 }
328/* L10: */
329 }
330
331/* If norm(C) < SMINI, use SMINI*identity. */
332
333 if (cmax < smini) {
334/* Computing MAX */
335 d__3 = (d__1 = b_ref(1, 1), absMACRO(d__1)), d__4 = (d__2 = b_ref(
336 2, 1), absMACRO(d__2));
337 bnorm = maxMACRO(d__3,d__4);
338 if (smini < 1. && bnorm > 1.) {
339 if (bnorm > bignum * smini) {
340 *scale = 1. / bnorm;
341 }
342 }
343 temp = *scale / smini;
344 x_ref(1, 1) = temp * b_ref(1, 1);
345 x_ref(2, 1) = temp * b_ref(2, 1);
346 *xnorm = temp * bnorm;
347 *info = 1;
348 return 0;
349 }
350
351/* Gaussian elimination with complete pivoting. */
352
353 ur11 = crv[icmax - 1];
354 cr21 = crv[ipivot_ref(2, icmax) - 1];
355 ur12 = crv[ipivot_ref(3, icmax) - 1];
356 cr22 = crv[ipivot_ref(4, icmax) - 1];
357 ur11r = 1. / ur11;
358 lr21 = ur11r * cr21;
359 ur22 = cr22 - ur12 * lr21;
360
361/* If smaller pivot < SMINI, use SMINI */
362
363 if (absMACRO(ur22) < smini) {
364 ur22 = smini;
365 *info = 1;
366 }
367 if (rswap[icmax - 1]) {
368 br1 = b_ref(2, 1);
369 br2 = b_ref(1, 1);
370 } else {
371 br1 = b_ref(1, 1);
372 br2 = b_ref(2, 1);
373 }
374 br2 -= lr21 * br1;
375/* Computing MAX */
376 d__2 = (d__1 = br1 * (ur22 * ur11r), absMACRO(d__1)), d__3 = absMACRO(br2);
377 bbnd = maxMACRO(d__2,d__3);
378 if (bbnd > 1. && absMACRO(ur22) < 1.) {
379 if (bbnd >= bignum * absMACRO(ur22)) {
380 *scale = 1. / bbnd;
381 }
382 }
383
384 xr2 = br2 * *scale / ur22;
385 xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
386 if (zswap[icmax - 1]) {
387 x_ref(1, 1) = xr2;
388 x_ref(2, 1) = xr1;
389 } else {
390 x_ref(1, 1) = xr1;
391 x_ref(2, 1) = xr2;
392 }
393/* Computing MAX */
394 d__1 = absMACRO(xr1), d__2 = absMACRO(xr2);
395 *xnorm = maxMACRO(d__1,d__2);
396
397/* Further scaling if norm(A) norm(X) > overflow */
398
399 if (*xnorm > 1. && cmax > 1.) {
400 if (*xnorm > bignum / cmax) {
401 temp = cmax / bignum;
402 x_ref(1, 1) = temp * x_ref(1, 1);
403 x_ref(2, 1) = temp * x_ref(2, 1);
404 *xnorm = temp * *xnorm;
405 *scale = temp * *scale;
406 }
407 }
408 } else {
409
410/* Complex 2x2 system (w is complex)
411
412 Find the largest element in C */
413
414 ci_ref(1, 1) = -(*wi) * *d1;
415 ci_ref(2, 1) = 0.;
416 ci_ref(1, 2) = 0.;
417 ci_ref(2, 2) = -(*wi) * *d2;
418 cmax = 0.;
419 icmax = 0;
420
421 for (j = 1; j <= 4; ++j) {
422 if ((d__1 = crv[j - 1], absMACRO(d__1)) + (d__2 = civ[j - 1], absMACRO(
423 d__2)) > cmax) {
424 cmax = (d__1 = crv[j - 1], absMACRO(d__1)) + (d__2 = civ[j - 1]
425 , absMACRO(d__2));
426 icmax = j;
427 }
428/* L20: */
429 }
430
431/* If norm(C) < SMINI, use SMINI*identity. */
432
433 if (cmax < smini) {
434/* Computing MAX */
435 d__5 = (d__1 = b_ref(1, 1), absMACRO(d__1)) + (d__2 = b_ref(1, 2),
436 absMACRO(d__2)), d__6 = (d__3 = b_ref(2, 1), absMACRO(d__3)) + (
437 d__4 = b_ref(2, 2), absMACRO(d__4));
438 bnorm = maxMACRO(d__5,d__6);
439 if (smini < 1. && bnorm > 1.) {
440 if (bnorm > bignum * smini) {
441 *scale = 1. / bnorm;
442 }
443 }
444 temp = *scale / smini;
445 x_ref(1, 1) = temp * b_ref(1, 1);
446 x_ref(2, 1) = temp * b_ref(2, 1);
447 x_ref(1, 2) = temp * b_ref(1, 2);
448 x_ref(2, 2) = temp * b_ref(2, 2);
449 *xnorm = temp * bnorm;
450 *info = 1;
451 return 0;
452 }
453
454/* Gaussian elimination with complete pivoting. */
455
456 ur11 = crv[icmax - 1];
457 ui11 = civ[icmax - 1];
458 cr21 = crv[ipivot_ref(2, icmax) - 1];
459 ci21 = civ[ipivot_ref(2, icmax) - 1];
460 ur12 = crv[ipivot_ref(3, icmax) - 1];
461 ui12 = civ[ipivot_ref(3, icmax) - 1];
462 cr22 = crv[ipivot_ref(4, icmax) - 1];
463 ci22 = civ[ipivot_ref(4, icmax) - 1];
464 if (icmax == 1 || icmax == 4) {
465
466/* Code when off-diagonals of pivoted C are real */
467
468 if (absMACRO(ur11) > absMACRO(ui11)) {
469 temp = ui11 / ur11;
470/* Computing 2nd power */
471 d__1 = temp;
472 ur11r = 1. / (ur11 * (d__1 * d__1 + 1.));
473 ui11r = -temp * ur11r;
474 } else {
475 temp = ur11 / ui11;
476/* Computing 2nd power */
477 d__1 = temp;
478 ui11r = -1. / (ui11 * (d__1 * d__1 + 1.));
479 ur11r = -temp * ui11r;
480 }
481 lr21 = cr21 * ur11r;
482 li21 = cr21 * ui11r;
483 ur12s = ur12 * ur11r;
484 ui12s = ur12 * ui11r;
485 ur22 = cr22 - ur12 * lr21;
486 ui22 = ci22 - ur12 * li21;
487 } else {
488
489/* Code when diagonals of pivoted C are real */
490
491 ur11r = 1. / ur11;
492 ui11r = 0.;
493 lr21 = cr21 * ur11r;
494 li21 = ci21 * ur11r;
495 ur12s = ur12 * ur11r;
496 ui12s = ui12 * ur11r;
497 ur22 = cr22 - ur12 * lr21 + ui12 * li21;
498 ui22 = -ur12 * li21 - ui12 * lr21;
499 }
500 u22abs = absMACRO(ur22) + absMACRO(ui22);
501
502/* If smaller pivot < SMINI, use SMINI */
503
504 if (u22abs < smini) {
505 ur22 = smini;
506 ui22 = 0.;
507 *info = 1;
508 }
509 if (rswap[icmax - 1]) {
510 br2 = b_ref(1, 1);
511 br1 = b_ref(2, 1);
512 bi2 = b_ref(1, 2);
513 bi1 = b_ref(2, 2);
514 } else {
515 br1 = b_ref(1, 1);
516 br2 = b_ref(2, 1);
517 bi1 = b_ref(1, 2);
518 bi2 = b_ref(2, 2);
519 }
520 br2 = br2 - lr21 * br1 + li21 * bi1;
521 bi2 = bi2 - li21 * br1 - lr21 * bi1;
522/* Computing MAX */
523 d__1 = (absMACRO(br1) + absMACRO(bi1)) * (u22abs * (absMACRO(ur11r) + absMACRO(ui11r))
524 ), d__2 = absMACRO(br2) + absMACRO(bi2);
525 bbnd = maxMACRO(d__1,d__2);
526 if (bbnd > 1. && u22abs < 1.) {
527 if (bbnd >= bignum * u22abs) {
528 *scale = 1. / bbnd;
529 br1 = *scale * br1;
530 bi1 = *scale * bi1;
531 br2 = *scale * br2;
532 bi2 = *scale * bi2;
533 }
534 }
535
536 template_lapack_ladiv(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
537 xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
538 xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
539 if (zswap[icmax - 1]) {
540 x_ref(1, 1) = xr2;
541 x_ref(2, 1) = xr1;
542 x_ref(1, 2) = xi2;
543 x_ref(2, 2) = xi1;
544 } else {
545 x_ref(1, 1) = xr1;
546 x_ref(2, 1) = xr2;
547 x_ref(1, 2) = xi1;
548 x_ref(2, 2) = xi2;
549 }
550/* Computing MAX */
551 d__1 = absMACRO(xr1) + absMACRO(xi1), d__2 = absMACRO(xr2) + absMACRO(xi2);
552 *xnorm = maxMACRO(d__1,d__2);
553
554/* Further scaling if norm(A) norm(X) > overflow */
555
556 if (*xnorm > 1. && cmax > 1.) {
557 if (*xnorm > bignum / cmax) {
558 temp = cmax / bignum;
559 x_ref(1, 1) = temp * x_ref(1, 1);
560 x_ref(2, 1) = temp * x_ref(2, 1);
561 x_ref(1, 2) = temp * x_ref(1, 2);
562 x_ref(2, 2) = temp * x_ref(2, 2);
563 *xnorm = temp * *xnorm;
564 *scale = temp * *scale;
565 }
566 }
567 }
568 }
569
570 return 0;
571
572/* End of DLALN2 */
573
574} /* dlaln2_ */
575
576#undef ipivot_ref
577#undef cr_ref
578#undef ci_ref
579#undef x_ref
580#undef b_ref
581#undef a_ref
582#undef crv
583#undef civ
584#undef cr
585#undef ci
586
587
588#endif
int integer
Definition: template_blas_common.h:40
#define absMACRO(x)
Definition: template_blas_common.h:47
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
#define TRUE_
Definition: template_lapack_common.h:42
#define FALSE_
Definition: template_lapack_common.h:43
int template_lapack_ladiv(const Treal *a, const Treal *b, const Treal *c__, const Treal *d__, Treal *p, Treal *q)
Definition: template_lapack_ladiv.h:42
#define civ
#define a_ref(a_1, a_2)
#define crv
#define b_ref(a_1, a_2)
#define ci_ref(a_1, a_2)
#define ipivot_ref(a_1, a_2)
#define cr_ref(a_1, a_2)
int template_lapack_laln2(const logical *ltrans, const integer *na, const integer *nw, const Treal *smin, const Treal *ca, const Treal *a, const integer *lda, const Treal *d1, const Treal *d2, const Treal *b, const integer *ldb, const Treal *wr, const Treal *wi, Treal *x, const integer *ldx, Treal *scale, Treal *xnorm, integer *info)
Definition: template_lapack_laln2.h:42
#define x_ref(a_1, a_2)
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202