ergo
template_lapack_lasq4.h
Go to the documentation of this file.
1/* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LASQ4_HEADER
38#define TEMPLATE_LAPACK_LASQ4_HEADER
39
40template<class Treal>
41int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__,
42 integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1,
43 Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2,
44 Treal *tau, integer *ttype, Treal *g)
45{
46 /* System generated locals */
47 integer i__1;
48 Treal d__1, d__2;
49
50
51 /* Local variables */
52 Treal s = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
53 Treal a2, b1, b2;
54 integer i4, nn, np;
55 Treal gam, gap1, gap2;
56
57
58/* -- LAPACK routine (version 3.2) -- */
59
60/* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
61/* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
62/* -- Berkeley -- */
63/* -- November 2008 -- */
64
65/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
66/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
67
68/* .. Scalar Arguments .. */
69/* .. */
70/* .. Array Arguments .. */
71/* .. */
72
73/* Purpose */
74/* ======= */
75
76/* DLASQ4 computes an approximation TAU to the smallest eigenvalue */
77/* using values of d from the previous transform. */
78
79/* I0 (input) INTEGER */
80/* First index. */
81
82/* N0 (input) INTEGER */
83/* Last index. */
84
85/* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
86/* Z holds the qd array. */
87
88/* PP (input) INTEGER */
89/* PP=0 for ping, PP=1 for pong. */
90
91/* NOIN (input) INTEGER */
92/* The value of N0 at start of EIGTEST. */
93
94/* DMIN (input) DOUBLE PRECISION */
95/* Minimum value of d. */
96
97/* DMIN1 (input) DOUBLE PRECISION */
98/* Minimum value of d, excluding D( N0 ). */
99
100/* DMIN2 (input) DOUBLE PRECISION */
101/* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
102
103/* DN (input) DOUBLE PRECISION */
104/* d(N) */
105
106/* DN1 (input) DOUBLE PRECISION */
107/* d(N-1) */
108
109/* DN2 (input) DOUBLE PRECISION */
110/* d(N-2) */
111
112/* TAU (output) DOUBLE PRECISION */
113/* This is the shift. */
114
115/* TTYPE (output) INTEGER */
116/* Shift type. */
117
118/* G (input/output) REAL */
119/* G is passed as an argument in order to save its value between */
120/* calls to DLASQ4. */
121
122/* Further Details */
123/* =============== */
124/* CNST1 = 9/16 */
125
126/* ===================================================================== */
127
128/* .. Parameters .. */
129/* .. */
130/* .. Local Scalars .. */
131/* .. */
132/* .. Intrinsic Functions .. */
133/* .. */
134/* .. Executable Statements .. */
135
136/* A negative DMIN forces the shift to take that absolute value */
137/* TTYPE records the type of shift. */
138
139 /* Parameter adjustments */
140 --z__;
141
142 /* Function Body */
143 if (*dmin__ <= 0.) {
144 *tau = -(*dmin__);
145 *ttype = -1;
146 return 0;
147 }
148
149 nn = (*n0 << 2) + *pp;
150 if (*n0in == *n0) {
151
152/* No eigenvalues deflated. */
153
154 if (*dmin__ == *dn || *dmin__ == *dn1) {
155
156 b1 = template_blas_sqrt(z__[nn - 3]) * template_blas_sqrt(z__[nn - 5]);
157 b2 = template_blas_sqrt(z__[nn - 7]) * template_blas_sqrt(z__[nn - 9]);
158 a2 = z__[nn - 7] + z__[nn - 5];
159
160/* Cases 2 and 3. */
161
162 if (*dmin__ == *dn && *dmin1 == *dn1) {
163 gap2 = *dmin2 - a2 - *dmin2 * .25;
164 if (gap2 > 0. && gap2 > b2) {
165 gap1 = a2 - *dn - b2 / gap2 * b2;
166 } else {
167 gap1 = a2 - *dn - (b1 + b2);
168 }
169 if (gap1 > 0. && gap1 > b1) {
170/* Computing MAX */
171 d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
172 s = maxMACRO(d__1,d__2);
173 *ttype = -2;
174 } else {
175 s = 0.;
176 if (*dn > b1) {
177 s = *dn - b1;
178 }
179 if (a2 > b1 + b2) {
180/* Computing MIN */
181 d__1 = s, d__2 = a2 - (b1 + b2);
182 s = minMACRO(d__1,d__2);
183 }
184/* Computing MAX */
185 d__1 = s, d__2 = *dmin__ * .333;
186 s = maxMACRO(d__1,d__2);
187 *ttype = -3;
188 }
189 } else {
190
191/* Case 4. */
192
193 *ttype = -4;
194 s = *dmin__ * .25;
195 if (*dmin__ == *dn) {
196 gam = *dn;
197 a2 = 0.;
198 if (z__[nn - 5] > z__[nn - 7]) {
199 return 0;
200 }
201 b2 = z__[nn - 5] / z__[nn - 7];
202 np = nn - 9;
203 } else {
204 np = nn - (*pp << 1);
205 b2 = z__[np - 2];
206 gam = *dn1;
207 if (z__[np - 4] > z__[np - 2]) {
208 return 0;
209 }
210 a2 = z__[np - 4] / z__[np - 2];
211 if (z__[nn - 9] > z__[nn - 11]) {
212 return 0;
213 }
214 b2 = z__[nn - 9] / z__[nn - 11];
215 np = nn - 13;
216 }
217
218/* Approximate contribution to norm squared from I < NN-1. */
219
220 a2 += b2;
221 i__1 = (*i0 << 2) - 1 + *pp;
222 for (i4 = np; i4 >= i__1; i4 += -4) {
223 if (b2 == 0.) {
224 goto L20;
225 }
226 b1 = b2;
227 if (z__[i4] > z__[i4 - 2]) {
228 return 0;
229 }
230 b2 *= z__[i4] / z__[i4 - 2];
231 a2 += b2;
232 if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) {
233 goto L20;
234 }
235/* L10: */
236 }
237L20:
238 a2 *= 1.05;
239
240/* Rayleigh quotient residual bound. */
241
242 if (a2 < .563) {
243 s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.);
244 }
245 }
246 } else if (*dmin__ == *dn2) {
247
248/* Case 5. */
249
250 *ttype = -5;
251 s = *dmin__ * .25;
252
253/* Compute contribution to norm squared from I > NN-2. */
254
255 np = nn - (*pp << 1);
256 b1 = z__[np - 2];
257 b2 = z__[np - 6];
258 gam = *dn2;
259 if (z__[np - 8] > b2 || z__[np - 4] > b1) {
260 return 0;
261 }
262 a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
263
264/* Approximate contribution to norm squared from I < NN-2. */
265
266 if (*n0 - *i0 > 2) {
267 b2 = z__[nn - 13] / z__[nn - 15];
268 a2 += b2;
269 i__1 = (*i0 << 2) - 1 + *pp;
270 for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
271 if (b2 == 0.) {
272 goto L40;
273 }
274 b1 = b2;
275 if (z__[i4] > z__[i4 - 2]) {
276 return 0;
277 }
278 b2 *= z__[i4] / z__[i4 - 2];
279 a2 += b2;
280 if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) {
281 goto L40;
282 }
283/* L30: */
284 }
285L40:
286 a2 *= 1.05;
287 }
288
289 if (a2 < .563) {
290 s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.);
291 }
292 } else {
293
294/* Case 6, no information to guide us. */
295
296 if (*ttype == -6) {
297 *g += (1. - *g) * .333;
298 } else if (*ttype == -18) {
299 *g = .083250000000000005;
300 } else {
301 *g = .25;
302 }
303 s = *g * *dmin__;
304 *ttype = -6;
305 }
306
307 } else if (*n0in == *n0 + 1) {
308
309/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
310
311 if (*dmin1 == *dn1 && *dmin2 == *dn2) {
312
313/* Cases 7 and 8. */
314
315 *ttype = -7;
316 s = *dmin1 * .333;
317 if (z__[nn - 5] > z__[nn - 7]) {
318 return 0;
319 }
320 b1 = z__[nn - 5] / z__[nn - 7];
321 b2 = b1;
322 if (b2 == 0.) {
323 goto L60;
324 }
325 i__1 = (*i0 << 2) - 1 + *pp;
326 for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
327 a2 = b1;
328 if (z__[i4] > z__[i4 - 2]) {
329 return 0;
330 }
331 b1 *= z__[i4] / z__[i4 - 2];
332 b2 += b1;
333 if (maxMACRO(b1,a2) * 100. < b2) {
334 goto L60;
335 }
336/* L50: */
337 }
338L60:
339 b2 = template_blas_sqrt(b2 * 1.05);
340/* Computing 2nd power */
341 d__1 = b2;
342 a2 = *dmin1 / (d__1 * d__1 + 1.);
343 gap2 = *dmin2 * .5 - a2;
344 if (gap2 > 0. && gap2 > b2 * a2) {
345/* Computing MAX */
346 d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
347 s = maxMACRO(d__1,d__2);
348 } else {
349/* Computing MAX */
350 d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
351 s = maxMACRO(d__1,d__2);
352 *ttype = -8;
353 }
354 } else {
355
356/* Case 9. */
357
358 s = *dmin1 * .25;
359 if (*dmin1 == *dn1) {
360 s = *dmin1 * .5;
361 }
362 *ttype = -9;
363 }
364
365 } else if (*n0in == *n0 + 2) {
366
367/* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */
368
369/* Cases 10 and 11. */
370
371 if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
372 *ttype = -10;
373 s = *dmin2 * .333;
374 if (z__[nn - 5] > z__[nn - 7]) {
375 return 0;
376 }
377 b1 = z__[nn - 5] / z__[nn - 7];
378 b2 = b1;
379 if (b2 == 0.) {
380 goto L80;
381 }
382 i__1 = (*i0 << 2) - 1 + *pp;
383 for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
384 if (z__[i4] > z__[i4 - 2]) {
385 return 0;
386 }
387 b1 *= z__[i4] / z__[i4 - 2];
388 b2 += b1;
389 if (b1 * 100. < b2) {
390 goto L80;
391 }
392/* L70: */
393 }
394L80:
395 b2 = template_blas_sqrt(b2 * 1.05);
396/* Computing 2nd power */
397 d__1 = b2;
398 a2 = *dmin2 / (d__1 * d__1 + 1.);
399 gap2 = z__[nn - 7] + z__[nn - 9] - template_blas_sqrt(z__[nn - 11]) * template_blas_sqrt(z__[
400 nn - 9]) - a2;
401 if (gap2 > 0. && gap2 > b2 * a2) {
402/* Computing MAX */
403 d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
404 s = maxMACRO(d__1,d__2);
405 } else {
406/* Computing MAX */
407 d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
408 s = maxMACRO(d__1,d__2);
409 }
410 } else {
411 s = *dmin2 * .25;
412 *ttype = -11;
413 }
414 } else if (*n0in > *n0 + 2) {
415
416/* Case 12, more than two eigenvalues deflated. No information. */
417
418 s = 0.;
419 *ttype = -12;
420 }
421
422 *tau = s;
423 return 0;
424
425/* End of DLASQ4 */
426
427} /* dlasq4_ */
428
429#endif
Treal template_blas_sqrt(Treal x)
int integer
Definition: template_blas_common.h:40
#define minMACRO(a, b)
Definition: template_blas_common.h:46
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__, integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *tau, integer *ttype, Treal *g)
Definition: template_lapack_lasq4.h:41