ergo
template_lapack_larre.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LARRE_HEADER
38#define TEMPLATE_LAPACK_LARRE_HEADER
39
40
43
44
45template<class Treal>
46int template_lapack_larre(const char *range, const integer *n, Treal *vl,
47 Treal *vu, integer *il, integer *iu, Treal *d__, Treal
48 *e, Treal *e2, Treal *rtol1, Treal *rtol2, Treal *
49 spltol, integer *nsplit, integer *isplit, integer *m, Treal *w,
50 Treal *werr, Treal *wgap, integer *iblock, integer *indexw,
51 Treal *gers, Treal *pivmin, Treal *work, integer *
52 iwork, integer *info)
53{
54 /* System generated locals */
55 integer i__1, i__2;
56 Treal d__1, d__2, d__3;
57
58
59 /* Local variables */
60 integer i__, j;
61 Treal s1, s2;
62 integer mb = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
63 Treal gl;
64 integer in, mm;
65 Treal gu;
66 integer cnt;
67 Treal eps, tau, tmp, rtl;
68 integer cnt1, cnt2;
69 Treal tmp1, eabs;
70 integer iend, jblk;
71 Treal eold;
72 integer indl;
73 Treal dmax__, emax;
74 integer wend = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
75 integer idum, indu;
76 Treal rtol;
77 integer iseed[4];
78 Treal avgap, sigma;
79 integer iinfo;
80 logical norep;
81 integer ibegin;
82 logical forceb;
83 integer irange = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
84 Treal sgndef;
85 integer wbegin;
86 Treal safmin, spdiam;
87 logical usedqd;
88 Treal clwdth, isleft;
89 Treal isrght, bsrtol, dpivot;
90
91
92/* -- LAPACK auxiliary routine (version 3.2) -- */
93/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
94/* November 2006 */
95
96/* .. Scalar Arguments .. */
97/* .. */
98/* .. Array Arguments .. */
99/* .. */
100
101/* Purpose */
102/* ======= */
103
104/* To find the desired eigenvalues of a given real symmetric */
105/* tridiagonal matrix T, DLARRE sets any "small" off-diagonal */
106/* elements to zero, and for each unreduced block T_i, it finds */
107/* (a) a suitable shift at one end of the block's spectrum, */
108/* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */
109/* (c) eigenvalues of each L_i D_i L_i^T. */
110/* The representations and eigenvalues found are then used by */
111/* DSTEMR to compute the eigenvectors of T. */
112/* The accuracy varies depending on whether bisection is used to */
113/* find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */
114/* conpute all and then discard any unwanted one. */
115/* As an added benefit, DLARRE also outputs the n */
116/* Gerschgorin intervals for the matrices L_i D_i L_i^T. */
117
118/* Arguments */
119/* ========= */
120
121/* RANGE (input) CHARACTER */
122/* = 'A': ("All") all eigenvalues will be found. */
123/* = 'V': ("Value") all eigenvalues in the half-open interval */
124/* (VL, VU] will be found. */
125/* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
126/* entire matrix) will be found. */
127
128/* N (input) INTEGER */
129/* The order of the matrix. N > 0. */
130
131/* VL (input/output) DOUBLE PRECISION */
132/* VU (input/output) DOUBLE PRECISION */
133/* If RANGE='V', the lower and upper bounds for the eigenvalues. */
134/* Eigenvalues less than or equal to VL, or greater than VU, */
135/* will not be returned. VL < VU. */
136/* If RANGE='I' or ='A', DLARRE computes bounds on the desired */
137/* part of the spectrum. */
138
139/* IL (input) INTEGER */
140/* IU (input) INTEGER */
141/* If RANGE='I', the indices (in ascending order) of the */
142/* smallest and largest eigenvalues to be returned. */
143/* 1 <= IL <= IU <= N. */
144
145/* D (input/output) DOUBLE PRECISION array, dimension (N) */
146/* On entry, the N diagonal elements of the tridiagonal */
147/* matrix T. */
148/* On exit, the N diagonal elements of the diagonal */
149/* matrices D_i. */
150
151/* E (input/output) DOUBLE PRECISION array, dimension (N) */
152/* On entry, the first (N-1) entries contain the subdiagonal */
153/* elements of the tridiagonal matrix T; E(N) need not be set. */
154/* On exit, E contains the subdiagonal elements of the unit */
155/* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */
156/* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */
157
158/* E2 (input/output) DOUBLE PRECISION array, dimension (N) */
159/* On entry, the first (N-1) entries contain the SQUARES of the */
160/* subdiagonal elements of the tridiagonal matrix T; */
161/* E2(N) need not be set. */
162/* On exit, the entries E2( ISPLIT( I ) ), */
163/* 1 <= I <= NSPLIT, have been set to zero */
164
165/* RTOL1 (input) DOUBLE PRECISION */
166/* RTOL2 (input) DOUBLE PRECISION */
167/* Parameters for bisection. */
168/* An interval [LEFT,RIGHT] has converged if */
169/* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
170
171/* SPLTOL (input) DOUBLE PRECISION */
172/* The threshold for splitting. */
173
174/* NSPLIT (output) INTEGER */
175/* The number of blocks T splits into. 1 <= NSPLIT <= N. */
176
177/* ISPLIT (output) INTEGER array, dimension (N) */
178/* The splitting points, at which T breaks up into blocks. */
179/* The first block consists of rows/columns 1 to ISPLIT(1), */
180/* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
181/* etc., and the NSPLIT-th consists of rows/columns */
182/* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
183
184/* M (output) INTEGER */
185/* The total number of eigenvalues (of all L_i D_i L_i^T) */
186/* found. */
187
188/* W (output) DOUBLE PRECISION array, dimension (N) */
189/* The first M elements contain the eigenvalues. The */
190/* eigenvalues of each of the blocks, L_i D_i L_i^T, are */
191/* sorted in ascending order ( DLARRE may use the */
192/* remaining N-M elements as workspace). */
193
194/* WERR (output) DOUBLE PRECISION array, dimension (N) */
195/* The error bound on the corresponding eigenvalue in W. */
196
197/* WGAP (output) DOUBLE PRECISION array, dimension (N) */
198/* The separation from the right neighbor eigenvalue in W. */
199/* The gap is only with respect to the eigenvalues of the same block */
200/* as each block has its own representation tree. */
201/* Exception: at the right end of a block we store the left gap */
202
203/* IBLOCK (output) INTEGER array, dimension (N) */
204/* The indices of the blocks (submatrices) associated with the */
205/* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
206/* W(i) belongs to the first block from the top, =2 if W(i) */
207/* belongs to the second block, etc. */
208
209/* INDEXW (output) INTEGER array, dimension (N) */
210/* The indices of the eigenvalues within each block (submatrix); */
211/* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
212/* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */
213
214/* GERS (output) DOUBLE PRECISION array, dimension (2*N) */
215/* The N Gerschgorin intervals (the i-th Gerschgorin interval */
216/* is (GERS(2*i-1), GERS(2*i)). */
217
218/* PIVMIN (output) DOUBLE PRECISION */
219/* The minimum pivot in the Sturm sequence for T. */
220
221/* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */
222/* Workspace. */
223
224/* IWORK (workspace) INTEGER array, dimension (5*N) */
225/* Workspace. */
226
227/* INFO (output) INTEGER */
228/* = 0: successful exit */
229/* > 0: A problem occured in DLARRE. */
230/* < 0: One of the called subroutines signaled an internal problem. */
231/* Needs inspection of the corresponding parameter IINFO */
232/* for further information. */
233
234/* =-1: Problem in DLARRD. */
235/* = 2: No base representation could be found in MAXTRY iterations. */
236/* Increasing MAXTRY and recompilation might be a remedy. */
237/* =-3: Problem in DLARRB when computing the refined root */
238/* representation for DLASQ2. */
239/* =-4: Problem in DLARRB when preforming bisection on the */
240/* desired part of the spectrum. */
241/* =-5: Problem in DLASQ2. */
242/* =-6: Problem in DLASQ2. */
243
244/* Further Details */
245/* The base representations are required to suffer very little */
246/* element growth and consequently define all their eigenvalues to */
247/* high relative accuracy. */
248/* =============== */
249
250/* Based on contributions by */
251/* Beresford Parlett, University of California, Berkeley, USA */
252/* Jim Demmel, University of California, Berkeley, USA */
253/* Inderjit Dhillon, University of Texas, Austin, USA */
254/* Osni Marques, LBNL/NERSC, USA */
255/* Christof Voemel, University of California, Berkeley, USA */
256
257/* ===================================================================== */
258
259/* .. Parameters .. */
260/* .. */
261/* .. Local Scalars .. */
262/* .. */
263/* .. Local Arrays .. */
264/* .. */
265/* .. External Functions .. */
266/* .. */
267/* .. External Subroutines .. */
268/* .. */
269/* .. Intrinsic Functions .. */
270/* .. */
271/* .. Executable Statements .. */
272
273 /* Parameter adjustments */
274
275
276 /* Table of constant values */
277
278 integer c__1 = 1;
279 integer c__2 = 2;
280
281
282 --iwork;
283 --work;
284 --gers;
285 --indexw;
286 --iblock;
287 --wgap;
288 --werr;
289 --w;
290 --isplit;
291 --e2;
292 --e;
293 --d__;
294
295 /* Initialization added by Elias to get rid of compiler warnings. */
296 mm = 0;
297 /* Function Body */
298 *info = 0;
299
300/* Decode RANGE */
301
302 if (template_blas_lsame(range, "A")) {
303 irange = 1;
304 } else if (template_blas_lsame(range, "V")) {
305 irange = 3;
306 } else if (template_blas_lsame(range, "I")) {
307 irange = 2;
308 }
309 *m = 0;
310/* Get machine constants */
311 safmin = template_lapack_lamch("S",(Treal)0);
312 eps = template_lapack_lamch("P",(Treal)0);
313/* Set parameters */
314 rtl = template_blas_sqrt(eps);
315 bsrtol = template_blas_sqrt(eps);
316/* Treat case of 1x1 matrix for quick return */
317 if (*n == 1) {
318 if (irange == 1 || ( irange == 3 && d__[1] > *vl && d__[1] <= *vu ) ||
319 ( irange == 2 && *il == 1 && *iu == 1 ) ) {
320 *m = 1;
321 w[1] = d__[1];
322/* The computation error of the eigenvalue is zero */
323 werr[1] = 0.;
324 wgap[1] = 0.;
325 iblock[1] = 1;
326 indexw[1] = 1;
327 gers[1] = d__[1];
328 gers[2] = d__[1];
329 }
330/* store the shift for the initial RRR, which is zero in this case */
331 e[1] = 0.;
332 return 0;
333 }
334/* General case: tridiagonal matrix of order > 1 */
335
336/* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */
337/* Compute maximum off-diagonal entry and pivmin. */
338 gl = d__[1];
339 gu = d__[1];
340 eold = 0.;
341 emax = 0.;
342 e[*n] = 0.;
343 i__1 = *n;
344 for (i__ = 1; i__ <= i__1; ++i__) {
345 werr[i__] = 0.;
346 wgap[i__] = 0.;
347 eabs = (d__1 = e[i__], absMACRO(d__1));
348 if (eabs >= emax) {
349 emax = eabs;
350 }
351 tmp1 = eabs + eold;
352 gers[(i__ << 1) - 1] = d__[i__] - tmp1;
353/* Computing MIN */
354 d__1 = gl, d__2 = gers[(i__ << 1) - 1];
355 gl = minMACRO(d__1,d__2);
356 gers[i__ * 2] = d__[i__] + tmp1;
357/* Computing MAX */
358 d__1 = gu, d__2 = gers[i__ * 2];
359 gu = maxMACRO(d__1,d__2);
360 eold = eabs;
361/* L5: */
362 }
363/* The minimum pivot allowed in the Sturm sequence for T */
364/* Computing MAX */
365/* Computing 2nd power */
366 d__3 = emax;
367 d__1 = 1., d__2 = d__3 * d__3;
368 *pivmin = safmin * maxMACRO(d__1,d__2);
369/* Compute spectral diameter. The Gerschgorin bounds give an */
370/* estimate that is wrong by at most a factor of SQRT(2) */
371 spdiam = gu - gl;
372/* Compute splitting points */
373 template_lapack_larra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], &
374 iinfo);
375/* Can force use of bisection instead of faster DQDS. */
376/* Option left in the code for future multisection work. */
377 forceb = FALSE_;
378/* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */
379/* explicitly wants bisection. */
380 usedqd = irange == 1 && ! forceb;
381 if (irange == 1 && ! forceb) {
382/* Set interval [VL,VU] that contains all eigenvalues */
383 *vl = gl;
384 *vu = gu;
385 } else {
386/* We call DLARRD to find crude approximations to the eigenvalues */
387/* in the desired range. In case IRANGE = INDRNG, we also obtain the */
388/* interval (VL,VU] that contains all the wanted eigenvalues. */
389/* An interval [LEFT,RIGHT] has converged if */
390/* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */
391/* DLARRD needs a WORK of size 4*N, IWORK of size 3*N */
392 template_lapack_larrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[
393 1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1],
394 vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo);
395 if (iinfo != 0) {
396 *info = -1;
397 return 0;
398 }
399/* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */
400 i__1 = *n;
401 for (i__ = mm + 1; i__ <= i__1; ++i__) {
402 w[i__] = 0.;
403 werr[i__] = 0.;
404 iblock[i__] = 0;
405 indexw[i__] = 0;
406/* L14: */
407 }
408 }
409/* ** */
410/* Loop over unreduced blocks */
411 ibegin = 1;
412 wbegin = 1;
413 i__1 = *nsplit;
414 for (jblk = 1; jblk <= i__1; ++jblk) {
415 iend = isplit[jblk];
416 in = iend - ibegin + 1;
417/* 1 X 1 block */
418 if (in == 1) {
419 if (irange == 1 || ( irange == 3 && d__[ibegin] > *vl && d__[ibegin]
420 <= *vu ) || ( irange == 2 && iblock[wbegin] == jblk ) ) {
421 ++(*m);
422 w[*m] = d__[ibegin];
423 werr[*m] = 0.;
424/* The gap for a single block doesn't matter for the later */
425/* algorithm and is assigned an arbitrary large value */
426 wgap[*m] = 0.;
427 iblock[*m] = jblk;
428 indexw[*m] = 1;
429 ++wbegin;
430 }
431/* E( IEND ) holds the shift for the initial RRR */
432 e[iend] = 0.;
433 ibegin = iend + 1;
434 goto L170;
435 }
436
437/* Blocks of size larger than 1x1 */
438
439/* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */
440 e[iend] = 0.;
441
442/* Find local outer bounds GL,GU for the block */
443 gl = d__[ibegin];
444 gu = d__[ibegin];
445 i__2 = iend;
446 for (i__ = ibegin; i__ <= i__2; ++i__) {
447/* Computing MIN */
448 d__1 = gers[(i__ << 1) - 1];
449 gl = minMACRO(d__1,gl);
450/* Computing MAX */
451 d__1 = gers[i__ * 2];
452 gu = maxMACRO(d__1,gu);
453/* L15: */
454 }
455 spdiam = gu - gl;
456 if (! (irange == 1 && ! forceb)) {
457/* Count the number of eigenvalues in the current block. */
458 mb = 0;
459 i__2 = mm;
460 for (i__ = wbegin; i__ <= i__2; ++i__) {
461 if (iblock[i__] == jblk) {
462 ++mb;
463 } else {
464 goto L21;
465 }
466/* L20: */
467 }
468L21:
469 if (mb == 0) {
470/* No eigenvalue in the current block lies in the desired range */
471/* E( IEND ) holds the shift for the initial RRR */
472 e[iend] = 0.;
473 ibegin = iend + 1;
474 goto L170;
475 } else {
476/* Decide whether dqds or bisection is more efficient */
477 usedqd = (Treal) mb > in * .5 && ! forceb;
478 wend = wbegin + mb - 1;
479/* Calculate gaps for the current block */
480/* In later stages, when representations for individual */
481/* eigenvalues are different, we use SIGMA = E( IEND ). */
482 sigma = 0.;
483 i__2 = wend - 1;
484 for (i__ = wbegin; i__ <= i__2; ++i__) {
485/* Computing MAX */
486 d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] +
487 werr[i__]);
488 wgap[i__] = maxMACRO(d__1,d__2);
489/* L30: */
490 }
491/* Computing MAX */
492 d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
493 wgap[wend] = maxMACRO(d__1,d__2);
494/* Find local index of the first and last desired evalue. */
495 indl = indexw[wbegin];
496 indu = indexw[wend];
497 }
498 }
499 if ( ( irange == 1 && ! forceb ) || usedqd) {
500/* Case of DQDS */
501/* Find approximations to the extremal eigenvalues of the block */
502 template_lapack_larrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
503 rtl, &tmp, &tmp1, &iinfo);
504 if (iinfo != 0) {
505 *info = -1;
506 return 0;
507 }
508/* Computing MAX */
509 d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1,
510 absMACRO(d__1));
511 isleft = maxMACRO(d__2,d__3);
512 template_lapack_larrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
513 rtl, &tmp, &tmp1, &iinfo);
514 if (iinfo != 0) {
515 *info = -1;
516 return 0;
517 }
518/* Computing MIN */
519 d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1,
520 absMACRO(d__1));
521 isrght = minMACRO(d__2,d__3);
522/* Improve the estimate of the spectral diameter */
523 spdiam = isrght - isleft;
524 } else {
525/* Case of bisection */
526/* Find approximations to the wanted extremal eigenvalues */
527/* Computing MAX */
528 d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 =
529 w[wbegin] - werr[wbegin], absMACRO(d__1));
530 isleft = maxMACRO(d__2,d__3);
531/* Computing MIN */
532 d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[
533 wend] + werr[wend], absMACRO(d__1));
534 isrght = minMACRO(d__2,d__3);
535 }
536/* Decide whether the base representation for the current block */
537/* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */
538/* should be on the left or the right end of the current block. */
539/* The strategy is to shift to the end which is "more populated" */
540/* Furthermore, decide whether to use DQDS for the computation of */
541/* the eigenvalue approximations at the end of DLARRE or bisection. */
542/* dqds is chosen if all eigenvalues are desired or the number of */
543/* eigenvalues to be computed is large compared to the blocksize. */
544 if (irange == 1 && ! forceb) {
545/* If all the eigenvalues have to be computed, we use dqd */
546 usedqd = TRUE_;
547/* INDL is the local index of the first eigenvalue to compute */
548 indl = 1;
549 indu = in;
550/* MB = number of eigenvalues to compute */
551 mb = in;
552 wend = wbegin + mb - 1;
553/* Define 1/4 and 3/4 points of the spectrum */
554 s1 = isleft + spdiam * .25;
555 s2 = isrght - spdiam * .25;
556 } else {
557/* DLARRD has computed IBLOCK and INDEXW for each eigenvalue */
558/* approximation. */
559/* choose sigma */
560 if (usedqd) {
561 s1 = isleft + spdiam * .25;
562 s2 = isrght - spdiam * .25;
563 } else {
564 tmp = minMACRO(isrght,*vu) - maxMACRO(isleft,*vl);
565 s1 = maxMACRO(isleft,*vl) + tmp * .25;
566 s2 = minMACRO(isrght,*vu) - tmp * .25;
567 }
568 }
569/* Compute the negcount at the 1/4 and 3/4 points */
570 if (mb > 1) {
571 template_lapack_larrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, &
572 cnt, &cnt1, &cnt2, &iinfo);
573 }
574 if (mb == 1) {
575 sigma = gl;
576 sgndef = 1.;
577 } else if (cnt1 - indl >= indu - cnt2) {
578 if (irange == 1 && ! forceb) {
579 sigma = maxMACRO(isleft,gl);
580 } else if (usedqd) {
581/* use Gerschgorin bound as shift to get pos def matrix */
582/* for dqds */
583 sigma = isleft;
584 } else {
585/* use approximation of the first desired eigenvalue of the */
586/* block as shift */
587 sigma = maxMACRO(isleft,*vl);
588 }
589 sgndef = 1.;
590 } else {
591 if (irange == 1 && ! forceb) {
592 sigma = minMACRO(isrght,gu);
593 } else if (usedqd) {
594/* use Gerschgorin bound as shift to get neg def matrix */
595/* for dqds */
596 sigma = isrght;
597 } else {
598/* use approximation of the first desired eigenvalue of the */
599/* block as shift */
600 sigma = minMACRO(isrght,*vu);
601 }
602 sgndef = -1.;
603 }
604/* An initial SIGMA has been chosen that will be used for computing */
605/* T - SIGMA I = L D L^T */
606/* Define the increment TAU of the shift in case the initial shift */
607/* needs to be refined to obtain a factorization with not too much */
608/* element growth. */
609 if (usedqd) {
610/* The initial SIGMA was to the outer end of the spectrum */
611/* the matrix is definite and we need not retreat. */
612 tau = spdiam * eps * *n + *pivmin * 2.;
613 } else {
614 if (mb > 1) {
615 clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin];
616 avgap = (d__1 = clwdth / (Treal) (wend - wbegin), absMACRO(
617 d__1));
618 if (sgndef == 1.) {
619/* Computing MAX */
620 d__1 = wgap[wbegin];
621 tau = maxMACRO(d__1,avgap) * .5;
622/* Computing MAX */
623 d__1 = tau, d__2 = werr[wbegin];
624 tau = maxMACRO(d__1,d__2);
625 } else {
626/* Computing MAX */
627 d__1 = wgap[wend - 1];
628 tau = maxMACRO(d__1,avgap) * .5;
629/* Computing MAX */
630 d__1 = tau, d__2 = werr[wend];
631 tau = maxMACRO(d__1,d__2);
632 }
633 } else {
634 tau = werr[wbegin];
635 }
636 }
637
638 for (idum = 1; idum <= 6; ++idum) {
639/* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */
640/* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */
641/* pivots in WORK(2*IN+1:3*IN) */
642 dpivot = d__[ibegin] - sigma;
643 work[1] = dpivot;
644 dmax__ = absMACRO(work[1]);
645 j = ibegin;
646 i__2 = in - 1;
647 for (i__ = 1; i__ <= i__2; ++i__) {
648 work[(in << 1) + i__] = 1. / work[i__];
649 tmp = e[j] * work[(in << 1) + i__];
650 work[in + i__] = tmp;
651 dpivot = d__[j + 1] - sigma - tmp * e[j];
652 work[i__ + 1] = dpivot;
653/* Computing MAX */
654 d__1 = dmax__, d__2 = absMACRO(dpivot);
655 dmax__ = maxMACRO(d__1,d__2);
656 ++j;
657/* L70: */
658 }
659/* check for element growth */
660 if (dmax__ > spdiam * 64.) {
661 norep = TRUE_;
662 } else {
663 norep = FALSE_;
664 }
665 if (usedqd && ! norep) {
666/* Ensure the definiteness of the representation */
667/* All entries of D (of L D L^T) must have the same sign */
668 i__2 = in;
669 for (i__ = 1; i__ <= i__2; ++i__) {
670 tmp = sgndef * work[i__];
671 if (tmp < 0.) {
672 norep = TRUE_;
673 }
674/* L71: */
675 }
676 }
677 if (norep) {
678/* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */
679/* shift which makes the matrix definite. So we should end up */
680/* here really only in the case of IRANGE = VALRNG or INDRNG. */
681 if (idum == 5) {
682 if (sgndef == 1.) {
683/* The fudged Gerschgorin shift should succeed */
684 sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.;
685 } else {
686 sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.;
687 }
688 } else {
689 sigma -= sgndef * tau;
690 tau *= 2.;
691 }
692 } else {
693/* an initial RRR is found */
694 goto L83;
695 }
696/* L80: */
697 }
698/* if the program reaches this point, no base representation could be */
699/* found in MAXTRY iterations. */
700 *info = 2;
701 return 0;
702L83:
703/* At this point, we have found an initial base representation */
704/* T - SIGMA I = L D L^T with not too much element growth. */
705/* Store the shift. */
706 e[iend] = sigma;
707/* Store D and L. */
708 template_blas_copy(&in, &work[1], &c__1, &d__[ibegin], &c__1);
709 i__2 = in - 1;
710 template_blas_copy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1);
711 if (mb > 1) {
712
713/* Perturb each entry of the base representation by a small */
714/* (but random) relative amount to overcome difficulties with */
715/* glued matrices. */
716
717 for (i__ = 1; i__ <= 4; ++i__) {
718 iseed[i__ - 1] = 1;
719/* L122: */
720 }
721 i__2 = (in << 1) - 1;
722 template_lapack_larnv(&c__2, iseed, &i__2, &work[1]);
723 i__2 = in - 1;
724 for (i__ = 1; i__ <= i__2; ++i__) {
725 d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.;
726 e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.;
727/* L125: */
728 }
729 d__[iend] *= eps * 4. * work[in] + 1.;
730
731 }
732
733/* Don't update the Gerschgorin intervals because keeping track */
734/* of the updates would be too much work in DLARRV. */
735/* We update W instead and use it to locate the proper Gerschgorin */
736/* intervals. */
737/* Compute the required eigenvalues of L D L' by bisection or dqds */
738 if (! usedqd) {
739/* If DLARRD has been used, shift the eigenvalue approximations */
740/* according to their representation. This is necessary for */
741/* a uniform DLARRV since dqds computes eigenvalues of the */
742/* shifted representation. In DLARRV, W will always hold the */
743/* UNshifted eigenvalue approximation. */
744 i__2 = wend;
745 for (j = wbegin; j <= i__2; ++j) {
746 w[j] -= sigma;
747 werr[j] += (d__1 = w[j], absMACRO(d__1)) * eps;
748/* L134: */
749 }
750/* call DLARRB to reduce eigenvalue error of the approximations */
751/* from DLARRD */
752 i__2 = iend - 1;
753 for (i__ = ibegin; i__ <= i__2; ++i__) {
754/* Computing 2nd power */
755 d__1 = e[i__];
756 work[i__] = d__[i__] * (d__1 * d__1);
757/* L135: */
758 }
759/* use bisection to find EV from INDL to INDU */
760 i__2 = indl - 1;
761 template_lapack_larrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1,
762 rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], &
763 work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, &
764 iinfo);
765 if (iinfo != 0) {
766 *info = -4;
767 return 0;
768 }
769/* DLARRB computes all gaps correctly except for the last one */
770/* Record distance to VU/GU */
771/* Computing MAX */
772 d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
773 wgap[wend] = maxMACRO(d__1,d__2);
774 i__2 = indu;
775 for (i__ = indl; i__ <= i__2; ++i__) {
776 ++(*m);
777 iblock[*m] = jblk;
778 indexw[*m] = i__;
779/* L138: */
780 }
781 } else {
782/* Call dqds to get all eigs (and then possibly delete unwanted */
783/* eigenvalues). */
784/* Note that dqds finds the eigenvalues of the L D L^T representation */
785/* of T to high relative accuracy. High relative accuracy */
786/* might be lost when the shift of the RRR is subtracted to obtain */
787/* the eigenvalues of T. However, T is not guaranteed to define its */
788/* eigenvalues to high relative accuracy anyway. */
789/* Set RTOL to the order of the tolerance used in DLASQ2 */
790/* This is an ESTIMATED error, the worst case bound is 4*N*EPS */
791/* which is usually too large and requires unnecessary work to be */
792/* done by bisection when computing the eigenvectors */
793 rtol = template_blas_log((Treal) in) * 4. * eps;
794 j = ibegin;
795 i__2 = in - 1;
796 for (i__ = 1; i__ <= i__2; ++i__) {
797 work[(i__ << 1) - 1] = (d__1 = d__[j], absMACRO(d__1));
798 work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1];
799 ++j;
800/* L140: */
801 }
802 work[(in << 1) - 1] = (d__1 = d__[iend], absMACRO(d__1));
803 work[in * 2] = 0.;
804 template_lapack_lasq2(&in, &work[1], &iinfo);
805 if (iinfo != 0) {
806/* If IINFO = -5 then an index is part of a tight cluster */
807/* and should be changed. The index is in IWORK(1) and the */
808/* gap is in WORK(N+1) */
809 *info = -5;
810 return 0;
811 } else {
812/* Test that all eigenvalues are positive as expected */
813 i__2 = in;
814 for (i__ = 1; i__ <= i__2; ++i__) {
815 if (work[i__] < 0.) {
816 *info = -6;
817 return 0;
818 }
819/* L149: */
820 }
821 }
822 if (sgndef > 0.) {
823 i__2 = indu;
824 for (i__ = indl; i__ <= i__2; ++i__) {
825 ++(*m);
826 w[*m] = work[in - i__ + 1];
827 iblock[*m] = jblk;
828 indexw[*m] = i__;
829/* L150: */
830 }
831 } else {
832 i__2 = indu;
833 for (i__ = indl; i__ <= i__2; ++i__) {
834 ++(*m);
835 w[*m] = -work[i__];
836 iblock[*m] = jblk;
837 indexw[*m] = i__;
838/* L160: */
839 }
840 }
841 i__2 = *m;
842 for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
843/* the value of RTOL below should be the tolerance in DLASQ2 */
844 werr[i__] = rtol * (d__1 = w[i__], absMACRO(d__1));
845/* L165: */
846 }
847 i__2 = *m - 1;
848 for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
849/* compute the right gap between the intervals */
850/* Computing MAX */
851 d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[
852 i__]);
853 wgap[i__] = maxMACRO(d__1,d__2);
854/* L166: */
855 }
856/* Computing MAX */
857 d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]);
858 wgap[*m] = maxMACRO(d__1,d__2);
859 }
860/* proceed with next block */
861 ibegin = iend + 1;
862 wbegin = wend + 1;
863L170:
864 ;
865 }
866
867 return 0;
868
869/* end of DLARRE */
870
871} /* dlarre_ */
872
873#endif
static const real gu
Definition fun-pz81.c:68
Treal template_blas_sqrt(Treal x)
Treal template_blas_log(Treal x)
logical template_blas_lsame(const char *ca, const char *cb)
Definition template_blas_common.cc:46
int integer
Definition template_blas_common.h:40
#define absMACRO(x)
Definition template_blas_common.h:47
#define minMACRO(a, b)
Definition template_blas_common.h:46
#define maxMACRO(a, b)
Definition template_blas_common.h:45
bool logical
Definition template_blas_common.h:41
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition template_blas_copy.h:42
#define TRUE_
Definition template_lapack_common.h:42
#define FALSE_
Definition template_lapack_common.h:43
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition template_lapack_lamch.h:202
int template_lapack_larnv(const integer *idist, integer *iseed, const integer *n, Treal *x)
Definition template_lapack_larnv.h:42
int template_lapack_larra(const integer *n, Treal *d__, Treal *e, Treal *e2, Treal *spltol, Treal *tnrm, integer *nsplit, integer *isplit, integer *info)
Definition template_lapack_larra.h:41
int template_lapack_larrb(integer *n, Treal *d__, Treal *lld, integer *ifirst, integer *ilast, Treal *rtol1, Treal *rtol2, integer *offset, Treal *w, Treal *wgap, Treal *werr, Treal *work, integer *iwork, Treal *pivmin, Treal *spdiam, integer *twist, integer *info)
Definition template_lapack_larrb.h:45
int template_lapack_larrc(const char *jobt, const integer *n, const Treal *vl, const Treal *vu, Treal *d__, Treal *e, Treal *pivmin, integer *eigcnt, integer *lcnt, integer *rcnt, integer *info)
Definition template_lapack_larrc.h:41
int template_lapack_larrd(const char *range, const char *order, const integer *n, Treal *vl, Treal *vu, integer *il, integer *iu, Treal *gers, Treal *reltol, Treal *d__, Treal *e, Treal *e2, Treal *pivmin, integer *nsplit, integer *isplit, integer *m, Treal *w, Treal *werr, Treal *wl, Treal *wu, integer *iblock, integer *indexw, Treal *work, integer *iwork, integer *info)
Definition template_lapack_larrd.h:41
int template_lapack_larre(const char *range, const integer *n, Treal *vl, Treal *vu, integer *il, integer *iu, Treal *d__, Treal *e, Treal *e2, Treal *rtol1, Treal *rtol2, Treal *spltol, integer *nsplit, integer *isplit, integer *m, Treal *w, Treal *werr, Treal *wgap, integer *iblock, integer *indexw, Treal *gers, Treal *pivmin, Treal *work, integer *iwork, integer *info)
Definition template_lapack_larre.h:46
int template_lapack_larrk(integer *n, integer *iw, Treal *gl, Treal *gu, Treal *d__, Treal *e2, Treal *pivmin, Treal *reltol, Treal *w, Treal *werr, integer *info)
Definition template_lapack_larrk.h:41
int template_lapack_lasq2(integer *n, Treal *z__, integer *info)
Definition template_lapack_lasq2.h:45