ergo
template_lapack_larrv.h
Go to the documentation of this file.
1/* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LARRV_HEADER
38#define TEMPLATE_LAPACK_LARRV_HEADER
39
40
42
43
44template<class Treal>
45int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu,
46 Treal *d__, Treal *l, Treal *pivmin, integer *isplit,
47 integer *m, integer *dol, integer *dou, Treal *minrgp,
48 Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr,
49 Treal *wgap, integer *iblock, integer *indexw, Treal *gers,
50 Treal *z__, const integer *ldz, integer *isuppz, Treal *work,
51 integer *iwork, integer *info)
52{
53 /* System generated locals */
54 integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
55 Treal d__1, d__2;
56 logical L__1;
57
58
59 /* Local variables */
60 integer minwsize, i__, j, k, p, q, miniwsize, ii;
61 Treal gl;
62 integer im, in;
63 Treal gu, gap, eps, tau, tol, tmp;
64 integer zto;
65 Treal ztz;
66 integer iend, jblk;
67 Treal lgap;
68 integer done;
69 Treal rgap, left;
70 integer wend, iter;
71 Treal bstw = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
72 integer itmp1;
73 integer indld;
74 Treal fudge;
75 integer idone;
76 Treal sigma;
77 integer iinfo, iindr;
78 Treal resid;
79 logical eskip;
80 Treal right;
81 integer nclus, zfrom;
82 Treal rqtol;
83 integer iindc1, iindc2;
84 logical stp2ii;
85 Treal lambda;
86 integer ibegin, indeig;
87 logical needbs;
88 integer indlld;
89 Treal sgndef, mingma;
90 integer oldien, oldncl, wbegin;
91 Treal spdiam;
92 integer negcnt;
93 integer oldcls;
94 Treal savgap;
95 integer ndepth;
96 Treal ssigma;
97 logical usedbs;
98 integer iindwk, offset;
99 Treal gaptol;
100 integer newcls, oldfst, indwrk, windex, oldlst;
101 logical usedrq;
102 integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;
103 Treal bstres = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
104 integer newsiz, zusedu, zusedw;
105 Treal nrminv, rqcorr;
106 logical tryrqc;
107 integer isupmx;
108
109
110/* -- LAPACK auxiliary routine (version 3.2) -- */
111/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
112/* November 2006 */
113
114/* .. Scalar Arguments .. */
115/* .. */
116/* .. Array Arguments .. */
117/* .. */
118
119/* Purpose */
120/* ======= */
121
122/* DLARRV computes the eigenvectors of the tridiagonal matrix */
123/* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */
124/* The input eigenvalues should have been computed by DLARRE. */
125
126/* Arguments */
127/* ========= */
128
129/* N (input) INTEGER */
130/* The order of the matrix. N >= 0. */
131
132/* VL (input) DOUBLE PRECISION */
133/* VU (input) DOUBLE PRECISION */
134/* Lower and upper bounds of the interval that contains the desired */
135/* eigenvalues. VL < VU. Needed to compute gaps on the left or right */
136/* end of the extremal eigenvalues in the desired RANGE. */
137
138/* D (input/output) DOUBLE PRECISION array, dimension (N) */
139/* On entry, the N diagonal elements of the diagonal matrix D. */
140/* On exit, D may be overwritten. */
141
142/* L (input/output) DOUBLE PRECISION array, dimension (N) */
143/* On entry, the (N-1) subdiagonal elements of the unit */
144/* bidiagonal matrix L are in elements 1 to N-1 of L */
145/* (if the matrix is not splitted.) At the end of each block */
146/* is stored the corresponding shift as given by DLARRE. */
147/* On exit, L is overwritten. */
148
149/* PIVMIN (in) DOUBLE PRECISION */
150/* The minimum pivot allowed in the Sturm sequence. */
151
152/* ISPLIT (input) INTEGER array, dimension (N) */
153/* The splitting points, at which T breaks up into blocks. */
154/* The first block consists of rows/columns 1 to */
155/* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
156/* through ISPLIT( 2 ), etc. */
157
158/* M (input) INTEGER */
159/* The total number of input eigenvalues. 0 <= M <= N. */
160
161/* DOL (input) INTEGER */
162/* DOU (input) INTEGER */
163/* If the user wants to compute only selected eigenvectors from all */
164/* the eigenvalues supplied, he can specify an index range DOL:DOU. */
165/* Or else the setting DOL=1, DOU=M should be applied. */
166/* Note that DOL and DOU refer to the order in which the eigenvalues */
167/* are stored in W. */
168/* If the user wants to compute only selected eigenpairs, then */
169/* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
170/* computed eigenvectors. All other columns of Z are set to zero. */
171
172/* MINRGP (input) DOUBLE PRECISION */
173
174/* RTOL1 (input) DOUBLE PRECISION */
175/* RTOL2 (input) DOUBLE PRECISION */
176/* Parameters for bisection. */
177/* An interval [LEFT,RIGHT] has converged if */
178/* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
179
180/* W (input/output) DOUBLE PRECISION array, dimension (N) */
181/* The first M elements of W contain the APPROXIMATE eigenvalues for */
182/* which eigenvectors are to be computed. The eigenvalues */
183/* should be grouped by split-off block and ordered from */
184/* smallest to largest within the block ( The output array */
185/* W from DLARRE is expected here ). Furthermore, they are with */
186/* respect to the shift of the corresponding root representation */
187/* for their block. On exit, W holds the eigenvalues of the */
188/* UNshifted matrix. */
189
190/* WERR (input/output) DOUBLE PRECISION array, dimension (N) */
191/* The first M elements contain the semiwidth of the uncertainty */
192/* interval of the corresponding eigenvalue in W */
193
194/* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */
195/* The separation from the right neighbor eigenvalue in W. */
196
197/* IBLOCK (input) INTEGER array, dimension (N) */
198/* The indices of the blocks (submatrices) associated with the */
199/* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
200/* W(i) belongs to the first block from the top, =2 if W(i) */
201/* belongs to the second block, etc. */
202
203/* INDEXW (input) INTEGER array, dimension (N) */
204/* The indices of the eigenvalues within each block (submatrix); */
205/* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
206/* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
207
208/* GERS (input) DOUBLE PRECISION array, dimension (2*N) */
209/* The N Gerschgorin intervals (the i-th Gerschgorin interval */
210/* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
211/* be computed from the original UNshifted matrix. */
212
213/* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
214/* If INFO = 0, the first M columns of Z contain the */
215/* orthonormal eigenvectors of the matrix T */
216/* corresponding to the input eigenvalues, with the i-th */
217/* column of Z holding the eigenvector associated with W(i). */
218/* Note: the user must ensure that at least max(1,M) columns are */
219/* supplied in the array Z. */
220
221/* LDZ (input) INTEGER */
222/* The leading dimension of the array Z. LDZ >= 1, and if */
223/* JOBZ = 'V', LDZ >= max(1,N). */
224
225/* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */
226/* The support of the eigenvectors in Z, i.e., the indices */
227/* indicating the nonzero elements in Z. The I-th eigenvector */
228/* is nonzero only in elements ISUPPZ( 2*I-1 ) through */
229/* ISUPPZ( 2*I ). */
230
231/* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */
232
233/* IWORK (workspace) INTEGER array, dimension (7*N) */
234
235/* INFO (output) INTEGER */
236/* = 0: successful exit */
237
238/* > 0: A problem occured in DLARRV. */
239/* < 0: One of the called subroutines signaled an internal problem. */
240/* Needs inspection of the corresponding parameter IINFO */
241/* for further information. */
242
243/* =-1: Problem in DLARRB when refining a child's eigenvalues. */
244/* =-2: Problem in DLARRF when computing the RRR of a child. */
245/* When a child is inside a tight cluster, it can be difficult */
246/* to find an RRR. A partial remedy from the user's point of */
247/* view is to make the parameter MINRGP smaller and recompile. */
248/* However, as the orthogonality of the computed vectors is */
249/* proportional to 1/MINRGP, the user should be aware that */
250/* he might be trading in precision when he decreases MINRGP. */
251/* =-3: Problem in DLARRB when refining a single eigenvalue */
252/* after the Rayleigh correction was rejected. */
253/* = 5: The Rayleigh Quotient Iteration failed to converge to */
254/* full accuracy in MAXITR steps. */
255
256/* Further Details */
257/* =============== */
258
259/* Based on contributions by */
260/* Beresford Parlett, University of California, Berkeley, USA */
261/* Jim Demmel, University of California, Berkeley, USA */
262/* Inderjit Dhillon, University of Texas, Austin, USA */
263/* Osni Marques, LBNL/NERSC, USA */
264/* Christof Voemel, University of California, Berkeley, USA */
265
266/* ===================================================================== */
267
268/* .. Parameters .. */
269/* .. */
270/* .. Local Scalars .. */
271/* .. */
272/* .. External Functions .. */
273/* .. */
274/* .. External Subroutines .. */
275/* .. */
276/* .. Intrinsic Functions .. */
277/* .. */
278/* .. Executable Statements .. */
279/* .. */
280/* The first N entries of WORK are reserved for the eigenvalues */
281
282/* Table of constant values */
283
284 Treal c_b5 = 0.;
285 integer c__1 = 1;
286 integer c__2 = 2;
287
288
289 /* Parameter adjustments */
290 --d__;
291 --l;
292 --isplit;
293 --w;
294 --werr;
295 --wgap;
296 --iblock;
297 --indexw;
298 --gers;
299 z_dim1 = *ldz;
300 z_offset = 1 + z_dim1;
301 z__ -= z_offset;
302 --isuppz;
303 --work;
304 --iwork;
305
306 /* Function Body */
307 indld = *n + 1;
308 indlld = (*n << 1) + 1;
309 indwrk = *n * 3 + 1;
310 minwsize = *n * 12;
311 i__1 = minwsize;
312 for (i__ = 1; i__ <= i__1; ++i__) {
313 work[i__] = 0.;
314/* L5: */
315 }
316/* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
317/* factorization used to compute the FP vector */
318 iindr = 0;
319/* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
320/* layer and the one above. */
321 iindc1 = *n;
322 iindc2 = *n << 1;
323 iindwk = *n * 3 + 1;
324 miniwsize = *n * 7;
325 i__1 = miniwsize;
326 for (i__ = 1; i__ <= i__1; ++i__) {
327 iwork[i__] = 0;
328/* L10: */
329 }
330 zusedl = 1;
331 if (*dol > 1) {
332/* Set lower bound for use of Z */
333 zusedl = *dol - 1;
334 }
335 zusedu = *m;
336 if (*dou < *m) {
337/* Set lower bound for use of Z */
338 zusedu = *dou + 1;
339 }
340/* The width of the part of Z that is used */
341 zusedw = zusedu - zusedl + 1;
342 template_lapack_laset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
343 eps = template_lapack_lamch("Precision",(Treal)0);
344 rqtol = eps * 2.;
345
346/* Set expert flags for standard code. */
347 tryrqc = TRUE_;
348 if (*dol == 1 && *dou == *m) {
349 } else {
350/* Only selected eigenpairs are computed. Since the other evalues */
351/* are not refined by RQ iteration, bisection has to compute to full */
352/* accuracy. */
353 *rtol1 = eps * 4.;
354 *rtol2 = eps * 4.;
355 }
356/* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
357/* desired eigenvalues. The support of the nonzero eigenvector */
358/* entries is contained in the interval IBEGIN:IEND. */
359/* Remark that if k eigenpairs are desired, then the eigenvectors */
360/* are stored in k contiguous columns of Z. */
361/* DONE is the number of eigenvectors already computed */
362 done = 0;
363 ibegin = 1;
364 wbegin = 1;
365 i__1 = iblock[*m];
366 for (jblk = 1; jblk <= i__1; ++jblk) {
367 iend = isplit[jblk];
368 sigma = l[iend];
369/* Find the eigenvectors of the submatrix indexed IBEGIN */
370/* through IEND. */
371 wend = wbegin - 1;
372L15:
373 if (wend < *m) {
374 if (iblock[wend + 1] == jblk) {
375 ++wend;
376 goto L15;
377 }
378 }
379 if (wend < wbegin) {
380 ibegin = iend + 1;
381 goto L170;
382 } else if (wend < *dol || wbegin > *dou) {
383 ibegin = iend + 1;
384 wbegin = wend + 1;
385 goto L170;
386 }
387/* Find local spectral diameter of the block */
388 gl = gers[(ibegin << 1) - 1];
389 gu = gers[ibegin * 2];
390 i__2 = iend;
391 for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
392/* Computing MIN */
393 d__1 = gers[(i__ << 1) - 1];
394 gl = minMACRO(d__1,gl);
395/* Computing MAX */
396 d__1 = gers[i__ * 2];
397 gu = maxMACRO(d__1,gu);
398/* L20: */
399 }
400 spdiam = gu - gl;
401/* OLDIEN is the last index of the previous block */
402 oldien = ibegin - 1;
403/* Calculate the size of the current block */
404 in = iend - ibegin + 1;
405/* The number of eigenvalues in the current block */
406 im = wend - wbegin + 1;
407/* This is for a 1x1 block */
408 if (ibegin == iend) {
409 ++done;
410 z__[ibegin + wbegin * z_dim1] = 1.;
411 isuppz[(wbegin << 1) - 1] = ibegin;
412 isuppz[wbegin * 2] = ibegin;
413 w[wbegin] += sigma;
414 work[wbegin] = w[wbegin];
415 ibegin = iend + 1;
416 ++wbegin;
417 goto L170;
418 }
419/* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
420/* Note that these can be approximations, in this case, the corresp. */
421/* entries of WERR give the size of the uncertainty interval. */
422/* The eigenvalue approximations will be refined when necessary as */
423/* high relative accuracy is required for the computation of the */
424/* corresponding eigenvectors. */
425 template_blas_copy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
426/* We store in W the eigenvalue approximations w.r.t. the original */
427/* matrix T. */
428 i__2 = im;
429 for (i__ = 1; i__ <= i__2; ++i__) {
430 w[wbegin + i__ - 1] += sigma;
431/* L30: */
432 }
433/* NDEPTH is the current depth of the representation tree */
434 ndepth = 0;
435/* PARITY is either 1 or 0 */
436 parity = 1;
437/* NCLUS is the number of clusters for the next level of the */
438/* representation tree, we start with NCLUS = 1 for the root */
439 nclus = 1;
440 iwork[iindc1 + 1] = 1;
441 iwork[iindc1 + 2] = im;
442/* IDONE is the number of eigenvectors already computed in the current */
443/* block */
444 idone = 0;
445/* loop while( IDONE.LT.IM ) */
446/* generate the representation tree for the current block and */
447/* compute the eigenvectors */
448L40:
449 if (idone < im) {
450/* This is a crude protection against infinitely deep trees */
451 if (ndepth > *m) {
452 *info = -2;
453 return 0;
454 }
455/* breadth first processing of the current level of the representation */
456/* tree: OLDNCL = number of clusters on current level */
457 oldncl = nclus;
458/* reset NCLUS to count the number of child clusters */
459 nclus = 0;
460
461 parity = 1 - parity;
462 if (parity == 0) {
463 oldcls = iindc1;
464 newcls = iindc2;
465 } else {
466 oldcls = iindc2;
467 newcls = iindc1;
468 }
469/* Process the clusters on the current level */
470 i__2 = oldncl;
471 for (i__ = 1; i__ <= i__2; ++i__) {
472 j = oldcls + (i__ << 1);
473/* OLDFST, OLDLST = first, last index of current cluster. */
474/* cluster indices start with 1 and are relative */
475/* to WBEGIN when accessing W, WGAP, WERR, Z */
476 oldfst = iwork[j - 1];
477 oldlst = iwork[j];
478 if (ndepth > 0) {
479/* Retrieve relatively robust representation (RRR) of cluster */
480/* that has been computed at the previous level */
481/* The RRR is stored in Z and overwritten once the eigenvectors */
482/* have been computed or when the cluster is refined */
483 if (*dol == 1 && *dou == *m) {
484/* Get representation from location of the leftmost evalue */
485/* of the cluster */
486 j = wbegin + oldfst - 1;
487 } else {
488 if (wbegin + oldfst - 1 < *dol) {
489/* Get representation from the left end of Z array */
490 j = *dol - 1;
491 } else if (wbegin + oldfst - 1 > *dou) {
492/* Get representation from the right end of Z array */
493 j = *dou;
494 } else {
495 j = wbegin + oldfst - 1;
496 }
497 }
498 template_blas_copy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
499, &c__1);
500 i__3 = in - 1;
501 template_blas_copy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
502 ibegin], &c__1);
503 sigma = z__[iend + (j + 1) * z_dim1];
504/* Set the corresponding entries in Z to zero */
505 template_lapack_laset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j
506 * z_dim1], ldz);
507 }
508/* Compute DL and DLL of current RRR */
509 i__3 = iend - 1;
510 for (j = ibegin; j <= i__3; ++j) {
511 tmp = d__[j] * l[j];
512 work[indld - 1 + j] = tmp;
513 work[indlld - 1 + j] = tmp * l[j];
514/* L50: */
515 }
516 if (ndepth > 0) {
517/* P and Q are index of the first and last eigenvalue to compute */
518/* within the current block */
519 p = indexw[wbegin - 1 + oldfst];
520 q = indexw[wbegin - 1 + oldlst];
521/* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */
522/* thru' Q-OFFSET elements of these arrays are to be used. */
523/* OFFSET = P-OLDFST */
524 offset = indexw[wbegin] - 1;
525/* perform limited bisection (if necessary) to get approximate */
526/* eigenvalues to the precision needed. */
527 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p,
528 &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
529 wbegin], &werr[wbegin], &work[indwrk], &iwork[
530 iindwk], pivmin, &spdiam, &in, &iinfo);
531 if (iinfo != 0) {
532 *info = -1;
533 return 0;
534 }
535/* We also recompute the extremal gaps. W holds all eigenvalues */
536/* of the unshifted matrix and must be used for computation */
537/* of WGAP, the entries of WORK might stem from RRRs with */
538/* different shifts. The gaps from WBEGIN-1+OLDFST to */
539/* WBEGIN-1+OLDLST are correctly computed in DLARRB. */
540/* However, we only allow the gaps to become greater since */
541/* this is what should happen when we decrease WERR */
542 if (oldfst > 1) {
543/* Computing MAX */
544 d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin +
545 oldfst - 1] - werr[wbegin + oldfst - 1] - w[
546 wbegin + oldfst - 2] - werr[wbegin + oldfst -
547 2];
548 wgap[wbegin + oldfst - 2] = maxMACRO(d__1,d__2);
549 }
550 if (wbegin + oldlst - 1 < wend) {
551/* Computing MAX */
552 d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin +
553 oldlst] - werr[wbegin + oldlst] - w[wbegin +
554 oldlst - 1] - werr[wbegin + oldlst - 1];
555 wgap[wbegin + oldlst - 1] = maxMACRO(d__1,d__2);
556 }
557/* Each time the eigenvalues in WORK get refined, we store */
558/* the newly found approximation with all shifts applied in W */
559 i__3 = oldlst;
560 for (j = oldfst; j <= i__3; ++j) {
561 w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
562/* L53: */
563 }
564 }
565/* Process the current node. */
566 newfst = oldfst;
567 i__3 = oldlst;
568 for (j = oldfst; j <= i__3; ++j) {
569 if (j == oldlst) {
570/* we are at the right end of the cluster, this is also the */
571/* boundary of the child cluster */
572 newlst = j;
573 } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[
574 wbegin + j - 1], absMACRO(d__1))) {
575/* the right relative gap is big enough, the child cluster */
576/* (NEWFST,..,NEWLST) is well separated from the following */
577 newlst = j;
578 } else {
579/* inside a child cluster, the relative gap is not */
580/* big enough. */
581 goto L140;
582 }
583/* Compute size of child cluster found */
584 newsiz = newlst - newfst + 1;
585/* NEWFTT is the place in Z where the new RRR or the computed */
586/* eigenvector is to be stored */
587 if (*dol == 1 && *dou == *m) {
588/* Store representation at location of the leftmost evalue */
589/* of the cluster */
590 newftt = wbegin + newfst - 1;
591 } else {
592 if (wbegin + newfst - 1 < *dol) {
593/* Store representation at the left end of Z array */
594 newftt = *dol - 1;
595 } else if (wbegin + newfst - 1 > *dou) {
596/* Store representation at the right end of Z array */
597 newftt = *dou;
598 } else {
599 newftt = wbegin + newfst - 1;
600 }
601 }
602 if (newsiz > 1) {
603
604/* Current child is not a singleton but a cluster. */
605/* Compute and store new representation of child. */
606
607
608/* Compute left and right cluster gap. */
609
610/* LGAP and RGAP are not computed from WORK because */
611/* the eigenvalue approximations may stem from RRRs */
612/* different shifts. However, W hold all eigenvalues */
613/* of the unshifted matrix. Still, the entries in WGAP */
614/* have to be computed from WORK since the entries */
615/* in W might be of the same order so that gaps are not */
616/* exhibited correctly for very close eigenvalues. */
617 if (newfst == 1) {
618/* Computing MAX */
619 d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;
620 lgap = maxMACRO(d__1,d__2);
621 } else {
622 lgap = wgap[wbegin + newfst - 2];
623 }
624 rgap = wgap[wbegin + newlst - 1];
625
626/* Compute left- and rightmost eigenvalue of child */
627/* to high precision in order to shift as close */
628/* as possible and obtain as large relative gaps */
629/* as possible */
630
631 for (k = 1; k <= 2; ++k) {
632 if (k == 1) {
633 p = indexw[wbegin - 1 + newfst];
634 } else {
635 p = indexw[wbegin - 1 + newlst];
636 }
637 offset = indexw[wbegin] - 1;
638 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin
639 - 1], &p, &p, &rqtol, &rqtol, &offset, &
640 work[wbegin], &wgap[wbegin], &werr[wbegin]
641, &work[indwrk], &iwork[iindwk], pivmin, &
642 spdiam, &in, &iinfo);
643/* L55: */
644 }
645
646 if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1
647 > *dou) {
648/* if the cluster contains no desired eigenvalues */
649/* skip the computation of that branch of the rep. tree */
650
651/* We could skip before the refinement of the extremal */
652/* eigenvalues of the child, but then the representation */
653/* tree could be different from the one when nothing is */
654/* skipped. For this reason we skip at this place. */
655 idone = idone + newlst - newfst + 1;
656 goto L139;
657 }
658
659/* Compute RRR of child cluster. */
660/* Note that the new RRR is stored in Z */
661
662/* DLARRF needs LWORK = 2*N */
663 template_lapack_larrf(&in, &d__[ibegin], &l[ibegin], &work[indld +
664 ibegin - 1], &newfst, &newlst, &work[wbegin],
665 &wgap[wbegin], &werr[wbegin], &spdiam, &lgap,
666 &rgap, pivmin, &tau, &z__[ibegin + newftt *
667 z_dim1], &z__[ibegin + (newftt + 1) * z_dim1],
668 &work[indwrk], &iinfo);
669 if (iinfo == 0) {
670/* a new RRR for the cluster was found by DLARRF */
671/* update shift and store it */
672 ssigma = sigma + tau;
673 z__[iend + (newftt + 1) * z_dim1] = ssigma;
674/* WORK() are the midpoints and WERR() the semi-width */
675/* Note that the entries in W are unchanged. */
676 i__4 = newlst;
677 for (k = newfst; k <= i__4; ++k) {
678 fudge = eps * 3. * (d__1 = work[wbegin + k -
679 1], absMACRO(d__1));
680 work[wbegin + k - 1] -= tau;
681 fudge += eps * 4. * (d__1 = work[wbegin + k -
682 1], absMACRO(d__1));
683/* Fudge errors */
684 werr[wbegin + k - 1] += fudge;
685/* Gaps are not fudged. Provided that WERR is small */
686/* when eigenvalues are close, a zero gap indicates */
687/* that a new representation is needed for resolving */
688/* the cluster. A fudge could lead to a wrong decision */
689/* of judging eigenvalues 'separated' which in */
690/* reality are not. This could have a negative impact */
691/* on the orthogonality of the computed eigenvectors. */
692/* L116: */
693 }
694 ++nclus;
695 k = newcls + (nclus << 1);
696 iwork[k - 1] = newfst;
697 iwork[k] = newlst;
698 } else {
699 *info = -2;
700 return 0;
701 }
702 } else {
703
704/* Compute eigenvector of singleton */
705
706 iter = 0;
707
708 tol = template_blas_log((Treal) in) * 4. * eps;
709
710 k = newfst;
711 windex = wbegin + k - 1;
712/* Computing MAX */
713 i__4 = windex - 1;
714 windmn = maxMACRO(i__4,1);
715/* Computing MIN */
716 i__4 = windex + 1;
717 windpl = minMACRO(i__4,*m);
718 lambda = work[windex];
719 ++done;
720/* Check if eigenvector computation is to be skipped */
721 if (windex < *dol || windex > *dou) {
722 eskip = TRUE_;
723 goto L125;
724 } else {
725 eskip = FALSE_;
726 }
727 left = work[windex] - werr[windex];
728 right = work[windex] + werr[windex];
729 indeig = indexw[windex];
730/* Note that since we compute the eigenpairs for a child, */
731/* all eigenvalue approximations are w.r.t the same shift. */
732/* In this case, the entries in WORK should be used for */
733/* computing the gaps since they exhibit even very small */
734/* differences in the eigenvalues, as opposed to the */
735/* entries in W which might "look" the same. */
736 if (k == 1) {
737/* In the case RANGE='I' and with not much initial */
738/* accuracy in LAMBDA and VL, the formula */
739/* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
740/* can lead to an overestimation of the left gap and */
741/* thus to inadequately early RQI 'convergence'. */
742/* Prevent this by forcing a small left gap. */
743/* Computing MAX */
744 d__1 = absMACRO(left), d__2 = absMACRO(right);
745 lgap = eps * maxMACRO(d__1,d__2);
746 } else {
747 lgap = wgap[windmn];
748 }
749 if (k == im) {
750/* In the case RANGE='I' and with not much initial */
751/* accuracy in LAMBDA and VU, the formula */
752/* can lead to an overestimation of the right gap and */
753/* thus to inadequately early RQI 'convergence'. */
754/* Prevent this by forcing a small right gap. */
755/* Computing MAX */
756 d__1 = absMACRO(left), d__2 = absMACRO(right);
757 rgap = eps * maxMACRO(d__1,d__2);
758 } else {
759 rgap = wgap[windex];
760 }
761 gap = minMACRO(lgap,rgap);
762 if (k == 1 || k == im) {
763/* The eigenvector support can become wrong */
764/* because significant entries could be cut off due to a */
765/* large GAPTOL parameter in LAR1V. Prevent this. */
766 gaptol = 0.;
767 } else {
768 gaptol = gap * eps;
769 }
770 isupmn = in;
771 isupmx = 1;
772/* Update WGAP so that it holds the minimum gap */
773/* to the left or the right. This is crucial in the */
774/* case where bisection is used to ensure that the */
775/* eigenvalue is refined up to the required precision. */
776/* The correct value is restored afterwards. */
777 savgap = wgap[windex];
778 wgap[windex] = gap;
779/* We want to use the Rayleigh Quotient Correction */
780/* as often as possible since it converges quadratically */
781/* when we are close enough to the desired eigenvalue. */
782/* However, the Rayleigh Quotient can have the wrong sign */
783/* and lead us away from the desired eigenvalue. In this */
784/* case, the best we can do is to use bisection. */
785 usedbs = FALSE_;
786 usedrq = FALSE_;
787/* Bisection is initially turned off unless it is forced */
788 needbs = ! tryrqc;
789L120:
790/* Check if bisection should be used to refine eigenvalue */
791 if (needbs) {
792/* Take the bisection as new iterate */
793 usedbs = TRUE_;
794 itmp1 = iwork[iindr + windex];
795 offset = indexw[wbegin] - 1;
796 d__1 = eps * 2.;
797 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin
798 - 1], &indeig, &indeig, &c_b5, &d__1, &
799 offset, &work[wbegin], &wgap[wbegin], &
800 werr[wbegin], &work[indwrk], &iwork[
801 iindwk], pivmin, &spdiam, &itmp1, &iinfo);
802 if (iinfo != 0) {
803 *info = -3;
804 return 0;
805 }
806 lambda = work[windex];
807/* Reset twist index from inaccurate LAMBDA to */
808/* force computation of true MINGMA */
809 iwork[iindr + windex] = 0;
810 }
811/* Given LAMBDA, compute the eigenvector. */
812 L__1 = ! usedbs;
813 template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
814 ibegin], &work[indld + ibegin - 1], &work[
815 indlld + ibegin - 1], pivmin, &gaptol, &z__[
816 ibegin + windex * z_dim1], &L__1, &negcnt, &
817 ztz, &mingma, &iwork[iindr + windex], &isuppz[
818 (windex << 1) - 1], &nrminv, &resid, &rqcorr,
819 &work[indwrk]);
820 if (iter == 0) {
821 bstres = resid;
822 bstw = lambda;
823 } else if (resid < bstres) {
824 bstres = resid;
825 bstw = lambda;
826 }
827/* Computing MIN */
828 i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
829 isupmn = minMACRO(i__4,i__5);
830/* Computing MAX */
831 i__4 = isupmx, i__5 = isuppz[windex * 2];
832 isupmx = maxMACRO(i__4,i__5);
833 ++iter;
834/* sin alpha <= |resid|/gap */
835/* Note that both the residual and the gap are */
836/* proportional to the matrix, so ||T|| doesn't play */
837/* a role in the quotient */
838
839/* Convergence test for Rayleigh-Quotient iteration */
840/* (omitted when Bisection has been used) */
841
842 if (resid > tol * gap && absMACRO(rqcorr) > rqtol * absMACRO(
843 lambda) && ! usedbs) {
844/* We need to check that the RQCORR update doesn't */
845/* move the eigenvalue away from the desired one and */
846/* towards a neighbor. -> protection with bisection */
847 if (indeig <= negcnt) {
848/* The wanted eigenvalue lies to the left */
849 sgndef = -1.;
850 } else {
851/* The wanted eigenvalue lies to the right */
852 sgndef = 1.;
853 }
854/* We only use the RQCORR if it improves the */
855/* the iterate reasonably. */
856 if (rqcorr * sgndef >= 0. && lambda + rqcorr <=
857 right && lambda + rqcorr >= left) {
858 usedrq = TRUE_;
859/* Store new midpoint of bisection interval in WORK */
860 if (sgndef == 1.) {
861/* The current LAMBDA is on the left of the true */
862/* eigenvalue */
863 left = lambda;
864/* We prefer to assume that the error estimate */
865/* is correct. We could make the interval not */
866/* as a bracket but to be modified if the RQCORR */
867/* chooses to. In this case, the RIGHT side should */
868/* be modified as follows: */
869/* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
870 } else {
871/* The current LAMBDA is on the right of the true */
872/* eigenvalue */
873 right = lambda;
874/* See comment about assuming the error estimate is */
875/* correct above. */
876/* LEFT = MIN(LEFT, LAMBDA + RQCORR) */
877 }
878 work[windex] = (right + left) * .5;
879/* Take RQCORR since it has the correct sign and */
880/* improves the iterate reasonably */
881 lambda += rqcorr;
882/* Update width of error interval */
883 werr[windex] = (right - left) * .5;
884 } else {
885 needbs = TRUE_;
886 }
887 if (right - left < rqtol * absMACRO(lambda)) {
888/* The eigenvalue is computed to bisection accuracy */
889/* compute eigenvector and stop */
890 usedbs = TRUE_;
891 goto L120;
892 } else if (iter < 10) {
893 goto L120;
894 } else if (iter == 10) {
895 needbs = TRUE_;
896 goto L120;
897 } else {
898 *info = 5;
899 return 0;
900 }
901 } else {
902 stp2ii = FALSE_;
903 if (usedrq && usedbs && bstres <= resid) {
904 lambda = bstw;
905 stp2ii = TRUE_;
906 }
907 if (stp2ii) {
908/* improve error angle by second step */
909 L__1 = ! usedbs;
910 template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin]
911, &l[ibegin], &work[indld + ibegin -
912 1], &work[indlld + ibegin - 1],
913 pivmin, &gaptol, &z__[ibegin + windex
914 * z_dim1], &L__1, &negcnt, &ztz, &
915 mingma, &iwork[iindr + windex], &
916 isuppz[(windex << 1) - 1], &nrminv, &
917 resid, &rqcorr, &work[indwrk]);
918 }
919 work[windex] = lambda;
920 }
921
922/* Compute FP-vector support w.r.t. whole matrix */
923
924 isuppz[(windex << 1) - 1] += oldien;
925 isuppz[windex * 2] += oldien;
926 zfrom = isuppz[(windex << 1) - 1];
927 zto = isuppz[windex * 2];
928 isupmn += oldien;
929 isupmx += oldien;
930/* Ensure vector is ok if support in the RQI has changed */
931 if (isupmn < zfrom) {
932 i__4 = zfrom - 1;
933 for (ii = isupmn; ii <= i__4; ++ii) {
934 z__[ii + windex * z_dim1] = 0.;
935/* L122: */
936 }
937 }
938 if (isupmx > zto) {
939 i__4 = isupmx;
940 for (ii = zto + 1; ii <= i__4; ++ii) {
941 z__[ii + windex * z_dim1] = 0.;
942/* L123: */
943 }
944 }
945 i__4 = zto - zfrom + 1;
946 template_blas_scal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1],
947 &c__1);
948L125:
949/* Update W */
950 w[windex] = lambda + sigma;
951/* Recompute the gaps on the left and right */
952/* But only allow them to become larger and not */
953/* smaller (which can only happen through "bad" */
954/* cancellation and doesn't reflect the theory */
955/* where the initial gaps are underestimated due */
956/* to WERR being too crude.) */
957 if (! eskip) {
958 if (k > 1) {
959/* Computing MAX */
960 d__1 = wgap[windmn], d__2 = w[windex] - werr[
961 windex] - w[windmn] - werr[windmn];
962 wgap[windmn] = maxMACRO(d__1,d__2);
963 }
964 if (windex < wend) {
965/* Computing MAX */
966 d__1 = savgap, d__2 = w[windpl] - werr[windpl]
967 - w[windex] - werr[windex];
968 wgap[windex] = maxMACRO(d__1,d__2);
969 }
970 }
971 ++idone;
972 }
973/* here ends the code for the current child */
974
975L139:
976/* Proceed to any remaining child nodes */
977 newfst = j + 1;
978L140:
979 ;
980 }
981/* L150: */
982 }
983 ++ndepth;
984 goto L40;
985 }
986 ibegin = iend + 1;
987 wbegin = wend + 1;
988L170:
989 ;
990 }
991
992 return 0;
993
994/* End of DLARRV */
995
996} /* dlarrv_ */
997
998#endif
static const real gu
Definition: fun-pz81.c:68
Treal template_blas_log(Treal x)
int integer
Definition: template_blas_common.h:40
#define absMACRO(x)
Definition: template_blas_common.h:47
#define minMACRO(a, b)
Definition: template_blas_common.h:46
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:42
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:43
#define TRUE_
Definition: template_lapack_common.h:42
#define FALSE_
Definition: template_lapack_common.h:43
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal *lambda, Treal *d__, Treal *l, Treal *ld, Treal *lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical *wantnc, integer *negcnt, Treal *ztz, Treal *mingma, integer *r__, integer *isuppz, Treal *nrminv, Treal *resid, Treal *rqcorr, Treal *work)
Definition: template_lapack_lar1v.h:41
int template_lapack_larrb(integer *n, Treal *d__, Treal *lld, integer *ifirst, integer *ilast, Treal *rtol1, Treal *rtol2, integer *offset, Treal *w, Treal *wgap, Treal *werr, Treal *work, integer *iwork, Treal *pivmin, Treal *spdiam, integer *twist, integer *info)
Definition: template_lapack_larrb.h:45
int template_lapack_larrf(integer *n, Treal *d__, Treal *l, Treal *ld, integer *clstrt, integer *clend, Treal *w, Treal *wgap, Treal *werr, Treal *spdiam, Treal *clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma, Treal *dplus, Treal *lplus, Treal *work, integer *info)
Definition: template_lapack_larrf.h:42
int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu, Treal *d__, Treal *l, Treal *pivmin, integer *isplit, integer *m, integer *dol, integer *dou, Treal *minrgp, Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr, Treal *wgap, integer *iblock, integer *indexw, Treal *gers, Treal *z__, const integer *ldz, integer *isuppz, Treal *work, integer *iwork, integer *info)
Definition: template_lapack_larrv.h:45
int template_lapack_laset(const char *uplo, const integer *m, const integer *n, const Treal *alpha, const Treal *beta, Treal *a, const integer *lda)
Definition: template_lapack_laset.h:42