ergo
template_lapack_sytrd.h
Go to the documentation of this file.
1/* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_SYTRD_HEADER
38#define TEMPLATE_LAPACK_SYTRD_HEADER
39
41
42template<class Treal>
43int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer *
44 lda, Treal *d__, Treal *e, Treal *tau, Treal *
45 work, const integer *lwork, integer *info)
46{
47/* -- LAPACK routine (version 3.0) --
48 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
49 Courant Institute, Argonne National Lab, and Rice University
50 June 30, 1999
51
52
53 Purpose
54 =======
55
56 DSYTRD reduces a real symmetric matrix A to real symmetric
57 tridiagonal form T by an orthogonal similarity transformation:
58 Q**T * A * Q = T.
59
60 Arguments
61 =========
62
63 UPLO (input) CHARACTER*1
64 = 'U': Upper triangle of A is stored;
65 = 'L': Lower triangle of A is stored.
66
67 N (input) INTEGER
68 The order of the matrix A. N >= 0.
69
70 A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
71 On entry, the symmetric matrix A. If UPLO = 'U', the leading
72 N-by-N upper triangular part of A contains the upper
73 triangular part of the matrix A, and the strictly lower
74 triangular part of A is not referenced. If UPLO = 'L', the
75 leading N-by-N lower triangular part of A contains the lower
76 triangular part of the matrix A, and the strictly upper
77 triangular part of A is not referenced.
78 On exit, if UPLO = 'U', the diagonal and first superdiagonal
79 of A are overwritten by the corresponding elements of the
80 tridiagonal matrix T, and the elements above the first
81 superdiagonal, with the array TAU, represent the orthogonal
82 matrix Q as a product of elementary reflectors; if UPLO
83 = 'L', the diagonal and first subdiagonal of A are over-
84 written by the corresponding elements of the tridiagonal
85 matrix T, and the elements below the first subdiagonal, with
86 the array TAU, represent the orthogonal matrix Q as a product
87 of elementary reflectors. See Further Details.
88
89 LDA (input) INTEGER
90 The leading dimension of the array A. LDA >= max(1,N).
91
92 D (output) DOUBLE PRECISION array, dimension (N)
93 The diagonal elements of the tridiagonal matrix T:
94 D(i) = A(i,i).
95
96 E (output) DOUBLE PRECISION array, dimension (N-1)
97 The off-diagonal elements of the tridiagonal matrix T:
98 E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
99
100 TAU (output) DOUBLE PRECISION array, dimension (N-1)
101 The scalar factors of the elementary reflectors (see Further
102 Details).
103
104 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
105 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
106
107 LWORK (input) INTEGER
108 The dimension of the array WORK. LWORK >= 1.
109 For optimum performance LWORK >= N*NB, where NB is the
110 optimal blocksize.
111
112 If LWORK = -1, then a workspace query is assumed; the routine
113 only calculates the optimal size of the WORK array, returns
114 this value as the first entry of the WORK array, and no error
115 message related to LWORK is issued by XERBLA.
116
117 INFO (output) INTEGER
118 = 0: successful exit
119 < 0: if INFO = -i, the i-th argument had an illegal value
120
121 Further Details
122 ===============
123
124 If UPLO = 'U', the matrix Q is represented as a product of elementary
125 reflectors
126
127 Q = H(n-1) . . . H(2) H(1).
128
129 Each H(i) has the form
130
131 H(i) = I - tau * v * v'
132
133 where tau is a real scalar, and v is a real vector with
134 v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
135 A(1:i-1,i+1), and tau in TAU(i).
136
137 If UPLO = 'L', the matrix Q is represented as a product of elementary
138 reflectors
139
140 Q = H(1) H(2) . . . H(n-1).
141
142 Each H(i) has the form
143
144 H(i) = I - tau * v * v'
145
146 where tau is a real scalar, and v is a real vector with
147 v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
148 and tau in TAU(i).
149
150 The contents of A on exit are illustrated by the following examples
151 with n = 5:
152
153 if UPLO = 'U': if UPLO = 'L':
154
155 ( d e v2 v3 v4 ) ( d )
156 ( d e v3 v4 ) ( e d )
157 ( d e v4 ) ( v1 e d )
158 ( d e ) ( v1 v2 e d )
159 ( d ) ( v1 v2 v3 e d )
160
161 where d and e denote diagonal and off-diagonal elements of T, and vi
162 denotes an element of the vector defining H(i).
163
164 =====================================================================
165
166
167 Test the input parameters
168
169 Parameter adjustments */
170 /* Table of constant values */
171 integer c__1 = 1;
172 integer c_n1 = -1;
173 integer c__3 = 3;
174 integer c__2 = 2;
175 Treal c_b22 = -1.;
176 Treal c_b23 = 1.;
177
178 /* System generated locals */
179 integer a_dim1, a_offset, i__1, i__2, i__3;
180 /* Local variables */
181 integer i__, j;
182 integer nbmin, iinfo;
183 logical upper;
184 integer nb, kk, nx;
185 integer ldwork, lwkopt;
186 logical lquery;
187 integer iws;
188#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
189
190
191 a_dim1 = *lda;
192 a_offset = 1 + a_dim1 * 1;
193 a -= a_offset;
194 --d__;
195 --e;
196 --tau;
197 --work;
198
199 /* Initialization added by Elias to get rid of compiler warnings. */
200 lwkopt = 0;
201 /* Function Body */
202 *info = 0;
203 upper = template_blas_lsame(uplo, "U");
204 lquery = *lwork == -1;
205 if (! upper && ! template_blas_lsame(uplo, "L")) {
206 *info = -1;
207 } else if (*n < 0) {
208 *info = -2;
209 } else if (*lda < maxMACRO(1,*n)) {
210 *info = -4;
211 } else if (*lwork < 1 && ! lquery) {
212 *info = -9;
213 }
214
215 if (*info == 0) {
216
217/* Determine the block size. */
218
219 nb = template_lapack_ilaenv(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
220 (ftnlen)1);
221 lwkopt = *n * nb;
222 work[1] = (Treal) lwkopt;
223 }
224
225 if (*info != 0) {
226 i__1 = -(*info);
227 template_blas_erbla("SYTRD ", &i__1);
228 return 0;
229 } else if (lquery) {
230 return 0;
231 }
232
233/* Quick return if possible */
234
235 if (*n == 0) {
236 work[1] = 1.;
237 return 0;
238 }
239
240 nx = *n;
241 iws = 1;
242 if (nb > 1 && nb < *n) {
243
244/* Determine when to cross over from blocked to unblocked code
245 (last block is always handled by unblocked code).
246
247 Computing MAX */
248 i__1 = nb, i__2 = template_lapack_ilaenv(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
249 c_n1, (ftnlen)6, (ftnlen)1);
250 nx = maxMACRO(i__1,i__2);
251 if (nx < *n) {
252
253/* Determine if workspace is large enough for blocked code. */
254
255 ldwork = *n;
256 iws = ldwork * nb;
257 if (*lwork < iws) {
258
259/* Not enough workspace to use optimal NB: determine the
260 minimum value of NB, and reduce NB or force use of
261 unblocked code by setting NX = N.
262
263 Computing MAX */
264 i__1 = *lwork / ldwork;
265 nb = maxMACRO(i__1,1);
266 nbmin = template_lapack_ilaenv(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
267 (ftnlen)6, (ftnlen)1);
268 if (nb < nbmin) {
269 nx = *n;
270 }
271 }
272 } else {
273 nx = *n;
274 }
275 } else {
276 nb = 1;
277 }
278
279 if (upper) {
280
281/* Reduce the upper triangle of A.
282 Columns 1:kk are handled by the unblocked method. */
283
284 kk = *n - (*n - nx + nb - 1) / nb * nb;
285 i__1 = kk + 1;
286 i__2 = -nb;
287 for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
288 i__2) {
289
290/* Reduce columns i:i+nb-1 to tridiagonal form and form the
291 matrix W which is needed to update the unreduced part of
292 the matrix */
293
294 i__3 = i__ + nb - 1;
295 template_lapack_latrd(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
296 work[1], &ldwork);
297
298/* Update the unreduced submatrix A(1:i-1,1:i-1), using an
299 update of the form: A := A - V*W' - W*V' */
300
301 i__3 = i__ - 1;
302 template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(1, i__),
303 lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda);
304
305/* Copy superdiagonal elements back into A, and diagonal
306 elements into D */
307
308 i__3 = i__ + nb - 1;
309 for (j = i__; j <= i__3; ++j) {
310 a_ref(j - 1, j) = e[j - 1];
311 d__[j] = a_ref(j, j);
312/* L10: */
313 }
314/* L20: */
315 }
316
317/* Use unblocked code to reduce the last or only block */
318
319 template_lapack_sytd2(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
320 } else {
321
322/* Reduce the lower triangle of A */
323
324 i__2 = *n - nx;
325 i__1 = nb;
326 for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
327
328/* Reduce columns i:i+nb-1 to tridiagonal form and form the
329 matrix W which is needed to update the unreduced part of
330 the matrix */
331
332 i__3 = *n - i__ + 1;
333 template_lapack_latrd(uplo, &i__3, &nb, &a_ref(i__, i__), lda, &e[i__], &tau[
334 i__], &work[1], &ldwork);
335
336/* Update the unreduced submatrix A(i+ib:n,i+ib:n), using
337 an update of the form: A := A - V*W' - W*V' */
338
339 i__3 = *n - i__ - nb + 1;
340 template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(i__ + nb,
341 i__), lda, &work[nb + 1], &ldwork, &c_b23, &a_ref(i__ +
342 nb, i__ + nb), lda);
343
344/* Copy subdiagonal elements back into A, and diagonal
345 elements into D */
346
347 i__3 = i__ + nb - 1;
348 for (j = i__; j <= i__3; ++j) {
349 a_ref(j + 1, j) = e[j];
350 d__[j] = a_ref(j, j);
351/* L30: */
352 }
353/* L40: */
354 }
355
356/* Use unblocked code to reduce the last or only block */
357
358 i__1 = *n - i__ + 1;
359 template_lapack_sytd2(uplo, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tau[
360 i__], &iinfo);
361 }
362
363 work[1] = (Treal) lwkopt;
364 return 0;
365
366/* End of DSYTRD */
367
368} /* dsytrd_ */
369
370#undef a_ref
371
372
373#endif
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
int integer
Definition: template_blas_common.h:40
int ftnlen
Definition: template_blas_common.h:42
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
int template_blas_syr2k(const char *uplo, const char *trans, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition: template_blas_syr2k.h:42
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:281
int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *a, const integer *lda, Treal *e, Treal *tau, Treal *w, const integer *ldw)
Definition: template_lapack_latrd.h:42
int template_lapack_sytd2(const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *d__, Treal *e, Treal *tau, integer *info)
Definition: template_lapack_sytd2.h:43
#define a_ref(a_1, a_2)
int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *d__, Treal *e, Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_sytrd.h:43