ergo
template_lapack_orgqr.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_ORGQR_HEADER
38#define TEMPLATE_LAPACK_ORGQR_HEADER
39
41
42template<class Treal>
44 const integer *m,
45 const integer *n,
46 const integer *k,
47 Treal * a,
48 const integer *lda,
49 const Treal *tau,
50 Treal *work,
51 const integer *lwork,
52 integer *info
53 )
54{
55/* -- LAPACK routine (version 3.0) --
56 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
57 Courant Institute, Argonne National Lab, and Rice University
58 June 30, 1999
59
60
61 Purpose
62 =======
63
64 DORGQR generates an M-by-N real matrix Q with orthonormal columns,
65 which is defined as the first N columns of a product of K elementary
66 reflectors of order M
67
68 Q = H(1) H(2) . . . H(k)
69
70 as returned by DGEQRF.
71
72 Arguments
73 =========
74
75 M (input) INTEGER
76 The number of rows of the matrix Q. M >= 0.
77
78 N (input) INTEGER
79 The number of columns of the matrix Q. M >= N >= 0.
80
81 K (input) INTEGER
82 The number of elementary reflectors whose product defines the
83 matrix Q. N >= K >= 0.
84
85 A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
86 On entry, the i-th column must contain the vector which
87 defines the elementary reflector H(i), for i = 1,2,...,k, as
88 returned by DGEQRF in the first k columns of its array
89 argument A.
90 On exit, the M-by-N matrix Q.
91
92 LDA (input) INTEGER
93 The first dimension of the array A. LDA >= max(1,M).
94
95 TAU (input) DOUBLE PRECISION array, dimension (K)
96 TAU(i) must contain the scalar factor of the elementary
97 reflector H(i), as returned by DGEQRF.
98
99 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
100 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
101
102 LWORK (input) INTEGER
103 The dimension of the array WORK. LWORK >= max(1,N).
104 For optimum performance LWORK >= N*NB, where NB is the
105 optimal blocksize.
106
107 If LWORK = -1, then a workspace query is assumed; the routine
108 only calculates the optimal size of the WORK array, returns
109 this value as the first entry of the WORK array, and no error
110 message related to LWORK is issued by XERBLA.
111
112 INFO (output) INTEGER
113 = 0: successful exit
114 < 0: if INFO = -i, the i-th argument has an illegal value
115
116 =====================================================================
117
118
119 Test the input arguments
120
121 Parameter adjustments */
122 /* Table of constant values */
123 integer c__1 = 1;
124 integer c_n1 = -1;
125 integer c__3 = 3;
126 integer c__2 = 2;
127
128 /* System generated locals */
129 integer a_dim1, a_offset, i__1, i__2, i__3;
130 /* Local variables */
131 integer i__, j, l, nbmin, iinfo;
132 integer ib, nb, ki, kk;
133 integer nx;
134 integer ldwork, lwkopt;
135 logical lquery;
136 integer iws;
137#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
138
139
140 a_dim1 = *lda;
141 a_offset = 1 + a_dim1 * 1;
142 a -= a_offset;
143 --tau;
144 --work;
145
146 /* Initialization added by Elias to get rid of compiler warnings. */
147 ki = 0;
148 /* Function Body */
149 *info = 0;
150 nb = template_lapack_ilaenv(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
151 lwkopt = maxMACRO(1,*n) * nb;
152 work[1] = (Treal) lwkopt;
153 lquery = *lwork == -1;
154 if (*m < 0) {
155 *info = -1;
156 } else if (*n < 0 || *n > *m) {
157 *info = -2;
158 } else if (*k < 0 || *k > *n) {
159 *info = -3;
160 } else if (*lda < maxMACRO(1,*m)) {
161 *info = -5;
162 } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
163 *info = -8;
164 }
165 if (*info != 0) {
166 i__1 = -(*info);
167 template_blas_erbla("ORGQR ", &i__1);
168 return 0;
169 } else if (lquery) {
170 return 0;
171 }
172
173/* Quick return if possible */
174
175 if (*n <= 0) {
176 work[1] = 1.;
177 return 0;
178 }
179
180 nbmin = 2;
181 nx = 0;
182 iws = *n;
183 if (nb > 1 && nb < *k) {
184
185/* Determine when to cross over from blocked to unblocked code.
186
187 Computing MAX */
188 i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQR", " ", m, n, k, &c_n1, (
189 ftnlen)6, (ftnlen)1);
190 nx = maxMACRO(i__1,i__2);
191 if (nx < *k) {
192
193/* Determine if workspace is large enough for blocked code. */
194
195 ldwork = *n;
196 iws = ldwork * nb;
197 if (*lwork < iws) {
198
199/* Not enough workspace to use optimal NB: reduce NB and
200 determine the minimum value of NB. */
201
202 nb = *lwork / ldwork;
203/* Computing MAX */
204 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQR", " ", m, n, k, &c_n1,
205 (ftnlen)6, (ftnlen)1);
206 nbmin = maxMACRO(i__1,i__2);
207 }
208 }
209 }
210
211 if (nb >= nbmin && nb < *k && nx < *k) {
212
213/* Use blocked code after the last block.
214 The first kk columns are handled by the block method. */
215
216 ki = (*k - nx - 1) / nb * nb;
217/* Computing MIN */
218 i__1 = *k, i__2 = ki + nb;
219 kk = minMACRO(i__1,i__2);
220
221/* Set A(1:kk,kk+1:n) to zero. */
222
223 i__1 = *n;
224 for (j = kk + 1; j <= i__1; ++j) {
225 i__2 = kk;
226 for (i__ = 1; i__ <= i__2; ++i__) {
227 a_ref(i__, j) = 0.;
228/* L10: */
229 }
230/* L20: */
231 }
232 } else {
233 kk = 0;
234 }
235
236/* Use unblocked code for the last or only block. */
237
238 if (kk < *n) {
239 i__1 = *m - kk;
240 i__2 = *n - kk;
241 i__3 = *k - kk;
242 template_lapack_org2r(&i__1, &i__2, &i__3, &a_ref(kk + 1, kk + 1), lda, &tau[kk + 1]
243 , &work[1], &iinfo);
244 }
245
246 if (kk > 0) {
247
248/* Use blocked code */
249
250 i__1 = -nb;
251 for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
252/* Computing MIN */
253 i__2 = nb, i__3 = *k - i__ + 1;
254 ib = minMACRO(i__2,i__3);
255 if (i__ + ib <= *n) {
256
257/* Form the triangular factor of the block reflector
258 H = H(i) H(i+1) . . . H(i+ib-1) */
259
260 i__2 = *m - i__ + 1;
261 template_lapack_larft("Forward", "Columnwise", &i__2, &ib, &a_ref(i__, i__),
262 lda, &tau[i__], &work[1], &ldwork);
263
264/* Apply H to A(i:m,i+ib:n) from the left */
265
266 i__2 = *m - i__ + 1;
267 i__3 = *n - i__ - ib + 1;
268 template_lapack_larfb("Left", "No transpose", "Forward", "Columnwise", &
269 i__2, &i__3, &ib, &a_ref(i__, i__), lda, &work[1], &
270 ldwork, &a_ref(i__, i__ + ib), lda, &work[ib + 1], &
271 ldwork);
272 }
273
274/* Apply H to rows i:m of current block */
275
276 i__2 = *m - i__ + 1;
277 template_lapack_org2r(&i__2, &ib, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[
278 1], &iinfo);
279
280/* Set rows 1:i-1 of current block to zero */
281
282 i__2 = i__ + ib - 1;
283 for (j = i__; j <= i__2; ++j) {
284 i__3 = i__ - 1;
285 for (l = 1; l <= i__3; ++l) {
286 a_ref(l, j) = 0.;
287/* L30: */
288 }
289/* L40: */
290 }
291/* L50: */
292 }
293 }
294
295 work[1] = (Treal) iws;
296 return 0;
297
298/* End of DORGQR */
299
300} /* dorgqr_ */
301
302#undef a_ref
303
304
305#endif
int template_blas_erbla(const char *srname, integer *info)
Definition template_blas_common.cc:146
int integer
Definition template_blas_common.h:40
int ftnlen
Definition template_blas_common.h:42
#define minMACRO(a, b)
Definition template_blas_common.h:46
#define maxMACRO(a, b)
Definition template_blas_common.h:45
bool logical
Definition template_blas_common.h:41
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition template_lapack_common.cc:281
int template_lapack_larfb(const char *side, const char *trans, const char *direct, const char *storev, const integer *m, const integer *n, const integer *k, const Treal *v, const integer *ldv, const Treal *t, const integer *ldt, Treal *c__, const integer *ldc, Treal *work, const integer *ldwork)
Definition template_lapack_larfb.h:42
int template_lapack_larft(const char *direct, const char *storev, const integer *n, const integer *k, Treal *v, const integer *ldv, const Treal *tau, Treal *t, const integer *ldt)
Definition template_lapack_larft.h:42
int template_lapack_org2r(const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *work, integer *info)
Definition template_lapack_org2r.h:42
#define a_ref(a_1, a_2)
int template_lapack_orgqr(const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition template_lapack_orgqr.h:43