LAPACK  3.9.0
LAPACK: Linear Algebra PACKage

◆ zhetrf_aa()

subroutine zhetrf_aa ( character  UPLO,
integer  N,
complex*16, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
complex*16, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

ZHETRF_AA

Download ZHETRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZHETRF_AA computes the factorization of a complex hermitian matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**H*T*U  or  A = L*T*L**H

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a hermitian tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is COMPLEX*16 array, dimension (LDA,N)
          On entry, the hermitian matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiaonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
November 2017

Definition at line 134 of file zhetrf_aa.f.

134 *
135 * -- LAPACK computational routine (version 3.8.0) --
136 * -- LAPACK is a software package provided by Univ. of Tennessee, --
137 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
138 * November 2017
139 *
140  IMPLICIT NONE
141 *
142 * .. Scalar Arguments ..
143  CHARACTER UPLO
144  INTEGER N, LDA, LWORK, INFO
145 * ..
146 * .. Array Arguments ..
147  INTEGER IPIV( * )
148  COMPLEX*16 A( LDA, * ), WORK( * )
149 * ..
150 *
151 * =====================================================================
152 * .. Parameters ..
153  COMPLEX*16 ZERO, ONE
154  parameter( zero = (0.0d+0, 0.0d+0), one = (1.0d+0, 0.0d+0) )
155 *
156 * .. Local Scalars ..
157  LOGICAL LQUERY, UPPER
158  INTEGER J, LWKOPT
159  INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
160  COMPLEX*16 ALPHA
161 * ..
162 * .. External Functions ..
163  LOGICAL LSAME
164  INTEGER ILAENV
165  EXTERNAL lsame, ilaenv
166 * ..
167 * .. External Subroutines ..
168  EXTERNAL zlahef_aa, zgemm, zgemv, zcopy, zscal, zswap, xerbla
169 * ..
170 * .. Intrinsic Functions ..
171  INTRINSIC dble, dconjg, max
172 * ..
173 * .. Executable Statements ..
174 *
175 * Determine the block size
176 *
177  nb = ilaenv( 1, 'ZHETRF_AA', uplo, n, -1, -1, -1 )
178 *
179 * Test the input parameters.
180 *
181  info = 0
182  upper = lsame( uplo, 'U' )
183  lquery = ( lwork.EQ.-1 )
184  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
185  info = -1
186  ELSE IF( n.LT.0 ) THEN
187  info = -2
188  ELSE IF( lda.LT.max( 1, n ) ) THEN
189  info = -4
190  ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
191  info = -7
192  END IF
193 *
194  IF( info.EQ.0 ) THEN
195  lwkopt = (nb+1)*n
196  work( 1 ) = lwkopt
197  END IF
198 *
199  IF( info.NE.0 ) THEN
200  CALL xerbla( 'ZHETRF_AA', -info )
201  RETURN
202  ELSE IF( lquery ) THEN
203  RETURN
204  END IF
205 *
206 * Quick return
207 *
208  IF ( n.EQ.0 ) THEN
209  RETURN
210  ENDIF
211  ipiv( 1 ) = 1
212  IF ( n.EQ.1 ) THEN
213  a( 1, 1 ) = dble( a( 1, 1 ) )
214  RETURN
215  END IF
216 *
217 * Adjust block size based on the workspace size
218 *
219  IF( lwork.LT.((1+nb)*n) ) THEN
220  nb = ( lwork-n ) / n
221  END IF
222 *
223  IF( upper ) THEN
224 *
225 * .....................................................
226 * Factorize A as U**H*D*U using the upper triangle of A
227 * .....................................................
228 *
229 * copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
230 *
231  CALL zcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
232 *
233 * J is the main loop index, increasing from 1 to N in steps of
234 * JB, where JB is the number of columns factorized by ZLAHEF;
235 * JB is either NB, or N-J+1 for the last block
236 *
237  j = 0
238  10 CONTINUE
239  IF( j.GE.n )
240  $ GO TO 20
241 *
242 * each step of the main loop
243 * J is the last column of the previous panel
244 * J1 is the first column of the current panel
245 * K1 identifies if the previous column of the panel has been
246 * explicitly stored, e.g., K1=1 for the first panel, and
247 * K1=0 for the rest
248 *
249  j1 = j + 1
250  jb = min( n-j1+1, nb )
251  k1 = max(1, j)-j
252 *
253 * Panel factorization
254 *
255  CALL zlahef_aa( uplo, 2-k1, n-j, jb,
256  $ a( max(1, j), j+1 ), lda,
257  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
258 *
259 * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
260 *
261  DO j2 = j+2, min(n, j+jb+1)
262  ipiv( j2 ) = ipiv( j2 ) + j
263  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
264  CALL zswap( j1-k1-2, a( 1, j2 ), 1,
265  $ a( 1, ipiv(j2) ), 1 )
266  END IF
267  END DO
268  j = j + jb
269 *
270 * Trailing submatrix update, where
271 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
272 * WORK stores the current block of the auxiriarly matrix H
273 *
274  IF( j.LT.n ) THEN
275 *
276 * if the first panel and JB=1 (NB=1), then nothing to do
277 *
278  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
279 *
280 * Merge rank-1 update with BLAS-3 update
281 *
282  alpha = dconjg( a( j, j+1 ) )
283  a( j, j+1 ) = one
284  CALL zcopy( n-j, a( j-1, j+1 ), lda,
285  $ work( (j+1-j1+1)+jb*n ), 1 )
286  CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
287 *
288 * K1 identifies if the previous column of the panel has been
289 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
290 * and K1=1 and K2=0 for the rest
291 *
292  IF( j1.GT.1 ) THEN
293 *
294 * Not first panel
295 *
296  k2 = 1
297  ELSE
298 *
299 * First panel
300 *
301  k2 = 0
302 *
303 * First update skips the first column
304 *
305  jb = jb - 1
306  END IF
307 *
308  DO j2 = j+1, n, nb
309  nj = min( nb, n-j2+1 )
310 *
311 * Update (J2, J2) diagonal block with ZGEMV
312 *
313  j3 = j2
314  DO mj = nj-1, 1, -1
315  CALL zgemm( 'Conjugate transpose', 'Transpose',
316  $ 1, mj, jb+1,
317  $ -one, a( j1-k2, j3 ), lda,
318  $ work( (j3-j1+1)+k1*n ), n,
319  $ one, a( j3, j3 ), lda )
320  j3 = j3 + 1
321  END DO
322 *
323 * Update off-diagonal block of J2-th block row with ZGEMM
324 *
325  CALL zgemm( 'Conjugate transpose', 'Transpose',
326  $ nj, n-j3+1, jb+1,
327  $ -one, a( j1-k2, j2 ), lda,
328  $ work( (j3-j1+1)+k1*n ), n,
329  $ one, a( j2, j3 ), lda )
330  END DO
331 *
332 * Recover T( J, J+1 )
333 *
334  a( j, j+1 ) = dconjg( alpha )
335  END IF
336 *
337 * WORK(J+1, 1) stores H(J+1, 1)
338 *
339  CALL zcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
340  END IF
341  GO TO 10
342  ELSE
343 *
344 * .....................................................
345 * Factorize A as L*D*L**H using the lower triangle of A
346 * .....................................................
347 *
348 * copy first column A(1:N, 1) into H(1:N, 1)
349 * (stored in WORK(1:N))
350 *
351  CALL zcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
352 *
353 * J is the main loop index, increasing from 1 to N in steps of
354 * JB, where JB is the number of columns factorized by ZLAHEF;
355 * JB is either NB, or N-J+1 for the last block
356 *
357  j = 0
358  11 CONTINUE
359  IF( j.GE.n )
360  $ GO TO 20
361 *
362 * each step of the main loop
363 * J is the last column of the previous panel
364 * J1 is the first column of the current panel
365 * K1 identifies if the previous column of the panel has been
366 * explicitly stored, e.g., K1=1 for the first panel, and
367 * K1=0 for the rest
368 *
369  j1 = j+1
370  jb = min( n-j1+1, nb )
371  k1 = max(1, j)-j
372 *
373 * Panel factorization
374 *
375  CALL zlahef_aa( uplo, 2-k1, n-j, jb,
376  $ a( j+1, max(1, j) ), lda,
377  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
378 *
379 * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
380 *
381  DO j2 = j+2, min(n, j+jb+1)
382  ipiv( j2 ) = ipiv( j2 ) + j
383  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
384  CALL zswap( j1-k1-2, a( j2, 1 ), lda,
385  $ a( ipiv(j2), 1 ), lda )
386  END IF
387  END DO
388  j = j + jb
389 *
390 * Trailing submatrix update, where
391 * A(J2+1, J1-1) stores L(J2+1, J1) and
392 * WORK(J2+1, 1) stores H(J2+1, 1)
393 *
394  IF( j.LT.n ) THEN
395 *
396 * if the first panel and JB=1 (NB=1), then nothing to do
397 *
398  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
399 *
400 * Merge rank-1 update with BLAS-3 update
401 *
402  alpha = dconjg( a( j+1, j ) )
403  a( j+1, j ) = one
404  CALL zcopy( n-j, a( j+1, j-1 ), 1,
405  $ work( (j+1-j1+1)+jb*n ), 1 )
406  CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
407 *
408 * K1 identifies if the previous column of the panel has been
409 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
410 * and K1=1 and K2=0 for the rest
411 *
412  IF( j1.GT.1 ) THEN
413 *
414 * Not first panel
415 *
416  k2 = 1
417  ELSE
418 *
419 * First panel
420 *
421  k2 = 0
422 *
423 * First update skips the first column
424 *
425  jb = jb - 1
426  END IF
427 *
428  DO j2 = j+1, n, nb
429  nj = min( nb, n-j2+1 )
430 *
431 * Update (J2, J2) diagonal block with ZGEMV
432 *
433  j3 = j2
434  DO mj = nj-1, 1, -1
435  CALL zgemm( 'No transpose', 'Conjugate transpose',
436  $ mj, 1, jb+1,
437  $ -one, work( (j3-j1+1)+k1*n ), n,
438  $ a( j3, j1-k2 ), lda,
439  $ one, a( j3, j3 ), lda )
440  j3 = j3 + 1
441  END DO
442 *
443 * Update off-diagonal block of J2-th block column with ZGEMM
444 *
445  CALL zgemm( 'No transpose', 'Conjugate transpose',
446  $ n-j3+1, nj, jb+1,
447  $ -one, work( (j3-j1+1)+k1*n ), n,
448  $ a( j2, j1-k2 ), lda,
449  $ one, a( j3, j2 ), lda )
450  END DO
451 *
452 * Recover T( J+1, J )
453 *
454  a( j+1, j ) = dconjg( alpha )
455  END IF
456 *
457 * WORK(J+1, 1) stores H(J+1, 1)
458 *
459  CALL zcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
460  END IF
461  GO TO 11
462  END IF
463 *
464  20 CONTINUE
465  RETURN
466 *
467 * End of ZHETRF_AA
468 *
Here is the call graph for this function:
Here is the caller graph for this function:
zlahef_aa
subroutine zlahef_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK)
ZLAHEF_AA
Definition: zlahef_aa.f:146
zgemv
subroutine zgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
ZGEMV
Definition: zgemv.f:160
zcopy
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:83
zgemm
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:189
xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
lsame
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
zswap
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
Definition: zswap.f:83
ilaenv
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: tstiee.f:83
zscal
subroutine zscal(N, ZA, ZX, INCX)
ZSCAL
Definition: zscal.f:80