LAPACK  3.9.0
LAPACK: Linear Algebra PACKage

◆ ssytrf_aa()

subroutine ssytrf_aa ( character  UPLO,
integer  N,
real, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
real, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

SSYTRF_AA

Download SSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSYTRF_AA computes the factorization of a real symmetric matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**T*T*U  or  A = L*T*L**T

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a symmetric tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is REAL array, dimension (LDA,N)
          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiaonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is REAL array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK.  LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
November 2017

Definition at line 134 of file ssytrf_aa.f.

134 *
135 * -- LAPACK computational routine (version 3.8.0) --
136 * -- LAPACK is a software package provided by Univ. of Tennessee, --
137 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
138 * November 2017
139 *
140  IMPLICIT NONE
141 *
142 * .. Scalar Arguments ..
143  CHARACTER UPLO
144  INTEGER N, LDA, LWORK, INFO
145 * ..
146 * .. Array Arguments ..
147  INTEGER IPIV( * )
148  REAL A( LDA, * ), WORK( * )
149 * ..
150 *
151 * =====================================================================
152 * .. Parameters ..
153  REAL ZERO, ONE
154  parameter( zero = 0.0e+0, one = 1.0e+0 )
155 *
156 * .. Local Scalars ..
157  LOGICAL LQUERY, UPPER
158  INTEGER J, LWKOPT
159  INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
160  REAL ALPHA
161 * ..
162 * .. External Functions ..
163  LOGICAL LSAME
164  INTEGER ILAENV
165  EXTERNAL lsame, ilaenv
166 * ..
167 * .. External Subroutines ..
168  EXTERNAL slasyf_aa, sgemv, sscal, scopy, sswap, sgemm,
169  $ xerbla
170 * ..
171 * .. Intrinsic Functions ..
172  INTRINSIC max
173 * ..
174 * .. Executable Statements ..
175 *
176 * Determine the block size
177 *
178  nb = ilaenv( 1, 'SSYTRF_AA', uplo, n, -1, -1, -1 )
179 *
180 * Test the input parameters.
181 *
182  info = 0
183  upper = lsame( uplo, 'U' )
184  lquery = ( lwork.EQ.-1 )
185  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
186  info = -1
187  ELSE IF( n.LT.0 ) THEN
188  info = -2
189  ELSE IF( lda.LT.max( 1, n ) ) THEN
190  info = -4
191  ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
192  info = -7
193  END IF
194 *
195  IF( info.EQ.0 ) THEN
196  lwkopt = (nb+1)*n
197  work( 1 ) = lwkopt
198  END IF
199 *
200  IF( info.NE.0 ) THEN
201  CALL xerbla( 'SSYTRF_AA', -info )
202  RETURN
203  ELSE IF( lquery ) THEN
204  RETURN
205  END IF
206 *
207 * Quick return
208 *
209  IF ( n.EQ.0 ) THEN
210  RETURN
211  ENDIF
212  ipiv( 1 ) = 1
213  IF ( n.EQ.1 ) THEN
214  RETURN
215  END IF
216 *
217 * Adjust block size based on the workspace size
218 *
219  IF( lwork.LT.((1+nb)*n) ) THEN
220  nb = ( lwork-n ) / n
221  END IF
222 *
223  IF( upper ) THEN
224 *
225 * .....................................................
226 * Factorize A as U**T*D*U using the upper triangle of A
227 * .....................................................
228 *
229 * Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
230 *
231  CALL scopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
232 *
233 * J is the main loop index, increasing from 1 to N in steps of
234 * JB, where JB is the number of columns factorized by SLASYF;
235 * JB is either NB, or N-J+1 for the last block
236 *
237  j = 0
238  10 CONTINUE
239  IF( j.GE.n )
240  $ GO TO 20
241 *
242 * each step of the main loop
243 * J is the last column of the previous panel
244 * J1 is the first column of the current panel
245 * K1 identifies if the previous column of the panel has been
246 * explicitly stored, e.g., K1=1 for the first panel, and
247 * K1=0 for the rest
248 *
249  j1 = j + 1
250  jb = min( n-j1+1, nb )
251  k1 = max(1, j)-j
252 *
253 * Panel factorization
254 *
255  CALL slasyf_aa( uplo, 2-k1, n-j, jb,
256  $ a( max(1, j), j+1 ), lda,
257  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
258 *
259 * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
260 *
261  DO j2 = j+2, min(n, j+jb+1)
262  ipiv( j2 ) = ipiv( j2 ) + j
263  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
264  CALL sswap( j1-k1-2, a( 1, j2 ), 1,
265  $ a( 1, ipiv(j2) ), 1 )
266  END IF
267  END DO
268  j = j + jb
269 *
270 * Trailing submatrix update, where
271 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
272 * WORK stores the current block of the auxiriarly matrix H
273 *
274  IF( j.LT.n ) THEN
275 *
276 * If first panel and JB=1 (NB=1), then nothing to do
277 *
278  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
279 *
280 * Merge rank-1 update with BLAS-3 update
281 *
282  alpha = a( j, j+1 )
283  a( j, j+1 ) = one
284  CALL scopy( n-j, a( j-1, j+1 ), lda,
285  $ work( (j+1-j1+1)+jb*n ), 1 )
286  CALL sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
287 *
288 * K1 identifies if the previous column of the panel has been
289 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
290 * while K1=0 and K2=1 for the rest
291 *
292  IF( j1.GT.1 ) THEN
293 *
294 * Not first panel
295 *
296  k2 = 1
297  ELSE
298 *
299 * First panel
300 *
301  k2 = 0
302 *
303 * First update skips the first column
304 *
305  jb = jb - 1
306  END IF
307 *
308  DO j2 = j+1, n, nb
309  nj = min( nb, n-j2+1 )
310 *
311 * Update (J2, J2) diagonal block with SGEMV
312 *
313  j3 = j2
314  DO mj = nj-1, 1, -1
315  CALL sgemv( 'No transpose', mj, jb+1,
316  $ -one, work( j3-j1+1+k1*n ), n,
317  $ a( j1-k2, j3 ), 1,
318  $ one, a( j3, j3 ), lda )
319  j3 = j3 + 1
320  END DO
321 *
322 * Update off-diagonal block of J2-th block row with SGEMM
323 *
324  CALL sgemm( 'Transpose', 'Transpose',
325  $ nj, n-j3+1, jb+1,
326  $ -one, a( j1-k2, j2 ), lda,
327  $ work( j3-j1+1+k1*n ), n,
328  $ one, a( j2, j3 ), lda )
329  END DO
330 *
331 * Recover T( J, J+1 )
332 *
333  a( j, j+1 ) = alpha
334  END IF
335 *
336 * WORK(J+1, 1) stores H(J+1, 1)
337 *
338  CALL scopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
339  END IF
340  GO TO 10
341  ELSE
342 *
343 * .....................................................
344 * Factorize A as L*D*L**T using the lower triangle of A
345 * .....................................................
346 *
347 * copy first column A(1:N, 1) into H(1:N, 1)
348 * (stored in WORK(1:N))
349 *
350  CALL scopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
351 *
352 * J is the main loop index, increasing from 1 to N in steps of
353 * JB, where JB is the number of columns factorized by SLASYF;
354 * JB is either NB, or N-J+1 for the last block
355 *
356  j = 0
357  11 CONTINUE
358  IF( j.GE.n )
359  $ GO TO 20
360 *
361 * each step of the main loop
362 * J is the last column of the previous panel
363 * J1 is the first column of the current panel
364 * K1 identifies if the previous column of the panel has been
365 * explicitly stored, e.g., K1=1 for the first panel, and
366 * K1=0 for the rest
367 *
368  j1 = j+1
369  jb = min( n-j1+1, nb )
370  k1 = max(1, j)-j
371 *
372 * Panel factorization
373 *
374  CALL slasyf_aa( uplo, 2-k1, n-j, jb,
375  $ a( j+1, max(1, j) ), lda,
376  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
377 *
378 * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
379 *
380  DO j2 = j+2, min(n, j+jb+1)
381  ipiv( j2 ) = ipiv( j2 ) + j
382  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
383  CALL sswap( j1-k1-2, a( j2, 1 ), lda,
384  $ a( ipiv(j2), 1 ), lda )
385  END IF
386  END DO
387  j = j + jb
388 *
389 * Trailing submatrix update, where
390 * A(J2+1, J1-1) stores L(J2+1, J1) and
391 * WORK(J2+1, 1) stores H(J2+1, 1)
392 *
393  IF( j.LT.n ) THEN
394 *
395 * if first panel and JB=1 (NB=1), then nothing to do
396 *
397  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
398 *
399 * Merge rank-1 update with BLAS-3 update
400 *
401  alpha = a( j+1, j )
402  a( j+1, j ) = one
403  CALL scopy( n-j, a( j+1, j-1 ), 1,
404  $ work( (j+1-j1+1)+jb*n ), 1 )
405  CALL sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
406 *
407 * K1 identifies if the previous column of the panel has been
408 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
409 * while K1=0 and K2=1 for the rest
410 *
411  IF( j1.GT.1 ) THEN
412 *
413 * Not first panel
414 *
415  k2 = 1
416  ELSE
417 *
418 * First panel
419 *
420  k2 = 0
421 *
422 * First update skips the first column
423 *
424  jb = jb - 1
425  END IF
426 *
427  DO j2 = j+1, n, nb
428  nj = min( nb, n-j2+1 )
429 *
430 * Update (J2, J2) diagonal block with SGEMV
431 *
432  j3 = j2
433  DO mj = nj-1, 1, -1
434  CALL sgemv( 'No transpose', mj, jb+1,
435  $ -one, work( j3-j1+1+k1*n ), n,
436  $ a( j3, j1-k2 ), lda,
437  $ one, a( j3, j3 ), 1 )
438  j3 = j3 + 1
439  END DO
440 *
441 * Update off-diagonal block in J2-th block column with SGEMM
442 *
443  CALL sgemm( 'No transpose', 'Transpose',
444  $ n-j3+1, nj, jb+1,
445  $ -one, work( j3-j1+1+k1*n ), n,
446  $ a( j2, j1-k2 ), lda,
447  $ one, a( j3, j2 ), lda )
448  END DO
449 *
450 * Recover T( J+1, J )
451 *
452  a( j+1, j ) = alpha
453  END IF
454 *
455 * WORK(J+1, 1) stores H(J+1, 1)
456 *
457  CALL scopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
458  END IF
459  GO TO 11
460  END IF
461 *
462  20 CONTINUE
463  RETURN
464 *
465 * End of SSYTRF_AA
466 *
Here is the call graph for this function:
Here is the caller graph for this function:
sswap
subroutine sswap(N, SX, INCX, SY, INCY)
SSWAP
Definition: sswap.f:84
sgemm
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
sgemv
subroutine sgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
SGEMV
Definition: sgemv.f:158
scopy
subroutine scopy(N, SX, INCX, SY, INCY)
SCOPY
Definition: scopy.f:84
sscal
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:81
xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
lsame
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
ilaenv
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: tstiee.f:83
slasyf_aa
subroutine slasyf_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK)
SLASYF_AA
Definition: slasyf_aa.f:146