172 INTEGER N, LDA, LTB, LWORK, INFO
175 INTEGER IPIV( * ), IPIV2( * )
176 COMPLEX A( LDA, * ), TB( * ), WORK( * )
182 parameter( czero = ( 0.0e+0, 0.0e+0 ),
183 $ cone = ( 1.0e+0, 0.0e+0 ) )
186 LOGICAL UPPER, TQUERY, WQUERY
187 INTEGER I, J, K, I1, I2, TD
188 INTEGER LDTB, NB, KB, JB, NT, IINFO
208 upper =
lsame( uplo,
'U' )
209 wquery = ( lwork.EQ.-1 )
210 tquery = ( ltb.EQ.-1 )
211 IF( .NOT.upper .AND. .NOT.
lsame( uplo,
'L' ) )
THEN
213 ELSE IF( n.LT.0 )
THEN
215 ELSE IF( lda.LT.max( 1, n ) )
THEN
217 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
219 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
224 CALL xerbla(
'CSYTRF_AA_2STAGE', -info )
230 nb =
ilaenv( 1,
'CSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
239 IF( tquery .OR. wquery )
THEN
252 IF( ldtb .LT. 3*nb+1 )
THEN
255 IF( lwork .LT. nb*n )
THEN
289 IF( i .EQ. (j-1) )
THEN
294 CALL cgemm(
'NoTranspose',
'NoTranspose',
296 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
297 $ a( (i-1)*nb+1, j*nb+1 ), lda,
298 $ czero, work( i*nb+1 ), n )
306 CALL cgemm(
'NoTranspose',
'NoTranspose',
308 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
310 $ a( (i-2)*nb+1, j*nb+1 ), lda,
311 $ czero, work( i*nb+1 ), n )
317 CALL clacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
318 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
321 CALL cgemm(
'Transpose',
'NoTranspose',
323 $ -cone, a( 1, j*nb+1 ), lda,
325 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
327 CALL cgemm(
'Transpose',
'NoTranspose',
329 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
330 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
331 $ czero, work( 1 ), n )
332 CALL cgemm(
'NoTranspose',
'NoTranspose',
334 $ -cone, work( 1 ), n,
335 $ a( (j-2)*nb+1, j*nb+1 ), lda,
336 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
343 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
344 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
351 CALL ctrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
352 $ a( (j-1)*nb+1, j*nb+1 ), lda,
353 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
354 CALL ctrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
355 $ a( (j-1)*nb+1, j*nb+1 ), lda,
356 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
365 CALL cgemm(
'NoTranspose',
'NoTranspose',
367 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
368 $ a( (j-1)*nb+1, j*nb+1 ), lda,
369 $ czero, work( j*nb+1 ), n )
371 CALL cgemm(
'NoTranspose',
'NoTranspose',
373 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
375 $ a( (j-2)*nb+1, j*nb+1 ), lda,
376 $ czero, work( j*nb+1 ), n )
381 CALL cgemm(
'Transpose',
'NoTranspose',
382 $ nb, n-(j+1)*nb, j*nb,
383 $ -cone, work( nb+1 ), n,
384 $ a( 1, (j+1)*nb+1 ), lda,
385 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
391 CALL ccopy( n-(j+1)*nb,
392 $ a( j*nb+k, (j+1)*nb+1 ), lda,
393 $ work( 1+(k-1)*n ), 1 )
398 CALL cgetrf( n-(j+1)*nb, nb,
400 $ ipiv( (j+1)*nb+1 ), iinfo )
408 CALL ccopy( n-(j+1)*nb,
409 $ work( 1+(k-1)*n ), 1,
410 $ a( j*nb+k, (j+1)*nb+1 ), lda )
415 kb = min(nb, n-(j+1)*nb)
416 CALL claset(
'Full', kb, nb, czero, czero,
417 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
418 CALL clacpy(
'Upper', kb, nb,
420 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
422 CALL ctrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
423 $ a( (j-1)*nb+1, j*nb+1 ), lda,
424 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
432 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
433 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
436 CALL claset(
'Lower', kb, nb, czero, cone,
437 $ a( j*nb+1, (j+1)*nb+1), lda )
443 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
446 i2 = ipiv( (j+1)*nb+k )
449 CALL cswap( k-1, a( (j+1)*nb+1, i1 ), 1,
450 $ a( (j+1)*nb+1, i2 ), 1 )
453 $
CALL cswap( i2-i1-1, a( i1, i1+1 ), lda,
457 $
CALL cswap( n-i2, a( i1, i2+1 ), lda,
458 $ a( i2, i2+1 ), lda )
461 a( i1, i1 ) = a( i2, i2 )
465 CALL cswap( j*nb, a( 1, i1 ), 1,
486 IF( i .EQ. (j-1) )
THEN
491 CALL cgemm(
'NoTranspose',
'Transpose',
493 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
494 $ a( j*nb+1, (i-1)*nb+1 ), lda,
495 $ czero, work( i*nb+1 ), n )
498 IF( i .EQ. (j-1) )
THEN
503 CALL cgemm(
'NoTranspose',
'Transpose',
505 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
507 $ a( j*nb+1, (i-2)*nb+1 ), lda,
508 $ czero, work( i*nb+1 ), n )
514 CALL clacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
515 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
518 CALL cgemm(
'NoTranspose',
'NoTranspose',
520 $ -cone, a( j*nb+1, 1 ), lda,
522 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
524 CALL cgemm(
'NoTranspose',
'NoTranspose',
526 $ cone, a( j*nb+1, (j-1)*nb+1 ), lda,
527 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
528 $ czero, work( 1 ), n )
529 CALL cgemm(
'NoTranspose',
'Transpose',
531 $ -cone, work( 1 ), n,
532 $ a( j*nb+1, (j-2)*nb+1 ), lda,
533 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
540 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
541 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
548 CALL ctrsm(
'L',
'L',
'N',
'N', kb, kb, cone,
549 $ a( j*nb+1, (j-1)*nb+1 ), lda,
550 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
551 CALL ctrsm(
'R',
'L',
'T',
'N', kb, kb, cone,
552 $ a( j*nb+1, (j-1)*nb+1 ), lda,
553 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
560 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
561 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
571 CALL cgemm(
'NoTranspose',
'Transpose',
573 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
574 $ a( j*nb+1, (j-1)*nb+1 ), lda,
575 $ czero, work( j*nb+1 ), n )
577 CALL cgemm(
'NoTranspose',
'Transpose',
579 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
581 $ a( j*nb+1, (j-2)*nb+1 ), lda,
582 $ czero, work( j*nb+1 ), n )
587 CALL cgemm(
'NoTranspose',
'NoTranspose',
588 $ n-(j+1)*nb, nb, j*nb,
589 $ -cone, a( (j+1)*nb+1, 1 ), lda,
591 $ cone, a( (j+1)*nb+1, j*nb+1 ), lda )
596 CALL cgetrf( n-(j+1)*nb, nb,
597 $ a( (j+1)*nb+1, j*nb+1 ), lda,
598 $ ipiv( (j+1)*nb+1 ), iinfo )
605 kb = min(nb, n-(j+1)*nb)
606 CALL claset(
'Full', kb, nb, czero, czero,
607 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
608 CALL clacpy(
'Upper', kb, nb,
609 $ a( (j+1)*nb+1, j*nb+1 ), lda,
610 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
612 CALL ctrsm(
'R',
'L',
'T',
'U', kb, nb, cone,
613 $ a( j*nb+1, (j-1)*nb+1 ), lda,
614 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
622 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
623 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
626 CALL claset(
'Upper', kb, nb, czero, cone,
627 $ a( (j+1)*nb+1, j*nb+1 ), lda )
633 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
636 i2 = ipiv( (j+1)*nb+k )
639 CALL cswap( k-1, a( i1, (j+1)*nb+1 ), lda,
640 $ a( i2, (j+1)*nb+1 ), lda )
643 $
CALL cswap( i2-i1-1, a( i1+1, i1 ), 1,
644 $ a( i2, i1+1 ), lda )
647 $
CALL cswap( n-i2, a( i2+1, i1 ), 1,
651 a( i1, i1 ) = a( i2, i2 )
655 CALL cswap( j*nb, a( i1, 1 ), lda,
670 CALL cgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )