LAPACK  3.9.0
LAPACK: Linear Algebra PACKage

◆ zgesvj()

subroutine zgesvj ( character*1  JOBA,
character*1  JOBU,
character*1  JOBV,
integer  M,
integer  N,
complex*16, dimension( lda, * )  A,
integer  LDA,
double precision, dimension( n )  SVA,
integer  MV,
complex*16, dimension( ldv, * )  V,
integer  LDV,
complex*16, dimension( lwork )  CWORK,
integer  LWORK,
double precision, dimension( lrwork )  RWORK,
integer  LRWORK,
integer  INFO 
)

ZGESVJ

Download ZGESVJ + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZGESVJ computes the singular value decomposition (SVD) of a complex
 M-by-N matrix A, where M >= N. The SVD of A is written as
                                    [++]   [xx]   [x0]   [xx]
              A = U * SIGMA * V^*,  [++] = [xx] * [ox] * [xx]
                                    [++]   [xx]
 where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
 matrix, and V is an N-by-N unitary matrix. The diagonal elements
 of SIGMA are the singular values of A. The columns of U and V are the
 left and the right singular vectors of A, respectively.
Parameters
[in]JOBA
          JOBA is CHARACTER*1
          Specifies the structure of A.
          = 'L': The input matrix A is lower triangular;
          = 'U': The input matrix A is upper triangular;
          = 'G': The input matrix A is general M-by-N matrix, M >= N.
[in]JOBU
          JOBU is CHARACTER*1
          Specifies whether to compute the left singular vectors
          (columns of U):
          = 'U' or 'F': The left singular vectors corresponding to the nonzero
                 singular values are computed and returned in the leading
                 columns of A. See more details in the description of A.
                 The default numerical orthogonality threshold is set to
                 approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=DLAMCH('E').
          = 'C': Analogous to JOBU='U', except that user can control the
                 level of numerical orthogonality of the computed left
                 singular vectors. TOL can be set to TOL = CTOL*EPS, where
                 CTOL is given on input in the array WORK.
                 No CTOL smaller than ONE is allowed. CTOL greater
                 than 1 / EPS is meaningless. The option 'C'
                 can be used if M*EPS is satisfactory orthogonality
                 of the computed left singular vectors, so CTOL=M could
                 save few sweeps of Jacobi rotations.
                 See the descriptions of A and WORK(1).
          = 'N': The matrix U is not computed. However, see the
                 description of A.
[in]JOBV
          JOBV is CHARACTER*1
          Specifies whether to compute the right singular vectors, that
          is, the matrix V:
          = 'V' or 'J': the matrix V is computed and returned in the array V
          = 'A':  the Jacobi rotations are applied to the MV-by-N
                  array V. In other words, the right singular vector
                  matrix V is not computed explicitly; instead it is
                  applied to an MV-by-N matrix initially stored in the
                  first MV rows of V.
          = 'N':  the matrix V is not computed and the array V is not
                  referenced
[in]M
          M is INTEGER
          The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0.
[in]N
          N is INTEGER
          The number of columns of the input matrix A.
          M >= N >= 0.
[in,out]A
          A is COMPLEX*16 array, dimension (LDA,N)
          On entry, the M-by-N matrix A.
          On exit,
          If JOBU = 'U' .OR. JOBU = 'C':
                 If INFO = 0 :
                 RANKA orthonormal columns of U are returned in the
                 leading RANKA columns of the array A. Here RANKA <= N
                 is the number of computed singular values of A that are
                 above the underflow threshold DLAMCH('S'). The singular
                 vectors corresponding to underflowed or zero singular
                 values are not computed. The value of RANKA is returned
                 in the array RWORK as RANKA=NINT(RWORK(2)). Also see the
                 descriptions of SVA and RWORK. The computed columns of U
                 are mutually numerically orthogonal up to approximately
                 TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'),
                 see the description of JOBU.
                 If INFO > 0,
                 the procedure ZGESVJ did not converge in the given number
                 of iterations (sweeps). In that case, the computed
                 columns of U may not be orthogonal up to TOL. The output
                 U (stored in A), SIGMA (given by the computed singular
                 values in SVA(1:N)) and V is still a decomposition of the
                 input matrix A in the sense that the residual
                 || A - SCALE * U * SIGMA * V^* ||_2 / ||A||_2 is small.
          If JOBU = 'N':
                 If INFO = 0 :
                 Note that the left singular vectors are 'for free' in the
                 one-sided Jacobi SVD algorithm. However, if only the
                 singular values are needed, the level of numerical
                 orthogonality of U is not an issue and iterations are
                 stopped when the columns of the iterated matrix are
                 numerically orthogonal up to approximately M*EPS. Thus,
                 on exit, A contains the columns of U scaled with the
                 corresponding singular values.
                 If INFO > 0:
                 the procedure ZGESVJ did not converge in the given number
                 of iterations (sweeps).
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,M).
[out]SVA
          SVA is DOUBLE PRECISION array, dimension (N)
          On exit,
          If INFO = 0 :
          depending on the value SCALE = RWORK(1), we have:
                 If SCALE = ONE:
                 SVA(1:N) contains the computed singular values of A.
                 During the computation SVA contains the Euclidean column
                 norms of the iterated matrices in the array A.
                 If SCALE .NE. ONE:
                 The singular values of A are SCALE*SVA(1:N), and this
                 factored representation is due to the fact that some of the
                 singular values of A might underflow or overflow.

          If INFO > 0:
          the procedure ZGESVJ did not converge in the given number of
          iterations (sweeps) and SCALE*SVA(1:N) may not be accurate.
[in]MV
          MV is INTEGER
          If JOBV = 'A', then the product of Jacobi rotations in ZGESVJ
          is applied to the first MV rows of V. See the description of JOBV.
[in,out]V
          V is COMPLEX*16 array, dimension (LDV,N)
          If JOBV = 'V', then V contains on exit the N-by-N matrix of
                         the right singular vectors;
          If JOBV = 'A', then V contains the product of the computed right
                         singular vector matrix and the initial matrix in
                         the array V.
          If JOBV = 'N', then V is not referenced.
[in]LDV
          LDV is INTEGER
          The leading dimension of the array V, LDV >= 1.
          If JOBV = 'V', then LDV >= max(1,N).
          If JOBV = 'A', then LDV >= max(1,MV) .
[in,out]CWORK
          CWORK is COMPLEX*16 array, dimension (max(1,LWORK))
          Used as workspace.
          If on entry LWORK = -1, then a workspace query is assumed and
          no computation is done; CWORK(1) is set to the minial (and optimal)
          length of CWORK.
[in]LWORK
          LWORK is INTEGER.
          Length of CWORK, LWORK >= M+N.
[in,out]RWORK
          RWORK is DOUBLE PRECISION array, dimension (max(6,LRWORK))
          On entry,
          If JOBU = 'C' :
          RWORK(1) = CTOL, where CTOL defines the threshold for convergence.
                    The process stops if all columns of A are mutually
                    orthogonal up to CTOL*EPS, EPS=DLAMCH('E').
                    It is required that CTOL >= ONE, i.e. it is not
                    allowed to force the routine to obtain orthogonality
                    below EPSILON.
          On exit,
          RWORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N)
                    are the computed singular values of A.
                    (See description of SVA().)
          RWORK(2) = NINT(RWORK(2)) is the number of the computed nonzero
                    singular values.
          RWORK(3) = NINT(RWORK(3)) is the number of the computed singular
                    values that are larger than the underflow threshold.
          RWORK(4) = NINT(RWORK(4)) is the number of sweeps of Jacobi
                    rotations needed for numerical convergence.
          RWORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep.
                    This is useful information in cases when ZGESVJ did
                    not converge, as it can be used to estimate whether
                    the output is still useful and for post festum analysis.
          RWORK(6) = the largest absolute value over all sines of the
                    Jacobi rotation angles in the last sweep. It can be
                    useful for a post festum analysis.
         If on entry LRWORK = -1, then a workspace query is assumed and
         no computation is done; RWORK(1) is set to the minial (and optimal)
         length of RWORK.
[in]LRWORK
         LRWORK is INTEGER
         Length of RWORK, LRWORK >= MAX(6,N).
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, then the i-th argument had an illegal value
          > 0:  ZGESVJ did not converge in the maximal allowed number
                (NSWEEP=30) of sweeps. The output may still be useful.
                See the description of RWORK.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
June 2016
Further Details:
 The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane
 rotations. In the case of underflow of the tangent of the Jacobi angle, a
 modified Jacobi transformation of Drmac [3] is used. Pivot strategy uses
 column interchanges of de Rijk [1]. The relative accuracy of the computed
 singular values and the accuracy of the computed singular vectors (in
 angle metric) is as guaranteed by the theory of Demmel and Veselic [2].
 The condition number that determines the accuracy in the full rank case
 is essentially min_{D=diag} kappa(A*D), where kappa(.) is the
 spectral condition number. The best performance of this Jacobi SVD
 procedure is achieved if used in an  accelerated version of Drmac and
 Veselic [4,5], and it is the kernel routine in the SIGMA library [6].
 Some tunning parameters (marked with [TP]) are available for the
 implementer.
 The computational range for the nonzero singular values is the  machine
 number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even
 denormalized singular values can be computed with the corresponding
 gradual loss of accurate digits.
Contributor:
  ============

  Zlatko Drmac (Zagreb, Croatia)
References:
 [1] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the
    singular value decomposition on a vector computer.
    SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371.
 [2] J. Demmel and K. Veselic: Jacobi method is more accurate than QR.
 [3] Z. Drmac: Implementation of Jacobi rotations for accurate singular
    value computation in floating point arithmetic.
    SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222.
 [4] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
    SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
    LAPACK Working note 169.
 [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
    SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
    LAPACK Working note 170.
 [6] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
    QSVD, (H,K)-SVD computations.
    Department of Mathematics, University of Zagreb, 2008, 2015.
Bugs, examples and comments:
  ===========================
  Please report all bugs and send interesting test examples and comments to
  drmac@math.hr. Thank you.

Definition at line 353 of file zgesvj.f.

353 *
354 * -- LAPACK computational routine (version 3.8.0) --
355 * -- LAPACK is a software package provided by Univ. of Tennessee, --
356 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
357 * June 2016
358 *
359  IMPLICIT NONE
360 * .. Scalar Arguments ..
361  INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N
362  CHARACTER*1 JOBA, JOBU, JOBV
363 * ..
364 * .. Array Arguments ..
365  COMPLEX*16 A( LDA, * ), V( LDV, * ), CWORK( LWORK )
366  DOUBLE PRECISION RWORK( LRWORK ), SVA( N )
367 * ..
368 *
369 * =====================================================================
370 *
371 * .. Local Parameters ..
372  DOUBLE PRECISION ZERO, HALF, ONE
373  parameter( zero = 0.0d0, half = 0.5d0, one = 1.0d0)
374  COMPLEX*16 CZERO, CONE
375  parameter( czero = (0.0d0, 0.0d0), cone = (1.0d0, 0.0d0) )
376  INTEGER NSWEEP
377  parameter( nsweep = 30 )
378 * ..
379 * .. Local Scalars ..
380  COMPLEX*16 AAPQ, OMPQ
381  DOUBLE PRECISION AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG,
382  $ BIGTHETA, CS, CTOL, EPSLN, MXAAPQ,
383  $ MXSINJ, ROOTBIG, ROOTEPS, ROOTSFMIN, ROOTTOL,
384  $ SKL, SFMIN, SMALL, SN, T, TEMP1, THETA, THSIGN, TOL
385  INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1,
386  $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, N2, N34,
387  $ N4, NBL, NOTROT, p, PSKIPPED, q, ROWSKIP, SWBAND
388  LOGICAL APPLV, GOSCALE, LOWER, LQUERY, LSVEC, NOSCALE, ROTOK,
389  $ RSVEC, UCTOL, UPPER
390 * ..
391 * ..
392 * .. Intrinsic Functions ..
393  INTRINSIC abs, max, min, conjg, dble, sign, sqrt
394 * ..
395 * .. External Functions ..
396 * ..
397 * from BLAS
398  DOUBLE PRECISION DZNRM2
399  COMPLEX*16 ZDOTC
400  EXTERNAL zdotc, dznrm2
401  INTEGER IDAMAX
402  EXTERNAL idamax
403 * from LAPACK
404  DOUBLE PRECISION DLAMCH
405  EXTERNAL dlamch
406  LOGICAL LSAME
407  EXTERNAL lsame
408 * ..
409 * .. External Subroutines ..
410 * ..
411 * from BLAS
412  EXTERNAL zcopy, zrot, zdscal, zswap, zaxpy
413 * from LAPACK
414  EXTERNAL dlascl, zlascl, zlaset, zlassq, xerbla
415  EXTERNAL zgsvj0, zgsvj1
416 * ..
417 * .. Executable Statements ..
418 *
419 * Test the input arguments
420 *
421  lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
422  uctol = lsame( jobu, 'C' )
423  rsvec = lsame( jobv, 'V' ) .OR. lsame( jobv, 'J' )
424  applv = lsame( jobv, 'A' )
425  upper = lsame( joba, 'U' )
426  lower = lsame( joba, 'L' )
427 *
428  lquery = ( lwork .EQ. -1 ) .OR. ( lrwork .EQ. -1 )
429  IF( .NOT.( upper .OR. lower .OR. lsame( joba, 'G' ) ) ) THEN
430  info = -1
431  ELSE IF( .NOT.( lsvec .OR. uctol .OR. lsame( jobu, 'N' ) ) ) THEN
432  info = -2
433  ELSE IF( .NOT.( rsvec .OR. applv .OR. lsame( jobv, 'N' ) ) ) THEN
434  info = -3
435  ELSE IF( m.LT.0 ) THEN
436  info = -4
437  ELSE IF( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
438  info = -5
439  ELSE IF( lda.LT.m ) THEN
440  info = -7
441  ELSE IF( mv.LT.0 ) THEN
442  info = -9
443  ELSE IF( ( rsvec .AND. ( ldv.LT.n ) ) .OR.
444  $ ( applv .AND. ( ldv.LT.mv ) ) ) THEN
445  info = -11
446  ELSE IF( uctol .AND. ( rwork( 1 ).LE.one ) ) THEN
447  info = -12
448  ELSE IF( ( lwork.LT.( m+n ) ) .AND. ( .NOT.lquery ) ) THEN
449  info = -13
450  ELSE IF( ( lrwork.LT.max( n, 6 ) ) .AND. ( .NOT.lquery ) ) THEN
451  info = -15
452  ELSE
453  info = 0
454  END IF
455 *
456 * #:(
457  IF( info.NE.0 ) THEN
458  CALL xerbla( 'ZGESVJ', -info )
459  RETURN
460  ELSE IF ( lquery ) THEN
461  cwork(1) = m + n
462  rwork(1) = max( n, 6 )
463  RETURN
464  END IF
465 *
466 * #:) Quick return for void matrix
467 *
468  IF( ( m.EQ.0 ) .OR. ( n.EQ.0 ) )RETURN
469 *
470 * Set numerical parameters
471 * The stopping criterion for Jacobi rotations is
472 *
473 * max_{i<>j}|A(:,i)^* * A(:,j)| / (||A(:,i)||*||A(:,j)||) < CTOL*EPS
474 *
475 * where EPS is the round-off and CTOL is defined as follows:
476 *
477  IF( uctol ) THEN
478 * ... user controlled
479  ctol = rwork( 1 )
480  ELSE
481 * ... default
482  IF( lsvec .OR. rsvec .OR. applv ) THEN
483  ctol = sqrt( dble( m ) )
484  ELSE
485  ctol = dble( m )
486  END IF
487  END IF
488 * ... and the machine dependent parameters are
489 *[!] (Make sure that SLAMCH() works properly on the target machine.)
490 *
491  epsln = dlamch( 'Epsilon' )
492  rooteps = sqrt( epsln )
493  sfmin = dlamch( 'SafeMinimum' )
494  rootsfmin = sqrt( sfmin )
495  small = sfmin / epsln
496  big = dlamch( 'Overflow' )
497 * BIG = ONE / SFMIN
498  rootbig = one / rootsfmin
499 * LARGE = BIG / SQRT( DBLE( M*N ) )
500  bigtheta = one / rooteps
501 *
502  tol = ctol*epsln
503  roottol = sqrt( tol )
504 *
505  IF( dble( m )*epsln.GE.one ) THEN
506  info = -4
507  CALL xerbla( 'ZGESVJ', -info )
508  RETURN
509  END IF
510 *
511 * Initialize the right singular vector matrix.
512 *
513  IF( rsvec ) THEN
514  mvl = n
515  CALL zlaset( 'A', mvl, n, czero, cone, v, ldv )
516  ELSE IF( applv ) THEN
517  mvl = mv
518  END IF
519  rsvec = rsvec .OR. applv
520 *
521 * Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N )
522 *(!) If necessary, scale A to protect the largest singular value
523 * from overflow. It is possible that saving the largest singular
524 * value destroys the information about the small ones.
525 * This initial scaling is almost minimal in the sense that the
526 * goal is to make sure that no column norm overflows, and that
527 * SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries
528 * in A are detected, the procedure returns with INFO=-6.
529 *
530  skl = one / sqrt( dble( m )*dble( n ) )
531  noscale = .true.
532  goscale = .true.
533 *
534  IF( lower ) THEN
535 * the input matrix is M-by-N lower triangular (trapezoidal)
536  DO 1874 p = 1, n
537  aapp = zero
538  aaqq = one
539  CALL zlassq( m-p+1, a( p, p ), 1, aapp, aaqq )
540  IF( aapp.GT.big ) THEN
541  info = -6
542  CALL xerbla( 'ZGESVJ', -info )
543  RETURN
544  END IF
545  aaqq = sqrt( aaqq )
546  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
547  sva( p ) = aapp*aaqq
548  ELSE
549  noscale = .false.
550  sva( p ) = aapp*( aaqq*skl )
551  IF( goscale ) THEN
552  goscale = .false.
553  DO 1873 q = 1, p - 1
554  sva( q ) = sva( q )*skl
555  1873 CONTINUE
556  END IF
557  END IF
558  1874 CONTINUE
559  ELSE IF( upper ) THEN
560 * the input matrix is M-by-N upper triangular (trapezoidal)
561  DO 2874 p = 1, n
562  aapp = zero
563  aaqq = one
564  CALL zlassq( p, a( 1, p ), 1, aapp, aaqq )
565  IF( aapp.GT.big ) THEN
566  info = -6
567  CALL xerbla( 'ZGESVJ', -info )
568  RETURN
569  END IF
570  aaqq = sqrt( aaqq )
571  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
572  sva( p ) = aapp*aaqq
573  ELSE
574  noscale = .false.
575  sva( p ) = aapp*( aaqq*skl )
576  IF( goscale ) THEN
577  goscale = .false.
578  DO 2873 q = 1, p - 1
579  sva( q ) = sva( q )*skl
580  2873 CONTINUE
581  END IF
582  END IF
583  2874 CONTINUE
584  ELSE
585 * the input matrix is M-by-N general dense
586  DO 3874 p = 1, n
587  aapp = zero
588  aaqq = one
589  CALL zlassq( m, a( 1, p ), 1, aapp, aaqq )
590  IF( aapp.GT.big ) THEN
591  info = -6
592  CALL xerbla( 'ZGESVJ', -info )
593  RETURN
594  END IF
595  aaqq = sqrt( aaqq )
596  IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
597  sva( p ) = aapp*aaqq
598  ELSE
599  noscale = .false.
600  sva( p ) = aapp*( aaqq*skl )
601  IF( goscale ) THEN
602  goscale = .false.
603  DO 3873 q = 1, p - 1
604  sva( q ) = sva( q )*skl
605  3873 CONTINUE
606  END IF
607  END IF
608  3874 CONTINUE
609  END IF
610 *
611  IF( noscale )skl = one
612 *
613 * Move the smaller part of the spectrum from the underflow threshold
614 *(!) Start by determining the position of the nonzero entries of the
615 * array SVA() relative to ( SFMIN, BIG ).
616 *
617  aapp = zero
618  aaqq = big
619  DO 4781 p = 1, n
620  IF( sva( p ).NE.zero )aaqq = min( aaqq, sva( p ) )
621  aapp = max( aapp, sva( p ) )
622  4781 CONTINUE
623 *
624 * #:) Quick return for zero matrix
625 *
626  IF( aapp.EQ.zero ) THEN
627  IF( lsvec )CALL zlaset( 'G', m, n, czero, cone, a, lda )
628  rwork( 1 ) = one
629  rwork( 2 ) = zero
630  rwork( 3 ) = zero
631  rwork( 4 ) = zero
632  rwork( 5 ) = zero
633  rwork( 6 ) = zero
634  RETURN
635  END IF
636 *
637 * #:) Quick return for one-column matrix
638 *
639  IF( n.EQ.1 ) THEN
640  IF( lsvec )CALL zlascl( 'G', 0, 0, sva( 1 ), skl, m, 1,
641  $ a( 1, 1 ), lda, ierr )
642  rwork( 1 ) = one / skl
643  IF( sva( 1 ).GE.sfmin ) THEN
644  rwork( 2 ) = one
645  ELSE
646  rwork( 2 ) = zero
647  END IF
648  rwork( 3 ) = zero
649  rwork( 4 ) = zero
650  rwork( 5 ) = zero
651  rwork( 6 ) = zero
652  RETURN
653  END IF
654 *
655 * Protect small singular values from underflow, and try to
656 * avoid underflows/overflows in computing Jacobi rotations.
657 *
658  sn = sqrt( sfmin / epsln )
659  temp1 = sqrt( big / dble( n ) )
660  IF( ( aapp.LE.sn ) .OR. ( aaqq.GE.temp1 ) .OR.
661  $ ( ( sn.LE.aaqq ) .AND. ( aapp.LE.temp1 ) ) ) THEN
662  temp1 = min( big, temp1 / aapp )
663 * AAQQ = AAQQ*TEMP1
664 * AAPP = AAPP*TEMP1
665  ELSE IF( ( aaqq.LE.sn ) .AND. ( aapp.LE.temp1 ) ) THEN
666  temp1 = min( sn / aaqq, big / (aapp*sqrt( dble(n)) ) )
667 * AAQQ = AAQQ*TEMP1
668 * AAPP = AAPP*TEMP1
669  ELSE IF( ( aaqq.GE.sn ) .AND. ( aapp.GE.temp1 ) ) THEN
670  temp1 = max( sn / aaqq, temp1 / aapp )
671 * AAQQ = AAQQ*TEMP1
672 * AAPP = AAPP*TEMP1
673  ELSE IF( ( aaqq.LE.sn ) .AND. ( aapp.GE.temp1 ) ) THEN
674  temp1 = min( sn / aaqq, big / ( sqrt( dble( n ) )*aapp ) )
675 * AAQQ = AAQQ*TEMP1
676 * AAPP = AAPP*TEMP1
677  ELSE
678  temp1 = one
679  END IF
680 *
681 * Scale, if necessary
682 *
683  IF( temp1.NE.one ) THEN
684  CALL dlascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr )
685  END IF
686  skl = temp1*skl
687  IF( skl.NE.one ) THEN
688  CALL zlascl( joba, 0, 0, one, skl, m, n, a, lda, ierr )
689  skl = one / skl
690  END IF
691 *
692 * Row-cyclic Jacobi SVD algorithm with column pivoting
693 *
694  emptsw = ( n*( n-1 ) ) / 2
695  notrot = 0
696 
697  DO 1868 q = 1, n
698  cwork( q ) = cone
699  1868 CONTINUE
700 *
701 *
702 *
703  swband = 3
704 *[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective
705 * if ZGESVJ is used as a computational routine in the preconditioned
706 * Jacobi SVD algorithm ZGEJSV. For sweeps i=1:SWBAND the procedure
707 * works on pivots inside a band-like region around the diagonal.
708 * The boundaries are determined dynamically, based on the number of
709 * pivots above a threshold.
710 *
711  kbl = min( 8, n )
712 *[TP] KBL is a tuning parameter that defines the tile size in the
713 * tiling of the p-q loops of pivot pairs. In general, an optimal
714 * value of KBL depends on the matrix dimensions and on the
715 * parameters of the computer's memory.
716 *
717  nbl = n / kbl
718  IF( ( nbl*kbl ).NE.n )nbl = nbl + 1
719 *
720  blskip = kbl**2
721 *[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
722 *
723  rowskip = min( 5, kbl )
724 *[TP] ROWSKIP is a tuning parameter.
725 *
726  lkahead = 1
727 *[TP] LKAHEAD is a tuning parameter.
728 *
729 * Quasi block transformations, using the lower (upper) triangular
730 * structure of the input matrix. The quasi-block-cycling usually
731 * invokes cubic convergence. Big part of this cycle is done inside
732 * canonical subspaces of dimensions less than M.
733 *
734  IF( ( lower .OR. upper ) .AND. ( n.GT.max( 64, 4*kbl ) ) ) THEN
735 *[TP] The number of partition levels and the actual partition are
736 * tuning parameters.
737  n4 = n / 4
738  n2 = n / 2
739  n34 = 3*n4
740  IF( applv ) THEN
741  q = 0
742  ELSE
743  q = 1
744  END IF
745 *
746  IF( lower ) THEN
747 *
748 * This works very well on lower triangular matrices, in particular
749 * in the framework of the preconditioned Jacobi SVD (xGEJSV).
750 * The idea is simple:
751 * [+ 0 0 0] Note that Jacobi transformations of [0 0]
752 * [+ + 0 0] [0 0]
753 * [+ + x 0] actually work on [x 0] [x 0]
754 * [+ + x x] [x x]. [x x]
755 *
756  CALL zgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,
757  $ cwork( n34+1 ), sva( n34+1 ), mvl,
758  $ v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,
759  $ 2, cwork( n+1 ), lwork-n, ierr )
760 
761  CALL zgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,
762  $ cwork( n2+1 ), sva( n2+1 ), mvl,
763  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,
764  $ cwork( n+1 ), lwork-n, ierr )
765 
766  CALL zgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,
767  $ cwork( n2+1 ), sva( n2+1 ), mvl,
768  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,
769  $ cwork( n+1 ), lwork-n, ierr )
770 
771  CALL zgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,
772  $ cwork( n4+1 ), sva( n4+1 ), mvl,
773  $ v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,
774  $ cwork( n+1 ), lwork-n, ierr )
775 *
776  CALL zgsvj0( jobv, m, n4, a, lda, cwork, sva, mvl, v, ldv,
777  $ epsln, sfmin, tol, 1, cwork( n+1 ), lwork-n,
778  $ ierr )
779 *
780  CALL zgsvj1( jobv, m, n2, n4, a, lda, cwork, sva, mvl, v,
781  $ ldv, epsln, sfmin, tol, 1, cwork( n+1 ),
782  $ lwork-n, ierr )
783 *
784 *
785  ELSE IF( upper ) THEN
786 *
787 *
788  CALL zgsvj0( jobv, n4, n4, a, lda, cwork, sva, mvl, v, ldv,
789  $ epsln, sfmin, tol, 2, cwork( n+1 ), lwork-n,
790  $ ierr )
791 *
792  CALL zgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda, cwork( n4+1 ),
793  $ sva( n4+1 ), mvl, v( n4*q+1, n4+1 ), ldv,
794  $ epsln, sfmin, tol, 1, cwork( n+1 ), lwork-n,
795  $ ierr )
796 *
797  CALL zgsvj1( jobv, n2, n2, n4, a, lda, cwork, sva, mvl, v,
798  $ ldv, epsln, sfmin, tol, 1, cwork( n+1 ),
799  $ lwork-n, ierr )
800 *
801  CALL zgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,
802  $ cwork( n2+1 ), sva( n2+1 ), mvl,
803  $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,
804  $ cwork( n+1 ), lwork-n, ierr )
805 
806  END IF
807 *
808  END IF
809 *
810 * .. Row-cyclic pivot strategy with de Rijk's pivoting ..
811 *
812  DO 1993 i = 1, nsweep
813 *
814 * .. go go go ...
815 *
816  mxaapq = zero
817  mxsinj = zero
818  iswrot = 0
819 *
820  notrot = 0
821  pskipped = 0
822 *
823 * Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs
824 * 1 <= p < q <= N. This is the first step toward a blocked implementation
825 * of the rotations. New implementation, based on block transformations,
826 * is under development.
827 *
828  DO 2000 ibr = 1, nbl
829 *
830  igl = ( ibr-1 )*kbl + 1
831 *
832  DO 1002 ir1 = 0, min( lkahead, nbl-ibr )
833 *
834  igl = igl + ir1*kbl
835 *
836  DO 2001 p = igl, min( igl+kbl-1, n-1 )
837 *
838 * .. de Rijk's pivoting
839 *
840  q = idamax( n-p+1, sva( p ), 1 ) + p - 1
841  IF( p.NE.q ) THEN
842  CALL zswap( m, a( 1, p ), 1, a( 1, q ), 1 )
843  IF( rsvec )CALL zswap( mvl, v( 1, p ), 1,
844  $ v( 1, q ), 1 )
845  temp1 = sva( p )
846  sva( p ) = sva( q )
847  sva( q ) = temp1
848  aapq = cwork(p)
849  cwork(p) = cwork(q)
850  cwork(q) = aapq
851  END IF
852 *
853  IF( ir1.EQ.0 ) THEN
854 *
855 * Column norms are periodically updated by explicit
856 * norm computation.
857 *[!] Caveat:
858 * Unfortunately, some BLAS implementations compute DZNRM2(M,A(1,p),1)
859 * as SQRT(S=CDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to
860 * overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to
861 * underflow for ||A(:,p)||_2 < SQRT(underflow_threshold).
862 * Hence, DZNRM2 cannot be trusted, not even in the case when
863 * the true norm is far from the under(over)flow boundaries.
864 * If properly implemented SCNRM2 is available, the IF-THEN-ELSE-END IF
865 * below should be replaced with "AAPP = DZNRM2( M, A(1,p), 1 )".
866 *
867  IF( ( sva( p ).LT.rootbig ) .AND.
868  $ ( sva( p ).GT.rootsfmin ) ) THEN
869  sva( p ) = dznrm2( m, a( 1, p ), 1 )
870  ELSE
871  temp1 = zero
872  aapp = one
873  CALL zlassq( m, a( 1, p ), 1, temp1, aapp )
874  sva( p ) = temp1*sqrt( aapp )
875  END IF
876  aapp = sva( p )
877  ELSE
878  aapp = sva( p )
879  END IF
880 *
881  IF( aapp.GT.zero ) THEN
882 *
883  pskipped = 0
884 *
885  DO 2002 q = p + 1, min( igl+kbl-1, n )
886 *
887  aaqq = sva( q )
888 *
889  IF( aaqq.GT.zero ) THEN
890 *
891  aapp0 = aapp
892  IF( aaqq.GE.one ) THEN
893  rotok = ( small*aapp ).LE.aaqq
894  IF( aapp.LT.( big / aaqq ) ) THEN
895  aapq = ( zdotc( m, a( 1, p ), 1,
896  $ a( 1, q ), 1 ) / aaqq ) / aapp
897  ELSE
898  CALL zcopy( m, a( 1, p ), 1,
899  $ cwork(n+1), 1 )
900  CALL zlascl( 'G', 0, 0, aapp, one,
901  $ m, 1, cwork(n+1), lda, ierr )
902  aapq = zdotc( m, cwork(n+1), 1,
903  $ a( 1, q ), 1 ) / aaqq
904  END IF
905  ELSE
906  rotok = aapp.LE.( aaqq / small )
907  IF( aapp.GT.( small / aaqq ) ) THEN
908  aapq = ( zdotc( m, a( 1, p ), 1,
909  $ a( 1, q ), 1 ) / aapp ) / aaqq
910  ELSE
911  CALL zcopy( m, a( 1, q ), 1,
912  $ cwork(n+1), 1 )
913  CALL zlascl( 'G', 0, 0, aaqq,
914  $ one, m, 1,
915  $ cwork(n+1), lda, ierr )
916  aapq = zdotc( m, a(1, p ), 1,
917  $ cwork(n+1), 1 ) / aapp
918  END IF
919  END IF
920 *
921 
922 * AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q)
923  aapq1 = -abs(aapq)
924  mxaapq = max( mxaapq, -aapq1 )
925 *
926 * TO rotate or NOT to rotate, THAT is the question ...
927 *
928  IF( abs( aapq1 ).GT.tol ) THEN
929  ompq = aapq / abs(aapq)
930 *
931 * .. rotate
932 *[RTD] ROTATED = ROTATED + ONE
933 *
934  IF( ir1.EQ.0 ) THEN
935  notrot = 0
936  pskipped = 0
937  iswrot = iswrot + 1
938  END IF
939 *
940  IF( rotok ) THEN
941 *
942  aqoap = aaqq / aapp
943  apoaq = aapp / aaqq
944  theta = -half*abs( aqoap-apoaq )/aapq1
945 *
946  IF( abs( theta ).GT.bigtheta ) THEN
947 *
948  t = half / theta
949  cs = one
950 
951  CALL zrot( m, a(1,p), 1, a(1,q), 1,
952  $ cs, conjg(ompq)*t )
953  IF ( rsvec ) THEN
954  CALL zrot( mvl, v(1,p), 1,
955  $ v(1,q), 1, cs, conjg(ompq)*t )
956  END IF
957 
958  sva( q ) = aaqq*sqrt( max( zero,
959  $ one+t*apoaq*aapq1 ) )
960  aapp = aapp*sqrt( max( zero,
961  $ one-t*aqoap*aapq1 ) )
962  mxsinj = max( mxsinj, abs( t ) )
963 *
964  ELSE
965 *
966 * .. choose correct signum for THETA and rotate
967 *
968  thsign = -sign( one, aapq1 )
969  t = one / ( theta+thsign*
970  $ sqrt( one+theta*theta ) )
971  cs = sqrt( one / ( one+t*t ) )
972  sn = t*cs
973 *
974  mxsinj = max( mxsinj, abs( sn ) )
975  sva( q ) = aaqq*sqrt( max( zero,
976  $ one+t*apoaq*aapq1 ) )
977  aapp = aapp*sqrt( max( zero,
978  $ one-t*aqoap*aapq1 ) )
979 *
980  CALL zrot( m, a(1,p), 1, a(1,q), 1,
981  $ cs, conjg(ompq)*sn )
982  IF ( rsvec ) THEN
983  CALL zrot( mvl, v(1,p), 1,
984  $ v(1,q), 1, cs, conjg(ompq)*sn )
985  END IF
986  END IF
987  cwork(p) = -cwork(q) * ompq
988 *
989  ELSE
990 * .. have to use modified Gram-Schmidt like transformation
991  CALL zcopy( m, a( 1, p ), 1,
992  $ cwork(n+1), 1 )
993  CALL zlascl( 'G', 0, 0, aapp, one, m,
994  $ 1, cwork(n+1), lda,
995  $ ierr )
996  CALL zlascl( 'G', 0, 0, aaqq, one, m,
997  $ 1, a( 1, q ), lda, ierr )
998  CALL zaxpy( m, -aapq, cwork(n+1), 1,
999  $ a( 1, q ), 1 )
1000  CALL zlascl( 'G', 0, 0, one, aaqq, m,
1001  $ 1, a( 1, q ), lda, ierr )
1002  sva( q ) = aaqq*sqrt( max( zero,
1003  $ one-aapq1*aapq1 ) )
1004  mxsinj = max( mxsinj, sfmin )
1005  END IF
1006 * END IF ROTOK THEN ... ELSE
1007 *
1008 * In the case of cancellation in updating SVA(q), SVA(p)
1009 * recompute SVA(q), SVA(p).
1010 *
1011  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
1012  $ THEN
1013  IF( ( aaqq.LT.rootbig ) .AND.
1014  $ ( aaqq.GT.rootsfmin ) ) THEN
1015  sva( q ) = dznrm2( m, a( 1, q ), 1 )
1016  ELSE
1017  t = zero
1018  aaqq = one
1019  CALL zlassq( m, a( 1, q ), 1, t,
1020  $ aaqq )
1021  sva( q ) = t*sqrt( aaqq )
1022  END IF
1023  END IF
1024  IF( ( aapp / aapp0 ).LE.rooteps ) THEN
1025  IF( ( aapp.LT.rootbig ) .AND.
1026  $ ( aapp.GT.rootsfmin ) ) THEN
1027  aapp = dznrm2( m, a( 1, p ), 1 )
1028  ELSE
1029  t = zero
1030  aapp = one
1031  CALL zlassq( m, a( 1, p ), 1, t,
1032  $ aapp )
1033  aapp = t*sqrt( aapp )
1034  END IF
1035  sva( p ) = aapp
1036  END IF
1037 *
1038  ELSE
1039 * A(:,p) and A(:,q) already numerically orthogonal
1040  IF( ir1.EQ.0 )notrot = notrot + 1
1041 *[RTD] SKIPPED = SKIPPED + 1
1042  pskipped = pskipped + 1
1043  END IF
1044  ELSE
1045 * A(:,q) is zero column
1046  IF( ir1.EQ.0 )notrot = notrot + 1
1047  pskipped = pskipped + 1
1048  END IF
1049 *
1050  IF( ( i.LE.swband ) .AND.
1051  $ ( pskipped.GT.rowskip ) ) THEN
1052  IF( ir1.EQ.0 )aapp = -aapp
1053  notrot = 0
1054  GO TO 2103
1055  END IF
1056 *
1057  2002 CONTINUE
1058 * END q-LOOP
1059 *
1060  2103 CONTINUE
1061 * bailed out of q-loop
1062 *
1063  sva( p ) = aapp
1064 *
1065  ELSE
1066  sva( p ) = aapp
1067  IF( ( ir1.EQ.0 ) .AND. ( aapp.EQ.zero ) )
1068  $ notrot = notrot + min( igl+kbl-1, n ) - p
1069  END IF
1070 *
1071  2001 CONTINUE
1072 * end of the p-loop
1073 * end of doing the block ( ibr, ibr )
1074  1002 CONTINUE
1075 * end of ir1-loop
1076 *
1077 * ... go to the off diagonal blocks
1078 *
1079  igl = ( ibr-1 )*kbl + 1
1080 *
1081  DO 2010 jbc = ibr + 1, nbl
1082 *
1083  jgl = ( jbc-1 )*kbl + 1
1084 *
1085 * doing the block at ( ibr, jbc )
1086 *
1087  ijblsk = 0
1088  DO 2100 p = igl, min( igl+kbl-1, n )
1089 *
1090  aapp = sva( p )
1091  IF( aapp.GT.zero ) THEN
1092 *
1093  pskipped = 0
1094 *
1095  DO 2200 q = jgl, min( jgl+kbl-1, n )
1096 *
1097  aaqq = sva( q )
1098  IF( aaqq.GT.zero ) THEN
1099  aapp0 = aapp
1100 *
1101 * .. M x 2 Jacobi SVD ..
1102 *
1103 * Safe Gram matrix computation
1104 *
1105  IF( aaqq.GE.one ) THEN
1106  IF( aapp.GE.aaqq ) THEN
1107  rotok = ( small*aapp ).LE.aaqq
1108  ELSE
1109  rotok = ( small*aaqq ).LE.aapp
1110  END IF
1111  IF( aapp.LT.( big / aaqq ) ) THEN
1112  aapq = ( zdotc( m, a( 1, p ), 1,
1113  $ a( 1, q ), 1 ) / aaqq ) / aapp
1114  ELSE
1115  CALL zcopy( m, a( 1, p ), 1,
1116  $ cwork(n+1), 1 )
1117  CALL zlascl( 'G', 0, 0, aapp,
1118  $ one, m, 1,
1119  $ cwork(n+1), lda, ierr )
1120  aapq = zdotc( m, cwork(n+1), 1,
1121  $ a( 1, q ), 1 ) / aaqq
1122  END IF
1123  ELSE
1124  IF( aapp.GE.aaqq ) THEN
1125  rotok = aapp.LE.( aaqq / small )
1126  ELSE
1127  rotok = aaqq.LE.( aapp / small )
1128  END IF
1129  IF( aapp.GT.( small / aaqq ) ) THEN
1130  aapq = ( zdotc( m, a( 1, p ), 1,
1131  $ a( 1, q ), 1 ) / max(aaqq,aapp) )
1132  $ / min(aaqq,aapp)
1133  ELSE
1134  CALL zcopy( m, a( 1, q ), 1,
1135  $ cwork(n+1), 1 )
1136  CALL zlascl( 'G', 0, 0, aaqq,
1137  $ one, m, 1,
1138  $ cwork(n+1), lda, ierr )
1139  aapq = zdotc( m, a( 1, p ), 1,
1140  $ cwork(n+1), 1 ) / aapp
1141  END IF
1142  END IF
1143 *
1144 
1145 * AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q)
1146  aapq1 = -abs(aapq)
1147  mxaapq = max( mxaapq, -aapq1 )
1148 *
1149 * TO rotate or NOT to rotate, THAT is the question ...
1150 *
1151  IF( abs( aapq1 ).GT.tol ) THEN
1152  ompq = aapq / abs(aapq)
1153  notrot = 0
1154 *[RTD] ROTATED = ROTATED + 1
1155  pskipped = 0
1156  iswrot = iswrot + 1
1157 *
1158  IF( rotok ) THEN
1159 *
1160  aqoap = aaqq / aapp
1161  apoaq = aapp / aaqq
1162  theta = -half*abs( aqoap-apoaq )/ aapq1
1163  IF( aaqq.GT.aapp0 )theta = -theta
1164 *
1165  IF( abs( theta ).GT.bigtheta ) THEN
1166  t = half / theta
1167  cs = one
1168  CALL zrot( m, a(1,p), 1, a(1,q), 1,
1169  $ cs, conjg(ompq)*t )
1170  IF( rsvec ) THEN
1171  CALL zrot( mvl, v(1,p), 1,
1172  $ v(1,q), 1, cs, conjg(ompq)*t )
1173  END IF
1174  sva( q ) = aaqq*sqrt( max( zero,
1175  $ one+t*apoaq*aapq1 ) )
1176  aapp = aapp*sqrt( max( zero,
1177  $ one-t*aqoap*aapq1 ) )
1178  mxsinj = max( mxsinj, abs( t ) )
1179  ELSE
1180 *
1181 * .. choose correct signum for THETA and rotate
1182 *
1183  thsign = -sign( one, aapq1 )
1184  IF( aaqq.GT.aapp0 )thsign = -thsign
1185  t = one / ( theta+thsign*
1186  $ sqrt( one+theta*theta ) )
1187  cs = sqrt( one / ( one+t*t ) )
1188  sn = t*cs
1189  mxsinj = max( mxsinj, abs( sn ) )
1190  sva( q ) = aaqq*sqrt( max( zero,
1191  $ one+t*apoaq*aapq1 ) )
1192  aapp = aapp*sqrt( max( zero,
1193  $ one-t*aqoap*aapq1 ) )
1194 *
1195  CALL zrot( m, a(1,p), 1, a(1,q), 1,
1196  $ cs, conjg(ompq)*sn )
1197  IF( rsvec ) THEN
1198  CALL zrot( mvl, v(1,p), 1,
1199  $ v(1,q), 1, cs, conjg(ompq)*sn )
1200  END IF
1201  END IF
1202  cwork(p) = -cwork(q) * ompq
1203 *
1204  ELSE
1205 * .. have to use modified Gram-Schmidt like transformation
1206  IF( aapp.GT.aaqq ) THEN
1207  CALL zcopy( m, a( 1, p ), 1,
1208  $ cwork(n+1), 1 )
1209  CALL zlascl( 'G', 0, 0, aapp, one,
1210  $ m, 1, cwork(n+1),lda,
1211  $ ierr )
1212  CALL zlascl( 'G', 0, 0, aaqq, one,
1213  $ m, 1, a( 1, q ), lda,
1214  $ ierr )
1215  CALL zaxpy( m, -aapq, cwork(n+1),
1216  $ 1, a( 1, q ), 1 )
1217  CALL zlascl( 'G', 0, 0, one, aaqq,
1218  $ m, 1, a( 1, q ), lda,
1219  $ ierr )
1220  sva( q ) = aaqq*sqrt( max( zero,
1221  $ one-aapq1*aapq1 ) )
1222  mxsinj = max( mxsinj, sfmin )
1223  ELSE
1224  CALL zcopy( m, a( 1, q ), 1,
1225  $ cwork(n+1), 1 )
1226  CALL zlascl( 'G', 0, 0, aaqq, one,
1227  $ m, 1, cwork(n+1),lda,
1228  $ ierr )
1229  CALL zlascl( 'G', 0, 0, aapp, one,
1230  $ m, 1, a( 1, p ), lda,
1231  $ ierr )
1232  CALL zaxpy( m, -conjg(aapq),
1233  $ cwork(n+1), 1, a( 1, p ), 1 )
1234  CALL zlascl( 'G', 0, 0, one, aapp,
1235  $ m, 1, a( 1, p ), lda,
1236  $ ierr )
1237  sva( p ) = aapp*sqrt( max( zero,
1238  $ one-aapq1*aapq1 ) )
1239  mxsinj = max( mxsinj, sfmin )
1240  END IF
1241  END IF
1242 * END IF ROTOK THEN ... ELSE
1243 *
1244 * In the case of cancellation in updating SVA(q), SVA(p)
1245 * .. recompute SVA(q), SVA(p)
1246  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
1247  $ THEN
1248  IF( ( aaqq.LT.rootbig ) .AND.
1249  $ ( aaqq.GT.rootsfmin ) ) THEN
1250  sva( q ) = dznrm2( m, a( 1, q ), 1)
1251  ELSE
1252  t = zero
1253  aaqq = one
1254  CALL zlassq( m, a( 1, q ), 1, t,
1255  $ aaqq )
1256  sva( q ) = t*sqrt( aaqq )
1257  END IF
1258  END IF
1259  IF( ( aapp / aapp0 )**2.LE.rooteps ) THEN
1260  IF( ( aapp.LT.rootbig ) .AND.
1261  $ ( aapp.GT.rootsfmin ) ) THEN
1262  aapp = dznrm2( m, a( 1, p ), 1 )
1263  ELSE
1264  t = zero
1265  aapp = one
1266  CALL zlassq( m, a( 1, p ), 1, t,
1267  $ aapp )
1268  aapp = t*sqrt( aapp )
1269  END IF
1270  sva( p ) = aapp
1271  END IF
1272 * end of OK rotation
1273  ELSE
1274  notrot = notrot + 1
1275 *[RTD] SKIPPED = SKIPPED + 1
1276  pskipped = pskipped + 1
1277  ijblsk = ijblsk + 1
1278  END IF
1279  ELSE
1280  notrot = notrot + 1
1281  pskipped = pskipped + 1
1282  ijblsk = ijblsk + 1
1283  END IF
1284 *
1285  IF( ( i.LE.swband ) .AND. ( ijblsk.GE.blskip ) )
1286  $ THEN
1287  sva( p ) = aapp
1288  notrot = 0
1289  GO TO 2011
1290  END IF
1291  IF( ( i.LE.swband ) .AND.
1292  $ ( pskipped.GT.rowskip ) ) THEN
1293  aapp = -aapp
1294  notrot = 0
1295  GO TO 2203
1296  END IF
1297 *
1298  2200 CONTINUE
1299 * end of the q-loop
1300  2203 CONTINUE
1301 *
1302  sva( p ) = aapp
1303 *
1304  ELSE
1305 *
1306  IF( aapp.EQ.zero )notrot = notrot +
1307  $ min( jgl+kbl-1, n ) - jgl + 1
1308  IF( aapp.LT.zero )notrot = 0
1309 *
1310  END IF
1311 *
1312  2100 CONTINUE
1313 * end of the p-loop
1314  2010 CONTINUE
1315 * end of the jbc-loop
1316  2011 CONTINUE
1317 *2011 bailed out of the jbc-loop
1318  DO 2012 p = igl, min( igl+kbl-1, n )
1319  sva( p ) = abs( sva( p ) )
1320  2012 CONTINUE
1321 ***
1322  2000 CONTINUE
1323 *2000 :: end of the ibr-loop
1324 *
1325 * .. update SVA(N)
1326  IF( ( sva( n ).LT.rootbig ) .AND. ( sva( n ).GT.rootsfmin ) )
1327  $ THEN
1328  sva( n ) = dznrm2( m, a( 1, n ), 1 )
1329  ELSE
1330  t = zero
1331  aapp = one
1332  CALL zlassq( m, a( 1, n ), 1, t, aapp )
1333  sva( n ) = t*sqrt( aapp )
1334  END IF
1335 *
1336 * Additional steering devices
1337 *
1338  IF( ( i.LT.swband ) .AND. ( ( mxaapq.LE.roottol ) .OR.
1339  $ ( iswrot.LE.n ) ) )swband = i
1340 *
1341  IF( ( i.GT.swband+1 ) .AND. ( mxaapq.LT.sqrt( dble( n ) )*
1342  $ tol ) .AND. ( dble( n )*mxaapq*mxsinj.LT.tol ) ) THEN
1343  GO TO 1994
1344  END IF
1345 *
1346  IF( notrot.GE.emptsw )GO TO 1994
1347 *
1348  1993 CONTINUE
1349 * end i=1:NSWEEP loop
1350 *
1351 * #:( Reaching this point means that the procedure has not converged.
1352  info = nsweep - 1
1353  GO TO 1995
1354 *
1355  1994 CONTINUE
1356 * #:) Reaching this point means numerical convergence after the i-th
1357 * sweep.
1358 *
1359  info = 0
1360 * #:) INFO = 0 confirms successful iterations.
1361  1995 CONTINUE
1362 *
1363 * Sort the singular values and find how many are above
1364 * the underflow threshold.
1365 *
1366  n2 = 0
1367  n4 = 0
1368  DO 5991 p = 1, n - 1
1369  q = idamax( n-p+1, sva( p ), 1 ) + p - 1
1370  IF( p.NE.q ) THEN
1371  temp1 = sva( p )
1372  sva( p ) = sva( q )
1373  sva( q ) = temp1
1374  CALL zswap( m, a( 1, p ), 1, a( 1, q ), 1 )
1375  IF( rsvec )CALL zswap( mvl, v( 1, p ), 1, v( 1, q ), 1 )
1376  END IF
1377  IF( sva( p ).NE.zero ) THEN
1378  n4 = n4 + 1
1379  IF( sva( p )*skl.GT.sfmin )n2 = n2 + 1
1380  END IF
1381  5991 CONTINUE
1382  IF( sva( n ).NE.zero ) THEN
1383  n4 = n4 + 1
1384  IF( sva( n )*skl.GT.sfmin )n2 = n2 + 1
1385  END IF
1386 *
1387 * Normalize the left singular vectors.
1388 *
1389  IF( lsvec .OR. uctol ) THEN
1390  DO 1998 p = 1, n4
1391 * CALL ZDSCAL( M, ONE / SVA( p ), A( 1, p ), 1 )
1392  CALL zlascl( 'G',0,0, sva(p), one, m, 1, a(1,p), m, ierr )
1393  1998 CONTINUE
1394  END IF
1395 *
1396 * Scale the product of Jacobi rotations.
1397 *
1398  IF( rsvec ) THEN
1399  DO 2399 p = 1, n
1400  temp1 = one / dznrm2( mvl, v( 1, p ), 1 )
1401  CALL zdscal( mvl, temp1, v( 1, p ), 1 )
1402  2399 CONTINUE
1403  END IF
1404 *
1405 * Undo scaling, if necessary (and possible).
1406  IF( ( ( skl.GT.one ) .AND. ( sva( 1 ).LT.( big / skl ) ) )
1407  $ .OR. ( ( skl.LT.one ) .AND. ( sva( max( n2, 1 ) ) .GT.
1408  $ ( sfmin / skl ) ) ) ) THEN
1409  DO 2400 p = 1, n
1410  sva( p ) = skl*sva( p )
1411  2400 CONTINUE
1412  skl = one
1413  END IF
1414 *
1415  rwork( 1 ) = skl
1416 * The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE
1417 * then some of the singular values may overflow or underflow and
1418 * the spectrum is given in this factored representation.
1419 *
1420  rwork( 2 ) = dble( n4 )
1421 * N4 is the number of computed nonzero singular values of A.
1422 *
1423  rwork( 3 ) = dble( n2 )
1424 * N2 is the number of singular values of A greater than SFMIN.
1425 * If N2<N, SVA(N2:N) contains ZEROS and/or denormalized numbers
1426 * that may carry some information.
1427 *
1428  rwork( 4 ) = dble( i )
1429 * i is the index of the last sweep before declaring convergence.
1430 *
1431  rwork( 5 ) = mxaapq
1432 * MXAAPQ is the largest absolute value of scaled pivots in the
1433 * last sweep
1434 *
1435  rwork( 6 ) = mxsinj
1436 * MXSINJ is the largest absolute value of the sines of Jacobi angles
1437 * in the last sweep
1438 *
1439  RETURN
1440 * ..
1441 * .. END OF ZGESVJ
1442 * ..
Here is the call graph for this function:
Here is the caller graph for this function:
dlascl
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:145
zaxpy
subroutine zaxpy(N, ZA, ZX, INCX, ZY, INCY)
ZAXPY
Definition: zaxpy.f:90
idamax
integer function idamax(N, DX, INCX)
IDAMAX
Definition: idamax.f:73
zdotc
complex *16 function zdotc(N, ZX, INCX, ZY, INCY)
ZDOTC
Definition: zdotc.f:85
dznrm2
double precision function dznrm2(N, X, INCX)
DZNRM2
Definition: dznrm2.f:77
zlassq
subroutine zlassq(N, X, INCX, SCALE, SUMSQ)
ZLASSQ updates a sum of squares represented in scaled form.
Definition: zlassq.f:108
zdscal
subroutine zdscal(N, DA, ZX, INCX)
ZDSCAL
Definition: zdscal.f:80
zcopy
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:83
zlaset
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: zlaset.f:108
zlascl
subroutine zlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: zlascl.f:145
zgsvj0
subroutine zgsvj0(JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO)
ZGSVJ0 pre-processor for the routine zgesvj.
Definition: zgsvj0.f:220
zgsvj1
subroutine zgsvj1(JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO)
ZGSVJ1 pre-processor for the routine zgesvj, applies Jacobi rotations targeting only particular pivot...
Definition: zgsvj1.f:238
zrot
subroutine zrot(N, CX, INCX, CY, INCY, C, S)
ZROT applies a plane rotation with real cosine and complex sine to a pair of complex vectors.
Definition: zrot.f:105
xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
lsame
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
zswap
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
Definition: zswap.f:83
dlamch
double precision function dlamch(CMACH)
DLAMCH
Definition: dlamch.f:70