dd/d21/dlasd3_8f_source.html

*> \brief \b DLASD3 finds all square roots of the roots of the secular equation, as defined by the values in D and Z, and then updates the singular vectors by matrix multiplication. Used by sbdsdc.

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> \htmlonly

*> Download DLASD3 + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd3.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd3.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd3.f">

*> [TXT]</a>

*> \endhtmlonly

*

*  Definition:

*  ===========

*

*       SUBROUTINE DLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,

*                          LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z,

*                          INFO )

*

*       .. Scalar Arguments ..

*       INTEGER            INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,

*      $                   SQRE

*       ..

*       .. Array Arguments ..

*       INTEGER            CTOT( * ), IDXC( * )

*       DOUBLE PRECISION   D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ),

*      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),

*      $                   Z( * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> DLASD3 finds all the square roots of the roots of the secular

*> equation, as defined by the values in D and Z.  It makes the

*> appropriate calls to DLASD4 and then updates the singular

*> vectors by matrix multiplication.

*>

*> This code makes very mild assumptions about floating point

*> arithmetic. It will work on machines with a guard digit in

*> add/subtract, or on those binary machines without guard digits

*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.

*> It could conceivably fail on hexadecimal or decimal machines

*> without guard digits, but we know of none.

*>

*> DLASD3 is called from DLASD1.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] NL

*> \verbatim

*>          NL is INTEGER

*>         The row dimension of the upper block.  NL >= 1.

*> \endverbatim

*>

*> \param[in] NR

*> \verbatim

*>          NR is INTEGER

*>         The row dimension of the lower block.  NR >= 1.

*> \endverbatim

*>

*> \param[in] SQRE

*> \verbatim

*>          SQRE is INTEGER

*>         = 0: the lower block is an NR-by-NR square matrix.

*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.

*>

*>         The bidiagonal matrix has N = NL + NR + 1 rows and

*>         M = N + SQRE >= N columns.

*> \endverbatim

*>

*> \param[in] K

*> \verbatim

*>          K is INTEGER

*>         The size of the secular equation, 1 =< K = < N.

*> \endverbatim

*>

*> \param[out] D

*> \verbatim

*>          D is DOUBLE PRECISION array, dimension(K)

*>         On exit the square roots of the roots of the secular equation,

*>         in ascending order.

*> \endverbatim

*>

*> \param[out] Q

*> \verbatim

*>          Q is DOUBLE PRECISION array, dimension (LDQ,K)

*> \endverbatim

*>

*> \param[in] LDQ

*> \verbatim

*>          LDQ is INTEGER

*>         The leading dimension of the array Q.  LDQ >= K.

*> \endverbatim

*>

*> \param[in,out] DSIGMA

*> \verbatim

*>          DSIGMA is DOUBLE PRECISION array, dimension(K)

*>         The first K elements of this array contain the old roots

*>         of the deflated updating problem.  These are the poles

*>         of the secular equation.

*> \endverbatim

*>

*> \param[out] U

*> \verbatim

*>          U is DOUBLE PRECISION array, dimension (LDU, N)

*>         The last N - K columns of this matrix contain the deflated

*>         left singular vectors.

*> \endverbatim

*>

*> \param[in] LDU

*> \verbatim

*>          LDU is INTEGER

*>         The leading dimension of the array U.  LDU >= N.

*> \endverbatim

*>

*> \param[in] U2

*> \verbatim

*>          U2 is DOUBLE PRECISION array, dimension (LDU2, N)

*>         The first K columns of this matrix contain the non-deflated

*>         left singular vectors for the split problem.

*> \endverbatim

*>

*> \param[in] LDU2

*> \verbatim

*>          LDU2 is INTEGER

*>         The leading dimension of the array U2.  LDU2 >= N.

*> \endverbatim

*>

*> \param[out] VT

*> \verbatim

*>          VT is DOUBLE PRECISION array, dimension (LDVT, M)

*>         The last M - K columns of VT**T contain the deflated

*>         right singular vectors.

*> \endverbatim

*>

*> \param[in] LDVT

*> \verbatim

*>          LDVT is INTEGER

*>         The leading dimension of the array VT.  LDVT >= N.

*> \endverbatim

*>

*> \param[in,out] VT2

*> \verbatim

*>          VT2 is DOUBLE PRECISION array, dimension (LDVT2, N)

*>         The first K columns of VT2**T contain the non-deflated

*>         right singular vectors for the split problem.

*> \endverbatim

*>

*> \param[in] LDVT2

*> \verbatim

*>          LDVT2 is INTEGER

*>         The leading dimension of the array VT2.  LDVT2 >= N.

*> \endverbatim

*>

*> \param[in] IDXC

*> \verbatim

*>          IDXC is INTEGER array, dimension ( N )

*>         The permutation used to arrange the columns of U (and rows of

*>         VT) into three groups:  the first group contains non-zero

*>         entries only at and above (or before) NL +1; the second

*>         contains non-zero entries only at and below (or after) NL+2;

*>         and the third is dense. The first column of U and the row of

*>         VT are treated separately, however.

*>

*>         The rows of the singular vectors found by DLASD4

*>         must be likewise permuted before the matrix multiplies can

*>         take place.

*> \endverbatim

*>

*> \param[in] CTOT

*> \verbatim

*>          CTOT is INTEGER array, dimension ( 4 )

*>         A count of the total number of the various types of columns

*>         in U (or rows in VT), as described in IDXC. The fourth column

*>         type is any column which has been deflated.

*> \endverbatim

*>

*> \param[in,out] Z

*> \verbatim

*>          Z is DOUBLE PRECISION array, dimension (K)

*>         The first K elements of this array contain the components

*>         of the deflation-adjusted updating row vector.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>         = 0:  successful exit.

*>         < 0:  if INFO = -i, the i-th argument had an illegal value.

*>         > 0:  if INFO = 1, a singular value did not converge

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \date June 2017

*

*> \ingroup OTHERauxiliary

*

*> \par Contributors:

*  ==================

*>

*>     Ming Gu and Huan Ren, Computer Science Division, University of

*>     California at Berkeley, USA

*>

*  =====================================================================

      SUBROUTINE dlasd3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,

     $                   LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z,

     $                   INFO )

*

*  -- LAPACK auxiliary routine (version 3.7.1) --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*     June 2017

*

*     .. Scalar Arguments ..

      INTEGER            INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,

     $                   SQRE

*     ..

*     .. Array Arguments ..

      INTEGER            CTOT( * ), IDXC( * )

      DOUBLE PRECISION   D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ),

     $                   u2( ldu2, * ), vt( ldvt, * ), vt2( ldvt2, * ),

     $                   z( * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   ONE, ZERO, NEGONE

      PARAMETER          ( ONE = 1.0d+0, zero = 0.0d+0,

     $                   negone = -1.0d+0 )

*     ..

*     .. Local Scalars ..

      INTEGER            CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1

      DOUBLE PRECISION   RHO, TEMP

*     ..

*     .. External Functions ..

      DOUBLE PRECISION   DLAMC3, DNRM2

      EXTERNAL           DLAMC3, DNRM2

*     ..

*     .. External Subroutines ..

      EXTERNAL           dcopy, dgemm, dlacpy, dlascl, dlasd4, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, sign, sqrt

*     ..

*     .. Executable Statements ..

*

*     Test the input parameters.

*

      info = 0

*

      IF( nl.LT.1 ) THEN

         info = -1

      ELSE IF( nr.LT.1 ) THEN

         info = -2

      ELSE IF( ( sqre.NE.1 ) .AND. ( sqre.NE.0 ) ) THEN

         info = -3

      END IF

*

      n = nl + nr + 1

      m = n + sqre

      nlp1 = nl + 1

      nlp2 = nl + 2

*

      IF( ( k.LT.1 ) .OR. ( k.GT.n ) ) THEN

         info = -4

      ELSE IF( ldq.LT.k ) THEN

         info = -7

      ELSE IF( ldu.LT.n ) THEN

         info = -10

      ELSE IF( ldu2.LT.n ) THEN

         info = -12

      ELSE IF( ldvt.LT.m ) THEN

         info = -14

      ELSE IF( ldvt2.LT.m ) THEN

         info = -16

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'DLASD3', -info )

         RETURN

      END IF

*

*     Quick return if possible

*

      IF( k.EQ.1 ) THEN

         d( 1 ) = abs( z( 1 ) )

         CALL dcopy( m, vt2( 1, 1 ), ldvt2, vt( 1, 1 ), ldvt )

         IF( z( 1 ).GT.zero ) THEN

            CALL dcopy( n, u2( 1, 1 ), 1, u( 1, 1 ), 1 )

         ELSE

            DO 10 i = 1, n

               u( i, 1 ) = -u2( i, 1 )

   10       CONTINUE

         END IF

         RETURN

      END IF

*

*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can

*     be computed with high relative accuracy (barring over/underflow).

*     This is a problem on machines without a guard digit in

*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).

*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),

*     which on any of these machines zeros out the bottommost

*     bit of DSIGMA(I) if it is 1; this makes the subsequent

*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation

*     occurs. On binary machines with a guard digit (almost all

*     machines) it does not change DSIGMA(I) at all. On hexadecimal

*     and decimal machines with a guard digit, it slightly

*     changes the bottommost bits of DSIGMA(I). It does not account

*     for hexadecimal or decimal machines without guard digits

*     (we know of none). We use a subroutine call to compute

*     2*DSIGMA(I) to prevent optimizing compilers from eliminating

*     this code.

*

      DO 20 i = 1, k

         dsigma( i ) = dlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )

   20 CONTINUE

*

*     Keep a copy of Z.

*

      CALL dcopy( k, z, 1, q, 1 )

*

*     Normalize Z.

*

      rho = dnrm2( k, z, 1 )

      CALL dlascl( 'G', 0, 0, rho, one, k, 1, z, k, info )

      rho = rho*rho

*

*     Find the new singular values.

*

      DO 30 j = 1, k

         CALL dlasd4( k, j, dsigma, z, u( 1, j ), rho, d( j ),

     $                vt( 1, j ), info )

*

*        If the zero finder fails, report the convergence failure.

*

         IF( info.NE.0 ) THEN

            RETURN

         END IF

   30 CONTINUE

*

*     Compute updated Z.

*

      DO 60 i = 1, k

         z( i ) = u( i, k )*vt( i, k )

         DO 40 j = 1, i - 1

            z( i ) = z( i )*( u( i, j )*vt( i, j ) /

     $               ( dsigma( i )-dsigma( j ) ) /

     $               ( dsigma( i )+dsigma( j ) ) )

   40    CONTINUE

         DO 50 j = i, k - 1

            z( i ) = z( i )*( u( i, j )*vt( i, j ) /

     $               ( dsigma( i )-dsigma( j+1 ) ) /

     $               ( dsigma( i )+dsigma( j+1 ) ) )

   50    CONTINUE

         z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1 ) )

   60 CONTINUE

*

*     Compute left singular vectors of the modified diagonal matrix,

*     and store related information for the right singular vectors.

*

      DO 90 i = 1, k

         vt( 1, i ) = z( 1 ) / u( 1, i ) / vt( 1, i )

         u( 1, i ) = negone

         DO 70 j = 2, k

            vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )

            u( j, i ) = dsigma( j )*vt( j, i )

   70    CONTINUE

         temp = dnrm2( k, u( 1, i ), 1 )

         q( 1, i ) = u( 1, i ) / temp

         DO 80 j = 2, k

            jc = idxc( j )

            q( j, i ) = u( jc, i ) / temp

   80    CONTINUE

   90 CONTINUE

*

*     Update the left singular vector matrix.

*

      IF( k.EQ.2 ) THEN

         CALL dgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,

     $               ldu )

         GO TO 100

      END IF

      IF( ctot( 1 ).GT.0 ) THEN

         CALL dgemm( 'N', 'N', nl, k, ctot( 1 ), one, u2( 1, 2 ), ldu2,

     $               q( 2, 1 ), ldq, zero, u( 1, 1 ), ldu )

         IF( ctot( 3 ).GT.0 ) THEN

            ktemp = 2 + ctot( 1 ) + ctot( 2 )

            CALL dgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),

     $                  ldu2, q( ktemp, 1 ), ldq, one, u( 1, 1 ), ldu )

         END IF

      ELSE IF( ctot( 3 ).GT.0 ) THEN

         ktemp = 2 + ctot( 1 ) + ctot( 2 )

         CALL dgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),

     $               ldu2, q( ktemp, 1 ), ldq, zero, u( 1, 1 ), ldu )

      ELSE

         CALL dlacpy( 'F', nl, k, u2, ldu2, u, ldu )

      END IF

      CALL dcopy( k, q( 1, 1 ), ldq, u( nlp1, 1 ), ldu )

      ktemp = 2 + ctot( 1 )

      ctemp = ctot( 2 ) + ctot( 3 )

      CALL dgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,

     $            q( ktemp, 1 ), ldq, zero, u( nlp2, 1 ), ldu )

*

*     Generate the right singular vectors.

*

  100 CONTINUE

      DO 120 i = 1, k

         temp = dnrm2( k, vt( 1, i ), 1 )

         q( i, 1 ) = vt( 1, i ) / temp

         DO 110 j = 2, k

            jc = idxc( j )

            q( i, j ) = vt( jc, i ) / temp

  110    CONTINUE

  120 CONTINUE

*

*     Update the right singular vector matrix.

*

      IF( k.EQ.2 ) THEN

         CALL dgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,

     $               vt, ldvt )

         RETURN

      END IF

      ktemp = 1 + ctot( 1 )

      CALL dgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1, 1 ), ldq,

     $            vt2( 1, 1 ), ldvt2, zero, vt( 1, 1 ), ldvt )

      ktemp = 2 + ctot( 1 ) + ctot( 2 )

      IF( ktemp.LE.ldvt2 )

     $   CALL dgemm( 'N', 'N', k, nlp1, ctot( 3 ), one, q( 1, ktemp ),

     $               ldq, vt2( ktemp, 1 ), ldvt2, one, vt( 1, 1 ),

     $               ldvt )

*

      ktemp = ctot( 1 ) + 1

      nrp1 = nr + sqre

      IF( ktemp.GT.1 ) THEN

         DO 130 i = 1, k

            q( i, ktemp ) = q( i, 1 )

  130    CONTINUE

         DO 140 i = nlp2, m

            vt2( ktemp, i ) = vt2( 1, i )

  140    CONTINUE

      END IF

      ctemp = 1 + ctot( 2 ) + ctot( 3 )

      CALL dgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1, ktemp ), ldq,

     $            vt2( ktemp, nlp2 ), ldvt2, zero, vt( 1, nlp2 ), ldvt )

*

      RETURN

*

*     End of DLASD3

*

      END