d8/df7/dgghd3_8f_source.html

*> \brief \b DGGHD3

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> \htmlonly

*> Download DGGHD3 + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgghd3.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgghd3.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgghd3.f">

*> [TXT]</a>

*> \endhtmlonly

*

*  Definition:

*  ===========

*

*       SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,

*                          LDQ, Z, LDZ, WORK, LWORK, INFO )

*

*       .. Scalar Arguments ..

*       CHARACTER          COMPQ, COMPZ

*       INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK

*       ..

*       .. Array Arguments ..

*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), Q( LDQ, * ),

*      $                   Z( LDZ, * ), WORK( * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> DGGHD3 reduces a pair of real matrices (A,B) to generalized upper

*> Hessenberg form using orthogonal transformations, where A is a

*> general matrix and B is upper triangular.  The form of the

*> generalized eigenvalue problem is

*>    A*x = lambda*B*x,

*> and B is typically made upper triangular by computing its QR

*> factorization and moving the orthogonal matrix Q to the left side

*> of the equation.

*>

*> This subroutine simultaneously reduces A to a Hessenberg matrix H:

*>    Q**T*A*Z = H

*> and transforms B to another upper triangular matrix T:

*>    Q**T*B*Z = T

*> in order to reduce the problem to its standard form

*>    H*y = lambda*T*y

*> where y = Z**T*x.

*>

*> The orthogonal matrices Q and Z are determined as products of Givens

*> rotations.  They may either be formed explicitly, or they may be

*> postmultiplied into input matrices Q1 and Z1, so that

*>

*>      Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T

*>

*>      Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T

*>

*> If Q1 is the orthogonal matrix from the QR factorization of B in the

*> original equation A*x = lambda*B*x, then DGGHD3 reduces the original

*> problem to generalized Hessenberg form.

*>

*> This is a blocked variant of DGGHRD, using matrix-matrix

*> multiplications for parts of the computation to enhance performance.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] COMPQ

*> \verbatim

*>          COMPQ is CHARACTER*1

*>          = 'N': do not compute Q;

*>          = 'I': Q is initialized to the unit matrix, and the

*>                 orthogonal matrix Q is returned;

*>          = 'V': Q must contain an orthogonal matrix Q1 on entry,

*>                 and the product Q1*Q is returned.

*> \endverbatim

*>

*> \param[in] COMPZ

*> \verbatim

*>          COMPZ is CHARACTER*1

*>          = 'N': do not compute Z;

*>          = 'I': Z is initialized to the unit matrix, and the

*>                 orthogonal matrix Z is returned;

*>          = 'V': Z must contain an orthogonal matrix Z1 on entry,

*>                 and the product Z1*Z is returned.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The order of the matrices A and B.  N >= 0.

*> \endverbatim

*>

*> \param[in] ILO

*> \verbatim

*>          ILO is INTEGER

*> \endverbatim

*>

*> \param[in] IHI

*> \verbatim

*>          IHI is INTEGER

*>

*>          ILO and IHI mark the rows and columns of A which are to be

*>          reduced.  It is assumed that A is already upper triangular

*>          in rows and columns 1:ILO-1 and IHI+1:N.  ILO and IHI are

*>          normally set by a previous call to DGGBAL; otherwise they

*>          should be set to 1 and N respectively.

*>          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.

*> \endverbatim

*>

*> \param[in,out] A

*> \verbatim

*>          A is DOUBLE PRECISION array, dimension (LDA, N)

*>          On entry, the N-by-N general matrix to be reduced.

*>          On exit, the upper triangle and the first subdiagonal of A

*>          are overwritten with the upper Hessenberg matrix H, and the

*>          rest is set to zero.

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A.  LDA >= max(1,N).

*> \endverbatim

*>

*> \param[in,out] B

*> \verbatim

*>          B is DOUBLE PRECISION array, dimension (LDB, N)

*>          On entry, the N-by-N upper triangular matrix B.

*>          On exit, the upper triangular matrix T = Q**T B Z.  The

*>          elements below the diagonal are set to zero.

*> \endverbatim

*>

*> \param[in] LDB

*> \verbatim

*>          LDB is INTEGER

*>          The leading dimension of the array B.  LDB >= max(1,N).

*> \endverbatim

*>

*> \param[in,out] Q

*> \verbatim

*>          Q is DOUBLE PRECISION array, dimension (LDQ, N)

*>          On entry, if COMPQ = 'V', the orthogonal matrix Q1,

*>          typically from the QR factorization of B.

*>          On exit, if COMPQ='I', the orthogonal matrix Q, and if

*>          COMPQ = 'V', the product Q1*Q.

*>          Not referenced if COMPQ='N'.

*> \endverbatim

*>

*> \param[in] LDQ

*> \verbatim

*>          LDQ is INTEGER

*>          The leading dimension of the array Q.

*>          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.

*> \endverbatim

*>

*> \param[in,out] Z

*> \verbatim

*>          Z is DOUBLE PRECISION array, dimension (LDZ, N)

*>          On entry, if COMPZ = 'V', the orthogonal matrix Z1.

*>          On exit, if COMPZ='I', the orthogonal matrix Z, and if

*>          COMPZ = 'V', the product Z1*Z.

*>          Not referenced if COMPZ='N'.

*> \endverbatim

*>

*> \param[in] LDZ

*> \verbatim

*>          LDZ is INTEGER

*>          The leading dimension of the array Z.

*>          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is DOUBLE PRECISION array, dimension (LWORK)

*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

*> \endverbatim

*>

*> \param[in]  LWORK

*> \verbatim

*>          LWORK is INTEGER

*>          The length of the array WORK.  LWORK >= 1.

*>          For optimum performance LWORK >= 6*N*NB, where NB is the

*>          optimal blocksize.

*>

*>          If LWORK = -1, then a workspace query is assumed; the routine

*>          only calculates the optimal size of the WORK array, returns

*>          this value as the first entry of the WORK array, and no error

*>          message related to LWORK is issued by XERBLA.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit.

*>          < 0:  if INFO = -i, the i-th argument had an illegal value.

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \date January 2015

*

*> \ingroup doubleOTHERcomputational

*

*> \par Further Details:

*  =====================

*>

*> \verbatim

*>

*>  This routine reduces A to Hessenberg form and maintains B in

*>  using a blocked variant of Moler and Stewart's original algorithm,

*>  as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti

*>  (BIT 2008).

*> \endverbatim

*>

*  =====================================================================

      SUBROUTINE dgghd3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,

     $                   LDQ, Z, LDZ, WORK, LWORK, INFO )

*

*  -- LAPACK computational routine (version 3.8.0) --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*     January 2015

*

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      CHARACTER          COMPQ, COMPZ

      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK

*     ..

*     .. Array Arguments ..

      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), Q( LDQ, * ),

     $                   z( ldz, * ), work( * )

*     ..

*

* =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   ZERO, ONE

      parameter( zero = 0.0d+0, one = 1.0d+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ

      CHARACTER*1        COMPQ2, COMPZ2

      INTEGER            COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,

     $                   kacc22, len, lwkopt, n2nb, nb, nblst, nbmin,

     $                   nh, nnb, nx, ppw, ppwo, pw, top, topq

      DOUBLE PRECISION   C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ILAENV

      EXTERNAL           ilaenv, lsame

*     ..

*     .. External Subroutines ..

      EXTERNAL           dgghrd, dlartg, dlaset, dorm22, drot, dgemm,

     $                   dgemv, dtrmv, dlacpy, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          dble, max

*     ..

*     .. Executable Statements ..

*

*     Decode and test the input parameters.

*

      info = 0

      nb = ilaenv( 1, 'DGGHD3', ' ', n, ilo, ihi, -1 )

      lwkopt = max( 6*n*nb, 1 )

      work( 1 ) = dble( lwkopt )

      initq = lsame( compq, 'I' )

      wantq = initq .OR. lsame( compq, 'V' )

      initz = lsame( compz, 'I' )

      wantz = initz .OR. lsame( compz, 'V' )

      lquery = ( lwork.EQ.-1 )

*

      IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN

         info = -1

      ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN

         info = -2

      ELSE IF( n.LT.0 ) THEN

         info = -3

      ELSE IF( ilo.LT.1 ) THEN

         info = -4

      ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN

         info = -5

      ELSE IF( lda.LT.max( 1, n ) ) THEN

         info = -7

      ELSE IF( ldb.LT.max( 1, n ) ) THEN

         info = -9

      ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN

         info = -11

      ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN

         info = -13

      ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN

         info = -15

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'DGGHD3', -info )

         RETURN

      ELSE IF( lquery ) THEN

         RETURN

      END IF

*

*     Initialize Q and Z if desired.

*

      IF( initq )

     $   CALL dlaset( 'All', n, n, zero, one, q, ldq )

      IF( initz )

     $   CALL dlaset( 'All', n, n, zero, one, z, ldz )

*

*     Zero out lower triangle of B.

*

      IF( n.GT.1 )

     $   CALL dlaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )

*

*     Quick return if possible

*

      nh = ihi - ilo + 1

      IF( nh.LE.1 ) THEN

         work( 1 ) = one

         RETURN

      END IF

*

*     Determine the blocksize.

*

      nbmin = ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi, -1 )

      IF( nb.GT.1 .AND. nb.LT.nh ) THEN

*

*        Determine when to use unblocked instead of blocked code.

*

         nx = max( nb, ilaenv( 3, 'DGGHD3', ' ', n, ilo, ihi, -1 ) )

         IF( nx.LT.nh ) THEN

*

*           Determine if workspace is large enough for blocked code.

*

            IF( lwork.LT.lwkopt ) THEN

*

*              Not enough workspace to use optimal NB:  determine the

*              minimum value of NB, and reduce NB or force use of

*              unblocked code.

*

               nbmin = max( 2, ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi,

     $                 -1 ) )

               IF( lwork.GE.6*n*nbmin ) THEN

                  nb = lwork / ( 6*n )

               ELSE

                  nb = 1

               END IF

            END IF

         END IF

      END IF

*

      IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN

*

*        Use unblocked code below

*

         jcol = ilo

*

      ELSE

*

*        Use blocked code

*

         kacc22 = ilaenv( 16, 'DGGHD3', ' ', n, ilo, ihi, -1 )

         blk22 = kacc22.EQ.2

         DO jcol = ilo, ihi-2, nb

            nnb = min( nb, ihi-jcol-1 )

*

*           Initialize small orthogonal factors that will hold the

*           accumulated Givens rotations in workspace.

*           N2NB   denotes the number of 2*NNB-by-2*NNB factors

*           NBLST  denotes the (possibly smaller) order of the last

*                  factor.

*

            n2nb = ( ihi-jcol-1 ) / nnb - 1

            nblst = ihi - jcol - n2nb*nnb

            CALL dlaset( 'All', nblst, nblst, zero, one, work, nblst )

            pw = nblst * nblst + 1

            DO i = 1, n2nb

               CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,

     $                      work( pw ), 2*nnb )

               pw = pw + 4*nnb*nnb

            END DO

*

*           Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.

*

            DO j = jcol, jcol+nnb-1

*

*              Reduce Jth column of A. Store cosines and sines in Jth

*              column of A and B, respectively.

*

               DO i = ihi, j+2, -1

                  temp = a( i-1, j )

                  CALL dlartg( temp, a( i, j ), c, s, a( i-1, j ) )

                  a( i, j ) = c

                  b( i, j ) = s

               END DO

*

*              Accumulate Givens rotations into workspace array.

*

               ppw  = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1

               len  = 2 + j - jcol

               jrow = j + n2nb*nnb + 2

               DO i = ihi, jrow, -1

                  c = a( i, j )

                  s = b( i, j )

                  DO jj = ppw, ppw+len-1

                     temp = work( jj + nblst )

                     work( jj + nblst ) = c*temp - s*work( jj )

                     work( jj ) = s*temp + c*work( jj )

                  END DO

                  len = len + 1

                  ppw = ppw - nblst - 1

               END DO

*

               ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb

               j0 = jrow - nnb

               DO jrow = j0, j+2, -nnb

                  ppw = ppwo

                  len  = 2 + j - jcol

                  DO i = jrow+nnb-1, jrow, -1

                     c = a( i, j )

                     s = b( i, j )

                     DO jj = ppw, ppw+len-1

                        temp = work( jj + 2*nnb )

                        work( jj + 2*nnb ) = c*temp - s*work( jj )

                        work( jj ) = s*temp + c*work( jj )

                     END DO

                     len = len + 1

                     ppw = ppw - 2*nnb - 1

                  END DO

                  ppwo = ppwo + 4*nnb*nnb

               END DO

*

*              TOP denotes the number of top rows in A and B that will

*              not be updated during the next steps.

*

               IF( jcol.LE.2 ) THEN

                  top = 0

               ELSE

                  top = jcol

               END IF

*

*              Propagate transformations through B and replace stored

*              left sines/cosines by right sines/cosines.

*

               DO jj = n, j+1, -1

*

*                 Update JJth column of B.

*

                  DO i = min( jj+1, ihi ), j+2, -1

                     c = a( i, j )

                     s = b( i, j )

                     temp = b( i, jj )

                     b( i, jj ) = c*temp - s*b( i-1, jj )

                     b( i-1, jj ) = s*temp + c*b( i-1, jj )

                  END DO

*

*                 Annihilate B( JJ+1, JJ ).

*

                  IF( jj.LT.ihi ) THEN

                     temp = b( jj+1, jj+1 )

                     CALL dlartg( temp, b( jj+1, jj ), c, s,

     $                            b( jj+1, jj+1 ) )

                     b( jj+1, jj ) = zero

                     CALL drot( jj-top, b( top+1, jj+1 ), 1,

     $                          b( top+1, jj ), 1, c, s )

                     a( jj+1, j ) = c

                     b( jj+1, j ) = -s

                  END IF

               END DO

*

*              Update A by transformations from right.

*              Explicit loop unrolling provides better performance

*              compared to DLASR.

*               CALL DLASR( 'Right', 'Variable', 'Backward', IHI-TOP,

*     $                     IHI-J, A( J+2, J ), B( J+2, J ),

*     $                     A( TOP+1, J+1 ), LDA )

*

               jj = mod( ihi-j-1, 3 )

               DO i = ihi-j-3, jj+1, -3

                  c = a( j+1+i, j )

                  s = -b( j+1+i, j )

                  c1 = a( j+2+i, j )

                  s1 = -b( j+2+i, j )

                  c2 = a( j+3+i, j )

                  s2 = -b( j+3+i, j )

*

                  DO k = top+1, ihi

                     temp = a( k, j+i  )

                     temp1 = a( k, j+i+1 )

                     temp2 = a( k, j+i+2 )

                     temp3 = a( k, j+i+3 )

                     a( k, j+i+3 ) = c2*temp3 + s2*temp2

                     temp2 = -s2*temp3 + c2*temp2

                     a( k, j+i+2 ) = c1*temp2 + s1*temp1

                     temp1 = -s1*temp2 + c1*temp1

                     a( k, j+i+1 ) = c*temp1 + s*temp

                     a( k, j+i ) = -s*temp1 + c*temp

                  END DO

               END DO

*

               IF( jj.GT.0 ) THEN

                  DO i = jj, 1, -1

                     CALL drot( ihi-top, a( top+1, j+i+1 ), 1,

     $                          a( top+1, j+i ), 1, a( j+1+i, j ),

     $                          -b( j+1+i, j ) )

                  END DO

               END IF

*

*              Update (J+1)th column of A by transformations from left.

*

               IF ( j .LT. jcol + nnb - 1 ) THEN

                  len  = 1 + j - jcol

*

*                 Multiply with the trailing accumulated orthogonal

*                 matrix, which takes the form

*

*                        [  U11  U12  ]

*                    U = [            ],

*                        [  U21  U22  ]

*

*                 where U21 is a LEN-by-LEN matrix and U12 is lower

*                 triangular.

*

                  jrow = ihi - nblst + 1

                  CALL dgemv( 'Transpose', nblst, len, one, work,

     $                        nblst, a( jrow, j+1 ), 1, zero,

     $                        work( pw ), 1 )

                  ppw = pw + len

                  DO i = jrow, jrow+nblst-len-1

                     work( ppw ) = a( i, j+1 )

                     ppw = ppw + 1

                  END DO

                  CALL dtrmv( 'Lower', 'Transpose', 'Non-unit',

     $                        nblst-len, work( len*nblst + 1 ), nblst,

     $                        work( pw+len ), 1 )

                  CALL dgemv( 'Transpose', len, nblst-len, one,

     $                        work( (len+1)*nblst - len + 1 ), nblst,

     $                        a( jrow+nblst-len, j+1 ), 1, one,

     $                        work( pw+len ), 1 )

                  ppw = pw

                  DO i = jrow, jrow+nblst-1

                     a( i, j+1 ) = work( ppw )

                     ppw = ppw + 1

                  END DO

*

*                 Multiply with the other accumulated orthogonal

*                 matrices, which take the form

*

*                        [  U11  U12   0  ]

*                        [                ]

*                    U = [  U21  U22   0  ],

*                        [                ]

*                        [   0    0    I  ]

*

*                 where I denotes the (NNB-LEN)-by-(NNB-LEN) identity

*                 matrix, U21 is a LEN-by-LEN upper triangular matrix

*                 and U12 is an NNB-by-NNB lower triangular matrix.

*

                  ppwo = 1 + nblst*nblst

                  j0 = jrow - nnb

                  DO jrow = j0, jcol+1, -nnb

                     ppw = pw + len

                     DO i = jrow, jrow+nnb-1

                        work( ppw ) = a( i, j+1 )

                        ppw = ppw + 1

                     END DO

                     ppw = pw

                     DO i = jrow+nnb, jrow+nnb+len-1

                        work( ppw ) = a( i, j+1 )

                        ppw = ppw + 1

                     END DO

                     CALL dtrmv( 'Upper', 'Transpose', 'Non-unit', len,

     $                           work( ppwo + nnb ), 2*nnb, work( pw ),

     $                           1 )

                     CALL dtrmv( 'Lower', 'Transpose', 'Non-unit', nnb,

     $                           work( ppwo + 2*len*nnb ),

     $                           2*nnb, work( pw + len ), 1 )

                     CALL dgemv( 'Transpose', nnb, len, one,

     $                           work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,

     $                           one, work( pw ), 1 )

                     CALL dgemv( 'Transpose', len, nnb, one,

     $                           work( ppwo + 2*len*nnb + nnb ), 2*nnb,

     $                           a( jrow+nnb, j+1 ), 1, one,

     $                           work( pw+len ), 1 )

                     ppw = pw

                     DO i = jrow, jrow+len+nnb-1

                        a( i, j+1 ) = work( ppw )

                        ppw = ppw + 1

                     END DO

                     ppwo = ppwo + 4*nnb*nnb

                  END DO

               END IF

            END DO

*

*           Apply accumulated orthogonal matrices to A.

*

            cola = n - jcol - nnb + 1

            j = ihi - nblst + 1

            CALL dgemm( 'Transpose', 'No Transpose', nblst,

     $                  cola, nblst, one, work, nblst,

     $                  a( j, jcol+nnb ), lda, zero, work( pw ),

     $                  nblst )

            CALL dlacpy( 'All', nblst, cola, work( pw ), nblst,

     $                   a( j, jcol+nnb ), lda )

            ppwo = nblst*nblst + 1

            j0 = j - nnb

            DO j = j0, jcol+1, -nnb

               IF ( blk22 ) THEN

*

*                 Exploit the structure of

*

*                        [  U11  U12  ]

*                    U = [            ]

*                        [  U21  U22  ],

*

*                 where all blocks are NNB-by-NNB, U21 is upper

*                 triangular and U12 is lower triangular.

*

                  CALL dorm22( 'Left', 'Transpose', 2*nnb, cola, nnb,

     $                         nnb, work( ppwo ), 2*nnb,

     $                         a( j, jcol+nnb ), lda, work( pw ),

     $                         lwork-pw+1, ierr )

               ELSE

*

*                 Ignore the structure of U.

*

                  CALL dgemm( 'Transpose', 'No Transpose', 2*nnb,

     $                        cola, 2*nnb, one, work( ppwo ), 2*nnb,

     $                        a( j, jcol+nnb ), lda, zero, work( pw ),

     $                        2*nnb )

                  CALL dlacpy( 'All', 2*nnb, cola, work( pw ), 2*nnb,

     $                         a( j, jcol+nnb ), lda )

               END IF

               ppwo = ppwo + 4*nnb*nnb

            END DO

*

*           Apply accumulated orthogonal matrices to Q.

*

            IF( wantq ) THEN

               j = ihi - nblst + 1

               IF ( initq ) THEN

                  topq = max( 2, j - jcol + 1 )

                  nh  = ihi - topq + 1

               ELSE

                  topq = 1

                  nh = n

               END IF

               CALL dgemm( 'No Transpose', 'No Transpose', nh,

     $                     nblst, nblst, one, q( topq, j ), ldq,

     $                     work, nblst, zero, work( pw ), nh )

               CALL dlacpy( 'All', nh, nblst, work( pw ), nh,

     $                      q( topq, j ), ldq )

               ppwo = nblst*nblst + 1

               j0 = j - nnb

               DO j = j0, jcol+1, -nnb

                  IF ( initq ) THEN

                     topq = max( 2, j - jcol + 1 )

                     nh  = ihi - topq + 1

                  END IF

                  IF ( blk22 ) THEN

*

*                    Exploit the structure of U.

*

                     CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,

     $                            nnb, nnb, work( ppwo ), 2*nnb,

     $                            q( topq, j ), ldq, work( pw ),

     $                            lwork-pw+1, ierr )

                  ELSE

*

*                    Ignore the structure of U.

*

                     CALL dgemm( 'No Transpose', 'No Transpose', nh,

     $                           2*nnb, 2*nnb, one, q( topq, j ), ldq,

     $                           work( ppwo ), 2*nnb, zero, work( pw ),

     $                           nh )

                     CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,

     $                            q( topq, j ), ldq )

                  END IF

                  ppwo = ppwo + 4*nnb*nnb

               END DO

            END IF

*

*           Accumulate right Givens rotations if required.

*

            IF ( wantz .OR. top.GT.0 ) THEN

*

*              Initialize small orthogonal factors that will hold the

*              accumulated Givens rotations in workspace.

*

               CALL dlaset( 'All', nblst, nblst, zero, one, work,

     $                      nblst )

               pw = nblst * nblst + 1

               DO i = 1, n2nb

                  CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,

     $                         work( pw ), 2*nnb )

                  pw = pw + 4*nnb*nnb

               END DO

*

*              Accumulate Givens rotations into workspace array.

*

               DO j = jcol, jcol+nnb-1

                  ppw  = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1

                  len  = 2 + j - jcol

                  jrow = j + n2nb*nnb + 2

                  DO i = ihi, jrow, -1

                     c = a( i, j )

                     a( i, j ) = zero

                     s = b( i, j )

                     b( i, j ) = zero

                     DO jj = ppw, ppw+len-1

                        temp = work( jj + nblst )

                        work( jj + nblst ) = c*temp - s*work( jj )

                        work( jj ) = s*temp + c*work( jj )

                     END DO

                     len = len + 1

                     ppw = ppw - nblst - 1

                  END DO

*

                  ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb

                  j0 = jrow - nnb

                  DO jrow = j0, j+2, -nnb

                     ppw = ppwo

                     len  = 2 + j - jcol

                     DO i = jrow+nnb-1, jrow, -1

                        c = a( i, j )

                        a( i, j ) = zero

                        s = b( i, j )

                        b( i, j ) = zero

                        DO jj = ppw, ppw+len-1

                           temp = work( jj + 2*nnb )

                           work( jj + 2*nnb ) = c*temp - s*work( jj )

                           work( jj ) = s*temp + c*work( jj )

                        END DO

                        len = len + 1

                        ppw = ppw - 2*nnb - 1

                     END DO

                     ppwo = ppwo + 4*nnb*nnb

                  END DO

               END DO

            ELSE

*

               CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,

     $                      a( jcol + 2, jcol ), lda )

               CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,

     $                      b( jcol + 2, jcol ), ldb )

            END IF

*

*           Apply accumulated orthogonal matrices to A and B.

*

            IF ( top.GT.0 ) THEN

               j = ihi - nblst + 1

               CALL dgemm( 'No Transpose', 'No Transpose', top,

     $                     nblst, nblst, one, a( 1, j ), lda,

     $                     work, nblst, zero, work( pw ), top )

               CALL dlacpy( 'All', top, nblst, work( pw ), top,

     $                      a( 1, j ), lda )

               ppwo = nblst*nblst + 1

               j0 = j - nnb

               DO j = j0, jcol+1, -nnb

                  IF ( blk22 ) THEN

*

*                    Exploit the structure of U.

*

                     CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,

     $                            nnb, nnb, work( ppwo ), 2*nnb,

     $                            a( 1, j ), lda, work( pw ),

     $                            lwork-pw+1, ierr )

                  ELSE

*

*                    Ignore the structure of U.

*

                     CALL dgemm( 'No Transpose', 'No Transpose', top,

     $                           2*nnb, 2*nnb, one, a( 1, j ), lda,

     $                           work( ppwo ), 2*nnb, zero,

     $                           work( pw ), top )

                     CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,

     $                            a( 1, j ), lda )

                  END IF

                  ppwo = ppwo + 4*nnb*nnb

               END DO

*

               j = ihi - nblst + 1

               CALL dgemm( 'No Transpose', 'No Transpose', top,

     $                     nblst, nblst, one, b( 1, j ), ldb,

     $                     work, nblst, zero, work( pw ), top )

               CALL dlacpy( 'All', top, nblst, work( pw ), top,

     $                      b( 1, j ), ldb )

               ppwo = nblst*nblst + 1

               j0 = j - nnb

               DO j = j0, jcol+1, -nnb

                  IF ( blk22 ) THEN

*

*                    Exploit the structure of U.

*

                     CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,

     $                            nnb, nnb, work( ppwo ), 2*nnb,

     $                            b( 1, j ), ldb, work( pw ),

     $                            lwork-pw+1, ierr )

                  ELSE

*

*                    Ignore the structure of U.

*

                     CALL dgemm( 'No Transpose', 'No Transpose', top,

     $                           2*nnb, 2*nnb, one, b( 1, j ), ldb,

     $                           work( ppwo ), 2*nnb, zero,

     $                           work( pw ), top )

                     CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,

     $                            b( 1, j ), ldb )

                  END IF

                  ppwo = ppwo + 4*nnb*nnb

               END DO

            END IF

*

*           Apply accumulated orthogonal matrices to Z.

*

            IF( wantz ) THEN

               j = ihi - nblst + 1

               IF ( initq ) THEN

                  topq = max( 2, j - jcol + 1 )

                  nh  = ihi - topq + 1

               ELSE

                  topq = 1

                  nh = n

               END IF

               CALL dgemm( 'No Transpose', 'No Transpose', nh,

     $                     nblst, nblst, one, z( topq, j ), ldz,

     $                     work, nblst, zero, work( pw ), nh )

               CALL dlacpy( 'All', nh, nblst, work( pw ), nh,

     $                      z( topq, j ), ldz )

               ppwo = nblst*nblst + 1

               j0 = j - nnb

               DO j = j0, jcol+1, -nnb

                     IF ( initq ) THEN

                     topq = max( 2, j - jcol + 1 )

                     nh  = ihi - topq + 1

                  END IF

                  IF ( blk22 ) THEN

*

*                    Exploit the structure of U.

*

                     CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,

     $                            nnb, nnb, work( ppwo ), 2*nnb,

     $                            z( topq, j ), ldz, work( pw ),

     $                            lwork-pw+1, ierr )

                  ELSE

*

*                    Ignore the structure of U.

*

                     CALL dgemm( 'No Transpose', 'No Transpose', nh,

     $                           2*nnb, 2*nnb, one, z( topq, j ), ldz,

     $                           work( ppwo ), 2*nnb, zero, work( pw ),

     $                           nh )

                     CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,

     $                            z( topq, j ), ldz )

                  END IF

                  ppwo = ppwo + 4*nnb*nnb

               END DO

            END IF

         END DO

      END IF

*

*     Use unblocked code to reduce the rest of the matrix

*     Avoid re-initialization of modified Q and Z.

*

      compq2 = compq

      compz2 = compz

      IF ( jcol.NE.ilo ) THEN

         IF ( wantq )

     $      compq2 = 'V'

         IF ( wantz )

     $      compz2 = 'V'

      END IF

*

      IF ( jcol.LT.ihi )

     $   CALL dgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb, q,

     $                ldq, z, ldz, ierr )

      work( 1 ) = dble( lwkopt )

*

      RETURN

*

*     End of DGGHD3

*

      END