LAPACK  3.9.0
LAPACK: Linear Algebra PACKage
dlaqr5.f
Go to the documentation of this file.
1 *> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DLAQR5 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22 * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23 * LDU, NV, WV, LDWV, NH, WH, LDWH )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27 * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28 * LOGICAL WANTT, WANTZ
29 * ..
30 * .. Array Arguments ..
31 * DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32 * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33 * $ Z( LDZ, * )
34 * ..
35 *
36 *
37 *> \par Purpose:
38 * =============
39 *>
40 *> \verbatim
41 *>
42 *> DLAQR5, called by DLAQR0, performs a
43 *> single small-bulge multi-shift QR sweep.
44 *> \endverbatim
45 *
46 * Arguments:
47 * ==========
48 *
49 *> \param[in] WANTT
50 *> \verbatim
51 *> WANTT is LOGICAL
52 *> WANTT = .true. if the quasi-triangular Schur factor
53 *> is being computed. WANTT is set to .false. otherwise.
54 *> \endverbatim
55 *>
56 *> \param[in] WANTZ
57 *> \verbatim
58 *> WANTZ is LOGICAL
59 *> WANTZ = .true. if the orthogonal Schur factor is being
60 *> computed. WANTZ is set to .false. otherwise.
61 *> \endverbatim
62 *>
63 *> \param[in] KACC22
64 *> \verbatim
65 *> KACC22 is INTEGER with value 0, 1, or 2.
66 *> Specifies the computation mode of far-from-diagonal
67 *> orthogonal updates.
68 *> = 0: DLAQR5 does not accumulate reflections and does not
69 *> use matrix-matrix multiply to update far-from-diagonal
70 *> matrix entries.
71 *> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
72 *> multiply to update the far-from-diagonal matrix entries.
73 *> = 2: DLAQR5 accumulates reflections, uses matrix-matrix
74 *> multiply to update the far-from-diagonal matrix entries,
75 *> and takes advantage of 2-by-2 block structure during
76 *> matrix multiplies.
77 *> \endverbatim
78 *>
79 *> \param[in] N
80 *> \verbatim
81 *> N is INTEGER
82 *> N is the order of the Hessenberg matrix H upon which this
83 *> subroutine operates.
84 *> \endverbatim
85 *>
86 *> \param[in] KTOP
87 *> \verbatim
88 *> KTOP is INTEGER
89 *> \endverbatim
90 *>
91 *> \param[in] KBOT
92 *> \verbatim
93 *> KBOT is INTEGER
94 *> These are the first and last rows and columns of an
95 *> isolated diagonal block upon which the QR sweep is to be
96 *> applied. It is assumed without a check that
97 *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
98 *> and
99 *> either KBOT = N or H(KBOT+1,KBOT) = 0.
100 *> \endverbatim
101 *>
102 *> \param[in] NSHFTS
103 *> \verbatim
104 *> NSHFTS is INTEGER
105 *> NSHFTS gives the number of simultaneous shifts. NSHFTS
106 *> must be positive and even.
107 *> \endverbatim
108 *>
109 *> \param[in,out] SR
110 *> \verbatim
111 *> SR is DOUBLE PRECISION array, dimension (NSHFTS)
112 *> \endverbatim
113 *>
114 *> \param[in,out] SI
115 *> \verbatim
116 *> SI is DOUBLE PRECISION array, dimension (NSHFTS)
117 *> SR contains the real parts and SI contains the imaginary
118 *> parts of the NSHFTS shifts of origin that define the
119 *> multi-shift QR sweep. On output SR and SI may be
120 *> reordered.
121 *> \endverbatim
122 *>
123 *> \param[in,out] H
124 *> \verbatim
125 *> H is DOUBLE PRECISION array, dimension (LDH,N)
126 *> On input H contains a Hessenberg matrix. On output a
127 *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
128 *> to the isolated diagonal block in rows and columns KTOP
129 *> through KBOT.
130 *> \endverbatim
131 *>
132 *> \param[in] LDH
133 *> \verbatim
134 *> LDH is INTEGER
135 *> LDH is the leading dimension of H just as declared in the
136 *> calling procedure. LDH >= MAX(1,N).
137 *> \endverbatim
138 *>
139 *> \param[in] ILOZ
140 *> \verbatim
141 *> ILOZ is INTEGER
142 *> \endverbatim
143 *>
144 *> \param[in] IHIZ
145 *> \verbatim
146 *> IHIZ is INTEGER
147 *> Specify the rows of Z to which transformations must be
148 *> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
149 *> \endverbatim
150 *>
151 *> \param[in,out] Z
152 *> \verbatim
153 *> Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ)
154 *> If WANTZ = .TRUE., then the QR Sweep orthogonal
155 *> similarity transformation is accumulated into
156 *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
157 *> If WANTZ = .FALSE., then Z is unreferenced.
158 *> \endverbatim
159 *>
160 *> \param[in] LDZ
161 *> \verbatim
162 *> LDZ is INTEGER
163 *> LDA is the leading dimension of Z just as declared in
164 *> the calling procedure. LDZ >= N.
165 *> \endverbatim
166 *>
167 *> \param[out] V
168 *> \verbatim
169 *> V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2)
170 *> \endverbatim
171 *>
172 *> \param[in] LDV
173 *> \verbatim
174 *> LDV is INTEGER
175 *> LDV is the leading dimension of V as declared in the
176 *> calling procedure. LDV >= 3.
177 *> \endverbatim
178 *>
179 *> \param[out] U
180 *> \verbatim
181 *> U is DOUBLE PRECISION array, dimension (LDU,3*NSHFTS-3)
182 *> \endverbatim
183 *>
184 *> \param[in] LDU
185 *> \verbatim
186 *> LDU is INTEGER
187 *> LDU is the leading dimension of U just as declared in the
188 *> in the calling subroutine. LDU >= 3*NSHFTS-3.
189 *> \endverbatim
190 *>
191 *> \param[in] NV
192 *> \verbatim
193 *> NV is INTEGER
194 *> NV is the number of rows in WV agailable for workspace.
195 *> NV >= 1.
196 *> \endverbatim
197 *>
198 *> \param[out] WV
199 *> \verbatim
200 *> WV is DOUBLE PRECISION array, dimension (LDWV,3*NSHFTS-3)
201 *> \endverbatim
202 *>
203 *> \param[in] LDWV
204 *> \verbatim
205 *> LDWV is INTEGER
206 *> LDWV is the leading dimension of WV as declared in the
207 *> in the calling subroutine. LDWV >= NV.
208 *> \endverbatim
209 *
210 *> \param[in] NH
211 *> \verbatim
212 *> NH is INTEGER
213 *> NH is the number of columns in array WH available for
214 *> workspace. NH >= 1.
215 *> \endverbatim
216 *>
217 *> \param[out] WH
218 *> \verbatim
219 *> WH is DOUBLE PRECISION array, dimension (LDWH,NH)
220 *> \endverbatim
221 *>
222 *> \param[in] LDWH
223 *> \verbatim
224 *> LDWH is INTEGER
225 *> Leading dimension of WH just as declared in the
226 *> calling procedure. LDWH >= 3*NSHFTS-3.
227 *> \endverbatim
228 *>
229 * Authors:
230 * ========
231 *
232 *> \author Univ. of Tennessee
233 *> \author Univ. of California Berkeley
234 *> \author Univ. of Colorado Denver
235 *> \author NAG Ltd.
236 *
237 *> \date June 2016
238 *
239 *> \ingroup doubleOTHERauxiliary
240 *
241 *> \par Contributors:
242 * ==================
243 *>
244 *> Karen Braman and Ralph Byers, Department of Mathematics,
245 *> University of Kansas, USA
246 *
247 *> \par References:
248 * ================
249 *>
250 *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
251 *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
252 *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
253 *> 929--947, 2002.
254 *>
255 * =====================================================================
256  SUBROUTINE dlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
257  $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
258  $ LDU, NV, WV, LDWV, NH, WH, LDWH )
259 *
260 * -- LAPACK auxiliary routine (version 3.7.1) --
261 * -- LAPACK is a software package provided by Univ. of Tennessee, --
262 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
263 * June 2016
264 *
265 * .. Scalar Arguments ..
266  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
267  $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
268  LOGICAL WANTT, WANTZ
269 * ..
270 * .. Array Arguments ..
271  DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
272  $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
273  $ z( ldz, * )
274 * ..
275 *
276 * ================================================================
277 * .. Parameters ..
278  DOUBLE PRECISION ZERO, ONE
279  PARAMETER ( ZERO = 0.0d0, one = 1.0d0 )
280 * ..
281 * .. Local Scalars ..
282  DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
283  $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
284  $ ulp
285  INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
286  $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
287  $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
288  $ ns, nu
289  LOGICAL ACCUM, BLK22, BMP22
290 * ..
291 * .. External Functions ..
292  DOUBLE PRECISION DLAMCH
293  EXTERNAL DLAMCH
294 * ..
295 * .. Intrinsic Functions ..
296 *
297  INTRINSIC abs, dble, max, min, mod
298 * ..
299 * .. Local Arrays ..
300  DOUBLE PRECISION VT( 3 )
301 * ..
302 * .. External Subroutines ..
303  EXTERNAL dgemm, dlabad, dlacpy, dlaqr1, dlarfg, dlaset,
304  $ dtrmm
305 * ..
306 * .. Executable Statements ..
307 *
308 * ==== If there are no shifts, then there is nothing to do. ====
309 *
310  IF( nshfts.LT.2 )
311  $ RETURN
312 *
313 * ==== If the active block is empty or 1-by-1, then there
314 * . is nothing to do. ====
315 *
316  IF( ktop.GE.kbot )
317  $ RETURN
318 *
319 * ==== Shuffle shifts into pairs of real shifts and pairs
320 * . of complex conjugate shifts assuming complex
321 * . conjugate shifts are already adjacent to one
322 * . another. ====
323 *
324  DO 10 i = 1, nshfts - 2, 2
325  IF( si( i ).NE.-si( i+1 ) ) THEN
326 *
327  swap = sr( i )
328  sr( i ) = sr( i+1 )
329  sr( i+1 ) = sr( i+2 )
330  sr( i+2 ) = swap
331 *
332  swap = si( i )
333  si( i ) = si( i+1 )
334  si( i+1 ) = si( i+2 )
335  si( i+2 ) = swap
336  END IF
337  10 CONTINUE
338 *
339 * ==== NSHFTS is supposed to be even, but if it is odd,
340 * . then simply reduce it by one. The shuffle above
341 * . ensures that the dropped shift is real and that
342 * . the remaining shifts are paired. ====
343 *
344  ns = nshfts - mod( nshfts, 2 )
345 *
346 * ==== Machine constants for deflation ====
347 *
348  safmin = dlamch( 'SAFE MINIMUM' )
349  safmax = one / safmin
350  CALL dlabad( safmin, safmax )
351  ulp = dlamch( 'PRECISION' )
352  smlnum = safmin*( dble( n ) / ulp )
353 *
354 * ==== Use accumulated reflections to update far-from-diagonal
355 * . entries ? ====
356 *
357  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
358 *
359 * ==== If so, exploit the 2-by-2 block structure? ====
360 *
361  blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )
362 *
363 * ==== clear trash ====
364 *
365  IF( ktop+2.LE.kbot )
366  $ h( ktop+2, ktop ) = zero
367 *
368 * ==== NBMPS = number of 2-shift bulges in the chain ====
369 *
370  nbmps = ns / 2
371 *
372 * ==== KDU = width of slab ====
373 *
374  kdu = 6*nbmps - 3
375 *
376 * ==== Create and chase chains of NBMPS bulges ====
377 *
378  DO 220 incol = 3*( 1-nbmps ) + ktop - 1, kbot - 2, 3*nbmps - 2
379  ndcol = incol + kdu
380  IF( accum )
381  $ CALL dlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
382 *
383 * ==== Near-the-diagonal bulge chase. The following loop
384 * . performs the near-the-diagonal part of a small bulge
385 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
386 * . chunk extends from column INCOL to column NDCOL
387 * . (including both column INCOL and column NDCOL). The
388 * . following loop chases a 3*NBMPS column long chain of
389 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
390 * . may be less than KTOP and and NDCOL may be greater than
391 * . KBOT indicating phantom columns from which to chase
392 * . bulges before they are actually introduced or to which
393 * . to chase bulges beyond column KBOT.) ====
394 *
395  DO 150 krcol = incol, min( incol+3*nbmps-3, kbot-2 )
396 *
397 * ==== Bulges number MTOP to MBOT are active double implicit
398 * . shift bulges. There may or may not also be small
399 * . 2-by-2 bulge, if there is room. The inactive bulges
400 * . (if any) must wait until the active bulges have moved
401 * . down the diagonal to make room. The phantom matrix
402 * . paradigm described above helps keep track. ====
403 *
404  mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )
405  mbot = min( nbmps, ( kbot-krcol ) / 3 )
406  m22 = mbot + 1
407  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.
408  $ ( kbot-2 )
409 *
410 * ==== Generate reflections to chase the chain right
411 * . one column. (The minimum value of K is KTOP-1.) ====
412 *
413  DO 20 m = mtop, mbot
414  k = krcol + 3*( m-1 )
415  IF( k.EQ.ktop-1 ) THEN
416  CALL dlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
417  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
418  $ v( 1, m ) )
419  alpha = v( 1, m )
420  CALL dlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
421  ELSE
422  beta = h( k+1, k )
423  v( 2, m ) = h( k+2, k )
424  v( 3, m ) = h( k+3, k )
425  CALL dlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
426 *
427 * ==== A Bulge may collapse because of vigilant
428 * . deflation or destructive underflow. In the
429 * . underflow case, try the two-small-subdiagonals
430 * . trick to try to reinflate the bulge. ====
431 *
432  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
433  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
434 *
435 * ==== Typical case: not collapsed (yet). ====
436 *
437  h( k+1, k ) = beta
438  h( k+2, k ) = zero
439  h( k+3, k ) = zero
440  ELSE
441 *
442 * ==== Atypical case: collapsed. Attempt to
443 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
444 * . If the fill resulting from the new
445 * . reflector is too large, then abandon it.
446 * . Otherwise, use the new one. ====
447 *
448  CALL dlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
449  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
450  $ vt )
451  alpha = vt( 1 )
452  CALL dlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
453  refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
454  $ h( k+2, k ) )
455 *
456  IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
457  $ abs( refsum*vt( 3 ) ).GT.ulp*
458  $ ( abs( h( k, k ) )+abs( h( k+1,
459  $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
460 *
461 * ==== Starting a new bulge here would
462 * . create non-negligible fill. Use
463 * . the old one with trepidation. ====
464 *
465  h( k+1, k ) = beta
466  h( k+2, k ) = zero
467  h( k+3, k ) = zero
468  ELSE
469 *
470 * ==== Stating a new bulge here would
471 * . create only negligible fill.
472 * . Replace the old reflector with
473 * . the new one. ====
474 *
475  h( k+1, k ) = h( k+1, k ) - refsum
476  h( k+2, k ) = zero
477  h( k+3, k ) = zero
478  v( 1, m ) = vt( 1 )
479  v( 2, m ) = vt( 2 )
480  v( 3, m ) = vt( 3 )
481  END IF
482  END IF
483  END IF
484  20 CONTINUE
485 *
486 * ==== Generate a 2-by-2 reflection, if needed. ====
487 *
488  k = krcol + 3*( m22-1 )
489  IF( bmp22 ) THEN
490  IF( k.EQ.ktop-1 ) THEN
491  CALL dlaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
492  $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
493  $ v( 1, m22 ) )
494  beta = v( 1, m22 )
495  CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
496  ELSE
497  beta = h( k+1, k )
498  v( 2, m22 ) = h( k+2, k )
499  CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
500  h( k+1, k ) = beta
501  h( k+2, k ) = zero
502  END IF
503  END IF
504 *
505 * ==== Multiply H by reflections from the left ====
506 *
507  IF( accum ) THEN
508  jbot = min( ndcol, kbot )
509  ELSE IF( wantt ) THEN
510  jbot = n
511  ELSE
512  jbot = kbot
513  END IF
514  DO 40 j = max( ktop, krcol ), jbot
515  mend = min( mbot, ( j-krcol+2 ) / 3 )
516  DO 30 m = mtop, mend
517  k = krcol + 3*( m-1 )
518  refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
519  $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
520  h( k+1, j ) = h( k+1, j ) - refsum
521  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
522  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
523  30 CONTINUE
524  40 CONTINUE
525  IF( bmp22 ) THEN
526  k = krcol + 3*( m22-1 )
527  DO 50 j = max( k+1, ktop ), jbot
528  refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
529  $ h( k+2, j ) )
530  h( k+1, j ) = h( k+1, j ) - refsum
531  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
532  50 CONTINUE
533  END IF
534 *
535 * ==== Multiply H by reflections from the right.
536 * . Delay filling in the last row until the
537 * . vigilant deflation check is complete. ====
538 *
539  IF( accum ) THEN
540  jtop = max( ktop, incol )
541  ELSE IF( wantt ) THEN
542  jtop = 1
543  ELSE
544  jtop = ktop
545  END IF
546  DO 90 m = mtop, mbot
547  IF( v( 1, m ).NE.zero ) THEN
548  k = krcol + 3*( m-1 )
549  DO 60 j = jtop, min( kbot, k+3 )
550  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
551  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
552  h( j, k+1 ) = h( j, k+1 ) - refsum
553  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
554  h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
555  60 CONTINUE
556 *
557  IF( accum ) THEN
558 *
559 * ==== Accumulate U. (If necessary, update Z later
560 * . with with an efficient matrix-matrix
561 * . multiply.) ====
562 *
563  kms = k - incol
564  DO 70 j = max( 1, ktop-incol ), kdu
565  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
566  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
567  u( j, kms+1 ) = u( j, kms+1 ) - refsum
568  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
569  u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
570  70 CONTINUE
571  ELSE IF( wantz ) THEN
572 *
573 * ==== U is not accumulated, so update Z
574 * . now by multiplying by reflections
575 * . from the right. ====
576 *
577  DO 80 j = iloz, ihiz
578  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
579  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
580  z( j, k+1 ) = z( j, k+1 ) - refsum
581  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
582  z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
583  80 CONTINUE
584  END IF
585  END IF
586  90 CONTINUE
587 *
588 * ==== Special case: 2-by-2 reflection (if needed) ====
589 *
590  k = krcol + 3*( m22-1 )
591  IF( bmp22 ) THEN
592  IF ( v( 1, m22 ).NE.zero ) THEN
593  DO 100 j = jtop, min( kbot, k+3 )
594  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
595  $ h( j, k+2 ) )
596  h( j, k+1 ) = h( j, k+1 ) - refsum
597  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
598  100 CONTINUE
599 *
600  IF( accum ) THEN
601  kms = k - incol
602  DO 110 j = max( 1, ktop-incol ), kdu
603  refsum = v( 1, m22 )*( u( j, kms+1 )+
604  $ v( 2, m22 )*u( j, kms+2 ) )
605  u( j, kms+1 ) = u( j, kms+1 ) - refsum
606  u( j, kms+2 ) = u( j, kms+2 ) -
607  $ refsum*v( 2, m22 )
608  110 CONTINUE
609  ELSE IF( wantz ) THEN
610  DO 120 j = iloz, ihiz
611  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
612  $ z( j, k+2 ) )
613  z( j, k+1 ) = z( j, k+1 ) - refsum
614  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
615  120 CONTINUE
616  END IF
617  END IF
618  END IF
619 *
620 * ==== Vigilant deflation check ====
621 *
622  mstart = mtop
623  IF( krcol+3*( mstart-1 ).LT.ktop )
624  $ mstart = mstart + 1
625  mend = mbot
626  IF( bmp22 )
627  $ mend = mend + 1
628  IF( krcol.EQ.kbot-2 )
629  $ mend = mend + 1
630  DO 130 m = mstart, mend
631  k = min( kbot-1, krcol+3*( m-1 ) )
632 *
633 * ==== The following convergence test requires that
634 * . the tradition small-compared-to-nearby-diagonals
635 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
636 * . criteria both be satisfied. The latter improves
637 * . accuracy in some examples. Falling back on an
638 * . alternate convergence criterion when TST1 or TST2
639 * . is zero (as done here) is traditional but probably
640 * . unnecessary. ====
641 *
642  IF( h( k+1, k ).NE.zero ) THEN
643  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
644  IF( tst1.EQ.zero ) THEN
645  IF( k.GE.ktop+1 )
646  $ tst1 = tst1 + abs( h( k, k-1 ) )
647  IF( k.GE.ktop+2 )
648  $ tst1 = tst1 + abs( h( k, k-2 ) )
649  IF( k.GE.ktop+3 )
650  $ tst1 = tst1 + abs( h( k, k-3 ) )
651  IF( k.LE.kbot-2 )
652  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
653  IF( k.LE.kbot-3 )
654  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
655  IF( k.LE.kbot-4 )
656  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
657  END IF
658  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
659  $ THEN
660  h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
661  h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
662  h11 = max( abs( h( k+1, k+1 ) ),
663  $ abs( h( k, k )-h( k+1, k+1 ) ) )
664  h22 = min( abs( h( k+1, k+1 ) ),
665  $ abs( h( k, k )-h( k+1, k+1 ) ) )
666  scl = h11 + h12
667  tst2 = h22*( h11 / scl )
668 *
669  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
670  $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
671  END IF
672  END IF
673  130 CONTINUE
674 *
675 * ==== Fill in the last row of each bulge. ====
676 *
677  mend = min( nbmps, ( kbot-krcol-1 ) / 3 )
678  DO 140 m = mtop, mend
679  k = krcol + 3*( m-1 )
680  refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )
681  h( k+4, k+1 ) = -refsum
682  h( k+4, k+2 ) = -refsum*v( 2, m )
683  h( k+4, k+3 ) = h( k+4, k+3 ) - refsum*v( 3, m )
684  140 CONTINUE
685 *
686 * ==== End of near-the-diagonal bulge chase. ====
687 *
688  150 CONTINUE
689 *
690 * ==== Use U (if accumulated) to update far-from-diagonal
691 * . entries in H. If required, use U to update Z as
692 * . well. ====
693 *
694  IF( accum ) THEN
695  IF( wantt ) THEN
696  jtop = 1
697  jbot = n
698  ELSE
699  jtop = ktop
700  jbot = kbot
701  END IF
702  IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.
703  $ ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) ) THEN
704 *
705 * ==== Updates not exploiting the 2-by-2 block
706 * . structure of U. K1 and NU keep track of
707 * . the location and size of U in the special
708 * . cases of introducing bulges and chasing
709 * . bulges off the bottom. In these special
710 * . cases and in case the number of shifts
711 * . is NS = 2, there is no 2-by-2 block
712 * . structure to exploit. ====
713 *
714  k1 = max( 1, ktop-incol )
715  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
716 *
717 * ==== Horizontal Multiply ====
718 *
719  DO 160 jcol = min( ndcol, kbot ) + 1, jbot, nh
720  jlen = min( nh, jbot-jcol+1 )
721  CALL dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
722  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
723  $ ldwh )
724  CALL dlacpy( 'ALL', nu, jlen, wh, ldwh,
725  $ h( incol+k1, jcol ), ldh )
726  160 CONTINUE
727 *
728 * ==== Vertical multiply ====
729 *
730  DO 170 jrow = jtop, max( ktop, incol ) - 1, nv
731  jlen = min( nv, max( ktop, incol )-jrow )
732  CALL dgemm( 'N', 'N', jlen, nu, nu, one,
733  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
734  $ ldu, zero, wv, ldwv )
735  CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
736  $ h( jrow, incol+k1 ), ldh )
737  170 CONTINUE
738 *
739 * ==== Z multiply (also vertical) ====
740 *
741  IF( wantz ) THEN
742  DO 180 jrow = iloz, ihiz, nv
743  jlen = min( nv, ihiz-jrow+1 )
744  CALL dgemm( 'N', 'N', jlen, nu, nu, one,
745  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
746  $ ldu, zero, wv, ldwv )
747  CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
748  $ z( jrow, incol+k1 ), ldz )
749  180 CONTINUE
750  END IF
751  ELSE
752 *
753 * ==== Updates exploiting U's 2-by-2 block structure.
754 * . (I2, I4, J2, J4 are the last rows and columns
755 * . of the blocks.) ====
756 *
757  i2 = ( kdu+1 ) / 2
758  i4 = kdu
759  j2 = i4 - i2
760  j4 = kdu
761 *
762 * ==== KZS and KNZ deal with the band of zeros
763 * . along the diagonal of one of the triangular
764 * . blocks. ====
765 *
766  kzs = ( j4-j2 ) - ( ns+1 )
767  knz = ns + 1
768 *
769 * ==== Horizontal multiply ====
770 *
771  DO 190 jcol = min( ndcol, kbot ) + 1, jbot, nh
772  jlen = min( nh, jbot-jcol+1 )
773 *
774 * ==== Copy bottom of H to top+KZS of scratch ====
775 * (The first KZS rows get multiplied by zero.) ====
776 *
777  CALL dlacpy( 'ALL', knz, jlen, h( incol+1+j2, jcol ),
778  $ ldh, wh( kzs+1, 1 ), ldwh )
779 *
780 * ==== Multiply by U21**T ====
781 *
782  CALL dlaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )
783  CALL dtrmm( 'L', 'U', 'C', 'N', knz, jlen, one,
784  $ u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),
785  $ ldwh )
786 *
787 * ==== Multiply top of H by U11**T ====
788 *
789  CALL dgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,
790  $ h( incol+1, jcol ), ldh, one, wh, ldwh )
791 *
792 * ==== Copy top of H to bottom of WH ====
793 *
794  CALL dlacpy( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,
795  $ wh( i2+1, 1 ), ldwh )
796 *
797 * ==== Multiply by U21**T ====
798 *
799  CALL dtrmm( 'L', 'L', 'C', 'N', j2, jlen, one,
800  $ u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )
801 *
802 * ==== Multiply by U22 ====
803 *
804  CALL dgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,
805  $ u( j2+1, i2+1 ), ldu,
806  $ h( incol+1+j2, jcol ), ldh, one,
807  $ wh( i2+1, 1 ), ldwh )
808 *
809 * ==== Copy it back ====
810 *
811  CALL dlacpy( 'ALL', kdu, jlen, wh, ldwh,
812  $ h( incol+1, jcol ), ldh )
813  190 CONTINUE
814 *
815 * ==== Vertical multiply ====
816 *
817  DO 200 jrow = jtop, max( incol, ktop ) - 1, nv
818  jlen = min( nv, max( incol, ktop )-jrow )
819 *
820 * ==== Copy right of H to scratch (the first KZS
821 * . columns get multiplied by zero) ====
822 *
823  CALL dlacpy( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),
824  $ ldh, wv( 1, 1+kzs ), ldwv )
825 *
826 * ==== Multiply by U21 ====
827 *
828  CALL dlaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )
829  CALL dtrmm( 'R', 'U', 'N', 'N', jlen, knz, one,
830  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
831  $ ldwv )
832 *
833 * ==== Multiply by U11 ====
834 *
835  CALL dgemm( 'N', 'N', jlen, i2, j2, one,
836  $ h( jrow, incol+1 ), ldh, u, ldu, one, wv,
837  $ ldwv )
838 *
839 * ==== Copy left of H to right of scratch ====
840 *
841  CALL dlacpy( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,
842  $ wv( 1, 1+i2 ), ldwv )
843 *
844 * ==== Multiply by U21 ====
845 *
846  CALL dtrmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
847  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )
848 *
849 * ==== Multiply by U22 ====
850 *
851  CALL dgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
852  $ h( jrow, incol+1+j2 ), ldh,
853  $ u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),
854  $ ldwv )
855 *
856 * ==== Copy it back ====
857 *
858  CALL dlacpy( 'ALL', jlen, kdu, wv, ldwv,
859  $ h( jrow, incol+1 ), ldh )
860  200 CONTINUE
861 *
862 * ==== Multiply Z (also vertical) ====
863 *
864  IF( wantz ) THEN
865  DO 210 jrow = iloz, ihiz, nv
866  jlen = min( nv, ihiz-jrow+1 )
867 *
868 * ==== Copy right of Z to left of scratch (first
869 * . KZS columns get multiplied by zero) ====
870 *
871  CALL dlacpy( 'ALL', jlen, knz,
872  $ z( jrow, incol+1+j2 ), ldz,
873  $ wv( 1, 1+kzs ), ldwv )
874 *
875 * ==== Multiply by U12 ====
876 *
877  CALL dlaset( 'ALL', jlen, kzs, zero, zero, wv,
878  $ ldwv )
879  CALL dtrmm( 'R', 'U', 'N', 'N', jlen, knz, one,
880  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
881  $ ldwv )
882 *
883 * ==== Multiply by U11 ====
884 *
885  CALL dgemm( 'N', 'N', jlen, i2, j2, one,
886  $ z( jrow, incol+1 ), ldz, u, ldu, one,
887  $ wv, ldwv )
888 *
889 * ==== Copy left of Z to right of scratch ====
890 *
891  CALL dlacpy( 'ALL', jlen, j2, z( jrow, incol+1 ),
892  $ ldz, wv( 1, 1+i2 ), ldwv )
893 *
894 * ==== Multiply by U21 ====
895 *
896  CALL dtrmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
897  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),
898  $ ldwv )
899 *
900 * ==== Multiply by U22 ====
901 *
902  CALL dgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
903  $ z( jrow, incol+1+j2 ), ldz,
904  $ u( j2+1, i2+1 ), ldu, one,
905  $ wv( 1, 1+i2 ), ldwv )
906 *
907 * ==== Copy the result back to Z ====
908 *
909  CALL dlacpy( 'ALL', jlen, kdu, wv, ldwv,
910  $ z( jrow, incol+1 ), ldz )
911  210 CONTINUE
912  END IF
913  END IF
914  END IF
915  220 CONTINUE
916 *
917 * ==== End of DLAQR5 ====
918 *
919  END
dlarfg
subroutine dlarfg(N, ALPHA, X, INCX, TAU)
DLARFG generates an elementary reflector (Householder matrix).
Definition: dlarfg.f:108
dtrmm
subroutine dtrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
DTRMM
Definition: dtrmm.f:179
dlaqr5
subroutine dlaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
DLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition: dlaqr5.f:259
dlaqr1
subroutine dlaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
DLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: dlaqr1.f:123
dgemm
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189
dlabad
subroutine dlabad(SMALL, LARGE)
DLABAD
Definition: dlabad.f:76
dlaset
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: dlaset.f:112
dlacpy
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105