LAPACK  3.9.0
LAPACK: Linear Algebra PACKage

◆ slaqr5()

subroutine slaqr5 ( logical  WANTT,
logical  WANTZ,
integer  KACC22,
integer  N,
integer  KTOP,
integer  KBOT,
integer  NSHFTS,
real, dimension( * )  SR,
real, dimension( * )  SI,
real, dimension( ldh, * )  H,
integer  LDH,
integer  ILOZ,
integer  IHIZ,
real, dimension( ldz, * )  Z,
integer  LDZ,
real, dimension( ldv, * )  V,
integer  LDV,
real, dimension( ldu, * )  U,
integer  LDU,
integer  NV,
real, dimension( ldwv, * )  WV,
integer  LDWV,
integer  NH,
real, dimension( ldwh, * )  WH,
integer  LDWH 
)

SLAQR5 performs a single small-bulge multi-shift QR sweep.

Download SLAQR5 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
    SLAQR5, called by SLAQR0, performs a
    single small-bulge multi-shift QR sweep.
Parameters
[in]WANTT
          WANTT is LOGICAL
             WANTT = .true. if the quasi-triangular Schur factor
             is being computed.  WANTT is set to .false. otherwise.
[in]WANTZ
          WANTZ is LOGICAL
             WANTZ = .true. if the orthogonal Schur factor is being
             computed.  WANTZ is set to .false. otherwise.
[in]KACC22
          KACC22 is INTEGER with value 0, 1, or 2.
             Specifies the computation mode of far-from-diagonal
             orthogonal updates.
        = 0: SLAQR5 does not accumulate reflections and does not
             use matrix-matrix multiply to update far-from-diagonal
             matrix entries.
        = 1: SLAQR5 accumulates reflections and uses matrix-matrix
             multiply to update the far-from-diagonal matrix entries.
        = 2: SLAQR5 accumulates reflections, uses matrix-matrix
             multiply to update the far-from-diagonal matrix entries,
             and takes advantage of 2-by-2 block structure during
             matrix multiplies.
[in]N
          N is INTEGER
             N is the order of the Hessenberg matrix H upon which this
             subroutine operates.
[in]KTOP
          KTOP is INTEGER
[in]KBOT
          KBOT is INTEGER
             These are the first and last rows and columns of an
             isolated diagonal block upon which the QR sweep is to be
             applied. It is assumed without a check that
                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
             and
                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
[in]NSHFTS
          NSHFTS is INTEGER
             NSHFTS gives the number of simultaneous shifts.  NSHFTS
             must be positive and even.
[in,out]SR
          SR is REAL array, dimension (NSHFTS)
[in,out]SI
          SI is REAL array, dimension (NSHFTS)
             SR contains the real parts and SI contains the imaginary
             parts of the NSHFTS shifts of origin that define the
             multi-shift QR sweep.  On output SR and SI may be
             reordered.
[in,out]H
          H is REAL array, dimension (LDH,N)
             On input H contains a Hessenberg matrix.  On output a
             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
             to the isolated diagonal block in rows and columns KTOP
             through KBOT.
[in]LDH
          LDH is INTEGER
             LDH is the leading dimension of H just as declared in the
             calling procedure.  LDH >= MAX(1,N).
[in]ILOZ
          ILOZ is INTEGER
[in]IHIZ
          IHIZ is INTEGER
             Specify the rows of Z to which transformations must be
             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
[in,out]Z
          Z is REAL array, dimension (LDZ,IHIZ)
             If WANTZ = .TRUE., then the QR Sweep orthogonal
             similarity transformation is accumulated into
             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
             If WANTZ = .FALSE., then Z is unreferenced.
[in]LDZ
          LDZ is INTEGER
             LDA is the leading dimension of Z just as declared in
             the calling procedure. LDZ >= N.
[out]V
          V is REAL array, dimension (LDV,NSHFTS/2)
[in]LDV
          LDV is INTEGER
             LDV is the leading dimension of V as declared in the
             calling procedure.  LDV >= 3.
[out]U
          U is REAL array, dimension (LDU,3*NSHFTS-3)
[in]LDU
          LDU is INTEGER
             LDU is the leading dimension of U just as declared in the
             in the calling subroutine.  LDU >= 3*NSHFTS-3.
[in]NV
          NV is INTEGER
             NV is the number of rows in WV agailable for workspace.
             NV >= 1.
[out]WV
          WV is REAL array, dimension (LDWV,3*NSHFTS-3)
[in]LDWV
          LDWV is INTEGER
             LDWV is the leading dimension of WV as declared in the
             in the calling subroutine.  LDWV >= NV.
[in]NH
          NH is INTEGER
             NH is the number of columns in array WH available for
             workspace. NH >= 1.
[out]WH
          WH is REAL array, dimension (LDWH,NH)
[in]LDWH
          LDWH is INTEGER
             Leading dimension of WH just as declared in the
             calling procedure.  LDWH >= 3*NSHFTS-3.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
June 2016
Contributors:
Karen Braman and Ralph Byers, Department of Mathematics, University of Kansas, USA
References:
K. Braman, R. Byers and R. Mathias, The Multi-Shift QR Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 Performance, SIAM Journal of Matrix Analysis, volume 23, pages 929–947, 2002.

Definition at line 259 of file slaqr5.f.

259 *
260 * -- LAPACK auxiliary routine (version 3.7.1) --
261 * -- LAPACK is a software package provided by Univ. of Tennessee, --
262 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
263 * June 2016
264 *
265 * .. Scalar Arguments ..
266  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
267  $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
268  LOGICAL WANTT, WANTZ
269 * ..
270 * .. Array Arguments ..
271  REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
272  $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
273  $ Z( LDZ, * )
274 * ..
275 *
276 * ================================================================
277 * .. Parameters ..
278  REAL ZERO, ONE
279  parameter( zero = 0.0e0, one = 1.0e0 )
280 * ..
281 * .. Local Scalars ..
282  REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
283  $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
284  $ ULP
285  INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
286  $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
287  $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
288  $ NS, NU
289  LOGICAL ACCUM, BLK22, BMP22
290 * ..
291 * .. External Functions ..
292  REAL SLAMCH
293  EXTERNAL slamch
294 * ..
295 * .. Intrinsic Functions ..
296 *
297  INTRINSIC abs, max, min, mod, real
298 * ..
299 * .. Local Arrays ..
300  REAL VT( 3 )
301 * ..
302 * .. External Subroutines ..
303  EXTERNAL sgemm, slabad, slacpy, slaqr1, slarfg, slaset,
304  $ strmm
305 * ..
306 * .. Executable Statements ..
307 *
308 * ==== If there are no shifts, then there is nothing to do. ====
309 *
310  IF( nshfts.LT.2 )
311  $ RETURN
312 *
313 * ==== If the active block is empty or 1-by-1, then there
314 * . is nothing to do. ====
315 *
316  IF( ktop.GE.kbot )
317  $ RETURN
318 *
319 * ==== Shuffle shifts into pairs of real shifts and pairs
320 * . of complex conjugate shifts assuming complex
321 * . conjugate shifts are already adjacent to one
322 * . another. ====
323 *
324  DO 10 i = 1, nshfts - 2, 2
325  IF( si( i ).NE.-si( i+1 ) ) THEN
326 *
327  swap = sr( i )
328  sr( i ) = sr( i+1 )
329  sr( i+1 ) = sr( i+2 )
330  sr( i+2 ) = swap
331 *
332  swap = si( i )
333  si( i ) = si( i+1 )
334  si( i+1 ) = si( i+2 )
335  si( i+2 ) = swap
336  END IF
337  10 CONTINUE
338 *
339 * ==== NSHFTS is supposed to be even, but if it is odd,
340 * . then simply reduce it by one. The shuffle above
341 * . ensures that the dropped shift is real and that
342 * . the remaining shifts are paired. ====
343 *
344  ns = nshfts - mod( nshfts, 2 )
345 *
346 * ==== Machine constants for deflation ====
347 *
348  safmin = slamch( 'SAFE MINIMUM' )
349  safmax = one / safmin
350  CALL slabad( safmin, safmax )
351  ulp = slamch( 'PRECISION' )
352  smlnum = safmin*( real( n ) / ulp )
353 *
354 * ==== Use accumulated reflections to update far-from-diagonal
355 * . entries ? ====
356 *
357  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
358 *
359 * ==== If so, exploit the 2-by-2 block structure? ====
360 *
361  blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )
362 *
363 * ==== clear trash ====
364 *
365  IF( ktop+2.LE.kbot )
366  $ h( ktop+2, ktop ) = zero
367 *
368 * ==== NBMPS = number of 2-shift bulges in the chain ====
369 *
370  nbmps = ns / 2
371 *
372 * ==== KDU = width of slab ====
373 *
374  kdu = 6*nbmps - 3
375 *
376 * ==== Create and chase chains of NBMPS bulges ====
377 *
378  DO 220 incol = 3*( 1-nbmps ) + ktop - 1, kbot - 2, 3*nbmps - 2
379  ndcol = incol + kdu
380  IF( accum )
381  $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
382 *
383 * ==== Near-the-diagonal bulge chase. The following loop
384 * . performs the near-the-diagonal part of a small bulge
385 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
386 * . chunk extends from column INCOL to column NDCOL
387 * . (including both column INCOL and column NDCOL). The
388 * . following loop chases a 3*NBMPS column long chain of
389 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
390 * . may be less than KTOP and and NDCOL may be greater than
391 * . KBOT indicating phantom columns from which to chase
392 * . bulges before they are actually introduced or to which
393 * . to chase bulges beyond column KBOT.) ====
394 *
395  DO 150 krcol = incol, min( incol+3*nbmps-3, kbot-2 )
396 *
397 * ==== Bulges number MTOP to MBOT are active double implicit
398 * . shift bulges. There may or may not also be small
399 * . 2-by-2 bulge, if there is room. The inactive bulges
400 * . (if any) must wait until the active bulges have moved
401 * . down the diagonal to make room. The phantom matrix
402 * . paradigm described above helps keep track. ====
403 *
404  mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )
405  mbot = min( nbmps, ( kbot-krcol ) / 3 )
406  m22 = mbot + 1
407  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.
408  $ ( kbot-2 )
409 *
410 * ==== Generate reflections to chase the chain right
411 * . one column. (The minimum value of K is KTOP-1.) ====
412 *
413  DO 20 m = mtop, mbot
414  k = krcol + 3*( m-1 )
415  IF( k.EQ.ktop-1 ) THEN
416  CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
417  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
418  $ v( 1, m ) )
419  alpha = v( 1, m )
420  CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
421  ELSE
422  beta = h( k+1, k )
423  v( 2, m ) = h( k+2, k )
424  v( 3, m ) = h( k+3, k )
425  CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
426 *
427 * ==== A Bulge may collapse because of vigilant
428 * . deflation or destructive underflow. In the
429 * . underflow case, try the two-small-subdiagonals
430 * . trick to try to reinflate the bulge. ====
431 *
432  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
433  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
434 *
435 * ==== Typical case: not collapsed (yet). ====
436 *
437  h( k+1, k ) = beta
438  h( k+2, k ) = zero
439  h( k+3, k ) = zero
440  ELSE
441 *
442 * ==== Atypical case: collapsed. Attempt to
443 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
444 * . If the fill resulting from the new
445 * . reflector is too large, then abandon it.
446 * . Otherwise, use the new one. ====
447 *
448  CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
449  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
450  $ vt )
451  alpha = vt( 1 )
452  CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
453  refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
454  $ h( k+2, k ) )
455 *
456  IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
457  $ abs( refsum*vt( 3 ) ).GT.ulp*
458  $ ( abs( h( k, k ) )+abs( h( k+1,
459  $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
460 *
461 * ==== Starting a new bulge here would
462 * . create non-negligible fill. Use
463 * . the old one with trepidation. ====
464 *
465  h( k+1, k ) = beta
466  h( k+2, k ) = zero
467  h( k+3, k ) = zero
468  ELSE
469 *
470 * ==== Stating a new bulge here would
471 * . create only negligible fill.
472 * . Replace the old reflector with
473 * . the new one. ====
474 *
475  h( k+1, k ) = h( k+1, k ) - refsum
476  h( k+2, k ) = zero
477  h( k+3, k ) = zero
478  v( 1, m ) = vt( 1 )
479  v( 2, m ) = vt( 2 )
480  v( 3, m ) = vt( 3 )
481  END IF
482  END IF
483  END IF
484  20 CONTINUE
485 *
486 * ==== Generate a 2-by-2 reflection, if needed. ====
487 *
488  k = krcol + 3*( m22-1 )
489  IF( bmp22 ) THEN
490  IF( k.EQ.ktop-1 ) THEN
491  CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
492  $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
493  $ v( 1, m22 ) )
494  beta = v( 1, m22 )
495  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
496  ELSE
497  beta = h( k+1, k )
498  v( 2, m22 ) = h( k+2, k )
499  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
500  h( k+1, k ) = beta
501  h( k+2, k ) = zero
502  END IF
503  END IF
504 *
505 * ==== Multiply H by reflections from the left ====
506 *
507  IF( accum ) THEN
508  jbot = min( ndcol, kbot )
509  ELSE IF( wantt ) THEN
510  jbot = n
511  ELSE
512  jbot = kbot
513  END IF
514  DO 40 j = max( ktop, krcol ), jbot
515  mend = min( mbot, ( j-krcol+2 ) / 3 )
516  DO 30 m = mtop, mend
517  k = krcol + 3*( m-1 )
518  refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
519  $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
520  h( k+1, j ) = h( k+1, j ) - refsum
521  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
522  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
523  30 CONTINUE
524  40 CONTINUE
525  IF( bmp22 ) THEN
526  k = krcol + 3*( m22-1 )
527  DO 50 j = max( k+1, ktop ), jbot
528  refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
529  $ h( k+2, j ) )
530  h( k+1, j ) = h( k+1, j ) - refsum
531  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
532  50 CONTINUE
533  END IF
534 *
535 * ==== Multiply H by reflections from the right.
536 * . Delay filling in the last row until the
537 * . vigilant deflation check is complete. ====
538 *
539  IF( accum ) THEN
540  jtop = max( ktop, incol )
541  ELSE IF( wantt ) THEN
542  jtop = 1
543  ELSE
544  jtop = ktop
545  END IF
546  DO 90 m = mtop, mbot
547  IF( v( 1, m ).NE.zero ) THEN
548  k = krcol + 3*( m-1 )
549  DO 60 j = jtop, min( kbot, k+3 )
550  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
551  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
552  h( j, k+1 ) = h( j, k+1 ) - refsum
553  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
554  h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
555  60 CONTINUE
556 *
557  IF( accum ) THEN
558 *
559 * ==== Accumulate U. (If necessary, update Z later
560 * . with with an efficient matrix-matrix
561 * . multiply.) ====
562 *
563  kms = k - incol
564  DO 70 j = max( 1, ktop-incol ), kdu
565  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
566  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
567  u( j, kms+1 ) = u( j, kms+1 ) - refsum
568  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
569  u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
570  70 CONTINUE
571  ELSE IF( wantz ) THEN
572 *
573 * ==== U is not accumulated, so update Z
574 * . now by multiplying by reflections
575 * . from the right. ====
576 *
577  DO 80 j = iloz, ihiz
578  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
579  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
580  z( j, k+1 ) = z( j, k+1 ) - refsum
581  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
582  z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
583  80 CONTINUE
584  END IF
585  END IF
586  90 CONTINUE
587 *
588 * ==== Special case: 2-by-2 reflection (if needed) ====
589 *
590  k = krcol + 3*( m22-1 )
591  IF( bmp22 ) THEN
592  IF ( v( 1, m22 ).NE.zero ) THEN
593  DO 100 j = jtop, min( kbot, k+3 )
594  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
595  $ h( j, k+2 ) )
596  h( j, k+1 ) = h( j, k+1 ) - refsum
597  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
598  100 CONTINUE
599 *
600  IF( accum ) THEN
601  kms = k - incol
602  DO 110 j = max( 1, ktop-incol ), kdu
603  refsum = v( 1, m22 )*( u( j, kms+1 )+
604  $ v( 2, m22 )*u( j, kms+2 ) )
605  u( j, kms+1 ) = u( j, kms+1 ) - refsum
606  u( j, kms+2 ) = u( j, kms+2 ) - refsum*
607  $ v( 2, m22 )
608  110 CONTINUE
609  ELSE IF( wantz ) THEN
610  DO 120 j = iloz, ihiz
611  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
612  $ z( j, k+2 ) )
613  z( j, k+1 ) = z( j, k+1 ) - refsum
614  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
615  120 CONTINUE
616  END IF
617  END IF
618  END IF
619 *
620 * ==== Vigilant deflation check ====
621 *
622  mstart = mtop
623  IF( krcol+3*( mstart-1 ).LT.ktop )
624  $ mstart = mstart + 1
625  mend = mbot
626  IF( bmp22 )
627  $ mend = mend + 1
628  IF( krcol.EQ.kbot-2 )
629  $ mend = mend + 1
630  DO 130 m = mstart, mend
631  k = min( kbot-1, krcol+3*( m-1 ) )
632 *
633 * ==== The following convergence test requires that
634 * . the tradition small-compared-to-nearby-diagonals
635 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
636 * . criteria both be satisfied. The latter improves
637 * . accuracy in some examples. Falling back on an
638 * . alternate convergence criterion when TST1 or TST2
639 * . is zero (as done here) is traditional but probably
640 * . unnecessary. ====
641 *
642  IF( h( k+1, k ).NE.zero ) THEN
643  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
644  IF( tst1.EQ.zero ) THEN
645  IF( k.GE.ktop+1 )
646  $ tst1 = tst1 + abs( h( k, k-1 ) )
647  IF( k.GE.ktop+2 )
648  $ tst1 = tst1 + abs( h( k, k-2 ) )
649  IF( k.GE.ktop+3 )
650  $ tst1 = tst1 + abs( h( k, k-3 ) )
651  IF( k.LE.kbot-2 )
652  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
653  IF( k.LE.kbot-3 )
654  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
655  IF( k.LE.kbot-4 )
656  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
657  END IF
658  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
659  $ THEN
660  h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
661  h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
662  h11 = max( abs( h( k+1, k+1 ) ),
663  $ abs( h( k, k )-h( k+1, k+1 ) ) )
664  h22 = min( abs( h( k+1, k+1 ) ),
665  $ abs( h( k, k )-h( k+1, k+1 ) ) )
666  scl = h11 + h12
667  tst2 = h22*( h11 / scl )
668 *
669  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
670  $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
671  END IF
672  END IF
673  130 CONTINUE
674 *
675 * ==== Fill in the last row of each bulge. ====
676 *
677  mend = min( nbmps, ( kbot-krcol-1 ) / 3 )
678  DO 140 m = mtop, mend
679  k = krcol + 3*( m-1 )
680  refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )
681  h( k+4, k+1 ) = -refsum
682  h( k+4, k+2 ) = -refsum*v( 2, m )
683  h( k+4, k+3 ) = h( k+4, k+3 ) - refsum*v( 3, m )
684  140 CONTINUE
685 *
686 * ==== End of near-the-diagonal bulge chase. ====
687 *
688  150 CONTINUE
689 *
690 * ==== Use U (if accumulated) to update far-from-diagonal
691 * . entries in H. If required, use U to update Z as
692 * . well. ====
693 *
694  IF( accum ) THEN
695  IF( wantt ) THEN
696  jtop = 1
697  jbot = n
698  ELSE
699  jtop = ktop
700  jbot = kbot
701  END IF
702  IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.
703  $ ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) ) THEN
704 *
705 * ==== Updates not exploiting the 2-by-2 block
706 * . structure of U. K1 and NU keep track of
707 * . the location and size of U in the special
708 * . cases of introducing bulges and chasing
709 * . bulges off the bottom. In these special
710 * . cases and in case the number of shifts
711 * . is NS = 2, there is no 2-by-2 block
712 * . structure to exploit. ====
713 *
714  k1 = max( 1, ktop-incol )
715  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
716 *
717 * ==== Horizontal Multiply ====
718 *
719  DO 160 jcol = min( ndcol, kbot ) + 1, jbot, nh
720  jlen = min( nh, jbot-jcol+1 )
721  CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
722  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
723  $ ldwh )
724  CALL slacpy( 'ALL', nu, jlen, wh, ldwh,
725  $ h( incol+k1, jcol ), ldh )
726  160 CONTINUE
727 *
728 * ==== Vertical multiply ====
729 *
730  DO 170 jrow = jtop, max( ktop, incol ) - 1, nv
731  jlen = min( nv, max( ktop, incol )-jrow )
732  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
733  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
734  $ ldu, zero, wv, ldwv )
735  CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
736  $ h( jrow, incol+k1 ), ldh )
737  170 CONTINUE
738 *
739 * ==== Z multiply (also vertical) ====
740 *
741  IF( wantz ) THEN
742  DO 180 jrow = iloz, ihiz, nv
743  jlen = min( nv, ihiz-jrow+1 )
744  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
745  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
746  $ ldu, zero, wv, ldwv )
747  CALL slacpy( 'ALL', jlen, nu, wv, ldwv,
748  $ z( jrow, incol+k1 ), ldz )
749  180 CONTINUE
750  END IF
751  ELSE
752 *
753 * ==== Updates exploiting U's 2-by-2 block structure.
754 * . (I2, I4, J2, J4 are the last rows and columns
755 * . of the blocks.) ====
756 *
757  i2 = ( kdu+1 ) / 2
758  i4 = kdu
759  j2 = i4 - i2
760  j4 = kdu
761 *
762 * ==== KZS and KNZ deal with the band of zeros
763 * . along the diagonal of one of the triangular
764 * . blocks. ====
765 *
766  kzs = ( j4-j2 ) - ( ns+1 )
767  knz = ns + 1
768 *
769 * ==== Horizontal multiply ====
770 *
771  DO 190 jcol = min( ndcol, kbot ) + 1, jbot, nh
772  jlen = min( nh, jbot-jcol+1 )
773 *
774 * ==== Copy bottom of H to top+KZS of scratch ====
775 * (The first KZS rows get multiplied by zero.) ====
776 *
777  CALL slacpy( 'ALL', knz, jlen, h( incol+1+j2, jcol ),
778  $ ldh, wh( kzs+1, 1 ), ldwh )
779 *
780 * ==== Multiply by U21**T ====
781 *
782  CALL slaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )
783  CALL strmm( 'L', 'U', 'C', 'N', knz, jlen, one,
784  $ u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),
785  $ ldwh )
786 *
787 * ==== Multiply top of H by U11**T ====
788 *
789  CALL sgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,
790  $ h( incol+1, jcol ), ldh, one, wh, ldwh )
791 *
792 * ==== Copy top of H to bottom of WH ====
793 *
794  CALL slacpy( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,
795  $ wh( i2+1, 1 ), ldwh )
796 *
797 * ==== Multiply by U21**T ====
798 *
799  CALL strmm( 'L', 'L', 'C', 'N', j2, jlen, one,
800  $ u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )
801 *
802 * ==== Multiply by U22 ====
803 *
804  CALL sgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,
805  $ u( j2+1, i2+1 ), ldu,
806  $ h( incol+1+j2, jcol ), ldh, one,
807  $ wh( i2+1, 1 ), ldwh )
808 *
809 * ==== Copy it back ====
810 *
811  CALL slacpy( 'ALL', kdu, jlen, wh, ldwh,
812  $ h( incol+1, jcol ), ldh )
813  190 CONTINUE
814 *
815 * ==== Vertical multiply ====
816 *
817  DO 200 jrow = jtop, max( incol, ktop ) - 1, nv
818  jlen = min( nv, max( incol, ktop )-jrow )
819 *
820 * ==== Copy right of H to scratch (the first KZS
821 * . columns get multiplied by zero) ====
822 *
823  CALL slacpy( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),
824  $ ldh, wv( 1, 1+kzs ), ldwv )
825 *
826 * ==== Multiply by U21 ====
827 *
828  CALL slaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )
829  CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
830  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
831  $ ldwv )
832 *
833 * ==== Multiply by U11 ====
834 *
835  CALL sgemm( 'N', 'N', jlen, i2, j2, one,
836  $ h( jrow, incol+1 ), ldh, u, ldu, one, wv,
837  $ ldwv )
838 *
839 * ==== Copy left of H to right of scratch ====
840 *
841  CALL slacpy( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,
842  $ wv( 1, 1+i2 ), ldwv )
843 *
844 * ==== Multiply by U21 ====
845 *
846  CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
847  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )
848 *
849 * ==== Multiply by U22 ====
850 *
851  CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
852  $ h( jrow, incol+1+j2 ), ldh,
853  $ u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),
854  $ ldwv )
855 *
856 * ==== Copy it back ====
857 *
858  CALL slacpy( 'ALL', jlen, kdu, wv, ldwv,
859  $ h( jrow, incol+1 ), ldh )
860  200 CONTINUE
861 *
862 * ==== Multiply Z (also vertical) ====
863 *
864  IF( wantz ) THEN
865  DO 210 jrow = iloz, ihiz, nv
866  jlen = min( nv, ihiz-jrow+1 )
867 *
868 * ==== Copy right of Z to left of scratch (first
869 * . KZS columns get multiplied by zero) ====
870 *
871  CALL slacpy( 'ALL', jlen, knz,
872  $ z( jrow, incol+1+j2 ), ldz,
873  $ wv( 1, 1+kzs ), ldwv )
874 *
875 * ==== Multiply by U12 ====
876 *
877  CALL slaset( 'ALL', jlen, kzs, zero, zero, wv,
878  $ ldwv )
879  CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
880  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
881  $ ldwv )
882 *
883 * ==== Multiply by U11 ====
884 *
885  CALL sgemm( 'N', 'N', jlen, i2, j2, one,
886  $ z( jrow, incol+1 ), ldz, u, ldu, one,
887  $ wv, ldwv )
888 *
889 * ==== Copy left of Z to right of scratch ====
890 *
891  CALL slacpy( 'ALL', jlen, j2, z( jrow, incol+1 ),
892  $ ldz, wv( 1, 1+i2 ), ldwv )
893 *
894 * ==== Multiply by U21 ====
895 *
896  CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
897  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),
898  $ ldwv )
899 *
900 * ==== Multiply by U22 ====
901 *
902  CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
903  $ z( jrow, incol+1+j2 ), ldz,
904  $ u( j2+1, i2+1 ), ldu, one,
905  $ wv( 1, 1+i2 ), ldwv )
906 *
907 * ==== Copy the result back to Z ====
908 *
909  CALL slacpy( 'ALL', jlen, kdu, wv, ldwv,
910  $ z( jrow, incol+1 ), ldz )
911  210 CONTINUE
912  END IF
913  END IF
914  END IF
915  220 CONTINUE
916 *
917 * ==== End of SLAQR5 ====
918 *
Here is the call graph for this function:
Here is the caller graph for this function:
strmm
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
Definition: strmm.f:179
slabad
subroutine slabad(SMALL, LARGE)
SLABAD
Definition: slabad.f:76
slaqr1
subroutine slaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: slaqr1.f:123
sgemm
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
slacpy
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:105
slarfg
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:108
slaset
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: slaset.f:112
slamch
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:70