LAPACK  3.9.0
LAPACK: Linear Algebra PACKage
sgsvj0.f
Go to the documentation of this file.
1 *> \brief \b SGSVJ0 pre-processor for the routine sgesvj.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGSVJ0 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgsvj0.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgsvj0.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgsvj0.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS,
22 * SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP
26 * REAL EPS, SFMIN, TOL
27 * CHARACTER*1 JOBV
28 * ..
29 * .. Array Arguments ..
30 * REAL A( LDA, * ), SVA( N ), D( N ), V( LDV, * ),
31 * $ WORK( LWORK )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> SGSVJ0 is called from SGESVJ as a pre-processor and that is its main
41 *> purpose. It applies Jacobi rotations in the same way as SGESVJ does, but
42 *> it does not check convergence (stopping criterion). Few tuning
43 *> parameters (marked by [TP]) are available for the implementer.
44 *> \endverbatim
45 *
46 * Arguments:
47 * ==========
48 *
49 *> \param[in] JOBV
50 *> \verbatim
51 *> JOBV is CHARACTER*1
52 *> Specifies whether the output from this procedure is used
53 *> to compute the matrix V:
54 *> = 'V': the product of the Jacobi rotations is accumulated
55 *> by postmulyiplying the N-by-N array V.
56 *> (See the description of V.)
57 *> = 'A': the product of the Jacobi rotations is accumulated
58 *> by postmulyiplying the MV-by-N array V.
59 *> (See the descriptions of MV and V.)
60 *> = 'N': the Jacobi rotations are not accumulated.
61 *> \endverbatim
62 *>
63 *> \param[in] M
64 *> \verbatim
65 *> M is INTEGER
66 *> The number of rows of the input matrix A. M >= 0.
67 *> \endverbatim
68 *>
69 *> \param[in] N
70 *> \verbatim
71 *> N is INTEGER
72 *> The number of columns of the input matrix A.
73 *> M >= N >= 0.
74 *> \endverbatim
75 *>
76 *> \param[in,out] A
77 *> \verbatim
78 *> A is REAL array, dimension (LDA,N)
79 *> On entry, M-by-N matrix A, such that A*diag(D) represents
80 *> the input matrix.
81 *> On exit,
82 *> A_onexit * D_onexit represents the input matrix A*diag(D)
83 *> post-multiplied by a sequence of Jacobi rotations, where the
84 *> rotation threshold and the total number of sweeps are given in
85 *> TOL and NSWEEP, respectively.
86 *> (See the descriptions of D, TOL and NSWEEP.)
87 *> \endverbatim
88 *>
89 *> \param[in] LDA
90 *> \verbatim
91 *> LDA is INTEGER
92 *> The leading dimension of the array A. LDA >= max(1,M).
93 *> \endverbatim
94 *>
95 *> \param[in,out] D
96 *> \verbatim
97 *> D is REAL array, dimension (N)
98 *> The array D accumulates the scaling factors from the fast scaled
99 *> Jacobi rotations.
100 *> On entry, A*diag(D) represents the input matrix.
101 *> On exit, A_onexit*diag(D_onexit) represents the input matrix
102 *> post-multiplied by a sequence of Jacobi rotations, where the
103 *> rotation threshold and the total number of sweeps are given in
104 *> TOL and NSWEEP, respectively.
105 *> (See the descriptions of A, TOL and NSWEEP.)
106 *> \endverbatim
107 *>
108 *> \param[in,out] SVA
109 *> \verbatim
110 *> SVA is REAL array, dimension (N)
111 *> On entry, SVA contains the Euclidean norms of the columns of
112 *> the matrix A*diag(D).
113 *> On exit, SVA contains the Euclidean norms of the columns of
114 *> the matrix onexit*diag(D_onexit).
115 *> \endverbatim
116 *>
117 *> \param[in] MV
118 *> \verbatim
119 *> MV is INTEGER
120 *> If JOBV = 'A', then MV rows of V are post-multipled by a
121 *> sequence of Jacobi rotations.
122 *> If JOBV = 'N', then MV is not referenced.
123 *> \endverbatim
124 *>
125 *> \param[in,out] V
126 *> \verbatim
127 *> V is REAL array, dimension (LDV,N)
128 *> If JOBV = 'V' then N rows of V are post-multipled by a
129 *> sequence of Jacobi rotations.
130 *> If JOBV = 'A' then MV rows of V are post-multipled by a
131 *> sequence of Jacobi rotations.
132 *> If JOBV = 'N', then V is not referenced.
133 *> \endverbatim
134 *>
135 *> \param[in] LDV
136 *> \verbatim
137 *> LDV is INTEGER
138 *> The leading dimension of the array V, LDV >= 1.
139 *> If JOBV = 'V', LDV >= N.
140 *> If JOBV = 'A', LDV >= MV.
141 *> \endverbatim
142 *>
143 *> \param[in] EPS
144 *> \verbatim
145 *> EPS is REAL
146 *> EPS = SLAMCH('Epsilon')
147 *> \endverbatim
148 *>
149 *> \param[in] SFMIN
150 *> \verbatim
151 *> SFMIN is REAL
152 *> SFMIN = SLAMCH('Safe Minimum')
153 *> \endverbatim
154 *>
155 *> \param[in] TOL
156 *> \verbatim
157 *> TOL is REAL
158 *> TOL is the threshold for Jacobi rotations. For a pair
159 *> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is
160 *> applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL.
161 *> \endverbatim
162 *>
163 *> \param[in] NSWEEP
164 *> \verbatim
165 *> NSWEEP is INTEGER
166 *> NSWEEP is the number of sweeps of Jacobi rotations to be
167 *> performed.
168 *> \endverbatim
169 *>
170 *> \param[out] WORK
171 *> \verbatim
172 *> WORK is REAL array, dimension (LWORK)
173 *> \endverbatim
174 *>
175 *> \param[in] LWORK
176 *> \verbatim
177 *> LWORK is INTEGER
178 *> LWORK is the dimension of WORK. LWORK >= M.
179 *> \endverbatim
180 *>
181 *> \param[out] INFO
182 *> \verbatim
183 *> INFO is INTEGER
184 *> = 0: successful exit.
185 *> < 0: if INFO = -i, then the i-th argument had an illegal value
186 *> \endverbatim
187 *
188 * Authors:
189 * ========
190 *
191 *> \author Univ. of Tennessee
192 *> \author Univ. of California Berkeley
193 *> \author Univ. of Colorado Denver
194 *> \author NAG Ltd.
195 *
196 *> \date November 2017
197 *
198 *> \ingroup realOTHERcomputational
199 *
200 *> \par Further Details:
201 * =====================
202 *>
203 *> SGSVJ0 is used just to enable SGESVJ to call a simplified version of
204 *> itself to work on a submatrix of the original matrix.
205 *>
206 *> \par Contributors:
207 * ==================
208 *>
209 *> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
210 *>
211 *> \par Bugs, Examples and Comments:
212 * =================================
213 *>
214 *> Please report all bugs and send interesting test examples and comments to
215 *> drmac@math.hr. Thank you.
216 *
217 * =====================================================================
218  SUBROUTINE sgsvj0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS,
219  $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
220 *
221 * -- LAPACK computational routine (version 3.8.0) --
222 * -- LAPACK is a software package provided by Univ. of Tennessee, --
223 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
224 * November 2017
225 *
226 * .. Scalar Arguments ..
227  INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP
228  REAL EPS, SFMIN, TOL
229  CHARACTER*1 JOBV
230 * ..
231 * .. Array Arguments ..
232  REAL A( LDA, * ), SVA( N ), D( N ), V( LDV, * ),
233  $ work( lwork )
234 * ..
235 *
236 * =====================================================================
237 *
238 * .. Local Parameters ..
239  REAL ZERO, HALF, ONE
240  parameter( zero = 0.0e0, half = 0.5e0, one = 1.0e0)
241 * ..
242 * .. Local Scalars ..
243  REAL AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG,
244  $ bigtheta, cs, mxaapq, mxsinj, rootbig, rooteps,
245  $ rootsfmin, roottol, small, sn, t, temp1, theta,
246  $ thsign
247  INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1,
248  $ iswrot, jbc, jgl, kbl, lkahead, mvl, nbl,
249  $ notrot, p, pskipped, q, rowskip, swband
250  LOGICAL APPLV, ROTOK, RSVEC
251 * ..
252 * .. Local Arrays ..
253  REAL FASTR( 5 )
254 * ..
255 * .. Intrinsic Functions ..
256  INTRINSIC abs, max, float, min, sign, sqrt
257 * ..
258 * .. External Functions ..
259  REAL SDOT, SNRM2
260  INTEGER ISAMAX
261  LOGICAL LSAME
262  EXTERNAL isamax, lsame, sdot, snrm2
263 * ..
264 * .. External Subroutines ..
265  EXTERNAL saxpy, scopy, slascl, slassq, srotm, sswap,
266  $ xerbla
267 * ..
268 * .. Executable Statements ..
269 *
270 * Test the input parameters.
271 *
272  applv = lsame( jobv, 'A' )
273  rsvec = lsame( jobv, 'V' )
274  IF( .NOT.( rsvec .OR. applv .OR. lsame( jobv, 'N' ) ) ) THEN
275  info = -1
276  ELSE IF( m.LT.0 ) THEN
277  info = -2
278  ELSE IF( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
279  info = -3
280  ELSE IF( lda.LT.m ) THEN
281  info = -5
282  ELSE IF( ( rsvec.OR.applv ) .AND. ( mv.LT.0 ) ) THEN
283  info = -8
284  ELSE IF( ( rsvec.AND.( ldv.LT.n ) ).OR.
285  $ ( applv.AND.( ldv.LT.mv ) ) ) THEN
286  info = -10
287  ELSE IF( tol.LE.eps ) THEN
288  info = -13
289  ELSE IF( nsweep.LT.0 ) THEN
290  info = -14
291  ELSE IF( lwork.LT.m ) THEN
292  info = -16
293  ELSE
294  info = 0
295  END IF
296 *
297 * #:(
298  IF( info.NE.0 ) THEN
299  CALL xerbla( 'SGSVJ0', -info )
300  RETURN
301  END IF
302 *
303  IF( rsvec ) THEN
304  mvl = n
305  ELSE IF( applv ) THEN
306  mvl = mv
307  END IF
308  rsvec = rsvec .OR. applv
309 
310  rooteps = sqrt( eps )
311  rootsfmin = sqrt( sfmin )
312  small = sfmin / eps
313  big = one / sfmin
314  rootbig = one / rootsfmin
315  bigtheta = one / rooteps
316  roottol = sqrt( tol )
317 *
318 * .. Row-cyclic Jacobi SVD algorithm with column pivoting ..
319 *
320  emptsw = ( n*( n-1 ) ) / 2
321  notrot = 0
322  fastr( 1 ) = zero
323 *
324 * .. Row-cyclic pivot strategy with de Rijk's pivoting ..
325 *
326 
327  swband = 0
328 *[TP] SWBAND is a tuning parameter. It is meaningful and effective
329 * if SGESVJ is used as a computational routine in the preconditioned
330 * Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure
331 * ......
332 
333  kbl = min( 8, n )
334 *[TP] KBL is a tuning parameter that defines the tile size in the
335 * tiling of the p-q loops of pivot pairs. In general, an optimal
336 * value of KBL depends on the matrix dimensions and on the
337 * parameters of the computer's memory.
338 *
339  nbl = n / kbl
340  IF( ( nbl*kbl ).NE.n )nbl = nbl + 1
341 
342  blskip = ( kbl**2 ) + 1
343 *[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
344 
345  rowskip = min( 5, kbl )
346 *[TP] ROWSKIP is a tuning parameter.
347 
348  lkahead = 1
349 *[TP] LKAHEAD is a tuning parameter.
350  swband = 0
351  pskipped = 0
352 *
353  DO 1993 i = 1, nsweep
354 * .. go go go ...
355 *
356  mxaapq = zero
357  mxsinj = zero
358  iswrot = 0
359 *
360  notrot = 0
361  pskipped = 0
362 *
363  DO 2000 ibr = 1, nbl
364 
365  igl = ( ibr-1 )*kbl + 1
366 *
367  DO 1002 ir1 = 0, min( lkahead, nbl-ibr )
368 *
369  igl = igl + ir1*kbl
370 *
371  DO 2001 p = igl, min( igl+kbl-1, n-1 )
372 
373 * .. de Rijk's pivoting
374  q = isamax( n-p+1, sva( p ), 1 ) + p - 1
375  IF( p.NE.q ) THEN
376  CALL sswap( m, a( 1, p ), 1, a( 1, q ), 1 )
377  IF( rsvec )CALL sswap( mvl, v( 1, p ), 1,
378  $ v( 1, q ), 1 )
379  temp1 = sva( p )
380  sva( p ) = sva( q )
381  sva( q ) = temp1
382  temp1 = d( p )
383  d( p ) = d( q )
384  d( q ) = temp1
385  END IF
386 *
387  IF( ir1.EQ.0 ) THEN
388 *
389 * Column norms are periodically updated by explicit
390 * norm computation.
391 * Caveat:
392 * Some BLAS implementations compute SNRM2(M,A(1,p),1)
393 * as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may result in
394 * overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and
395 * undeflow for ||A(:,p)||_2 < SQRT(underflow_threshold).
396 * Hence, SNRM2 cannot be trusted, not even in the case when
397 * the true norm is far from the under(over)flow boundaries.
398 * If properly implemented SNRM2 is available, the IF-THEN-ELSE
399 * below should read "AAPP = SNRM2( M, A(1,p), 1 ) * D(p)".
400 *
401  IF( ( sva( p ).LT.rootbig ) .AND.
402  $ ( sva( p ).GT.rootsfmin ) ) THEN
403  sva( p ) = snrm2( m, a( 1, p ), 1 )*d( p )
404  ELSE
405  temp1 = zero
406  aapp = one
407  CALL slassq( m, a( 1, p ), 1, temp1, aapp )
408  sva( p ) = temp1*sqrt( aapp )*d( p )
409  END IF
410  aapp = sva( p )
411  ELSE
412  aapp = sva( p )
413  END IF
414 
415 *
416  IF( aapp.GT.zero ) THEN
417 *
418  pskipped = 0
419 *
420  DO 2002 q = p + 1, min( igl+kbl-1, n )
421 *
422  aaqq = sva( q )
423 
424  IF( aaqq.GT.zero ) THEN
425 *
426  aapp0 = aapp
427  IF( aaqq.GE.one ) THEN
428  rotok = ( small*aapp ).LE.aaqq
429  IF( aapp.LT.( big / aaqq ) ) THEN
430  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
431  $ q ), 1 )*d( p )*d( q ) / aaqq )
432  $ / aapp
433  ELSE
434  CALL scopy( m, a( 1, p ), 1, work, 1 )
435  CALL slascl( 'G', 0, 0, aapp, d( p ),
436  $ m, 1, work, lda, ierr )
437  aapq = sdot( m, work, 1, a( 1, q ),
438  $ 1 )*d( q ) / aaqq
439  END IF
440  ELSE
441  rotok = aapp.LE.( aaqq / small )
442  IF( aapp.GT.( small / aaqq ) ) THEN
443  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
444  $ q ), 1 )*d( p )*d( q ) / aaqq )
445  $ / aapp
446  ELSE
447  CALL scopy( m, a( 1, q ), 1, work, 1 )
448  CALL slascl( 'G', 0, 0, aaqq, d( q ),
449  $ m, 1, work, lda, ierr )
450  aapq = sdot( m, work, 1, a( 1, p ),
451  $ 1 )*d( p ) / aapp
452  END IF
453  END IF
454 *
455  mxaapq = max( mxaapq, abs( aapq ) )
456 *
457 * TO rotate or NOT to rotate, THAT is the question ...
458 *
459  IF( abs( aapq ).GT.tol ) THEN
460 *
461 * .. rotate
462 * ROTATED = ROTATED + ONE
463 *
464  IF( ir1.EQ.0 ) THEN
465  notrot = 0
466  pskipped = 0
467  iswrot = iswrot + 1
468  END IF
469 *
470  IF( rotok ) THEN
471 *
472  aqoap = aaqq / aapp
473  apoaq = aapp / aaqq
474  theta = -half*abs( aqoap-apoaq ) / aapq
475 *
476  IF( abs( theta ).GT.bigtheta ) THEN
477 *
478  t = half / theta
479  fastr( 3 ) = t*d( p ) / d( q )
480  fastr( 4 ) = -t*d( q ) / d( p )
481  CALL srotm( m, a( 1, p ), 1,
482  $ a( 1, q ), 1, fastr )
483  IF( rsvec )CALL srotm( mvl,
484  $ v( 1, p ), 1,
485  $ v( 1, q ), 1,
486  $ fastr )
487  sva( q ) = aaqq*sqrt( max( zero,
488  $ one+t*apoaq*aapq ) )
489  aapp = aapp*sqrt( max( zero,
490  $ one-t*aqoap*aapq ) )
491  mxsinj = max( mxsinj, abs( t ) )
492 *
493  ELSE
494 *
495 * .. choose correct signum for THETA and rotate
496 *
497  thsign = -sign( one, aapq )
498  t = one / ( theta+thsign*
499  $ sqrt( one+theta*theta ) )
500  cs = sqrt( one / ( one+t*t ) )
501  sn = t*cs
502 *
503  mxsinj = max( mxsinj, abs( sn ) )
504  sva( q ) = aaqq*sqrt( max( zero,
505  $ one+t*apoaq*aapq ) )
506  aapp = aapp*sqrt( max( zero,
507  $ one-t*aqoap*aapq ) )
508 *
509  apoaq = d( p ) / d( q )
510  aqoap = d( q ) / d( p )
511  IF( d( p ).GE.one ) THEN
512  IF( d( q ).GE.one ) THEN
513  fastr( 3 ) = t*apoaq
514  fastr( 4 ) = -t*aqoap
515  d( p ) = d( p )*cs
516  d( q ) = d( q )*cs
517  CALL srotm( m, a( 1, p ), 1,
518  $ a( 1, q ), 1,
519  $ fastr )
520  IF( rsvec )CALL srotm( mvl,
521  $ v( 1, p ), 1, v( 1, q ),
522  $ 1, fastr )
523  ELSE
524  CALL saxpy( m, -t*aqoap,
525  $ a( 1, q ), 1,
526  $ a( 1, p ), 1 )
527  CALL saxpy( m, cs*sn*apoaq,
528  $ a( 1, p ), 1,
529  $ a( 1, q ), 1 )
530  d( p ) = d( p )*cs
531  d( q ) = d( q ) / cs
532  IF( rsvec ) THEN
533  CALL saxpy( mvl, -t*aqoap,
534  $ v( 1, q ), 1,
535  $ v( 1, p ), 1 )
536  CALL saxpy( mvl,
537  $ cs*sn*apoaq,
538  $ v( 1, p ), 1,
539  $ v( 1, q ), 1 )
540  END IF
541  END IF
542  ELSE
543  IF( d( q ).GE.one ) THEN
544  CALL saxpy( m, t*apoaq,
545  $ a( 1, p ), 1,
546  $ a( 1, q ), 1 )
547  CALL saxpy( m, -cs*sn*aqoap,
548  $ a( 1, q ), 1,
549  $ a( 1, p ), 1 )
550  d( p ) = d( p ) / cs
551  d( q ) = d( q )*cs
552  IF( rsvec ) THEN
553  CALL saxpy( mvl, t*apoaq,
554  $ v( 1, p ), 1,
555  $ v( 1, q ), 1 )
556  CALL saxpy( mvl,
557  $ -cs*sn*aqoap,
558  $ v( 1, q ), 1,
559  $ v( 1, p ), 1 )
560  END IF
561  ELSE
562  IF( d( p ).GE.d( q ) ) THEN
563  CALL saxpy( m, -t*aqoap,
564  $ a( 1, q ), 1,
565  $ a( 1, p ), 1 )
566  CALL saxpy( m, cs*sn*apoaq,
567  $ a( 1, p ), 1,
568  $ a( 1, q ), 1 )
569  d( p ) = d( p )*cs
570  d( q ) = d( q ) / cs
571  IF( rsvec ) THEN
572  CALL saxpy( mvl,
573  $ -t*aqoap,
574  $ v( 1, q ), 1,
575  $ v( 1, p ), 1 )
576  CALL saxpy( mvl,
577  $ cs*sn*apoaq,
578  $ v( 1, p ), 1,
579  $ v( 1, q ), 1 )
580  END IF
581  ELSE
582  CALL saxpy( m, t*apoaq,
583  $ a( 1, p ), 1,
584  $ a( 1, q ), 1 )
585  CALL saxpy( m,
586  $ -cs*sn*aqoap,
587  $ a( 1, q ), 1,
588  $ a( 1, p ), 1 )
589  d( p ) = d( p ) / cs
590  d( q ) = d( q )*cs
591  IF( rsvec ) THEN
592  CALL saxpy( mvl,
593  $ t*apoaq, v( 1, p ),
594  $ 1, v( 1, q ), 1 )
595  CALL saxpy( mvl,
596  $ -cs*sn*aqoap,
597  $ v( 1, q ), 1,
598  $ v( 1, p ), 1 )
599  END IF
600  END IF
601  END IF
602  END IF
603  END IF
604 *
605  ELSE
606 * .. have to use modified Gram-Schmidt like transformation
607  CALL scopy( m, a( 1, p ), 1, work, 1 )
608  CALL slascl( 'G', 0, 0, aapp, one, m,
609  $ 1, work, lda, ierr )
610  CALL slascl( 'G', 0, 0, aaqq, one, m,
611  $ 1, a( 1, q ), lda, ierr )
612  temp1 = -aapq*d( p ) / d( q )
613  CALL saxpy( m, temp1, work, 1,
614  $ a( 1, q ), 1 )
615  CALL slascl( 'G', 0, 0, one, aaqq, m,
616  $ 1, a( 1, q ), lda, ierr )
617  sva( q ) = aaqq*sqrt( max( zero,
618  $ one-aapq*aapq ) )
619  mxsinj = max( mxsinj, sfmin )
620  END IF
621 * END IF ROTOK THEN ... ELSE
622 *
623 * In the case of cancellation in updating SVA(q), SVA(p)
624 * recompute SVA(q), SVA(p).
625  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
626  $ THEN
627  IF( ( aaqq.LT.rootbig ) .AND.
628  $ ( aaqq.GT.rootsfmin ) ) THEN
629  sva( q ) = snrm2( m, a( 1, q ), 1 )*
630  $ d( q )
631  ELSE
632  t = zero
633  aaqq = one
634  CALL slassq( m, a( 1, q ), 1, t,
635  $ aaqq )
636  sva( q ) = t*sqrt( aaqq )*d( q )
637  END IF
638  END IF
639  IF( ( aapp / aapp0 ).LE.rooteps ) THEN
640  IF( ( aapp.LT.rootbig ) .AND.
641  $ ( aapp.GT.rootsfmin ) ) THEN
642  aapp = snrm2( m, a( 1, p ), 1 )*
643  $ d( p )
644  ELSE
645  t = zero
646  aapp = one
647  CALL slassq( m, a( 1, p ), 1, t,
648  $ aapp )
649  aapp = t*sqrt( aapp )*d( p )
650  END IF
651  sva( p ) = aapp
652  END IF
653 *
654  ELSE
655 * A(:,p) and A(:,q) already numerically orthogonal
656  IF( ir1.EQ.0 )notrot = notrot + 1
657  pskipped = pskipped + 1
658  END IF
659  ELSE
660 * A(:,q) is zero column
661  IF( ir1.EQ.0 )notrot = notrot + 1
662  pskipped = pskipped + 1
663  END IF
664 *
665  IF( ( i.LE.swband ) .AND.
666  $ ( pskipped.GT.rowskip ) ) THEN
667  IF( ir1.EQ.0 )aapp = -aapp
668  notrot = 0
669  GO TO 2103
670  END IF
671 *
672  2002 CONTINUE
673 * END q-LOOP
674 *
675  2103 CONTINUE
676 * bailed out of q-loop
677 
678  sva( p ) = aapp
679 
680  ELSE
681  sva( p ) = aapp
682  IF( ( ir1.EQ.0 ) .AND. ( aapp.EQ.zero ) )
683  $ notrot = notrot + min( igl+kbl-1, n ) - p
684  END IF
685 *
686  2001 CONTINUE
687 * end of the p-loop
688 * end of doing the block ( ibr, ibr )
689  1002 CONTINUE
690 * end of ir1-loop
691 *
692 *........................................................
693 * ... go to the off diagonal blocks
694 *
695  igl = ( ibr-1 )*kbl + 1
696 *
697  DO 2010 jbc = ibr + 1, nbl
698 *
699  jgl = ( jbc-1 )*kbl + 1
700 *
701 * doing the block at ( ibr, jbc )
702 *
703  ijblsk = 0
704  DO 2100 p = igl, min( igl+kbl-1, n )
705 *
706  aapp = sva( p )
707 *
708  IF( aapp.GT.zero ) THEN
709 *
710  pskipped = 0
711 *
712  DO 2200 q = jgl, min( jgl+kbl-1, n )
713 *
714  aaqq = sva( q )
715 *
716  IF( aaqq.GT.zero ) THEN
717  aapp0 = aapp
718 *
719 * .. M x 2 Jacobi SVD ..
720 *
721 * .. Safe Gram matrix computation ..
722 *
723  IF( aaqq.GE.one ) THEN
724  IF( aapp.GE.aaqq ) THEN
725  rotok = ( small*aapp ).LE.aaqq
726  ELSE
727  rotok = ( small*aaqq ).LE.aapp
728  END IF
729  IF( aapp.LT.( big / aaqq ) ) THEN
730  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
731  $ q ), 1 )*d( p )*d( q ) / aaqq )
732  $ / aapp
733  ELSE
734  CALL scopy( m, a( 1, p ), 1, work, 1 )
735  CALL slascl( 'G', 0, 0, aapp, d( p ),
736  $ m, 1, work, lda, ierr )
737  aapq = sdot( m, work, 1, a( 1, q ),
738  $ 1 )*d( q ) / aaqq
739  END IF
740  ELSE
741  IF( aapp.GE.aaqq ) THEN
742  rotok = aapp.LE.( aaqq / small )
743  ELSE
744  rotok = aaqq.LE.( aapp / small )
745  END IF
746  IF( aapp.GT.( small / aaqq ) ) THEN
747  aapq = ( sdot( m, a( 1, p ), 1, a( 1,
748  $ q ), 1 )*d( p )*d( q ) / aaqq )
749  $ / aapp
750  ELSE
751  CALL scopy( m, a( 1, q ), 1, work, 1 )
752  CALL slascl( 'G', 0, 0, aaqq, d( q ),
753  $ m, 1, work, lda, ierr )
754  aapq = sdot( m, work, 1, a( 1, p ),
755  $ 1 )*d( p ) / aapp
756  END IF
757  END IF
758 *
759  mxaapq = max( mxaapq, abs( aapq ) )
760 *
761 * TO rotate or NOT to rotate, THAT is the question ...
762 *
763  IF( abs( aapq ).GT.tol ) THEN
764  notrot = 0
765 * ROTATED = ROTATED + 1
766  pskipped = 0
767  iswrot = iswrot + 1
768 *
769  IF( rotok ) THEN
770 *
771  aqoap = aaqq / aapp
772  apoaq = aapp / aaqq
773  theta = -half*abs( aqoap-apoaq ) / aapq
774  IF( aaqq.GT.aapp0 )theta = -theta
775 *
776  IF( abs( theta ).GT.bigtheta ) THEN
777  t = half / theta
778  fastr( 3 ) = t*d( p ) / d( q )
779  fastr( 4 ) = -t*d( q ) / d( p )
780  CALL srotm( m, a( 1, p ), 1,
781  $ a( 1, q ), 1, fastr )
782  IF( rsvec )CALL srotm( mvl,
783  $ v( 1, p ), 1,
784  $ v( 1, q ), 1,
785  $ fastr )
786  sva( q ) = aaqq*sqrt( max( zero,
787  $ one+t*apoaq*aapq ) )
788  aapp = aapp*sqrt( max( zero,
789  $ one-t*aqoap*aapq ) )
790  mxsinj = max( mxsinj, abs( t ) )
791  ELSE
792 *
793 * .. choose correct signum for THETA and rotate
794 *
795  thsign = -sign( one, aapq )
796  IF( aaqq.GT.aapp0 )thsign = -thsign
797  t = one / ( theta+thsign*
798  $ sqrt( one+theta*theta ) )
799  cs = sqrt( one / ( one+t*t ) )
800  sn = t*cs
801  mxsinj = max( mxsinj, abs( sn ) )
802  sva( q ) = aaqq*sqrt( max( zero,
803  $ one+t*apoaq*aapq ) )
804  aapp = aapp*sqrt( max( zero,
805  $ one-t*aqoap*aapq ) )
806 *
807  apoaq = d( p ) / d( q )
808  aqoap = d( q ) / d( p )
809  IF( d( p ).GE.one ) THEN
810 *
811  IF( d( q ).GE.one ) THEN
812  fastr( 3 ) = t*apoaq
813  fastr( 4 ) = -t*aqoap
814  d( p ) = d( p )*cs
815  d( q ) = d( q )*cs
816  CALL srotm( m, a( 1, p ), 1,
817  $ a( 1, q ), 1,
818  $ fastr )
819  IF( rsvec )CALL srotm( mvl,
820  $ v( 1, p ), 1, v( 1, q ),
821  $ 1, fastr )
822  ELSE
823  CALL saxpy( m, -t*aqoap,
824  $ a( 1, q ), 1,
825  $ a( 1, p ), 1 )
826  CALL saxpy( m, cs*sn*apoaq,
827  $ a( 1, p ), 1,
828  $ a( 1, q ), 1 )
829  IF( rsvec ) THEN
830  CALL saxpy( mvl, -t*aqoap,
831  $ v( 1, q ), 1,
832  $ v( 1, p ), 1 )
833  CALL saxpy( mvl,
834  $ cs*sn*apoaq,
835  $ v( 1, p ), 1,
836  $ v( 1, q ), 1 )
837  END IF
838  d( p ) = d( p )*cs
839  d( q ) = d( q ) / cs
840  END IF
841  ELSE
842  IF( d( q ).GE.one ) THEN
843  CALL saxpy( m, t*apoaq,
844  $ a( 1, p ), 1,
845  $ a( 1, q ), 1 )
846  CALL saxpy( m, -cs*sn*aqoap,
847  $ a( 1, q ), 1,
848  $ a( 1, p ), 1 )
849  IF( rsvec ) THEN
850  CALL saxpy( mvl, t*apoaq,
851  $ v( 1, p ), 1,
852  $ v( 1, q ), 1 )
853  CALL saxpy( mvl,
854  $ -cs*sn*aqoap,
855  $ v( 1, q ), 1,
856  $ v( 1, p ), 1 )
857  END IF
858  d( p ) = d( p ) / cs
859  d( q ) = d( q )*cs
860  ELSE
861  IF( d( p ).GE.d( q ) ) THEN
862  CALL saxpy( m, -t*aqoap,
863  $ a( 1, q ), 1,
864  $ a( 1, p ), 1 )
865  CALL saxpy( m, cs*sn*apoaq,
866  $ a( 1, p ), 1,
867  $ a( 1, q ), 1 )
868  d( p ) = d( p )*cs
869  d( q ) = d( q ) / cs
870  IF( rsvec ) THEN
871  CALL saxpy( mvl,
872  $ -t*aqoap,
873  $ v( 1, q ), 1,
874  $ v( 1, p ), 1 )
875  CALL saxpy( mvl,
876  $ cs*sn*apoaq,
877  $ v( 1, p ), 1,
878  $ v( 1, q ), 1 )
879  END IF
880  ELSE
881  CALL saxpy( m, t*apoaq,
882  $ a( 1, p ), 1,
883  $ a( 1, q ), 1 )
884  CALL saxpy( m,
885  $ -cs*sn*aqoap,
886  $ a( 1, q ), 1,
887  $ a( 1, p ), 1 )
888  d( p ) = d( p ) / cs
889  d( q ) = d( q )*cs
890  IF( rsvec ) THEN
891  CALL saxpy( mvl,
892  $ t*apoaq, v( 1, p ),
893  $ 1, v( 1, q ), 1 )
894  CALL saxpy( mvl,
895  $ -cs*sn*aqoap,
896  $ v( 1, q ), 1,
897  $ v( 1, p ), 1 )
898  END IF
899  END IF
900  END IF
901  END IF
902  END IF
903 *
904  ELSE
905  IF( aapp.GT.aaqq ) THEN
906  CALL scopy( m, a( 1, p ), 1, work,
907  $ 1 )
908  CALL slascl( 'G', 0, 0, aapp, one,
909  $ m, 1, work, lda, ierr )
910  CALL slascl( 'G', 0, 0, aaqq, one,
911  $ m, 1, a( 1, q ), lda,
912  $ ierr )
913  temp1 = -aapq*d( p ) / d( q )
914  CALL saxpy( m, temp1, work, 1,
915  $ a( 1, q ), 1 )
916  CALL slascl( 'G', 0, 0, one, aaqq,
917  $ m, 1, a( 1, q ), lda,
918  $ ierr )
919  sva( q ) = aaqq*sqrt( max( zero,
920  $ one-aapq*aapq ) )
921  mxsinj = max( mxsinj, sfmin )
922  ELSE
923  CALL scopy( m, a( 1, q ), 1, work,
924  $ 1 )
925  CALL slascl( 'G', 0, 0, aaqq, one,
926  $ m, 1, work, lda, ierr )
927  CALL slascl( 'G', 0, 0, aapp, one,
928  $ m, 1, a( 1, p ), lda,
929  $ ierr )
930  temp1 = -aapq*d( q ) / d( p )
931  CALL saxpy( m, temp1, work, 1,
932  $ a( 1, p ), 1 )
933  CALL slascl( 'G', 0, 0, one, aapp,
934  $ m, 1, a( 1, p ), lda,
935  $ ierr )
936  sva( p ) = aapp*sqrt( max( zero,
937  $ one-aapq*aapq ) )
938  mxsinj = max( mxsinj, sfmin )
939  END IF
940  END IF
941 * END IF ROTOK THEN ... ELSE
942 *
943 * In the case of cancellation in updating SVA(q)
944 * .. recompute SVA(q)
945  IF( ( sva( q ) / aaqq )**2.LE.rooteps )
946  $ THEN
947  IF( ( aaqq.LT.rootbig ) .AND.
948  $ ( aaqq.GT.rootsfmin ) ) THEN
949  sva( q ) = snrm2( m, a( 1, q ), 1 )*
950  $ d( q )
951  ELSE
952  t = zero
953  aaqq = one
954  CALL slassq( m, a( 1, q ), 1, t,
955  $ aaqq )
956  sva( q ) = t*sqrt( aaqq )*d( q )
957  END IF
958  END IF
959  IF( ( aapp / aapp0 )**2.LE.rooteps ) THEN
960  IF( ( aapp.LT.rootbig ) .AND.
961  $ ( aapp.GT.rootsfmin ) ) THEN
962  aapp = snrm2( m, a( 1, p ), 1 )*
963  $ d( p )
964  ELSE
965  t = zero
966  aapp = one
967  CALL slassq( m, a( 1, p ), 1, t,
968  $ aapp )
969  aapp = t*sqrt( aapp )*d( p )
970  END IF
971  sva( p ) = aapp
972  END IF
973 * end of OK rotation
974  ELSE
975  notrot = notrot + 1
976  pskipped = pskipped + 1
977  ijblsk = ijblsk + 1
978  END IF
979  ELSE
980  notrot = notrot + 1
981  pskipped = pskipped + 1
982  ijblsk = ijblsk + 1
983  END IF
984 *
985  IF( ( i.LE.swband ) .AND. ( ijblsk.GE.blskip ) )
986  $ THEN
987  sva( p ) = aapp
988  notrot = 0
989  GO TO 2011
990  END IF
991  IF( ( i.LE.swband ) .AND.
992  $ ( pskipped.GT.rowskip ) ) THEN
993  aapp = -aapp
994  notrot = 0
995  GO TO 2203
996  END IF
997 *
998  2200 CONTINUE
999 * end of the q-loop
1000  2203 CONTINUE
1001 *
1002  sva( p ) = aapp
1003 *
1004  ELSE
1005  IF( aapp.EQ.zero )notrot = notrot +
1006  $ min( jgl+kbl-1, n ) - jgl + 1
1007  IF( aapp.LT.zero )notrot = 0
1008  END IF
1009 
1010  2100 CONTINUE
1011 * end of the p-loop
1012  2010 CONTINUE
1013 * end of the jbc-loop
1014  2011 CONTINUE
1015 *2011 bailed out of the jbc-loop
1016  DO 2012 p = igl, min( igl+kbl-1, n )
1017  sva( p ) = abs( sva( p ) )
1018  2012 CONTINUE
1019 *
1020  2000 CONTINUE
1021 *2000 :: end of the ibr-loop
1022 *
1023 * .. update SVA(N)
1024  IF( ( sva( n ).LT.rootbig ) .AND. ( sva( n ).GT.rootsfmin ) )
1025  $ THEN
1026  sva( n ) = snrm2( m, a( 1, n ), 1 )*d( n )
1027  ELSE
1028  t = zero
1029  aapp = one
1030  CALL slassq( m, a( 1, n ), 1, t, aapp )
1031  sva( n ) = t*sqrt( aapp )*d( n )
1032  END IF
1033 *
1034 * Additional steering devices
1035 *
1036  IF( ( i.LT.swband ) .AND. ( ( mxaapq.LE.roottol ) .OR.
1037  $ ( iswrot.LE.n ) ) )swband = i
1038 *
1039  IF( ( i.GT.swband+1 ) .AND. ( mxaapq.LT.float( n )*tol ) .AND.
1040  $ ( float( n )*mxaapq*mxsinj.LT.tol ) ) THEN
1041  GO TO 1994
1042  END IF
1043 *
1044  IF( notrot.GE.emptsw )GO TO 1994
1045 
1046  1993 CONTINUE
1047 * end i=1:NSWEEP loop
1048 * #:) Reaching this point means that the procedure has completed the given
1049 * number of iterations.
1050  info = nsweep - 1
1051  GO TO 1995
1052  1994 CONTINUE
1053 * #:) Reaching this point means that during the i-th sweep all pivots were
1054 * below the given tolerance, causing early exit.
1055 *
1056  info = 0
1057 * #:) INFO = 0 confirms successful iterations.
1058  1995 CONTINUE
1059 *
1060 * Sort the vector D.
1061  DO 5991 p = 1, n - 1
1062  q = isamax( n-p+1, sva( p ), 1 ) + p - 1
1063  IF( p.NE.q ) THEN
1064  temp1 = sva( p )
1065  sva( p ) = sva( q )
1066  sva( q ) = temp1
1067  temp1 = d( p )
1068  d( p ) = d( q )
1069  d( q ) = temp1
1070  CALL sswap( m, a( 1, p ), 1, a( 1, q ), 1 )
1071  IF( rsvec )CALL sswap( mvl, v( 1, p ), 1, v( 1, q ), 1 )
1072  END IF
1073  5991 CONTINUE
1074 *
1075  RETURN
1076 * ..
1077 * .. END OF SGSVJ0
1078 * ..
1079  END
sswap
subroutine sswap(N, SX, INCX, SY, INCY)
SSWAP
Definition: sswap.f:84
srotm
subroutine srotm(N, SX, INCX, SY, INCY, SPARAM)
SROTM
Definition: srotm.f:99
slascl
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:145
slassq
subroutine slassq(N, X, INCX, SCALE, SUMSQ)
SLASSQ updates a sum of squares represented in scaled form.
Definition: slassq.f:105
scopy
subroutine scopy(N, SX, INCX, SY, INCY)
SCOPY
Definition: scopy.f:84
xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
sgsvj0
subroutine sgsvj0(JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO)
SGSVJ0 pre-processor for the routine sgesvj.
Definition: sgsvj0.f:220
saxpy
subroutine saxpy(N, SA, SX, INCX, SY, INCY)
SAXPY
Definition: saxpy.f:91