LAPACK  3.9.0
LAPACK: Linear Algebra PACKage
ssb2st_kernels.f
Go to the documentation of this file.
1 *> \brief \b SSB2ST_KERNELS
2 *
3 * @generated from zhb2st_kernels.f, fortran z -> s, Wed Dec 7 08:22:40 2016
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download SSB2ST_KERNELS + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssb2st_kernels.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssb2st_kernels.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssb2st_kernels.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24 * ST, ED, SWEEP, N, NB, IB,
25 * A, LDA, V, TAU, LDVT, WORK)
26 *
27 * IMPLICIT NONE
28 *
29 * .. Scalar Arguments ..
30 * CHARACTER UPLO
31 * LOGICAL WANTZ
32 * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33 * ..
34 * .. Array Arguments ..
35 * REAL A( LDA, * ), V( * ),
36 * TAU( * ), WORK( * )
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
44 *> subroutine.
45 *> \endverbatim
46 *
47 * Arguments:
48 * ==========
49 *
50 *> \param[in] UPLO
51 *> \verbatim
52 *> UPLO is CHARACTER*1
53 *> \endverbatim
54 *>
55 *> \param[in] WANTZ
56 *> \verbatim
57 *> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
58 *> Eigenvalue/Eigenvectors.
59 *> \endverbatim
60 *>
61 *> \param[in] TTYPE
62 *> \verbatim
63 *> TTYPE is INTEGER
64 *> \endverbatim
65 *>
66 *> \param[in] ST
67 *> \verbatim
68 *> ST is INTEGER
69 *> internal parameter for indices.
70 *> \endverbatim
71 *>
72 *> \param[in] ED
73 *> \verbatim
74 *> ED is INTEGER
75 *> internal parameter for indices.
76 *> \endverbatim
77 *>
78 *> \param[in] SWEEP
79 *> \verbatim
80 *> SWEEP is INTEGER
81 *> internal parameter for indices.
82 *> \endverbatim
83 *>
84 *> \param[in] N
85 *> \verbatim
86 *> N is INTEGER. The order of the matrix A.
87 *> \endverbatim
88 *>
89 *> \param[in] NB
90 *> \verbatim
91 *> NB is INTEGER. The size of the band.
92 *> \endverbatim
93 *>
94 *> \param[in] IB
95 *> \verbatim
96 *> IB is INTEGER.
97 *> \endverbatim
98 *>
99 *> \param[in, out] A
100 *> \verbatim
101 *> A is REAL array. A pointer to the matrix A.
102 *> \endverbatim
103 *>
104 *> \param[in] LDA
105 *> \verbatim
106 *> LDA is INTEGER. The leading dimension of the matrix A.
107 *> \endverbatim
108 *>
109 *> \param[out] V
110 *> \verbatim
111 *> V is REAL array, dimension 2*n if eigenvalues only are
112 *> requested or to be queried for vectors.
113 *> \endverbatim
114 *>
115 *> \param[out] TAU
116 *> \verbatim
117 *> TAU is REAL array, dimension (2*n).
118 *> The scalar factors of the Householder reflectors are stored
119 *> in this array.
120 *> \endverbatim
121 *>
122 *> \param[in] LDVT
123 *> \verbatim
124 *> LDVT is INTEGER.
125 *> \endverbatim
126 *>
127 *> \param[out] WORK
128 *> \verbatim
129 *> WORK is REAL array. Workspace of size nb.
130 *> \endverbatim
131 *> @param[in] n
132 *> The order of the matrix A.
133 *>
134 *>
135 *> \par Further Details:
136 * =====================
137 *>
138 *> \verbatim
139 *>
140 *> Implemented by Azzam Haidar.
141 *>
142 *> All details are available on technical report, SC11, SC13 papers.
143 *>
144 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
145 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
146 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
147 *> of 2011 International Conference for High Performance Computing,
148 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
149 *> Article 8 , 11 pages.
150 *> http://doi.acm.org/10.1145/2063384.2063394
151 *>
152 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
153 *> An improved parallel singular value algorithm and its implementation
154 *> for multicore hardware, In Proceedings of 2013 International Conference
155 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
156 *> Denver, Colorado, USA, 2013.
157 *> Article 90, 12 pages.
158 *> http://doi.acm.org/10.1145/2503210.2503292
159 *>
160 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
161 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
162 *> calculations based on fine-grained memory aware tasks.
163 *> International Journal of High Performance Computing Applications.
164 *> Volume 28 Issue 2, Pages 196-209, May 2014.
165 *> http://hpc.sagepub.com/content/28/2/196
166 *>
167 *> \endverbatim
168 *>
169 * =====================================================================
170  SUBROUTINE ssb2st_kernels( UPLO, WANTZ, TTYPE,
171  $ ST, ED, SWEEP, N, NB, IB,
172  $ A, LDA, V, TAU, LDVT, WORK)
173 *
174  IMPLICIT NONE
175 *
176 * -- LAPACK computational routine (version 3.7.1) --
177 * -- LAPACK is a software package provided by Univ. of Tennessee, --
178 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
179 * June 2017
180 *
181 * .. Scalar Arguments ..
182  CHARACTER UPLO
183  LOGICAL WANTZ
184  INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
185 * ..
186 * .. Array Arguments ..
187  REAL A( LDA, * ), V( * ),
188  $ TAU( * ), WORK( * )
189 * ..
190 *
191 * =====================================================================
192 *
193 * .. Parameters ..
194  REAL ZERO, ONE
195  PARAMETER ( ZERO = 0.0e+0,
196  $ one = 1.0e+0 )
197 * ..
198 * .. Local Scalars ..
199  LOGICAL UPPER
200  INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
201  $ dpos, ofdpos, ajeter
202  REAL CTMP
203 * ..
204 * .. External Subroutines ..
205  EXTERNAL slarfg, slarfx, slarfy
206 * ..
207 * .. Intrinsic Functions ..
208  INTRINSIC mod
209 * .. External Functions ..
210  LOGICAL LSAME
211  EXTERNAL LSAME
212 * ..
213 * ..
214 * .. Executable Statements ..
215 *
216  ajeter = ib + ldvt
217  upper = lsame( uplo, 'U' )
218 
219  IF( upper ) THEN
220  dpos = 2 * nb + 1
221  ofdpos = 2 * nb
222  ELSE
223  dpos = 1
224  ofdpos = 2
225  ENDIF
226 
227 *
228 * Upper case
229 *
230  IF( upper ) THEN
231 *
232  IF( wantz ) THEN
233  vpos = mod( sweep-1, 2 ) * n + st
234  taupos = mod( sweep-1, 2 ) * n + st
235  ELSE
236  vpos = mod( sweep-1, 2 ) * n + st
237  taupos = mod( sweep-1, 2 ) * n + st
238  ENDIF
239 *
240  IF( ttype.EQ.1 ) THEN
241  lm = ed - st + 1
242 *
243  v( vpos ) = one
244  DO 10 i = 1, lm-1
245  v( vpos+i ) = ( a( ofdpos-i, st+i ) )
246  a( ofdpos-i, st+i ) = zero
247  10 CONTINUE
248  ctmp = ( a( ofdpos, st ) )
249  CALL slarfg( lm, ctmp, v( vpos+1 ), 1,
250  $ tau( taupos ) )
251  a( ofdpos, st ) = ctmp
252 *
253  lm = ed - st + 1
254  CALL slarfy( uplo, lm, v( vpos ), 1,
255  $ ( tau( taupos ) ),
256  $ a( dpos, st ), lda-1, work)
257  ENDIF
258 *
259  IF( ttype.EQ.3 ) THEN
260 *
261  lm = ed - st + 1
262  CALL slarfy( uplo, lm, v( vpos ), 1,
263  $ ( tau( taupos ) ),
264  $ a( dpos, st ), lda-1, work)
265  ENDIF
266 *
267  IF( ttype.EQ.2 ) THEN
268  j1 = ed+1
269  j2 = min( ed+nb, n )
270  ln = ed-st+1
271  lm = j2-j1+1
272  IF( lm.GT.0) THEN
273  CALL slarfx( 'Left', ln, lm, v( vpos ),
274  $ ( tau( taupos ) ),
275  $ a( dpos-nb, j1 ), lda-1, work)
276 *
277  IF( wantz ) THEN
278  vpos = mod( sweep-1, 2 ) * n + j1
279  taupos = mod( sweep-1, 2 ) * n + j1
280  ELSE
281  vpos = mod( sweep-1, 2 ) * n + j1
282  taupos = mod( sweep-1, 2 ) * n + j1
283  ENDIF
284 *
285  v( vpos ) = one
286  DO 30 i = 1, lm-1
287  v( vpos+i ) =
288  $ ( a( dpos-nb-i, j1+i ) )
289  a( dpos-nb-i, j1+i ) = zero
290  30 CONTINUE
291  ctmp = ( a( dpos-nb, j1 ) )
292  CALL slarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
293  a( dpos-nb, j1 ) = ctmp
294 *
295  CALL slarfx( 'Right', ln-1, lm, v( vpos ),
296  $ tau( taupos ),
297  $ a( dpos-nb+1, j1 ), lda-1, work)
298  ENDIF
299  ENDIF
300 *
301 * Lower case
302 *
303  ELSE
304 *
305  IF( wantz ) THEN
306  vpos = mod( sweep-1, 2 ) * n + st
307  taupos = mod( sweep-1, 2 ) * n + st
308  ELSE
309  vpos = mod( sweep-1, 2 ) * n + st
310  taupos = mod( sweep-1, 2 ) * n + st
311  ENDIF
312 *
313  IF( ttype.EQ.1 ) THEN
314  lm = ed - st + 1
315 *
316  v( vpos ) = one
317  DO 20 i = 1, lm-1
318  v( vpos+i ) = a( ofdpos+i, st-1 )
319  a( ofdpos+i, st-1 ) = zero
320  20 CONTINUE
321  CALL slarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
322  $ tau( taupos ) )
323 *
324  lm = ed - st + 1
325 *
326  CALL slarfy( uplo, lm, v( vpos ), 1,
327  $ ( tau( taupos ) ),
328  $ a( dpos, st ), lda-1, work)
329 
330  ENDIF
331 *
332  IF( ttype.EQ.3 ) THEN
333  lm = ed - st + 1
334 *
335  CALL slarfy( uplo, lm, v( vpos ), 1,
336  $ ( tau( taupos ) ),
337  $ a( dpos, st ), lda-1, work)
338 
339  ENDIF
340 *
341  IF( ttype.EQ.2 ) THEN
342  j1 = ed+1
343  j2 = min( ed+nb, n )
344  ln = ed-st+1
345  lm = j2-j1+1
346 *
347  IF( lm.GT.0) THEN
348  CALL slarfx( 'Right', lm, ln, v( vpos ),
349  $ tau( taupos ), a( dpos+nb, st ),
350  $ lda-1, work)
351 *
352  IF( wantz ) THEN
353  vpos = mod( sweep-1, 2 ) * n + j1
354  taupos = mod( sweep-1, 2 ) * n + j1
355  ELSE
356  vpos = mod( sweep-1, 2 ) * n + j1
357  taupos = mod( sweep-1, 2 ) * n + j1
358  ENDIF
359 *
360  v( vpos ) = one
361  DO 40 i = 1, lm-1
362  v( vpos+i ) = a( dpos+nb+i, st )
363  a( dpos+nb+i, st ) = zero
364  40 CONTINUE
365  CALL slarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
366  $ tau( taupos ) )
367 *
368  CALL slarfx( 'Left', lm, ln-1, v( vpos ),
369  $ ( tau( taupos ) ),
370  $ a( dpos+nb-1, st+1 ), lda-1, work)
371 
372  ENDIF
373  ENDIF
374  ENDIF
375 *
376  RETURN
377 *
378 * END OF SSB2ST_KERNELS
379 *
380  END
ssb2st_kernels
subroutine ssb2st_kernels(UPLO, WANTZ, TTYPE, ST, ED, SWEEP, N, NB, IB, A, LDA, V, TAU, LDVT, WORK)
SSB2ST_KERNELS
Definition: ssb2st_kernels.f:173
slarfg
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:108
slarfx
subroutine slarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
SLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: slarfx.f:122
slarfy
subroutine slarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
SLARFY
Definition: slarfy.f:110