LAPACK  3.9.0
LAPACK: Linear Algebra PACKage

◆ ssb2st_kernels()

subroutine ssb2st_kernels ( character  UPLO,
logical  WANTZ,
integer  TTYPE,
integer  ST,
integer  ED,
integer  SWEEP,
integer  N,
integer  NB,
integer  IB,
real, dimension( lda, * )  A,
integer  LDA,
real, dimension( * )  V,
real, dimension( * )  TAU,
integer  LDVT,
real, dimension( * )  WORK 
)

SSB2ST_KERNELS

Download SSB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is REAL array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is REAL array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is REAL array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is REAL array. Workspace of size nb.
[in]nThe order of the matrix A.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 173 of file ssb2st_kernels.f.

173 *
174  IMPLICIT NONE
175 *
176 * -- LAPACK computational routine (version 3.7.1) --
177 * -- LAPACK is a software package provided by Univ. of Tennessee, --
178 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
179 * June 2017
180 *
181 * .. Scalar Arguments ..
182  CHARACTER UPLO
183  LOGICAL WANTZ
184  INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
185 * ..
186 * .. Array Arguments ..
187  REAL A( LDA, * ), V( * ),
188  $ TAU( * ), WORK( * )
189 * ..
190 *
191 * =====================================================================
192 *
193 * .. Parameters ..
194  REAL ZERO, ONE
195  parameter( zero = 0.0e+0,
196  $ one = 1.0e+0 )
197 * ..
198 * .. Local Scalars ..
199  LOGICAL UPPER
200  INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
201  $ DPOS, OFDPOS, AJETER
202  REAL CTMP
203 * ..
204 * .. External Subroutines ..
205  EXTERNAL slarfg, slarfx, slarfy
206 * ..
207 * .. Intrinsic Functions ..
208  INTRINSIC mod
209 * .. External Functions ..
210  LOGICAL LSAME
211  EXTERNAL lsame
212 * ..
213 * ..
214 * .. Executable Statements ..
215 *
216  ajeter = ib + ldvt
217  upper = lsame( uplo, 'U' )
218 
219  IF( upper ) THEN
220  dpos = 2 * nb + 1
221  ofdpos = 2 * nb
222  ELSE
223  dpos = 1
224  ofdpos = 2
225  ENDIF
226 
227 *
228 * Upper case
229 *
230  IF( upper ) THEN
231 *
232  IF( wantz ) THEN
233  vpos = mod( sweep-1, 2 ) * n + st
234  taupos = mod( sweep-1, 2 ) * n + st
235  ELSE
236  vpos = mod( sweep-1, 2 ) * n + st
237  taupos = mod( sweep-1, 2 ) * n + st
238  ENDIF
239 *
240  IF( ttype.EQ.1 ) THEN
241  lm = ed - st + 1
242 *
243  v( vpos ) = one
244  DO 10 i = 1, lm-1
245  v( vpos+i ) = ( a( ofdpos-i, st+i ) )
246  a( ofdpos-i, st+i ) = zero
247  10 CONTINUE
248  ctmp = ( a( ofdpos, st ) )
249  CALL slarfg( lm, ctmp, v( vpos+1 ), 1,
250  $ tau( taupos ) )
251  a( ofdpos, st ) = ctmp
252 *
253  lm = ed - st + 1
254  CALL slarfy( uplo, lm, v( vpos ), 1,
255  $ ( tau( taupos ) ),
256  $ a( dpos, st ), lda-1, work)
257  ENDIF
258 *
259  IF( ttype.EQ.3 ) THEN
260 *
261  lm = ed - st + 1
262  CALL slarfy( uplo, lm, v( vpos ), 1,
263  $ ( tau( taupos ) ),
264  $ a( dpos, st ), lda-1, work)
265  ENDIF
266 *
267  IF( ttype.EQ.2 ) THEN
268  j1 = ed+1
269  j2 = min( ed+nb, n )
270  ln = ed-st+1
271  lm = j2-j1+1
272  IF( lm.GT.0) THEN
273  CALL slarfx( 'Left', ln, lm, v( vpos ),
274  $ ( tau( taupos ) ),
275  $ a( dpos-nb, j1 ), lda-1, work)
276 *
277  IF( wantz ) THEN
278  vpos = mod( sweep-1, 2 ) * n + j1
279  taupos = mod( sweep-1, 2 ) * n + j1
280  ELSE
281  vpos = mod( sweep-1, 2 ) * n + j1
282  taupos = mod( sweep-1, 2 ) * n + j1
283  ENDIF
284 *
285  v( vpos ) = one
286  DO 30 i = 1, lm-1
287  v( vpos+i ) =
288  $ ( a( dpos-nb-i, j1+i ) )
289  a( dpos-nb-i, j1+i ) = zero
290  30 CONTINUE
291  ctmp = ( a( dpos-nb, j1 ) )
292  CALL slarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
293  a( dpos-nb, j1 ) = ctmp
294 *
295  CALL slarfx( 'Right', ln-1, lm, v( vpos ),
296  $ tau( taupos ),
297  $ a( dpos-nb+1, j1 ), lda-1, work)
298  ENDIF
299  ENDIF
300 *
301 * Lower case
302 *
303  ELSE
304 *
305  IF( wantz ) THEN
306  vpos = mod( sweep-1, 2 ) * n + st
307  taupos = mod( sweep-1, 2 ) * n + st
308  ELSE
309  vpos = mod( sweep-1, 2 ) * n + st
310  taupos = mod( sweep-1, 2 ) * n + st
311  ENDIF
312 *
313  IF( ttype.EQ.1 ) THEN
314  lm = ed - st + 1
315 *
316  v( vpos ) = one
317  DO 20 i = 1, lm-1
318  v( vpos+i ) = a( ofdpos+i, st-1 )
319  a( ofdpos+i, st-1 ) = zero
320  20 CONTINUE
321  CALL slarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
322  $ tau( taupos ) )
323 *
324  lm = ed - st + 1
325 *
326  CALL slarfy( uplo, lm, v( vpos ), 1,
327  $ ( tau( taupos ) ),
328  $ a( dpos, st ), lda-1, work)
329 
330  ENDIF
331 *
332  IF( ttype.EQ.3 ) THEN
333  lm = ed - st + 1
334 *
335  CALL slarfy( uplo, lm, v( vpos ), 1,
336  $ ( tau( taupos ) ),
337  $ a( dpos, st ), lda-1, work)
338 
339  ENDIF
340 *
341  IF( ttype.EQ.2 ) THEN
342  j1 = ed+1
343  j2 = min( ed+nb, n )
344  ln = ed-st+1
345  lm = j2-j1+1
346 *
347  IF( lm.GT.0) THEN
348  CALL slarfx( 'Right', lm, ln, v( vpos ),
349  $ tau( taupos ), a( dpos+nb, st ),
350  $ lda-1, work)
351 *
352  IF( wantz ) THEN
353  vpos = mod( sweep-1, 2 ) * n + j1
354  taupos = mod( sweep-1, 2 ) * n + j1
355  ELSE
356  vpos = mod( sweep-1, 2 ) * n + j1
357  taupos = mod( sweep-1, 2 ) * n + j1
358  ENDIF
359 *
360  v( vpos ) = one
361  DO 40 i = 1, lm-1
362  v( vpos+i ) = a( dpos+nb+i, st )
363  a( dpos+nb+i, st ) = zero
364  40 CONTINUE
365  CALL slarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
366  $ tau( taupos ) )
367 *
368  CALL slarfx( 'Left', lm, ln-1, v( vpos ),
369  $ ( tau( taupos ) ),
370  $ a( dpos+nb-1, st+1 ), lda-1, work)
371 
372  ENDIF
373  ENDIF
374  ENDIF
375 *
376  RETURN
377 *
378 * END OF SSB2ST_KERNELS
379 *
Here is the call graph for this function:
Here is the caller graph for this function:
slarfg
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:108
slarfx
subroutine slarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
SLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: slarfx.f:122
lsame
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
slarfy
subroutine slarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
SLARFY
Definition: slarfy.f:110