LAPACK  3.9.0
LAPACK: Linear Algebra PACKage

◆ zhb2st_kernels()

subroutine zhb2st_kernels ( character  UPLO,
logical  WANTZ,
integer  TTYPE,
integer  ST,
integer  ED,
integer  SWEEP,
integer  N,
integer  NB,
integer  IB,
complex*16, dimension( lda, * )  A,
integer  LDA,
complex*16, dimension( * )  V,
complex*16, dimension( * )  TAU,
integer  LDVT,
complex*16, dimension( * )  WORK 
)

ZHB2ST_KERNELS

Download ZHB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is COMPLEX*16 array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is COMPLEX*16 array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is COMPLEX*16 array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is COMPLEX*16 array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 170 of file zhb2st_kernels.f.

170 *
171  IMPLICIT NONE
172 *
173 * -- LAPACK computational routine (version 3.7.1) --
174 * -- LAPACK is a software package provided by Univ. of Tennessee, --
175 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176 * June 2017
177 *
178 * .. Scalar Arguments ..
179  CHARACTER UPLO
180  LOGICAL WANTZ
181  INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
182 * ..
183 * .. Array Arguments ..
184  COMPLEX*16 A( LDA, * ), V( * ),
185  $ TAU( * ), WORK( * )
186 * ..
187 *
188 * =====================================================================
189 *
190 * .. Parameters ..
191  COMPLEX*16 ZERO, ONE
192  parameter( zero = ( 0.0d+0, 0.0d+0 ),
193  $ one = ( 1.0d+0, 0.0d+0 ) )
194 * ..
195 * .. Local Scalars ..
196  LOGICAL UPPER
197  INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
198  $ DPOS, OFDPOS, AJETER
199  COMPLEX*16 CTMP
200 * ..
201 * .. External Subroutines ..
202  EXTERNAL zlarfg, zlarfx, zlarfy
203 * ..
204 * .. Intrinsic Functions ..
205  INTRINSIC dconjg, mod
206 * .. External Functions ..
207  LOGICAL LSAME
208  EXTERNAL lsame
209 * ..
210 * ..
211 * .. Executable Statements ..
212 *
213  ajeter = ib + ldvt
214  upper = lsame( uplo, 'U' )
215 
216  IF( upper ) THEN
217  dpos = 2 * nb + 1
218  ofdpos = 2 * nb
219  ELSE
220  dpos = 1
221  ofdpos = 2
222  ENDIF
223 
224 *
225 * Upper case
226 *
227  IF( upper ) THEN
228 *
229  IF( wantz ) THEN
230  vpos = mod( sweep-1, 2 ) * n + st
231  taupos = mod( sweep-1, 2 ) * n + st
232  ELSE
233  vpos = mod( sweep-1, 2 ) * n + st
234  taupos = mod( sweep-1, 2 ) * n + st
235  ENDIF
236 *
237  IF( ttype.EQ.1 ) THEN
238  lm = ed - st + 1
239 *
240  v( vpos ) = one
241  DO 10 i = 1, lm-1
242  v( vpos+i ) = dconjg( a( ofdpos-i, st+i ) )
243  a( ofdpos-i, st+i ) = zero
244  10 CONTINUE
245  ctmp = dconjg( a( ofdpos, st ) )
246  CALL zlarfg( lm, ctmp, v( vpos+1 ), 1,
247  $ tau( taupos ) )
248  a( ofdpos, st ) = ctmp
249 *
250  lm = ed - st + 1
251  CALL zlarfy( uplo, lm, v( vpos ), 1,
252  $ dconjg( tau( taupos ) ),
253  $ a( dpos, st ), lda-1, work)
254  ENDIF
255 *
256  IF( ttype.EQ.3 ) THEN
257 *
258  lm = ed - st + 1
259  CALL zlarfy( uplo, lm, v( vpos ), 1,
260  $ dconjg( tau( taupos ) ),
261  $ a( dpos, st ), lda-1, work)
262  ENDIF
263 *
264  IF( ttype.EQ.2 ) THEN
265  j1 = ed+1
266  j2 = min( ed+nb, n )
267  ln = ed-st+1
268  lm = j2-j1+1
269  IF( lm.GT.0) THEN
270  CALL zlarfx( 'Left', ln, lm, v( vpos ),
271  $ dconjg( tau( taupos ) ),
272  $ a( dpos-nb, j1 ), lda-1, work)
273 *
274  IF( wantz ) THEN
275  vpos = mod( sweep-1, 2 ) * n + j1
276  taupos = mod( sweep-1, 2 ) * n + j1
277  ELSE
278  vpos = mod( sweep-1, 2 ) * n + j1
279  taupos = mod( sweep-1, 2 ) * n + j1
280  ENDIF
281 *
282  v( vpos ) = one
283  DO 30 i = 1, lm-1
284  v( vpos+i ) =
285  $ dconjg( a( dpos-nb-i, j1+i ) )
286  a( dpos-nb-i, j1+i ) = zero
287  30 CONTINUE
288  ctmp = dconjg( a( dpos-nb, j1 ) )
289  CALL zlarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
290  a( dpos-nb, j1 ) = ctmp
291 *
292  CALL zlarfx( 'Right', ln-1, lm, v( vpos ),
293  $ tau( taupos ),
294  $ a( dpos-nb+1, j1 ), lda-1, work)
295  ENDIF
296  ENDIF
297 *
298 * Lower case
299 *
300  ELSE
301 *
302  IF( wantz ) THEN
303  vpos = mod( sweep-1, 2 ) * n + st
304  taupos = mod( sweep-1, 2 ) * n + st
305  ELSE
306  vpos = mod( sweep-1, 2 ) * n + st
307  taupos = mod( sweep-1, 2 ) * n + st
308  ENDIF
309 *
310  IF( ttype.EQ.1 ) THEN
311  lm = ed - st + 1
312 *
313  v( vpos ) = one
314  DO 20 i = 1, lm-1
315  v( vpos+i ) = a( ofdpos+i, st-1 )
316  a( ofdpos+i, st-1 ) = zero
317  20 CONTINUE
318  CALL zlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
319  $ tau( taupos ) )
320 *
321  lm = ed - st + 1
322 *
323  CALL zlarfy( uplo, lm, v( vpos ), 1,
324  $ dconjg( tau( taupos ) ),
325  $ a( dpos, st ), lda-1, work)
326 
327  ENDIF
328 *
329  IF( ttype.EQ.3 ) THEN
330  lm = ed - st + 1
331 *
332  CALL zlarfy( uplo, lm, v( vpos ), 1,
333  $ dconjg( tau( taupos ) ),
334  $ a( dpos, st ), lda-1, work)
335 
336  ENDIF
337 *
338  IF( ttype.EQ.2 ) THEN
339  j1 = ed+1
340  j2 = min( ed+nb, n )
341  ln = ed-st+1
342  lm = j2-j1+1
343 *
344  IF( lm.GT.0) THEN
345  CALL zlarfx( 'Right', lm, ln, v( vpos ),
346  $ tau( taupos ), a( dpos+nb, st ),
347  $ lda-1, work)
348 *
349  IF( wantz ) THEN
350  vpos = mod( sweep-1, 2 ) * n + j1
351  taupos = mod( sweep-1, 2 ) * n + j1
352  ELSE
353  vpos = mod( sweep-1, 2 ) * n + j1
354  taupos = mod( sweep-1, 2 ) * n + j1
355  ENDIF
356 *
357  v( vpos ) = one
358  DO 40 i = 1, lm-1
359  v( vpos+i ) = a( dpos+nb+i, st )
360  a( dpos+nb+i, st ) = zero
361  40 CONTINUE
362  CALL zlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
363  $ tau( taupos ) )
364 *
365  CALL zlarfx( 'Left', lm, ln-1, v( vpos ),
366  $ dconjg( tau( taupos ) ),
367  $ a( dpos+nb-1, st+1 ), lda-1, work)
368 
369  ENDIF
370  ENDIF
371  ENDIF
372 *
373  RETURN
374 *
375 * END OF ZHB2ST_KERNELS
376 *
Here is the call graph for this function:
Here is the caller graph for this function:
zlarfx
subroutine zlarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
ZLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: zlarfx.f:121
zlarfy
subroutine zlarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
ZLARFY
Definition: zlarfy.f:110
lsame
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
zlarfg
subroutine zlarfg(N, ALPHA, X, INCX, TAU)
ZLARFG generates an elementary reflector (Householder matrix).
Definition: zlarfg.f:108