LAPACK  3.9.0
LAPACK: Linear Algebra PACKage

◆ dsytrd_sb2st()

subroutine dsytrd_sb2st ( character  STAGE1,
character  VECT,
character  UPLO,
integer  N,
integer  KD,
double precision, dimension( ldab, * )  AB,
integer  LDAB,
double precision, dimension( * )  D,
double precision, dimension( * )  E,
double precision, dimension( * )  HOUS,
integer  LHOUS,
double precision, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T

Download DSYTRD_SB2ST + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric
 tridiagonal form T by a orthogonal similarity transformation:
 Q**T * A * Q = T.
Parameters
[in]STAGE1
          STAGE1 is CHARACTER*1
          = 'N':  "No": to mention that the stage 1 of the reduction  
                  from dense to band using the dsytrd_sy2sb routine
                  was not called before this routine to reproduce AB. 
                  In other term this routine is called as standalone. 
          = 'Y':  "Yes": to mention that the stage 1 of the 
                  reduction from dense to band using the dsytrd_sy2sb 
                  routine has been called to produce AB (e.g., AB is
                  the output of dsytrd_sy2sb.
[in]VECT
          VECT is CHARACTER*1
          = 'N':  No need for the Housholder representation, 
                  and thus LHOUS is of size max(1, 4*N);
          = 'V':  the Householder representation is needed to 
                  either generate or to apply Q later on, 
                  then LHOUS is to be queried and computed.
                  (NOT AVAILABLE IN THIS RELEASE).
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in]KD
          KD is INTEGER
          The number of superdiagonals of the matrix A if UPLO = 'U',
          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
[in,out]AB
          AB is DOUBLE PRECISION array, dimension (LDAB,N)
          On entry, the upper or lower triangle of the symmetric band
          matrix A, stored in the first KD+1 rows of the array.  The
          j-th column of A is stored in the j-th column of the array AB
          as follows:
          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
          On exit, the diagonal elements of AB are overwritten by the
          diagonal elements of the tridiagonal matrix T; if KD > 0, the
          elements on the first superdiagonal (if UPLO = 'U') or the
          first subdiagonal (if UPLO = 'L') are overwritten by the
          off-diagonal elements of T; the rest of AB is overwritten by
          values generated during the reduction.
[in]LDAB
          LDAB is INTEGER
          The leading dimension of the array AB.  LDAB >= KD+1.
[out]D
          D is DOUBLE PRECISION array, dimension (N)
          The diagonal elements of the tridiagonal matrix T.
[out]E
          E is DOUBLE PRECISION array, dimension (N-1)
          The off-diagonal elements of the tridiagonal matrix T:
          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
[out]HOUS
          HOUS is DOUBLE PRECISION array, dimension LHOUS, that
          store the Householder representation.
[in]LHOUS
          LHOUS is INTEGER
          The dimension of the array HOUS. LHOUS = MAX(1, dimension)
          If LWORK = -1, or LHOUS=-1,
          then a query is assumed; the routine
          only calculates the optimal size of the HOUS array, returns
          this value as the first entry of the HOUS array, and no error
          message related to LHOUS is issued by XERBLA.
          LHOUS = MAX(1, dimension) where
          dimension = 4*N if VECT='N'
          not available now if VECT='H'     
[out]WORK
          WORK is DOUBLE PRECISION array, dimension LWORK.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK. LWORK = MAX(1, dimension)
          If LWORK = -1, or LHOUS=-1,
          then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
          LWORK = MAX(1, dimension) where
          dimension   = (2KD+1)*N + KD*NTHREADS
          where KD is the blocking size of the reduction,
          FACTOPTNB is the blocking used by the QR or LQ
          algorithm, usually FACTOPTNB=128 is a good choice
          NTHREADS is the number of threads used when
          openMP compilation is enabled, otherwise =1.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
November 2017
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation 
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196 

Definition at line 232 of file dsytrd_sb2st.F.

232 *
233 #if defined(_OPENMP)
234  use omp_lib
235 #endif
236 *
237  IMPLICIT NONE
238 *
239 * -- LAPACK computational routine (version 3.8.0) --
240 * -- LAPACK is a software package provided by Univ. of Tennessee, --
241 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
242 * November 2017
243 *
244 * .. Scalar Arguments ..
245  CHARACTER STAGE1, UPLO, VECT
246  INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
247 * ..
248 * .. Array Arguments ..
249  DOUBLE PRECISION D( * ), E( * )
250  DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * )
251 * ..
252 *
253 * =====================================================================
254 *
255 * .. Parameters ..
256  DOUBLE PRECISION RZERO
257  DOUBLE PRECISION ZERO, ONE
258  parameter( rzero = 0.0d+0,
259  $ zero = 0.0d+0,
260  $ one = 1.0d+0 )
261 * ..
262 * .. Local Scalars ..
263  LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
264  INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
265  $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
266  $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
267  $ NBTILES, TTYPE, TID, NTHREADS, DEBUG,
268  $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
269  $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
270  $ SIDEV, SIZETAU, LDV, LHMIN, LWMIN
271 * ..
272 * .. External Subroutines ..
273  EXTERNAL dsb2st_kernels, dlacpy, dlaset, xerbla
274 * ..
275 * .. Intrinsic Functions ..
276  INTRINSIC min, max, ceiling, real
277 * ..
278 * .. External Functions ..
279  LOGICAL LSAME
280  INTEGER ILAENV2STAGE
281  EXTERNAL lsame, ilaenv2stage
282 * ..
283 * .. Executable Statements ..
284 *
285 * Determine the minimal workspace size required.
286 * Test the input parameters
287 *
288  debug = 0
289  info = 0
290  afters1 = lsame( stage1, 'Y' )
291  wantq = lsame( vect, 'V' )
292  upper = lsame( uplo, 'U' )
293  lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
294 *
295 * Determine the block size, the workspace size and the hous size.
296 *
297  ib = ilaenv2stage( 2, 'DSYTRD_SB2ST', vect, n, kd, -1, -1 )
298  lhmin = ilaenv2stage( 3, 'DSYTRD_SB2ST', vect, n, kd, ib, -1 )
299  lwmin = ilaenv2stage( 4, 'DSYTRD_SB2ST', vect, n, kd, ib, -1 )
300 *
301  IF( .NOT.afters1 .AND. .NOT.lsame( stage1, 'N' ) ) THEN
302  info = -1
303  ELSE IF( .NOT.lsame( vect, 'N' ) ) THEN
304  info = -2
305  ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
306  info = -3
307  ELSE IF( n.LT.0 ) THEN
308  info = -4
309  ELSE IF( kd.LT.0 ) THEN
310  info = -5
311  ELSE IF( ldab.LT.(kd+1) ) THEN
312  info = -7
313  ELSE IF( lhous.LT.lhmin .AND. .NOT.lquery ) THEN
314  info = -11
315  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
316  info = -13
317  END IF
318 *
319  IF( info.EQ.0 ) THEN
320  hous( 1 ) = lhmin
321  work( 1 ) = lwmin
322  END IF
323 *
324  IF( info.NE.0 ) THEN
325  CALL xerbla( 'DSYTRD_SB2ST', -info )
326  RETURN
327  ELSE IF( lquery ) THEN
328  RETURN
329  END IF
330 *
331 * Quick return if possible
332 *
333  IF( n.EQ.0 ) THEN
334  hous( 1 ) = 1
335  work( 1 ) = 1
336  RETURN
337  END IF
338 *
339 * Determine pointer position
340 *
341  ldv = kd + ib
342  sizetau = 2 * n
343  sidev = 2 * n
344  indtau = 1
345  indv = indtau + sizetau
346  lda = 2 * kd + 1
347  sizea = lda * n
348  inda = 1
349  indw = inda + sizea
350  nthreads = 1
351  tid = 0
352 *
353  IF( upper ) THEN
354  apos = inda + kd
355  awpos = inda
356  dpos = apos + kd
357  ofdpos = dpos - 1
358  abdpos = kd + 1
359  abofdpos = kd
360  ELSE
361  apos = inda
362  awpos = inda + kd + 1
363  dpos = apos
364  ofdpos = dpos + 1
365  abdpos = 1
366  abofdpos = 2
367 
368  ENDIF
369 *
370 * Case KD=0:
371 * The matrix is diagonal. We just copy it (convert to "real" for
372 * real because D is double and the imaginary part should be 0)
373 * and store it in D. A sequential code here is better or
374 * in a parallel environment it might need two cores for D and E
375 *
376  IF( kd.EQ.0 ) THEN
377  DO 30 i = 1, n
378  d( i ) = ( ab( abdpos, i ) )
379  30 CONTINUE
380  DO 40 i = 1, n-1
381  e( i ) = rzero
382  40 CONTINUE
383 *
384  hous( 1 ) = 1
385  work( 1 ) = 1
386  RETURN
387  END IF
388 *
389 * Case KD=1:
390 * The matrix is already Tridiagonal. We have to make diagonal
391 * and offdiagonal elements real, and store them in D and E.
392 * For that, for real precision just copy the diag and offdiag
393 * to D and E while for the COMPLEX case the bulge chasing is
394 * performed to convert the hermetian tridiagonal to symmetric
395 * tridiagonal. A simpler coversion formula might be used, but then
396 * updating the Q matrix will be required and based if Q is generated
397 * or not this might complicate the story.
398 *
399  IF( kd.EQ.1 ) THEN
400  DO 50 i = 1, n
401  d( i ) = ( ab( abdpos, i ) )
402  50 CONTINUE
403 *
404  IF( upper ) THEN
405  DO 60 i = 1, n-1
406  e( i ) = ( ab( abofdpos, i+1 ) )
407  60 CONTINUE
408  ELSE
409  DO 70 i = 1, n-1
410  e( i ) = ( ab( abofdpos, i ) )
411  70 CONTINUE
412  ENDIF
413 *
414  hous( 1 ) = 1
415  work( 1 ) = 1
416  RETURN
417  END IF
418 *
419 * Main code start here.
420 * Reduce the symmetric band of A to a tridiagonal matrix.
421 *
422  thgrsiz = n
423  grsiz = 1
424  shift = 3
425  nbtiles = ceiling( real(n)/real(kd) )
426  stepercol = ceiling( real(shift)/real(grsiz) )
427  thgrnb = ceiling( real(n-1)/real(thgrsiz) )
428 *
429  CALL dlacpy( "A", kd+1, n, ab, ldab, work( apos ), lda )
430  CALL dlaset( "A", kd, n, zero, zero, work( awpos ), lda )
431 *
432 *
433 * openMP parallelisation start here
434 *
435 #if defined(_OPENMP)
436 !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
437 !$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
438 !$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
439 !$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
440 !$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
441 !$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
442 !$OMP MASTER
443 #endif
444 *
445 * main bulge chasing loop
446 *
447  DO 100 thgrid = 1, thgrnb
448  stt = (thgrid-1)*thgrsiz+1
449  thed = min( (stt + thgrsiz -1), (n-1))
450  DO 110 i = stt, n-1
451  ed = min( i, thed )
452  IF( stt.GT.ed ) EXIT
453  DO 120 m = 1, stepercol
454  st = stt
455  DO 130 sweepid = st, ed
456  DO 140 k = 1, grsiz
457  myid = (i-sweepid)*(stepercol*grsiz)
458  $ + (m-1)*grsiz + k
459  IF ( myid.EQ.1 ) THEN
460  ttype = 1
461  ELSE
462  ttype = mod( myid, 2 ) + 2
463  ENDIF
464 
465  IF( ttype.EQ.2 ) THEN
466  colpt = (myid/2)*kd + sweepid
467  stind = colpt-kd+1
468  edind = min(colpt,n)
469  blklastind = colpt
470  ELSE
471  colpt = ((myid+1)/2)*kd + sweepid
472  stind = colpt-kd+1
473  edind = min(colpt,n)
474  IF( ( stind.GE.edind-1 ).AND.
475  $ ( edind.EQ.n ) ) THEN
476  blklastind = n
477  ELSE
478  blklastind = 0
479  ENDIF
480  ENDIF
481 *
482 * Call the kernel
483 *
484 #if defined(_OPENMP)
485  IF( ttype.NE.1 ) THEN
486 !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
487 !$OMP$ DEPEND(in:WORK(MYID-1))
488 !$OMP$ DEPEND(out:WORK(MYID))
489  tid = omp_get_thread_num()
490  CALL dsb2st_kernels( uplo, wantq, ttype,
491  $ stind, edind, sweepid, n, kd, ib,
492  $ work( inda ), lda,
493  $ hous( indv ), hous( indtau ), ldv,
494  $ work( indw + tid*kd ) )
495 !$OMP END TASK
496  ELSE
497 !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
498 !$OMP$ DEPEND(out:WORK(MYID))
499  tid = omp_get_thread_num()
500  CALL dsb2st_kernels( uplo, wantq, ttype,
501  $ stind, edind, sweepid, n, kd, ib,
502  $ work( inda ), lda,
503  $ hous( indv ), hous( indtau ), ldv,
504  $ work( indw + tid*kd ) )
505 !$OMP END TASK
506  ENDIF
507 #else
508  CALL dsb2st_kernels( uplo, wantq, ttype,
509  $ stind, edind, sweepid, n, kd, ib,
510  $ work( inda ), lda,
511  $ hous( indv ), hous( indtau ), ldv,
512  $ work( indw + tid*kd ) )
513 #endif
514  IF ( blklastind.GE.(n-1) ) THEN
515  stt = stt + 1
516  EXIT
517  ENDIF
518  140 CONTINUE
519  130 CONTINUE
520  120 CONTINUE
521  110 CONTINUE
522  100 CONTINUE
523 *
524 #if defined(_OPENMP)
525 !$OMP END MASTER
526 !$OMP END PARALLEL
527 #endif
528 *
529 * Copy the diagonal from A to D. Note that D is REAL thus only
530 * the Real part is needed, the imaginary part should be zero.
531 *
532  DO 150 i = 1, n
533  d( i ) = ( work( dpos+(i-1)*lda ) )
534  150 CONTINUE
535 *
536 * Copy the off diagonal from A to E. Note that E is REAL thus only
537 * the Real part is needed, the imaginary part should be zero.
538 *
539  IF( upper ) THEN
540  DO 160 i = 1, n-1
541  e( i ) = ( work( ofdpos+i*lda ) )
542  160 CONTINUE
543  ELSE
544  DO 170 i = 1, n-1
545  e( i ) = ( work( ofdpos+(i-1)*lda ) )
546  170 CONTINUE
547  ENDIF
548 *
549  hous( 1 ) = lhmin
550  work( 1 ) = lwmin
551  RETURN
552 *
553 * End of DSYTRD_SB2ST
554 *
Here is the call graph for this function:
Here is the caller graph for this function:
xerbla
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
lsame
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
dlaset
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: dlaset.f:112
ilaenv2stage
integer function ilaenv2stage(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV2STAGE
Definition: ilaenv2stage.f:151
dsb2st_kernels
subroutine dsb2st_kernels(UPLO, WANTZ, TTYPE, ST, ED, SWEEP, N, NB, IB, A, LDA, V, TAU, LDVT, WORK)
DSB2ST_KERNELS
Definition: dsb2st_kernels.f:170
dlacpy
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105