module stdlib_linalg_lapack_d
     use stdlib_linalg_constants
     use stdlib_linalg_blas
     use stdlib_linalg_lapack_aux
     use stdlib_linalg_lapack_s
     implicit none(type, external)
     private

     public :: sp, dp, lk, ilp
     public :: stdlib_dbbcsd
     public :: stdlib_dbdsdc
     public :: stdlib_dbdsqr
     public :: stdlib_ddisna
     public :: stdlib_dgbbrd
     public :: stdlib_dgbcon
     public :: stdlib_dgbequ
     public :: stdlib_dgbequb
     public :: stdlib_dgbrfs
     public :: stdlib_dgbsv
     public :: stdlib_dgbsvx
     public :: stdlib_dgbtf2
     public :: stdlib_dgbtrf
     public :: stdlib_dgbtrs
     public :: stdlib_dgebak
     public :: stdlib_dgebal
     public :: stdlib_dgebd2
     public :: stdlib_dgebrd
     public :: stdlib_dgecon
     public :: stdlib_dgeequ
     public :: stdlib_dgeequb
     public :: stdlib_dgees
     public :: stdlib_dgeesx
     public :: stdlib_dgeev
     public :: stdlib_dgeevx
     public :: stdlib_dgehd2
     public :: stdlib_dgehrd
     public :: stdlib_dgejsv
     public :: stdlib_dgelq
     public :: stdlib_dgelq2
     public :: stdlib_dgelqf
     public :: stdlib_dgelqt
     public :: stdlib_dgelqt3
     public :: stdlib_dgels
     public :: stdlib_dgelsd
     public :: stdlib_dgelss
     public :: stdlib_dgelsy
     public :: stdlib_dgemlq
     public :: stdlib_dgemlqt
     public :: stdlib_dgemqr
     public :: stdlib_dgemqrt
     public :: stdlib_dgeql2
     public :: stdlib_dgeqlf
     public :: stdlib_dgeqp3
     public :: stdlib_dgeqr
     public :: stdlib_dgeqr2
     public :: stdlib_dgeqr2p
     public :: stdlib_dgeqrf
     public :: stdlib_dgeqrfp
     public :: stdlib_dgeqrt
     public :: stdlib_dgeqrt2
     public :: stdlib_dgeqrt3
     public :: stdlib_dgerfs
     public :: stdlib_dgerq2
     public :: stdlib_dgerqf
     public :: stdlib_dgesc2
     public :: stdlib_dgesdd
     public :: stdlib_dgesv
     public :: stdlib_dgesvd
     public :: stdlib_dgesvdq
     public :: stdlib_dgesvj
     public :: stdlib_dgesvx
     public :: stdlib_dgetc2
     public :: stdlib_dgetf2
     public :: stdlib_dgetrf
     public :: stdlib_dgetrf2
     public :: stdlib_dgetri
     public :: stdlib_dgetrs
     public :: stdlib_dgetsls
     public :: stdlib_dgetsqrhrt
     public :: stdlib_dggbak
     public :: stdlib_dggbal
     public :: stdlib_dgges
     public :: stdlib_dgges3
     public :: stdlib_dggesx
     public :: stdlib_dggev
     public :: stdlib_dggev3
     public :: stdlib_dggevx
     public :: stdlib_dggglm
     public :: stdlib_dgghd3
     public :: stdlib_dgghrd
     public :: stdlib_dgglse
     public :: stdlib_dggqrf
     public :: stdlib_dggrqf
     public :: stdlib_dgsvj0
     public :: stdlib_dgsvj1
     public :: stdlib_dgtcon
     public :: stdlib_dgtrfs
     public :: stdlib_dgtsv
     public :: stdlib_dgtsvx
     public :: stdlib_dgttrf
     public :: stdlib_dgttrs
     public :: stdlib_dgtts2
     public :: stdlib_dhgeqz
     public :: stdlib_dhsein
     public :: stdlib_dhseqr
     public :: stdlib_disnan
     public :: stdlib_dla_gbamv
     public :: stdlib_dla_gbrcond
     public :: stdlib_dla_gbrpvgrw
     public :: stdlib_dla_geamv
     public :: stdlib_dla_gercond
     public :: stdlib_dla_gerpvgrw
     public :: stdlib_dla_lin_berr
     public :: stdlib_dla_porcond
     public :: stdlib_dla_porpvgrw
     public :: stdlib_dla_syamv
     public :: stdlib_dla_syrcond
     public :: stdlib_dla_syrpvgrw
     public :: stdlib_dla_wwaddw
     public :: stdlib_dlabad
     public :: stdlib_dlabrd
     public :: stdlib_dlacn2
     public :: stdlib_dlacon
     public :: stdlib_dlacpy
     public :: stdlib_dladiv
     public :: stdlib_dladiv1
     public :: stdlib_dladiv2
     public :: stdlib_dlae2
     public :: stdlib_dlaebz
     public :: stdlib_dlaed0
     public :: stdlib_dlaed1
     public :: stdlib_dlaed2
     public :: stdlib_dlaed3
     public :: stdlib_dlaed4
     public :: stdlib_dlaed5
     public :: stdlib_dlaed6
     public :: stdlib_dlaed7
     public :: stdlib_dlaed8
     public :: stdlib_dlaed9
     public :: stdlib_dlaeda
     public :: stdlib_dlaein
     public :: stdlib_dlaev2
     public :: stdlib_dlaexc
     public :: stdlib_dlag2
     public :: stdlib_dlag2s
     public :: stdlib_dlags2
     public :: stdlib_dlagtf
     public :: stdlib_dlagtm
     public :: stdlib_dlagts
     public :: stdlib_dlagv2
     public :: stdlib_dlahqr
     public :: stdlib_dlahr2
     public :: stdlib_dlaic1
     public :: stdlib_dlaisnan
     public :: stdlib_dlaln2
     public :: stdlib_dlals0
     public :: stdlib_dlalsa
     public :: stdlib_dlalsd
     public :: stdlib_dlamch
     public :: stdlib_dlamc3
     public :: stdlib_dlamrg
     public :: stdlib_dlamswlq
     public :: stdlib_dlamtsqr
     public :: stdlib_dlaneg
     public :: stdlib_dlangb
     public :: stdlib_dlange
     public :: stdlib_dlangt
     public :: stdlib_dlanhs
     public :: stdlib_dlansb
     public :: stdlib_dlansf
     public :: stdlib_dlansp
     public :: stdlib_dlanst
     public :: stdlib_dlansy
     public :: stdlib_dlantb
     public :: stdlib_dlantp
     public :: stdlib_dlantr
     public :: stdlib_dlanv2
     public :: stdlib_dlaorhr_col_getrfnp
     public :: stdlib_dlaorhr_col_getrfnp2
     public :: stdlib_dlapll
     public :: stdlib_dlapmr
     public :: stdlib_dlapmt
     public :: stdlib_dlapy2
     public :: stdlib_dlapy3
     public :: stdlib_dlaqgb
     public :: stdlib_dlaqge
     public :: stdlib_dlaqp2
     public :: stdlib_dlaqps
     public :: stdlib_dlaqr0
     public :: stdlib_dlaqr1
     public :: stdlib_dlaqr2
     public :: stdlib_dlaqr3
     public :: stdlib_dlaqr4
     public :: stdlib_dlaqr5
     public :: stdlib_dlaqsb
     public :: stdlib_dlaqsp
     public :: stdlib_dlaqsy
     public :: stdlib_dlaqtr
     public :: stdlib_dlaqz0
     public :: stdlib_dlaqz1
     public :: stdlib_dlaqz2
     public :: stdlib_dlaqz3
     public :: stdlib_dlaqz4
     public :: stdlib_dlar1v
     public :: stdlib_dlar2v
     public :: stdlib_dlarf
     public :: stdlib_dlarfb
     public :: stdlib_dlarfb_gett
     public :: stdlib_dlarfg
     public :: stdlib_dlarfgp
     public :: stdlib_dlarft
     public :: stdlib_dlarfx
     public :: stdlib_dlarfy
     public :: stdlib_dlargv
     public :: stdlib_dlarnv
     public :: stdlib_dlarra
     public :: stdlib_dlarrb
     public :: stdlib_dlarrc
     public :: stdlib_dlarrd
     public :: stdlib_dlarre
     public :: stdlib_dlarrf
     public :: stdlib_dlarrj
     public :: stdlib_dlarrk
     public :: stdlib_dlarrr
     public :: stdlib_dlarrv
     public :: stdlib_dlartg
     public :: stdlib_dlartgp
     public :: stdlib_dlartgs
     public :: stdlib_dlartv
     public :: stdlib_dlaruv
     public :: stdlib_dlarz
     public :: stdlib_dlarzb
     public :: stdlib_dlarzt
     public :: stdlib_dlas2
     public :: stdlib_dlascl
     public :: stdlib_dlasd0
     public :: stdlib_dlasd1
     public :: stdlib_dlasd2
     public :: stdlib_dlasd3
     public :: stdlib_dlasd4
     public :: stdlib_dlasd5
     public :: stdlib_dlasd6
     public :: stdlib_dlasd7
     public :: stdlib_dlasd8
     public :: stdlib_dlasda
     public :: stdlib_dlasdq
     public :: stdlib_dlasdt
     public :: stdlib_dlaset
     public :: stdlib_dlasq1
     public :: stdlib_dlasq2
     public :: stdlib_dlasq3
     public :: stdlib_dlasq4
     public :: stdlib_dlasq5
     public :: stdlib_dlasq6
     public :: stdlib_dlasr
     public :: stdlib_dlasrt
     public :: stdlib_dlassq
     public :: stdlib_dlasv2
     public :: stdlib_dlaswlq
     public :: stdlib_dlaswp
     public :: stdlib_dlasy2
     public :: stdlib_dlasyf
     public :: stdlib_dlasyf_aa
     public :: stdlib_dlasyf_rk
     public :: stdlib_dlasyf_rook
     public :: stdlib_dlat2s
     public :: stdlib_dlatbs
     public :: stdlib_dlatdf
     public :: stdlib_dlatps
     public :: stdlib_dlatrd
     public :: stdlib_dlatrs
     public :: stdlib_dlatrz
     public :: stdlib_dlatsqr
     public :: stdlib_dlauu2
     public :: stdlib_dlauum
     public :: stdlib_dopgtr
     public :: stdlib_dopmtr
     public :: stdlib_dorbdb
     public :: stdlib_dorbdb1
     public :: stdlib_dorbdb2
     public :: stdlib_dorbdb3
     public :: stdlib_dorbdb4
     public :: stdlib_dorbdb5
     public :: stdlib_dorbdb6
     public :: stdlib_dorcsd
     public :: stdlib_dorcsd2by1
     public :: stdlib_dorg2l
     public :: stdlib_dorg2r
     public :: stdlib_dorgbr
     public :: stdlib_dorghr
     public :: stdlib_dorgl2
     public :: stdlib_dorglq
     public :: stdlib_dorgql
     public :: stdlib_dorgqr
     public :: stdlib_dorgr2
     public :: stdlib_dorgrq
     public :: stdlib_dorgtr
     public :: stdlib_dorgtsqr
     public :: stdlib_dorgtsqr_row
     public :: stdlib_dorhr_col
     public :: stdlib_dorm22
     public :: stdlib_dorm2l
     public :: stdlib_dorm2r
     public :: stdlib_dormbr
     public :: stdlib_dormhr
     public :: stdlib_dorml2
     public :: stdlib_dormlq
     public :: stdlib_dormql
     public :: stdlib_dormqr
     public :: stdlib_dormr2
     public :: stdlib_dormr3
     public :: stdlib_dormrq
     public :: stdlib_dormrz
     public :: stdlib_dormtr
     public :: stdlib_dpbcon
     public :: stdlib_dpbequ
     public :: stdlib_dpbrfs
     public :: stdlib_dpbstf
     public :: stdlib_dpbsv
     public :: stdlib_dpbsvx
     public :: stdlib_dpbtf2
     public :: stdlib_dpbtrf
     public :: stdlib_dpbtrs
     public :: stdlib_dpftrf
     public :: stdlib_dpftri
     public :: stdlib_dpftrs
     public :: stdlib_dpocon
     public :: stdlib_dpoequ
     public :: stdlib_dpoequb
     public :: stdlib_dporfs
     public :: stdlib_dposv
     public :: stdlib_dposvx
     public :: stdlib_dpotf2
     public :: stdlib_dpotrf
     public :: stdlib_dpotrf2
     public :: stdlib_dpotri
     public :: stdlib_dpotrs
     public :: stdlib_dppcon
     public :: stdlib_dppequ
     public :: stdlib_dpprfs
     public :: stdlib_dppsv
     public :: stdlib_dppsvx
     public :: stdlib_dpptrf
     public :: stdlib_dpptri
     public :: stdlib_dpptrs
     public :: stdlib_dpstf2
     public :: stdlib_dpstrf
     public :: stdlib_dptcon
     public :: stdlib_dpteqr
     public :: stdlib_dptrfs
     public :: stdlib_dptsv
     public :: stdlib_dptsvx
     public :: stdlib_dpttrf
     public :: stdlib_dpttrs
     public :: stdlib_dptts2
     public :: stdlib_drscl
     public :: stdlib_dsb2st_kernels
     public :: stdlib_dsbev
     public :: stdlib_dsbevd
     public :: stdlib_dsbevx
     public :: stdlib_dsbgst
     public :: stdlib_dsbgv
     public :: stdlib_dsbgvd
     public :: stdlib_dsbgvx
     public :: stdlib_dsbtrd
     public :: stdlib_dsfrk
     public :: stdlib_dsgesv
     public :: stdlib_dspcon
     public :: stdlib_dspev
     public :: stdlib_dspevd
     public :: stdlib_dspevx
     public :: stdlib_dspgst
     public :: stdlib_dspgv
     public :: stdlib_dspgvd
     public :: stdlib_dspgvx
     public :: stdlib_dsposv
     public :: stdlib_dsprfs
     public :: stdlib_dspsv
     public :: stdlib_dspsvx
     public :: stdlib_dsptrd
     public :: stdlib_dsptrf
     public :: stdlib_dsptri
     public :: stdlib_dsptrs
     public :: stdlib_dstebz
     public :: stdlib_dstedc
     public :: stdlib_dstegr
     public :: stdlib_dstein
     public :: stdlib_dstemr
     public :: stdlib_dsteqr
     public :: stdlib_dsterf
     public :: stdlib_dstev
     public :: stdlib_dstevd
     public :: stdlib_dstevr
     public :: stdlib_dstevx
     public :: stdlib_dsycon
     public :: stdlib_dsycon_rook
     public :: stdlib_dsyconv
     public :: stdlib_dsyconvf
     public :: stdlib_dsyconvf_rook
     public :: stdlib_dsyequb
     public :: stdlib_dsyev
     public :: stdlib_dsyevd
     public :: stdlib_dsyevr
     public :: stdlib_dsyevx
     public :: stdlib_dsygs2
     public :: stdlib_dsygst
     public :: stdlib_dsygv
     public :: stdlib_dsygvd
     public :: stdlib_dsygvx
     public :: stdlib_dsyrfs
     public :: stdlib_dsysv
     public :: stdlib_dsysv_aa
     public :: stdlib_dsysv_rk
     public :: stdlib_dsysv_rook
     public :: stdlib_dsysvx
     public :: stdlib_dsyswapr
     public :: stdlib_dsytd2
     public :: stdlib_dsytf2
     public :: stdlib_dsytf2_rk
     public :: stdlib_dsytf2_rook
     public :: stdlib_dsytrd
     public :: stdlib_dsytrd_sb2st
     public :: stdlib_dsytrd_sy2sb
     public :: stdlib_dsytrf
     public :: stdlib_dsytrf_aa
     public :: stdlib_dsytrf_rk
     public :: stdlib_dsytrf_rook
     public :: stdlib_dsytri
     public :: stdlib_dsytri_rook
     public :: stdlib_dsytrs
     public :: stdlib_dsytrs2
     public :: stdlib_dsytrs_3
     public :: stdlib_dsytrs_aa
     public :: stdlib_dsytrs_rook
     public :: stdlib_dtbcon
     public :: stdlib_dtbrfs
     public :: stdlib_dtbtrs
     public :: stdlib_dtfsm
     public :: stdlib_dtftri
     public :: stdlib_dtfttp
     public :: stdlib_dtfttr
     public :: stdlib_dtgevc
     public :: stdlib_dtgex2
     public :: stdlib_dtgexc
     public :: stdlib_dtgsen
     public :: stdlib_dtgsja
     public :: stdlib_dtgsna
     public :: stdlib_dtgsy2
     public :: stdlib_dtgsyl
     public :: stdlib_dtpcon
     public :: stdlib_dtplqt
     public :: stdlib_dtplqt2
     public :: stdlib_dtpmlqt
     public :: stdlib_dtpmqrt
     public :: stdlib_dtpqrt
     public :: stdlib_dtpqrt2
     public :: stdlib_dtprfb
     public :: stdlib_dtprfs
     public :: stdlib_dtptri
     public :: stdlib_dtptrs
     public :: stdlib_dtpttf
     public :: stdlib_dtpttr
     public :: stdlib_dtrcon
     public :: stdlib_dtrevc
     public :: stdlib_dtrevc3
     public :: stdlib_dtrexc
     public :: stdlib_dtrrfs
     public :: stdlib_dtrsen
     public :: stdlib_dtrsna
     public :: stdlib_dtrsyl
     public :: stdlib_dtrti2
     public :: stdlib_dtrtri
     public :: stdlib_dtrtrs
     public :: stdlib_dtrttf
     public :: stdlib_dtrttp
     public :: stdlib_dtzrzf
     public :: stdlib_dzsum1

     ! 64-bit real constants
     real(dp), parameter, private :: zero = 0.00_dp
     real(dp), parameter, private :: half = 0.50_dp
     real(dp), parameter, private :: one = 1.00_dp
     real(dp), parameter, private :: two = 2.00_dp
     real(dp), parameter, private :: three = 3.00_dp
     real(dp), parameter, private :: four = 4.00_dp
     real(dp), parameter, private :: eight = 8.00_dp
     real(dp), parameter, private :: ten = 10.00_dp

     ! 64-bit complex constants
     complex(dp), parameter, private :: czero = (0.0_dp, 0.0_dp)
     complex(dp), parameter, private :: chalf = (0.5_dp, 0.0_dp)
     complex(dp), parameter, private :: cone = (1.0_dp, 0.0_dp)

     ! 64-bit scaling constants
     integer, parameter, private :: maxexp = maxexponent(zero)
     integer, parameter, private :: minexp = minexponent(zero)
     real(dp), parameter, private :: rradix = real(radix(zero), dp)
     real(dp), parameter, private :: ulp = epsilon(zero)
     real(dp), parameter, private :: eps = ulp*half
     real(dp), parameter, private :: safmin = rradix**max(minexp - 1, 1 - maxexp)
     real(dp), parameter, private :: safmax = one/safmin
     real(dp), parameter, private :: smlnum = safmin/ulp
     real(dp), parameter, private :: bignum = safmax*ulp
     real(dp), parameter, private :: rtmin = sqrt(smlnum)
     real(dp), parameter, private :: rtmax = sqrt(bignum)

     ! 64-bit Blue's scaling constants
     ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771
     real(dp), parameter, private :: tsml = rradix**ceiling((minexp - 1)*half)
     real(dp), parameter, private :: tbig = rradix**floor((maxexp - digits(zero) + 1)*half)
     real(dp), parameter, private :: ssml = rradix**(-floor((minexp - digits(zero))*half))
     real(dp), parameter, private :: sbig = rradix**(-ceiling((maxexp + digits(zero) - 1)*half))

     contains

     ! DGBTF2 computes an LU factorization of a real m-by-n band matrix A
     ! using partial pivoting with row interchanges.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_dgbtf2(m, n, kl, ku, ab, ldab, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, kl, ku, ldab, m, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, jp, ju, km, kv
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! kv is the number of superdiagonals in the factor u, allowing for
           ! fill-in.
           kv = ku + kl
           ! test the input parameters.
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kl < 0) then
              info = -3
           else if (ku < 0) then
              info = -4
           else if (ldab < kl + kv + 1) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dgbtf2', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) return
           ! gaussian elimination with partial pivoting
           ! set fill-in elements in columns ku+2 to kv to zero.
           do j = ku + 2, min(kv, n)
              do i = kv - j + 2, kl
                 ab(i, j) = zero
              end do
           end do
           ! ju is the index of the last column affected by the current stage
           ! of the factorization.
           ju = 1
           loop_40: do j = 1, min(m, n)
              ! set fill-in elements in column j+kv to zero.
              if (j + kv <= n) then
                 do i = 1, kl
                    ab(i, j + kv) = zero
                 end do
              end if
              ! find pivot and test for singularity. km is the number of
              ! subdiagonal elements in the current column.
              km = min(kl, m - j)
              jp = stdlib_idamax(km + 1, ab(kv + 1, j), 1)
              ipiv(j) = jp + j - 1
              if (ab(kv + jp, j) /= zero) then
                 ju = max(ju, min(j + ku + jp - 1, n))
                 ! apply interchange to columns j to ju.
                 if (jp /= 1) call stdlib_dswap(ju - j + 1, ab(kv + jp, j), ldab - 1, ab(kv + 1, j), ldab - &
                           1)
                 if (km > 0) then
                    ! compute multipliers.
                    call stdlib_dscal(km, one/ab(kv + 1, j), ab(kv + 2, j), 1)
                    ! update trailing submatrix within the band.
                    if (ju > j) call stdlib_dger(km, ju - j, -one, ab(kv + 2, j), 1, ab(kv, j + 1), &
                              ldab - 1, ab(kv + 1, j + 1), ldab - 1)
                 end if
              else
                 ! if pivot is zero, set info to the index of the pivot
                 ! unless a zero pivot has already been found.
                 if (info == 0) info = j
              end if
           end do loop_40
           return
           ! end of stdlib_dgbtf2
     end subroutine stdlib_dgbtf2

     ! DGBTRS solves a system of linear equations
     ! A * X = B  or  A**T * X = B
     ! with a general band matrix A using the LU factorization computed
     ! by DGBTRF.

     subroutine stdlib_dgbtrs(trans, n, kl, ku, nrhs, ab, ldab, ipiv, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: trans
           integer(ilp) :: info, kl, ku, ldab, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: ab(ldab, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lnoti, notran
           integer(ilp) :: i, j, kd, l, lm
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           notran = stdlib_lsame(trans, 'n')
           if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame(trans, &
                     'c')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kl < 0) then
              info = -3
           else if (ku < 0) then
              info = -4
           else if (nrhs < 0) then
              info = -5
           else if (ldab < (2*kl + ku + 1)) then
              info = -7
           else if (ldb < max(1, n)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dgbtrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           kd = ku + kl + 1
           lnoti = kl > 0
           if (notran) then
              ! solve  a*x = b.
              ! solve l*x = b, overwriting b with x.
              ! l is represented as a product of permutations and unit lower
              ! triangular matrices l = p(1) * l(1) * ... * p(n-1) * l(n-1),
              ! where each transformation l(i) is a rank-one modification of
              ! the identity matrix.
              if (lnoti) then
                 do j = 1, n - 1
                    lm = min(kl, n - j)
                    l = ipiv(j)
                    if (l /= j) call stdlib_dswap(nrhs, b(l, 1), ldb, b(j, 1), ldb)
                    call stdlib_dger(lm, nrhs, -one, ab(kd + 1, j), 1, b(j, 1), ldb, b(j + 1, 1) &
                              , ldb)
                 end do
              end if
              do i = 1, nrhs
                 ! solve u*x = b, overwriting b with x.
                 call stdlib_dtbsv('upper', 'no transpose', 'non-unit', n, kl + ku, ab, ldab, b(1, &
                           i), 1)
              end do
           else
              ! solve a**t*x = b.
              do i = 1, nrhs
                 ! solve u**t*x = b, overwriting b with x.
                 call stdlib_dtbsv('upper', 'transpose', 'non-unit', n, kl + ku, ab, ldab, b(1, i) &
                           , 1)
              end do
              ! solve l**t*x = b, overwriting b with x.
              if (lnoti) then
                 do j = n - 1, 1, -1
                    lm = min(kl, n - j)
                    call stdlib_dgemv('transpose', lm, nrhs, -one, b(j + 1, 1), ldb, ab(kd + 1, j) &
                              , 1, one, b(j, 1), ldb)
                    l = ipiv(j)
                    if (l /= j) call stdlib_dswap(nrhs, b(l, 1), ldb, b(j, 1), ldb)
                 end do
              end if
           end if
           return
           ! end of stdlib_dgbtrs
     end subroutine stdlib_dgbtrs

     ! DGEBAK forms the right or left eigenvectors of a real general matrix
     ! by backward transformation on the computed eigenvectors of the
     ! balanced matrix output by DGEBAL.

     subroutine stdlib_dgebak(job, side, n, ilo, ihi, scale, m, v, ldv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: job, side
           integer(ilp) :: ihi, ilo, info, ldv, m, n
           ! .. array arguments ..
           real(dp) :: scale(*), v(ldv, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: leftv, rightv
           integer(ilp) :: i, ii, k
           real(dp) :: s
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! decode and test the input parameters
           rightv = stdlib_lsame(side, 'r')
           leftv = stdlib_lsame(side, 'l')
           info = 0
           if (.not. stdlib_lsame(job, 'n') .and. .not. stdlib_lsame(job, 'p') &
                     .and. .not. stdlib_lsame(job, 's') .and. .not. stdlib_lsame(job, 'b')) then
              info = -1
           else if (.not. rightv .and. .not. leftv) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (ilo < 1 .or. ilo > max(1, n)) then
              info = -4
           else if (ihi < min(ilo, n) .or. ihi > n) then
              info = -5
           else if (m < 0) then
              info = -7
           else if (ldv < max(1, n)) then
              info = -9
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dgebak', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (m == 0) return
           if (stdlib_lsame(job, 'n')) return
           if (ilo == ihi) go to 30
           ! backward balance
           if (stdlib_lsame(job, 's') .or. stdlib_lsame(job, 'b')) then
              if (rightv) then
                 do i = ilo, ihi
                    s = scale(i)
                    call stdlib_dscal(m, s, v(i, 1), ldv)
                 end do
              end if
              if (leftv) then
                 do i = ilo, ihi
                    s = one/scale(i)
                    call stdlib_dscal(m, s, v(i, 1), ldv)
                 end do
              end if
           end if
           ! backward permutation
           ! for  i = ilo-1 step -1 until 1,
                    ! ihi+1 step 1 until n do --
30      continue
           if (stdlib_lsame(job, 'p') .or. stdlib_lsame(job, 'b')) then
              if (rightv) then
                 loop_40: do ii = 1, n
                    i = ii
                    if (i >= ilo .and. i <= ihi) cycle loop_40
                    if (i < ilo) i = ilo - ii
                    k = scale(i)
                    if (k == i) cycle loop_40
                    call stdlib_dswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_40
              end if
              if (leftv) then
                 loop_50: do ii = 1, n
                    i = ii
                    if (i >= ilo .and. i <= ihi) cycle loop_50
                    if (i < ilo) i = ilo - ii
                    k = scale(i)
                    if (k == i) cycle loop_50
                    call stdlib_dswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_50
              end if
           end if
           return
           ! end of stdlib_dgebak
     end subroutine stdlib_dgebak

     ! DGGBAK forms the right or left eigenvectors of a real generalized
     ! eigenvalue problem A*x = lambda*B*x, by backward transformation on
     ! the computed eigenvectors of the balanced pair of matrices output by
     ! DGGBAL.

     subroutine stdlib_dggbak(job, side, n, ilo, ihi, lscale, rscale, m, v, ldv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: job, side
           integer(ilp) :: ihi, ilo, info, ldv, m, n
           ! .. array arguments ..
           real(dp) :: lscale(*), rscale(*), v(ldv, *)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: leftv, rightv
           integer(ilp) :: i, k
     
           ! .. intrinsic functions ..
           intrinsic :: max, int
           ! .. executable statements ..
           ! test the input parameters
           rightv = stdlib_lsame(side, 'r')
           leftv = stdlib_lsame(side, 'l')
           info = 0
           if (.not. stdlib_lsame(job, 'n') .and. .not. stdlib_lsame(job, 'p') &
                     .and. .not. stdlib_lsame(job, 's') .and. .not. stdlib_lsame(job, 'b')) then
              info = -1
           else if (.not. rightv .and. .not. leftv) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (ilo < 1) then
              info = -4
           else if (n == 0 .and. ihi == 0 .and. ilo /= 1) then
              info = -4
           else if (n > 0 .and. (ihi < ilo .or. ihi > max(1, n))) then
              info = -5
           else if (n == 0 .and. ilo == 1 .and. ihi /= 0) then
              info = -5
           else if (m < 0) then
              info = -8
           else if (ldv < max(1, n)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dggbak', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (m == 0) return
           if (stdlib_lsame(job, 'n')) return
           if (ilo == ihi) go to 30
           ! backward balance
           if (stdlib_lsame(job, 's') .or. stdlib_lsame(job, 'b')) then
              ! backward transformation on right eigenvectors
              if (rightv) then
                 do i = ilo, ihi
                    call stdlib_dscal(m, rscale(i), v(i, 1), ldv)
                 end do
              end if
              ! backward transformation on left eigenvectors
              if (leftv) then
                 do i = ilo, ihi
                    call stdlib_dscal(m, lscale(i), v(i, 1), ldv)
                 end do
              end if
           end if
           ! backward permutation
30      continue
           if (stdlib_lsame(job, 'p') .or. stdlib_lsame(job, 'b')) then
              ! backward permutation on right eigenvectors
              if (rightv) then
                 if (ilo == 1) go to 50
                 loop_40: do i = ilo - 1, 1, -1
                    k = int(rscale(i), KIND=ilp)
                    if (k == i) cycle loop_40
                    call stdlib_dswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_40
50      continue
                 if (ihi == n) go to 70
                 loop_60: do i = ihi + 1, n
                    k = int(rscale(i), KIND=ilp)
                    if (k == i) cycle loop_60
                    call stdlib_dswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_60
              end if
              ! backward permutation on left eigenvectors
70      continue
              if (leftv) then
                 if (ilo == 1) go to 90
                 loop_80: do i = ilo - 1, 1, -1
                    k = int(lscale(i), KIND=ilp)
                    if (k == i) cycle loop_80
                    call stdlib_dswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_80
90      continue
                 if (ihi == n) go to 110
                 loop_100: do i = ihi + 1, n
                    k = int(lscale(i), KIND=ilp)
                    if (k == i) cycle loop_100
                    call stdlib_dswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_100
              end if
           end if
110    continue
           return
           ! end of stdlib_dggbak
     end subroutine stdlib_dggbak

     ! DGTSV  solves the equation
     ! A*X = B,
     ! where A is an n by n tridiagonal matrix, by Gaussian elimination with
     ! partial pivoting.
     ! Note that the equation  A**T*X = B  may be solved by interchanging the
     ! order of the arguments DU and DL.

     subroutine stdlib_dgtsv(n, nrhs, dl, d, du, b, ldb, info)
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, ldb, n, nrhs
           ! .. array arguments ..
           real(dp) :: b(ldb, *), d(*), dl(*), du(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(dp) :: fact, temp
           ! .. intrinsic functions ..
           intrinsic :: abs, max
     
           ! .. executable statements ..
           info = 0
           if (n < 0) then
              info = -1
           else if (nrhs < 0) then
              info = -2
           else if (ldb < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dgtsv ', -info)
              return
           end if
           if (n == 0) return
           if (nrhs == 1) then
              loop_10: do i = 1, n - 2
                 if (abs(d(i)) >= abs(dl(i))) then
                    ! no row interchange required
                    if (d(i) /= zero) then
                       fact = dl(i)/d(i)
                       d(i + 1) = d(i + 1) - fact*du(i)
                       b(i + 1, 1) = b(i + 1, 1) - fact*b(i, 1)
                    else
                       info = i
                       return
                    end if
                    dl(i) = zero
                 else
                    ! interchange rows i and i+1
                    fact = d(i)/dl(i)
                    d(i) = dl(i)
                    temp = d(i + 1)
                    d(i + 1) = du(i) - fact*temp
                    dl(i) = du(i + 1)
                    du(i + 1) = -fact*dl(i)
                    du(i) = temp
                    temp = b(i, 1)
                    b(i, 1) = b(i + 1, 1)
                    b(i + 1, 1) = temp - fact*b(i + 1, 1)
                 end if
              end do loop_10
              if (n > 1) then
                 i = n - 1
                 if (abs(d(i)) >= abs(dl(i))) then
                    if (d(i) /= zero) then
                       fact = dl(i)/d(i)
                       d(i + 1) = d(i + 1) - fact*du(i)
                       b(i + 1, 1) = b(i + 1, 1) - fact*b(i, 1)
                    else
                       info = i
                       return
                    end if
                 else
                    fact = d(i)/dl(i)
                    d(i) = dl(i)
                    temp = d(i + 1)
                    d(i + 1) = du(i) - fact*temp
                    du(i) = temp
                    temp = b(i, 1)
                    b(i, 1) = b(i + 1, 1)
                    b(i + 1, 1) = temp - fact*b(i + 1, 1)
                 end if
              end if
              if (d(n) == zero) then
                 info = n
                 return
              end if
           else
              loop_40: do i = 1, n - 2
                 if (abs(d(i)) >= abs(dl(i))) then
                    ! no row interchange required
                    if (d(i) /= zero) then
                       fact = dl(i)/d(i)
                       d(i + 1) = d(i + 1) - fact*du(i)
                       do j = 1, nrhs
                          b(i + 1, j) = b(i + 1, j) - fact*b(i, j)
                       end do
                    else
                       info = i
                       return
                    end if
                    dl(i) = zero
                 else
                    ! interchange rows i and i+1
                    fact = d(i)/dl(i)
                    d(i) = dl(i)
                    temp = d(i + 1)
                    d(i + 1) = du(i) - fact*temp
                    dl(i) = du(i + 1)
                    du(i + 1) = -fact*dl(i)
                    du(i) = temp
                    do j = 1, nrhs
                       temp = b(i, j)
                       b(i, j) = b(i + 1, j)
                       b(i + 1, j) = temp - fact*b(i + 1, j)
                    end do
                 end if
              end do loop_40
              if (n > 1) then
                 i = n - 1
                 if (abs(d(i)) >= abs(dl(i))) then
                    if (d(i) /= zero) then
                       fact = dl(i)/d(i)
                       d(i + 1) = d(i + 1) - fact*du(i)
                       do j = 1, nrhs
                          b(i + 1, j) = b(i + 1, j) - fact*b(i, j)
                       end do
                    else
                       info = i
                       return
                    end if
                 else
                    fact = d(i)/dl(i)
                    d(i) = dl(i)
                    temp = d(i + 1)
                    d(i + 1) = du(i) - fact*temp
                    du(i) = temp
                    do j = 1, nrhs
                       temp = b(i, j)
                       b(i, j) = b(i + 1, j)
                       b(i + 1, j) = temp - fact*b(i + 1, j)
                    end do
                 end if
              end if
              if (d(n) == zero) then
                 info = n
                 return
              end if
           end if
           ! back solve with the matrix u from the factorization.
           if (nrhs <= 2) then
              j = 1
70      continue
              b(n, j) = b(n, j)/d(n)
              if (n > 1) b(n - 1, j) = (b(n - 1, j) - du(n - 1)*b(n, j))/d(n - 1)
              do i = n - 2, 1, -1
                 b(i, j) = (b(i, j) - du(i)*b(i + 1, j) - dl(i)*b(i + 2, j))/d(i)
                           
              end do
              if (j < nrhs) then
                 j = j + 1
                 go to 70
              end if
           else
              do j = 1, nrhs
                 b(n, j) = b(n, j)/d(n)
                 if (n > 1) b(n - 1, j) = (b(n - 1, j) - du(n - 1)*b(n, j))/d(n - 1)
                 do i = n - 2, 1, -1
                    b(i, j) = (b(i, j) - du(i)*b(i + 1, j) - dl(i)*b(i + 2, j))/d(i)
                              
                 end do
              end do
           end if
           return
           ! end of stdlib_dgtsv
     end subroutine stdlib_dgtsv

     ! DGTTRF computes an LU factorization of a real tridiagonal matrix A
     ! using elimination with partial pivoting and row interchanges.
     ! The factorization has the form
     ! A = L * U
     ! where L is a product of permutation and unit lower bidiagonal
     ! matrices and U is upper triangular with nonzeros in only the main
     ! diagonal and first two superdiagonals.

     subroutine stdlib_dgttrf(n, dl, d, du, du2, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: d(*), dl(*), du(*), du2(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           real(dp) :: fact, temp
           ! .. intrinsic functions ..
           intrinsic :: abs
     
           ! .. executable statements ..
           info = 0
           if (n < 0) then
              info = -1
              call stdlib_xerbla('stdlib_dgttrf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! initialize ipiv(i) = i and du2(i) = 0
           do i = 1, n
              ipiv(i) = i
           end do
           do i = 1, n - 2
              du2(i) = zero
           end do
           do i = 1, n - 2
              if (abs(d(i)) >= abs(dl(i))) then
                 ! no row interchange required, eliminate dl(i)
                 if (d(i) /= zero) then
                    fact = dl(i)/d(i)
                    dl(i) = fact
                    d(i + 1) = d(i + 1) - fact*du(i)
                 end if
              else
                 ! interchange rows i and i+1, eliminate dl(i)
                 fact = d(i)/dl(i)
                 d(i) = dl(i)
                 dl(i) = fact
                 temp = du(i)
                 du(i) = d(i + 1)
                 d(i + 1) = temp - fact*d(i + 1)
                 du2(i) = du(i + 1)
                 du(i + 1) = -fact*du(i + 1)
                 ipiv(i) = i + 1
              end if
           end do
           if (n > 1) then
              i = n - 1
              if (abs(d(i)) >= abs(dl(i))) then
                 if (d(i) /= zero) then
                    fact = dl(i)/d(i)
                    dl(i) = fact
                    d(i + 1) = d(i + 1) - fact*du(i)
                 end if
              else
                 fact = d(i)/dl(i)
                 d(i) = dl(i)
                 dl(i) = fact
                 temp = du(i)
                 du(i) = d(i + 1)
                 d(i + 1) = temp - fact*d(i + 1)
                 ipiv(i) = i + 1
              end if
           end if
           ! check for a zero on the diagonal of u.
           do i = 1, n
              if (d(i) == zero) then
                 info = i
                 go to 50
              end if
           end do
50      continue
           return
           ! end of stdlib_dgttrf
     end subroutine stdlib_dgttrf

     ! DGTTS2 solves one of the systems of equations
     ! A*X = B  or  A**T*X = B,
     ! with a tridiagonal matrix A using the LU factorization computed
     ! by DGTTRF.

     subroutine stdlib_dgtts2(itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: itrans, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: b(ldb, *), d(*), dl(*), du(*), du2(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ip, j
           real(dp) :: temp
           ! .. executable statements ..
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (itrans == 0) then
              ! solve a*x = b using the lu factorization of a,
              ! overwriting each right hand side vector with its solution.
              if (nrhs <= 1) then
                 j = 1
10      continue
                 ! solve l*x = b.
                 do i = 1, n - 1
                    ip = ipiv(i)
                    temp = b(i + 1 - ip + i, j) - dl(i)*b(ip, j)
                    b(i, j) = b(ip, j)
                    b(i + 1, j) = temp
                 end do
                 ! solve u*x = b.
                 b(n, j) = b(n, j)/d(n)
                 if (n > 1) b(n - 1, j) = (b(n - 1, j) - du(n - 1)*b(n, j))/d(n - 1)
                 do i = n - 2, 1, -1
                    b(i, j) = (b(i, j) - du(i)*b(i + 1, j) - du2(i)*b(i + 2, j))/d(i)
                              
                 end do
                 if (j < nrhs) then
                    j = j + 1
                    go to 10
                 end if
              else
                 do j = 1, nrhs
                    ! solve l*x = b.
                    do i = 1, n - 1
                       if (ipiv(i) == i) then
                          b(i + 1, j) = b(i + 1, j) - dl(i)*b(i, j)
                       else
                          temp = b(i, j)
                          b(i, j) = b(i + 1, j)
                          b(i + 1, j) = temp - dl(i)*b(i, j)
                       end if
                    end do
                    ! solve u*x = b.
                    b(n, j) = b(n, j)/d(n)
                    if (n > 1) b(n - 1, j) = (b(n - 1, j) - du(n - 1)*b(n, j))/d(n - 1)
                    do i = n - 2, 1, -1
                       b(i, j) = (b(i, j) - du(i)*b(i + 1, j) - du2(i)*b(i + 2, j))/d(i)
                                 
                    end do
                 end do
              end if
           else
              ! solve a**t * x = b.
              if (nrhs <= 1) then
                 ! solve u**t*x = b.
                 j = 1
70      continue
                 b(1, j) = b(1, j)/d(1)
                 if (n > 1) b(2, j) = (b(2, j) - du(1)*b(1, j))/d(2)
                 do i = 3, n
                    b(i, j) = (b(i, j) - du(i - 1)*b(i - 1, j) - du2(i - 2)*b(i - 2, j))/d(i &
                              )
                 end do
                 ! solve l**t*x = b.
                 do i = n - 1, 1, -1
                    ip = ipiv(i)
                    temp = b(i, j) - dl(i)*b(i + 1, j)
                    b(i, j) = b(ip, j)
                    b(ip, j) = temp
                 end do
                 if (j < nrhs) then
                    j = j + 1
                    go to 70
                 end if
              else
                 do j = 1, nrhs
                    ! solve u**t*x = b.
                    b(1, j) = b(1, j)/d(1)
                    if (n > 1) b(2, j) = (b(2, j) - du(1)*b(1, j))/d(2)
                    do i = 3, n
                       b(i, j) = (b(i, j) - du(i - 1)*b(i - 1, j) - du2(i - 2)*b(i - 2, j))/d( &
                                  i)
                    end do
                    do i = n - 1, 1, -1
                       if (ipiv(i) == i) then
                          b(i, j) = b(i, j) - dl(i)*b(i + 1, j)
                       else
                          temp = b(i + 1, j)
                          b(i + 1, j) = b(i, j) - dl(i)*temp
                          b(i, j) = temp
                       end if
                    end do
                 end do
              end if
           end if
           ! end of stdlib_dgtts2
     end subroutine stdlib_dgtts2

     ! DLA_GBRPVGRW computes the reciprocal pivot growth factor
     ! norm(A)/norm(U). The "max absolute element" norm is used. If this is
     ! much less than 1, the stability of the LU factorization of the
     ! (equilibrated) matrix A could be poor. This also means that the
     ! solution X, estimated condition numbers, and error bounds could be
     ! unreliable.

     real(dp) function stdlib_dla_gbrpvgrw(n, kl, ku, ncols, ab, ldab, afb, ldafb)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n, kl, ku, ncols, ldab, ldafb
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), afb(ldafb, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j, kd
           real(dp) :: amax, umax, rpvgrw
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min
           ! .. executable statements ..
           rpvgrw = 1.0_dp
           kd = ku + 1
           do j = 1, ncols
              amax = 0.0_dp
              umax = 0.0_dp
              do i = max(j - ku, 1), min(j + kl, n)
                 amax = max(abs(ab(kd + i - j, j)), amax)
              end do
              do i = max(j - ku, 1), j
                 umax = max(abs(afb(kd + i - j, j)), umax)
              end do
              if (umax /= 0.0_dp) then
                 rpvgrw = min(amax/umax, rpvgrw)
              end if
           end do
           stdlib_dla_gbrpvgrw = rpvgrw
           ! end of stdlib_dla_gbrpvgrw
     end function stdlib_dla_gbrpvgrw

     ! DLA_GERPVGRW computes the reciprocal pivot growth factor
     ! norm(A)/norm(U). The "max absolute element" norm is used. If this is
     ! much less than 1, the stability of the LU factorization of the
     ! (equilibrated) matrix A could be poor. This also means that the
     ! solution X, estimated condition numbers, and error bounds could be
     ! unreliable.

     real(dp) function stdlib_dla_gerpvgrw(n, ncols, a, lda, af, ldaf)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n, ncols, lda, ldaf
           ! .. array arguments ..
           real(dp) :: a(lda, *), af(ldaf, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(dp) :: amax, umax, rpvgrw
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min
           ! .. executable statements ..
           rpvgrw = 1.0_dp
           do j = 1, ncols
              amax = 0.0_dp
              umax = 0.0_dp
              do i = 1, n
                 amax = max(abs(a(i, j)), amax)
              end do
              do i = 1, j
                 umax = max(abs(af(i, j)), umax)
              end do
              if (umax /= 0.0_dp) then
                 rpvgrw = min(amax/umax, rpvgrw)
              end if
           end do
           stdlib_dla_gerpvgrw = rpvgrw
           ! end of stdlib_dla_gerpvgrw
     end function stdlib_dla_gerpvgrw

     ! DLA_WWADDW adds a vector W into a doubled-single vector (X, Y).
     ! This works for all extant IBM's hex and binary floating point
     ! arithmetic, but not for decimal.

     subroutine stdlib_dla_wwaddw(n, x, y, w)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n
           ! .. array arguments ..
           real(dp) :: x(*), y(*), w(*)
        ! =====================================================================
           ! .. local scalars ..
           real(dp) :: s
           integer(ilp) :: i
           ! .. executable statements ..
           do 10 i = 1, n
             s = x(i) + w(i)
             s = (s + s) - s
             y(i) = ((x(i) - s) + w(i)) + y(i)
             x(i) = s
10    continue
           return
           ! end of stdlib_dla_wwaddw
     end subroutine stdlib_dla_wwaddw

     ! DLABAD takes as input the values computed by DLAMCH for underflow and
     ! overflow, and returns the square root of each of these values if the
     ! log of LARGE is sufficiently large.  This subroutine is intended to
     ! identify machines with a large exponent range, such as the Crays, and
     ! redefine the underflow and overflow limits to be the square roots of
     ! the values computed by DLAMCH.  This subroutine is needed because
     ! DLAMCH does not compensate for poor arithmetic in the upper half of
     ! the exponent range, as is found on a Cray.

     subroutine stdlib_dlabad(small, large)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: large, small
        ! =====================================================================
           ! .. intrinsic functions ..
           intrinsic :: log10, sqrt
           ! .. executable statements ..
           ! if it looks like we're on a cray, take the square root of
           ! small and large to avoid overflow and underflow problems.
           if (log10(large) > 2000.d0) then
              small = sqrt(small)
              large = sqrt(large)
           end if
           return
           ! end of stdlib_dlabad
     end subroutine stdlib_dlabad

     ! DLACN2 estimates the 1-norm of a square, real matrix A.
     ! Reverse communication is used for evaluating matrix-vector products.

     subroutine stdlib_dlacn2(n, v, x, isgn, est, kase, isave)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: kase, n
           real(dp) :: est
           ! .. array arguments ..
           integer(ilp) :: isgn(*), isave(3)
           real(dp) :: v(*), x(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: itmax = 5
           
           ! .. local scalars ..
           integer(ilp) :: i, jlast
           real(dp) :: altsgn, estold, temp, xs
     
           ! .. intrinsic functions ..
           intrinsic :: abs, dble, nint
           ! .. executable statements ..
           if (kase == 0) then
              do i = 1, n
                 x(i) = one/real(n, KIND=dp)
              end do
              kase = 1
              isave(1) = 1
              return
           end if
           go to(20, 40, 70, 110, 140) isave(1)
           ! ................ entry   (isave( 1 ) = 1)
           ! first iteration.  x has been overwritten by a*x.
20      continue
           if (n == 1) then
              v(1) = x(1)
              est = abs(v(1))
              ! ... quit
              go to 150
           end if
           est = stdlib_dasum(n, x, 1)
           do i = 1, n
              if (x(i) >= zero) then
                 x(i) = one
              else
                 x(i) = -one
              end if
              isgn(i) = nint(x(i), KIND=ilp)
           end do
           kase = 2
           isave(1) = 2
           return
           ! ................ entry   (isave( 1 ) = 2)
           ! first iteration.  x has been overwritten by transpose(a)*x.
40      continue
           isave(2) = stdlib_idamax(n, x, 1)
           isave(3) = 2
           ! main loop - iterations 2,3,...,itmax.
50      continue
           do i = 1, n
              x(i) = zero
           end do
           x(isave(2)) = one
           kase = 1
           isave(1) = 3
           return
           ! ................ entry   (isave( 1 ) = 3)
           ! x has been overwritten by a*x.
70      continue
           call stdlib_dcopy(n, x, 1, v, 1)
           estold = est
           est = stdlib_dasum(n, v, 1)
           do i = 1, n
              if (x(i) >= zero) then
                 xs = one
              else
                 xs = -one
              end if
              if (nint(xs, KIND=ilp) /= isgn(i)) go to 90
           end do
           ! repeated sign vector detected, hence algorithm has converged.
           go to 120
90      continue
           ! test for cycling.
           if (est <= estold) go to 120
           do i = 1, n
              if (x(i) >= zero) then
                 x(i) = one
              else
                 x(i) = -one
              end if
              isgn(i) = nint(x(i), KIND=ilp)
           end do
           kase = 2
           isave(1) = 4
           return
           ! ................ entry   (isave( 1 ) = 4)
           ! x has been overwritten by transpose(a)*x.
110    continue
           jlast = isave(2)
           isave(2) = stdlib_idamax(n, x, 1)
           if ((x(jlast) /= abs(x(isave(2)))) .and. (isave(3) < itmax)) then
              isave(3) = isave(3) + 1
              go to 50
           end if
           ! iteration complete.  final stage.
120    continue
           altsgn = one
           do i = 1, n
              x(i) = altsgn*(one + real(i - 1, KIND=dp)/real(n - 1, KIND=dp))
              altsgn = -altsgn
           end do
           kase = 1
           isave(1) = 5
           return
           ! ................ entry   (isave( 1 ) = 5)
           ! x has been overwritten by a*x.
140    continue
           temp = two*(stdlib_dasum(n, x, 1)/real(3*n, KIND=dp))
           if (temp > est) then
              call stdlib_dcopy(n, x, 1, v, 1)
              est = temp
           end if
150    continue
           kase = 0
           return
           ! end of stdlib_dlacn2
     end subroutine stdlib_dlacn2

     ! DLACON estimates the 1-norm of a square, real matrix A.
     ! Reverse communication is used for evaluating matrix-vector products.

     subroutine stdlib_dlacon(n, v, x, isgn, est, kase)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: kase, n
           real(dp) :: est
           ! .. array arguments ..
           integer(ilp) :: isgn(*)
           real(dp) :: v(*), x(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: itmax = 5
           
           ! .. local scalars ..
           integer(ilp) :: i, iter, j, jlast, jump
           real(dp) :: altsgn, estold, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, dble, nint, sign
           ! .. save statement ..
           save
           ! .. executable statements ..
           if (kase == 0) then
              do i = 1, n
                 x(i) = one/real(n, KIND=dp)
              end do
              kase = 1
              jump = 1
              return
           end if
           go to(20, 40, 70, 110, 140) jump
           ! ................ entry   (jump = 1)
           ! first iteration.  x has been overwritten by a*x.
20      continue
           if (n == 1) then
              v(1) = x(1)
              est = abs(v(1))
              ! ... quit
              go to 150
           end if
           est = stdlib_dasum(n, x, 1)
           do i = 1, n
              x(i) = sign(one, x(i))
              isgn(i) = nint(x(i), KIND=ilp)
           end do
           kase = 2
           jump = 2
           return
           ! ................ entry   (jump = 2)
           ! first iteration.  x has been overwritten by transpose(a)*x.
40      continue
           j = stdlib_idamax(n, x, 1)
           iter = 2
           ! main loop - iterations 2,3,...,itmax.
50      continue
           do i = 1, n
              x(i) = zero
           end do
           x(j) = one
           kase = 1
           jump = 3
           return
           ! ................ entry   (jump = 3)
           ! x has been overwritten by a*x.
70      continue
           call stdlib_dcopy(n, x, 1, v, 1)
           estold = est
           est = stdlib_dasum(n, v, 1)
           do i = 1, n
              if (nint(sign(one, x(i))) /= isgn(i)) go to 90
           end do
           ! repeated sign vector detected, hence algorithm has converged.
           go to 120
90      continue
           ! test for cycling.
           if (est <= estold) go to 120
           do i = 1, n
              x(i) = sign(one, x(i))
              isgn(i) = nint(x(i), KIND=ilp)
           end do
           kase = 2
           jump = 4
           return
           ! ................ entry   (jump = 4)
           ! x has been overwritten by transpose(a)*x.
110    continue
           jlast = j
           j = stdlib_idamax(n, x, 1)
           if ((x(jlast) /= abs(x(j))) .and. (iter < itmax)) then
              iter = iter + 1
              go to 50
           end if
           ! iteration complete.  final stage.
120    continue
           altsgn = one
           do i = 1, n
              x(i) = altsgn*(one + real(i - 1, KIND=dp)/real(n - 1, KIND=dp))
              altsgn = -altsgn
           end do
           kase = 1
           jump = 5
           return
           ! ................ entry   (jump = 5)
           ! x has been overwritten by a*x.
140    continue
           temp = two*(stdlib_dasum(n, x, 1)/real(3*n, KIND=dp))
           if (temp > est) then
              call stdlib_dcopy(n, x, 1, v, 1)
              est = temp
           end if
150    continue
           kase = 0
           return
           ! end of stdlib_dlacon
     end subroutine stdlib_dlacon

     ! DLACPY copies all or part of a two-dimensional matrix A to another
     ! matrix B.

     subroutine stdlib_dlacpy(uplo, m, n, a, lda, b, ldb)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: lda, ldb, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. intrinsic functions ..
           intrinsic :: min
           ! .. executable statements ..
           if (stdlib_lsame(uplo, 'u')) then
              do j = 1, n
                 do i = 1, min(j, m)
                    b(i, j) = a(i, j)
                 end do
              end do
           else if (stdlib_lsame(uplo, 'l')) then
              do j = 1, n
                 do i = j, m
                    b(i, j) = a(i, j)
                 end do
              end do
           else
              do j = 1, n
                 do i = 1, m
                    b(i, j) = a(i, j)
                 end do
              end do
           end if
           return
           ! end of stdlib_dlacpy
     end subroutine stdlib_dlacpy

     real(dp) function stdlib_dladiv2(a, b, c, d, r, t)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: a, b, c, d, r, t
        ! =====================================================================
           
           ! .. local scalars ..
           real(dp) :: br
           ! .. executable statements ..
           if (r /= zero) then
              br = b*r
              if (br /= zero) then
                 stdlib_dladiv2 = (a + br)*t
              else
                 stdlib_dladiv2 = a*t + (b*t)*r
              end if
           else
              stdlib_dladiv2 = (a + d*(b/c))*t
           end if
           return
           ! end of stdlib_dladiv2
     end function stdlib_dladiv2

     ! DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
     ! [  A   B  ]
     ! [  B   C  ].
     ! On return, RT1 is the eigenvalue of larger absolute value, and RT2
     ! is the eigenvalue of smaller absolute value.

     subroutine stdlib_dlae2(a, b, c, rt1, rt2)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: a, b, c, rt1, rt2
       ! =====================================================================
           
           ! .. local scalars ..
           real(dp) :: ab, acmn, acmx, adf, df, rt, sm, tb
           ! .. intrinsic functions ..
           intrinsic :: abs, sqrt
           ! .. executable statements ..
           ! compute the eigenvalues
           sm = a + c
           df = a - c
           adf = abs(df)
           tb = b + b
           ab = abs(tb)
           if (abs(a) > abs(c)) then
              acmx = a
              acmn = c
           else
              acmx = c
              acmn = a
           end if
           if (adf > ab) then
              rt = adf*sqrt(one + (ab/adf)**2)
           else if (adf < ab) then
              rt = ab*sqrt(one + (adf/ab)**2)
           else
              ! includes case ab=adf=0
              rt = ab*sqrt(two)
           end if
           if (sm < zero) then
              rt1 = half*(sm - rt)
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = (acmx/rt1)*acmn - (b/rt1)*b
           else if (sm > zero) then
              rt1 = half*(sm + rt)
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = (acmx/rt1)*acmn - (b/rt1)*b
           else
              ! includes case rt1 = rt2 = 0
              rt1 = half*rt
              rt2 = -half*rt
           end if
           return
           ! end of stdlib_dlae2
     end subroutine stdlib_dlae2

     ! DLAEBZ contains the iteration loops which compute and use the
     ! function N(w), which is the count of eigenvalues of a symmetric
     ! tridiagonal matrix T less than or equal to its argument  w.  It
     ! performs a choice of two types of loops:
     ! IJOB=1, followed by
     ! IJOB=2: It takes as input a list of intervals and returns a list of
     ! sufficiently small intervals whose union contains the same
     ! eigenvalues as the union of the original intervals.
     ! The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP.
     ! The output interval (AB(j,1),AB(j,2)] will contain
     ! eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT.
     ! IJOB=3: It performs a binary search in each input interval
     ! (AB(j,1),AB(j,2)] for a point  w(j)  such that
     ! N(w(j))=NVAL(j), and uses  C(j)  as the starting point of
     ! the search.  If such a w(j) is found, then on output
     ! AB(j,1)=AB(j,2)=w.  If no such w(j) is found, then on output
     ! (AB(j,1),AB(j,2)] will be a small interval containing the
     ! point where N(w) jumps through NVAL(j), unless that point
     ! lies outside the initial interval.
     ! Note that the intervals are in all cases half-open intervals,
     ! i.e., of the form  (a,b] , which includes  b  but not  a .
     ! To avoid underflow, the matrix should be scaled so that its largest
     ! element is no greater than  overflow**(1/2) * underflow**(1/4)
     ! in absolute value.  To assure the most accurate computation
     ! of small eigenvalues, the matrix should be scaled to be
     ! not much smaller than that, either.
     ! See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal
     ! Matrix", Report CS41, Computer Science Dept., Stanford
     ! University, July 21, 1966
     ! Note: the arguments are, in general, *not* checked for unreasonable
     ! values.

     subroutine stdlib_dlaebz(ijob, nitmax, n, mmax, minp, nbmin, abstol, reltol, pivmin, d, e, &
               e2, nval, ab, c, mout, nab, work, iwork, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: ijob, info, minp, mmax, mout, n, nbmin, nitmax
           real(dp) :: abstol, pivmin, reltol
           ! .. array arguments ..
           integer(ilp) :: iwork(*), nab(mmax, *), nval(*)
           real(dp) :: ab(mmax, *), c(*), d(*), e(*), e2(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: itmp1, itmp2, j, ji, jit, jp, kf, kfnew, kl, klnew
           real(dp) :: tmp1, tmp2
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min
           ! .. executable statements ..
           ! check for errors
           info = 0
           if (ijob < 1 .or. ijob > 3) then
              info = -1
              return
           end if
           ! initialize nab
           if (ijob == 1) then
              ! compute the number of eigenvalues in the initial intervals.
              mout = 0
              do ji = 1, minp
                 do jp = 1, 2
                    tmp1 = d(1) - ab(ji, jp)
                    if (abs(tmp1) < pivmin) tmp1 = -pivmin
                    nab(ji, jp) = 0
                    if (tmp1 <= zero) nab(ji, jp) = 1
                    do j = 2, n
                       tmp1 = d(j) - e2(j - 1)/tmp1 - ab(ji, jp)
                       if (abs(tmp1) < pivmin) tmp1 = -pivmin
                       if (tmp1 <= zero) nab(ji, jp) = nab(ji, jp) + 1
                    end do
                 end do
                 mout = mout + nab(ji, 2) - nab(ji, 1)
              end do
              return
           end if
           ! initialize for loop
           ! kf and kl have the following meaning:
              ! intervals 1,...,kf-1 have converged.
              ! intervals kf,...,kl  still need to be refined.
           kf = 1
           kl = minp
           ! if ijob=2, initialize c.
           ! if ijob=3, use the user-supplied starting point.
           if (ijob == 2) then
              do ji = 1, minp
                 c(ji) = half*(ab(ji, 1) + ab(ji, 2))
              end do
           end if
           ! iteration loop
           loop_130: do jit = 1, nitmax
              ! loop over intervals
              if (kl - kf + 1 >= nbmin .and. nbmin > 0) then
                 ! begin of parallel version of the loop
                 do ji = kf, kl
                    ! compute n(c), the number of eigenvalues less than c
                    work(ji) = d(1) - c(ji)
                    iwork(ji) = 0
                    if (work(ji) <= pivmin) then
                       iwork(ji) = 1
                       work(ji) = min(work(ji), -pivmin)
                    end if
                    do j = 2, n
                       work(ji) = d(j) - e2(j - 1)/work(ji) - c(ji)
                       if (work(ji) <= pivmin) then
                          iwork(ji) = iwork(ji) + 1
                          work(ji) = min(work(ji), -pivmin)
                       end if
                    end do
                 end do
                 if (ijob <= 2) then
                    ! ijob=2: choose all intervals containing eigenvalues.
                    klnew = kl
                    loop_70: do ji = kf, kl
                       ! insure that n(w) is monotone
                       iwork(ji) = min(nab(ji, 2), max(nab(ji, 1), iwork(ji)))
                       ! update the queue -- add intervals if both halves
                       ! contain eigenvalues.
                       if (iwork(ji) == nab(ji, 2)) then
                          ! no eigenvalue in the upper interval:
                          ! just use the lower interval.
                          ab(ji, 2) = c(ji)
                       else if (iwork(ji) == nab(ji, 1)) then
                          ! no eigenvalue in the lower interval:
                          ! just use the upper interval.
                          ab(ji, 1) = c(ji)
                       else
                          klnew = klnew + 1
                          if (klnew <= mmax) then
                             ! eigenvalue in both intervals -- add upper to
                             ! queue.
                             ab(klnew, 2) = ab(ji, 2)
                             nab(klnew, 2) = nab(ji, 2)
                             ab(klnew, 1) = c(ji)
                             nab(klnew, 1) = iwork(ji)
                             ab(ji, 2) = c(ji)
                             nab(ji, 2) = iwork(ji)
                          else
                             info = mmax + 1
                          end if
                       end if
                    end do loop_70
                    if (info /= 0) return
                    kl = klnew
                 else
                    ! ijob=3: binary search.  keep only the interval containing
                            ! w   s.t. n(w) = nval
                    do ji = kf, kl
                       if (iwork(ji) <= nval(ji)) then
                          ab(ji, 1) = c(ji)
                          nab(ji, 1) = iwork(ji)
                       end if
                       if (iwork(ji) >= nval(ji)) then
                          ab(ji, 2) = c(ji)
                          nab(ji, 2) = iwork(ji)
                       end if
                    end do
                 end if
              else
                 ! end of parallel version of the loop
                 ! begin of serial version of the loop
                 klnew = kl
                 loop_100: do ji = kf, kl
                    ! compute n(w), the number of eigenvalues less than w
                    tmp1 = c(ji)
                    tmp2 = d(1) - tmp1
                    itmp1 = 0
                    if (tmp2 <= pivmin) then
                       itmp1 = 1
                       tmp2 = min(tmp2, -pivmin)
                    end if
                    do j = 2, n
                       tmp2 = d(j) - e2(j - 1)/tmp2 - tmp1
                       if (tmp2 <= pivmin) then
                          itmp1 = itmp1 + 1
                          tmp2 = min(tmp2, -pivmin)
                       end if
                    end do
                    if (ijob <= 2) then
                       ! ijob=2: choose all intervals containing eigenvalues.
                       ! insure that n(w) is monotone
                       itmp1 = min(nab(ji, 2), max(nab(ji, 1), itmp1))
                       ! update the queue -- add intervals if both halves
                       ! contain eigenvalues.
                       if (itmp1 == nab(ji, 2)) then
                          ! no eigenvalue in the upper interval:
                          ! just use the lower interval.
                          ab(ji, 2) = tmp1
                       else if (itmp1 == nab(ji, 1)) then
                          ! no eigenvalue in the lower interval:
                          ! just use the upper interval.
                          ab(ji, 1) = tmp1
                       else if (klnew < mmax) then
                          ! eigenvalue in both intervals -- add upper to queue.
                          klnew = klnew + 1
                          ab(klnew, 2) = ab(ji, 2)
                          nab(klnew, 2) = nab(ji, 2)
                          ab(klnew, 1) = tmp1
                          nab(klnew, 1) = itmp1
                          ab(ji, 2) = tmp1
                          nab(ji, 2) = itmp1
                       else
                          info = mmax + 1
                          return
                       end if
                    else
                       ! ijob=3: binary search.  keep only the interval
                               ! containing  w  s.t. n(w) = nval
                       if (itmp1 <= nval(ji)) then
                          ab(ji, 1) = tmp1
                          nab(ji, 1) = itmp1
                       end if
                       if (itmp1 >= nval(ji)) then
                          ab(ji, 2) = tmp1
                          nab(ji, 2) = itmp1
                       end if
                    end if
                 end do loop_100
                 kl = klnew
              end if
              ! check for convergence
              kfnew = kf
              loop_110: do ji = kf, kl
                 tmp1 = abs(ab(ji, 2) - ab(ji, 1))
                 tmp2 = max(abs(ab(ji, 2)), abs(ab(ji, 1)))
                 if (tmp1 < max(abstol, pivmin, reltol*tmp2) .or. nab(ji, 1) >= nab(ji, 2)) &
                           then
                    ! converged -- swap with position kfnew,
                                 ! then increment kfnew
                    if (ji > kfnew) then
                       tmp1 = ab(ji, 1)
                       tmp2 = ab(ji, 2)
                       itmp1 = nab(ji, 1)
                       itmp2 = nab(ji, 2)
                       ab(ji, 1) = ab(kfnew, 1)
                       ab(ji, 2) = ab(kfnew, 2)
                       nab(ji, 1) = nab(kfnew, 1)
                       nab(ji, 2) = nab(kfnew, 2)
                       ab(kfnew, 1) = tmp1
                       ab(kfnew, 2) = tmp2
                       nab(kfnew, 1) = itmp1
                       nab(kfnew, 2) = itmp2
                       if (ijob == 3) then
                          itmp1 = nval(ji)
                          nval(ji) = nval(kfnew)
                          nval(kfnew) = itmp1
                       end if
                    end if
                    kfnew = kfnew + 1
                 end if
              end do loop_110
              kf = kfnew
              ! choose midpoints
              do ji = kf, kl
                 c(ji) = half*(ab(ji, 1) + ab(ji, 2))
              end do
              ! if no more intervals to refine, quit.
              if (kf > kl) go to 140
           end do loop_130
           ! converged
140    continue
           info = max(kl + 1 - kf, 0)
           mout = kl
           return
           ! end of stdlib_dlaebz
     end subroutine stdlib_dlaebz

     ! This subroutine computes the I-th eigenvalue of a symmetric rank-one
     ! modification of a 2-by-2 diagonal matrix
     ! diag( D )  +  RHO * Z * transpose(Z) .
     ! The diagonal elements in the array D are assumed to satisfy
     ! D(i) < D(j)  for  i < j .
     ! We also assume RHO > 0 and that the Euclidean norm of the vector
     ! Z is one.

     subroutine stdlib_dlaed5(i, d, z, delta, rho, dlam)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: i
           real(dp) :: dlam, rho
           ! .. array arguments ..
           real(dp) :: d(2), delta(2), z(2)
        ! =====================================================================
           
           ! .. local scalars ..
           real(dp) :: b, c, del, tau, temp, w
           ! .. intrinsic functions ..
           intrinsic :: abs, sqrt
           ! .. executable statements ..
           del = d(2) - d(1)
           if (i == 1) then
              w = one + two*rho*(z(2)*z(2) - z(1)*z(1))/del
              if (w > zero) then
                 b = del + rho*(z(1)*z(1) + z(2)*z(2))
                 c = rho*z(1)*z(1)*del
                 ! b > zero, always
                 tau = two*c/(b + sqrt(abs(b*b - four*c)))
                 dlam = d(1) + tau
                 delta(1) = -z(1)/tau
                 delta(2) = z(2)/(del - tau)
              else
                 b = -del + rho*(z(1)*z(1) + z(2)*z(2))
                 c = rho*z(2)*z(2)*del
                 if (b > zero) then
                    tau = -two*c/(b + sqrt(b*b + four*c))
                 else
                    tau = (b - sqrt(b*b + four*c))/two
                 end if
                 dlam = d(2) + tau
                 delta(1) = -z(1)/(del + tau)
                 delta(2) = -z(2)/tau
              end if
              temp = sqrt(delta(1)*delta(1) + delta(2)*delta(2))
              delta(1) = delta(1)/temp
              delta(2) = delta(2)/temp
           else
           ! now i=2
              b = -del + rho*(z(1)*z(1) + z(2)*z(2))
              c = rho*z(2)*z(2)*del
              if (b > zero) then
                 tau = (b + sqrt(b*b + four*c))/two
              else
                 tau = two*c/(-b + sqrt(b*b + four*c))
              end if
              dlam = d(2) + tau
              delta(1) = -z(1)/(del + tau)
              delta(2) = -z(2)/tau
              temp = sqrt(delta(1)*delta(1) + delta(2)*delta(2))
              delta(1) = delta(1)/temp
              delta(2) = delta(2)/temp
           end if
           return
           ! end of stdlib_dlaed5
     end subroutine stdlib_dlaed5

     ! DLAEDA computes the Z vector corresponding to the merge step in the
     ! CURLVLth step of the merge process with TLVLS steps for the CURPBMth
     ! problem.

     subroutine stdlib_dlaeda(n, tlvls, curlvl, curpbm, prmptr, perm, givptr, givcol, givnum, q, &
               qptr, z, ztemp, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: curlvl, curpbm, info, n, tlvls
           ! .. array arguments ..
           integer(ilp) :: givcol(2, *), givptr(*), perm(*), prmptr(*), qptr(*)
           real(dp) :: givnum(2, *), q(*), z(*), ztemp(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: bsiz1, bsiz2, curr, i, k, mid, psiz1, psiz2, ptr, zptr1
     
           ! .. intrinsic functions ..
           intrinsic :: dble, int, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (n < 0) then
              info = -1
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlaeda', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine location of first number in second half.
           mid = n/2 + 1
           ! gather last/first rows of appropriate eigenblocks into center of z
           ptr = 1
           ! determine location of lowest level subproblem in the full storage
           ! scheme
           curr = ptr + curpbm*2**curlvl + 2**(curlvl - 1) - 1
           ! determine size of these matrices.  we add half to the value of
           ! the sqrt in case the machine underestimates one of these square
           ! roots.
           bsiz1 = int(half + sqrt(dble(qptr(curr + 1) - qptr(curr))))
           bsiz2 = int(half + sqrt(dble(qptr(curr + 2) - qptr(curr + 1))))
           do k = 1, mid - bsiz1 - 1
              z(k) = zero
           end do
           call stdlib_dcopy(bsiz1, q(qptr(curr) + bsiz1 - 1), bsiz1, z(mid - bsiz1), 1)
           call stdlib_dcopy(bsiz2, q(qptr(curr + 1)), bsiz2, z(mid), 1)
           do k = mid + bsiz2, n
              z(k) = zero
           end do
           ! loop through remaining levels 1 -> curlvl applying the givens
           ! rotations and permutation and then multiplying the center matrices
           ! against the current z.
           ptr = 2**tlvls + 1
           loop_70: do k = 1, curlvl - 1
              curr = ptr + curpbm*2**(curlvl - k) + 2**(curlvl - k - 1) - 1
              psiz1 = prmptr(curr + 1) - prmptr(curr)
              psiz2 = prmptr(curr + 2) - prmptr(curr + 1)
              zptr1 = mid - psiz1
             ! apply givens at curr and curr+1
              do i = givptr(curr), givptr(curr + 1) - 1
                 call stdlib_drot(1, z(zptr1 + givcol(1, i) - 1), 1, z(zptr1 + givcol(2, i) - 1), &
                           1, givnum(1, i), givnum(2, i))
              end do
              do i = givptr(curr + 1), givptr(curr + 2) - 1
                 call stdlib_drot(1, z(mid - 1 + givcol(1, i)), 1, z(mid - 1 + givcol(2, i)), 1, &
                           givnum(1, i), givnum(2, i))
              end do
              psiz1 = prmptr(curr + 1) - prmptr(curr)
              psiz2 = prmptr(curr + 2) - prmptr(curr + 1)
              do i = 0, psiz1 - 1
                 ztemp(i + 1) = z(zptr1 + perm(prmptr(curr) + i) - 1)
              end do
              do i = 0, psiz2 - 1
                 ztemp(psiz1 + i + 1) = z(mid + perm(prmptr(curr + 1) + i) - 1)
              end do
              ! multiply blocks at curr and curr+1
              ! determine size of these matrices.  we add half to the value of
              ! the sqrt in case the machine underestimates one of these
              ! square roots.
              bsiz1 = int(half + sqrt(dble(qptr(curr + 1) - qptr(curr))))
              bsiz2 = int(half + sqrt(dble(qptr(curr + 2) - qptr(curr + 1))))
              if (bsiz1 > 0) then
                 call stdlib_dgemv('t', bsiz1, bsiz1, one, q(qptr(curr)), bsiz1, ztemp(1), &
                           1, zero, z(zptr1), 1)
              end if
              call stdlib_dcopy(psiz1 - bsiz1, ztemp(bsiz1 + 1), 1, z(zptr1 + bsiz1), 1)
              if (bsiz2 > 0) then
                 call stdlib_dgemv('t', bsiz2, bsiz2, one, q(qptr(curr + 1)), bsiz2, ztemp( &
                           psiz1 + 1), 1, zero, z(mid), 1)
              end if
              call stdlib_dcopy(psiz2 - bsiz2, ztemp(psiz1 + bsiz2 + 1), 1, z(mid + bsiz2), 1)
                        
              ptr = ptr + 2**(tlvls - k)
           end do loop_70
           return
           ! end of stdlib_dlaeda
     end subroutine stdlib_dlaeda

     ! DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
     ! [  A   B  ]
     ! [  B   C  ].
     ! On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
     ! eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
     ! eigenvector for RT1, giving the decomposition
     ! [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
     ! [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].

     subroutine stdlib_dlaev2(a, b, c, rt1, rt2, cs1, sn1)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: a, b, c, cs1, rt1, rt2, sn1
       ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: sgn1, sgn2
           real(dp) :: ab, acmn, acmx, acs, adf, cs, ct, df, rt, sm, tb, tn
           ! .. intrinsic functions ..
           intrinsic :: abs, sqrt
           ! .. executable statements ..
           ! compute the eigenvalues
           sm = a + c
           df = a - c
           adf = abs(df)
           tb = b + b
           ab = abs(tb)
           if (abs(a) > abs(c)) then
              acmx = a
              acmn = c
           else
              acmx = c
              acmn = a
           end if
           if (adf > ab) then
              rt = adf*sqrt(one + (ab/adf)**2)
           else if (adf < ab) then
              rt = ab*sqrt(one + (adf/ab)**2)
           else
              ! includes case ab=adf=0
              rt = ab*sqrt(two)
           end if
           if (sm < zero) then
              rt1 = half*(sm - rt)
              sgn1 = -1
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = (acmx/rt1)*acmn - (b/rt1)*b
           else if (sm > zero) then
              rt1 = half*(sm + rt)
              sgn1 = 1
              ! order of execution important.
              ! to get fully accurate smaller eigenvalue,
              ! next line needs to be executed in higher precision.
              rt2 = (acmx/rt1)*acmn - (b/rt1)*b
           else
              ! includes case rt1 = rt2 = 0
              rt1 = half*rt
              rt2 = -half*rt
              sgn1 = 1
           end if
           ! compute the eigenvector
           if (df >= zero) then
              cs = df + rt
              sgn2 = 1
           else
              cs = df - rt
              sgn2 = -1
           end if
           acs = abs(cs)
           if (acs > ab) then
              ct = -tb/cs
              sn1 = one/sqrt(one + ct*ct)
              cs1 = ct*sn1
           else
              if (ab == zero) then
                 cs1 = one
                 sn1 = zero
              else
                 tn = -cs/tb
                 cs1 = one/sqrt(one + tn*tn)
                 sn1 = tn*cs1
              end if
           end if
           if (sgn1 == sgn2) then
              tn = cs1
              cs1 = -sn1
              sn1 = tn
           end if
           return
           ! end of stdlib_dlaev2
     end subroutine stdlib_dlaev2

     ! DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue
     ! problem  A - w B, with scaling as necessary to avoid over-/underflow.
     ! The scaling factor "s" results in a modified eigenvalue equation
     ! s A - w B
     ! where  s  is a non-negative scaling factor chosen so that  w,  w B,
     ! and  s A  do not overflow and, if possible, do not underflow, either.

     subroutine stdlib_dlag2(a, lda, b, ldb, safmin, scale1, scale2, wr1, wr2, wi)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: lda, ldb
           real(dp) :: safmin, scale1, scale2, wi, wr1, wr2
           ! .. array arguments ..
           real(dp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: fuzzy1 = one + 1.0d-5
           
           ! .. local scalars ..
           real(dp) :: a11, a12, a21, a22, abi22, anorm, as11, as12, as22, ascale, b11, b12, b22, &
           binv11, binv22, bmin, bnorm, bscale, bsize, c1, c2, c3, c4, c5, diff, discr, pp, qq, r, &
           rtmax, rtmin, s1, s2, safmax, shift, ss, sum, wabs, wbig, wdet, wscale, wsize, &
                     wsmall
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, sign, sqrt
           ! .. executable statements ..
           rtmin = sqrt(safmin)
           rtmax = one/rtmin
           safmax = one/safmin
           ! scale a
           anorm = max(abs(a(1, 1)) + abs(a(2, 1)), abs(a(1, 2)) + abs(a(2, 2)), &
                     safmin)
           ascale = one/anorm
           a11 = ascale*a(1, 1)
           a21 = ascale*a(2, 1)
           a12 = ascale*a(1, 2)
           a22 = ascale*a(2, 2)
           ! perturb b if necessary to insure non-singularity
           b11 = b(1, 1)
           b12 = b(1, 2)
           b22 = b(2, 2)
           bmin = rtmin*max(abs(b11), abs(b12), abs(b22), rtmin)
           if (abs(b11) < bmin) b11 = sign(bmin, b11)
           if (abs(b22) < bmin) b22 = sign(bmin, b22)
           ! scale b
           bnorm = max(abs(b11), abs(b12) + abs(b22), safmin)
           bsize = max(abs(b11), abs(b22))
           bscale = one/bsize
           b11 = b11*bscale
           b12 = b12*bscale
           b22 = b22*bscale
           ! compute larger eigenvalue by method described by c. van loan
           ! ( as is a shifted by -shift*b )
           binv11 = one/b11
           binv22 = one/b22
           s1 = a11*binv11
           s2 = a22*binv22
           if (abs(s1) <= abs(s2)) then
              as12 = a12 - s1*b12
              as22 = a22 - s1*b22
              ss = a21*(binv11*binv22)
              abi22 = as22*binv22 - ss*b12
              pp = half*abi22
              shift = s1
           else
              as12 = a12 - s2*b12
              as11 = a11 - s2*b11
              ss = a21*(binv11*binv22)
              abi22 = -ss*b12
              pp = half*(as11*binv11 + abi22)
              shift = s2
           end if
           qq = ss*as12
           if (abs(pp*rtmin) >= one) then
              discr = (rtmin*pp)**2 + qq*safmin
              r = sqrt(abs(discr))*rtmax
           else
              if (pp**2 + abs(qq) <= safmin) then
                 discr = (rtmax*pp)**2 + qq*safmax
                 r = sqrt(abs(discr))*rtmin
              else
                 discr = pp**2 + qq
                 r = sqrt(abs(discr))
              end if
           end if
           ! note: the test of r in the following if is to cover the case when
                 ! discr is small and negative and is flushed to zero during
                 ! the calculation of r.  on machines which have a consistent
                 ! flush-to-zero threshold and handle numbers above that
                 ! threshold correctly, it would not be necessary.
           if (discr >= zero .or. r == zero) then
              sum = pp + sign(r, pp)
              diff = pp - sign(r, pp)
              wbig = shift + sum
              ! compute smaller eigenvalue
              wsmall = shift + diff
              if (half*abs(wbig) > max(abs(wsmall), safmin)) then
                 wdet = (a11*a22 - a12*a21)*(binv11*binv22)
                 wsmall = wdet/wbig
              end if
              ! choose (real) eigenvalue closest to 2,2 element of a*b**(-1)
              ! for wr1.
              if (pp > abi22) then
                 wr1 = min(wbig, wsmall)
                 wr2 = max(wbig, wsmall)
              else
                 wr1 = max(wbig, wsmall)
                 wr2 = min(wbig, wsmall)
              end if
              wi = zero
           else
              ! complex eigenvalues
              wr1 = shift + pp
              wr2 = wr1
              wi = r
           end if
           ! further scaling to avoid underflow and overflow in computing
           ! scale1 and overflow in computing w*b.
           ! this scale factor (wscale) is bounded from above using c1 and c2,
           ! and from below using c3 and c4.
              ! c1 implements the condition  s a  must never overflow.
              ! c2 implements the condition  w b  must never overflow.
              ! c3, with c2,
                 ! implement the condition that s a - w b must never overflow.
              ! c4 implements the condition  s    should not underflow.
              ! c5 implements the condition  max(s,|w|) should be at least 2.
           c1 = bsize*(safmin*max(one, ascale))
           c2 = safmin*max(one, bnorm)
           c3 = bsize*safmin
           if (ascale <= one .and. bsize <= one) then
              c4 = min(one, (ascale/safmin)*bsize)
           else
              c4 = one
           end if
           if (ascale <= one .or. bsize <= one) then
              c5 = min(one, ascale*bsize)
           else
              c5 = one
           end if
           ! scale first eigenvalue
           wabs = abs(wr1) + abs(wi)
           wsize = max(safmin, c1, fuzzy1*(wabs*c2 + c3), min(c4, half*max(wabs, c5)))
                     
           if (wsize /= one) then
              wscale = one/wsize
              if (wsize > one) then
                 scale1 = (max(ascale, bsize)*wscale)*min(ascale, bsize)
              else
                 scale1 = (min(ascale, bsize)*wscale)*max(ascale, bsize)
              end if
              wr1 = wr1*wscale
              if (wi /= zero) then
                 wi = wi*wscale
                 wr2 = wr1
                 scale2 = scale1
              end if
           else
              scale1 = ascale*bsize
              scale2 = scale1
           end if
           ! scale second eigenvalue (if real)
           if (wi == zero) then
              wsize = max(safmin, c1, fuzzy1*(abs(wr2)*c2 + c3), min(c4, half*max(abs(wr2), &
                        c5)))
              if (wsize /= one) then
                 wscale = one/wsize
                 if (wsize > one) then
                    scale2 = (max(ascale, bsize)*wscale)*min(ascale, bsize)
                 else
                    scale2 = (min(ascale, bsize)*wscale)*max(ascale, bsize)
                 end if
                 wr2 = wr2*wscale
              else
                 scale2 = ascale*bsize
              end if
           end if
           ! end of stdlib_dlag2
           return
     end subroutine stdlib_dlag2

     ! DLAG2S converts a DOUBLE PRECISION matrix, SA, to a SINGLE
     ! PRECISION matrix, A.
     ! RMAX is the overflow for the SINGLE PRECISION arithmetic
     ! DLAG2S checks that all the entries of A are between -RMAX and
     ! RMAX. If not the conversion is aborted and a flag is raised.
     ! This is an auxiliary routine so there is no argument checking.

     subroutine stdlib_dlag2s(m, n, a, lda, sa, ldsa, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, ldsa, m, n
           ! .. array arguments ..
           real(sp) :: sa(ldsa, *)
           real(dp) :: a(lda, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(dp) :: rmax
     
           ! .. executable statements ..
           rmax = stdlib_slamch('o')
           do j = 1, n
              do i = 1, m
                 if ((a(i, j) < -rmax) .or. (a(i, j) > rmax)) then
                    info = 1
                    go to 30
                 end if
                 sa(i, j) = a(i, j)
              end do
           end do
           info = 0
30      continue
           return
           ! end of stdlib_dlag2s
     end subroutine stdlib_dlag2s

     ! DLAGTM performs a matrix-vector product of the form
     ! B := alpha * A * X + beta * B
     ! where A is a tridiagonal matrix of order N, B and X are N by NRHS
     ! matrices, and alpha and beta are real scalars, each of which may be
     ! 0., 1., or -1.

     subroutine stdlib_dlagtm(trans, n, nrhs, alpha, dl, d, du, x, ldx, beta, b, ldb)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: trans
           integer(ilp) :: ldb, ldx, n, nrhs
           real(dp) :: alpha, beta
           ! .. array arguments ..
           real(dp) :: b(ldb, *), d(*), dl(*), du(*), x(ldx, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. executable statements ..
           if (n == 0) return
           ! multiply b by beta if beta/=1.
           if (beta == zero) then
              do j = 1, nrhs
                 do i = 1, n
                    b(i, j) = zero
                 end do
              end do
           else if (beta == -one) then
              do j = 1, nrhs
                 do i = 1, n
                    b(i, j) = -b(i, j)
                 end do
              end do
           end if
           if (alpha == one) then
              if (stdlib_lsame(trans, 'n')) then
                 ! compute b := b + a*x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) + d(1)*x(1, j)
                    else
                       b(1, j) = b(1, j) + d(1)*x(1, j) + du(1)*x(2, j)
                       b(n, j) = b(n, j) + dl(n - 1)*x(n - 1, j) + d(n)*x(n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) + dl(i - 1)*x(i - 1, j) + d(i)*x(i, j) + du(i &
                                    )*x(i + 1, j)
                       end do
                    end if
                 end do
              else
                 ! compute b := b + a**t*x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) + d(1)*x(1, j)
                    else
                       b(1, j) = b(1, j) + d(1)*x(1, j) + dl(1)*x(2, j)
                       b(n, j) = b(n, j) + du(n - 1)*x(n - 1, j) + d(n)*x(n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) + du(i - 1)*x(i - 1, j) + d(i)*x(i, j) + dl(i &
                                    )*x(i + 1, j)
                       end do
                    end if
                 end do
              end if
           else if (alpha == -one) then
              if (stdlib_lsame(trans, 'n')) then
                 ! compute b := b - a*x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) - d(1)*x(1, j)
                    else
                       b(1, j) = b(1, j) - d(1)*x(1, j) - du(1)*x(2, j)
                       b(n, j) = b(n, j) - dl(n - 1)*x(n - 1, j) - d(n)*x(n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) - dl(i - 1)*x(i - 1, j) - d(i)*x(i, j) - du(i &
                                    )*x(i + 1, j)
                       end do
                    end if
                 end do
              else
                 ! compute b := b - a**t*x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) - d(1)*x(1, j)
                    else
                       b(1, j) = b(1, j) - d(1)*x(1, j) - dl(1)*x(2, j)
                       b(n, j) = b(n, j) - du(n - 1)*x(n - 1, j) - d(n)*x(n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) - du(i - 1)*x(i - 1, j) - d(i)*x(i, j) - dl(i &
                                    )*x(i + 1, j)
                       end do
                    end if
                 end do
              end if
           end if
           return
           ! end of stdlib_dlagtm
     end subroutine stdlib_dlagtm

     ! This routine is not for general use.  It exists solely to avoid
     ! over-optimization in DISNAN.
     ! DLAISNAN checks for NaNs by comparing its two arguments for
     ! inequality.  NaN is the only floating-point value where NaN != NaN
     ! returns .TRUE.  To check for NaNs, pass the same variable as both
     ! arguments.
     ! A compiler must assume that the two arguments are
     ! not the same variable, and the test will not be optimized away.
     ! Interprocedural or whole-program optimization may delete this
     ! test.  The ISNAN functions will be replaced by the correct
     ! Fortran 03 intrinsic once the intrinsic is widely available.

     logical(lk) function stdlib_dlaisnan(din1, din2)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp), intent(in) :: din1, din2
        ! =====================================================================
        ! .. executable statements ..
           stdlib_dlaisnan = (din1 /= din2)
           return
     end function stdlib_dlaisnan

     ! DLAMCH determines double precision machine parameters.

     real(dp) function stdlib_dlamch(cmach)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: cmach
       ! =====================================================================
           
           ! .. local scalars ..
           real(dp) :: rnd, eps, sfmin, small, rmach
     
           ! .. intrinsic functions ..
           intrinsic :: digits, epsilon, huge, maxexponent, minexponent, radix, tiny
           ! .. executable statements ..
           ! assume rounding, not chopping. always.
           rnd = one
           if (one == rnd) then
              eps = epsilon(zero)*0.5
           else
              eps = epsilon(zero)
           end if
           if (stdlib_lsame(cmach, 'e')) then
              rmach = eps
           else if (stdlib_lsame(cmach, 's')) then
              sfmin = tiny(zero)
              small = one/huge(zero)
              if (small >= sfmin) then
                 ! use small plus a bit, to avoid the possibility of rounding
                 ! causing overflow when computing  1/sfmin.
                 sfmin = small*(one + eps)
              end if
              rmach = sfmin
           else if (stdlib_lsame(cmach, 'b')) then
              rmach = radix(zero)
           else if (stdlib_lsame(cmach, 'p')) then
              rmach = eps*radix(zero)
           else if (stdlib_lsame(cmach, 'n')) then
              rmach = digits(zero)
           else if (stdlib_lsame(cmach, 'r')) then
              rmach = rnd
           else if (stdlib_lsame(cmach, 'm')) then
              rmach = minexponent(zero)
           else if (stdlib_lsame(cmach, 'u')) then
              rmach = tiny(zero)
           else if (stdlib_lsame(cmach, 'l')) then
              rmach = maxexponent(zero)
           else if (stdlib_lsame(cmach, 'o')) then
              rmach = huge(zero)
           else
              rmach = zero
           end if
           stdlib_dlamch = rmach
           return
           ! end of stdlib_dlamch
     end function stdlib_dlamch

     real(dp) function stdlib_dlamc3(a, b)
        ! -- lapack auxiliary routine --
           ! univ. of tennessee, univ. of california berkeley and nag ltd..
           ! .. scalar arguments ..
           real(dp) :: a, b
       ! =====================================================================
           ! .. executable statements ..
           stdlib_dlamc3 = a + b
           return
           ! end of stdlib_dlamc3
     end function stdlib_dlamc3

     ! DLAMRG will create a permutation list which will merge the elements
     ! of A (which is composed of two independently sorted sets) into a
     ! single set which is sorted in ascending order.

     subroutine stdlib_dlamrg(n1, n2, a, dtrd1, dtrd2, index)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: dtrd1, dtrd2, n1, n2
           ! .. array arguments ..
           integer(ilp) :: index(*)
           real(dp) :: a(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ind1, ind2, n1sv, n2sv
           ! .. executable statements ..
           n1sv = n1
           n2sv = n2
           if (dtrd1 > 0) then
              ind1 = 1
           else
              ind1 = n1
           end if
           if (dtrd2 > 0) then
              ind2 = 1 + n1
           else
              ind2 = n1 + n2
           end if
           i = 1
           ! while ( (n1sv > 0)
10      continue
           if (n1sv > 0 .and. n2sv > 0) then
              if (a(ind1) <= a(ind2)) then
                 index(i) = ind1
                 i = i + 1
                 ind1 = ind1 + dtrd1
                 n1sv = n1sv - 1
              else
                 index(i) = ind2
                 i = i + 1
                 ind2 = ind2 + dtrd2
                 n2sv = n2sv - 1
              end if
              go to 10
           end if
           ! end while
           if (n1sv == 0) then
              do n1sv = 1, n2sv
                 index(i) = ind2
                 i = i + 1
                 ind2 = ind2 + dtrd2
              end do
           else
           ! n2sv == 0
              do n2sv = 1, n1sv
                 index(i) = ind1
                 i = i + 1
                 ind1 = ind1 + dtrd1
              end do
           end if
           return
           ! end of stdlib_dlamrg
     end subroutine stdlib_dlamrg

     ! DLAORHR_COL_GETRFNP2 computes the modified LU factorization without
     ! pivoting of a real general M-by-N matrix A. The factorization has
     ! the form:
     ! A - S = L * U,
     ! where:
     ! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     ! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     ! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     ! i-1 steps of Gaussian elimination. This means that the diagonal
     ! element at each step of "modified" Gaussian elimination is at
     ! least one in absolute value (so that division-by-zero not
     ! possible during the division by the diagonal element);
     ! L is a M-by-N lower triangular matrix with unit diagonal elements
     ! (lower trapezoidal if M > N);
     ! and U is a M-by-N upper triangular matrix
     ! (upper trapezoidal if M < N).
     ! This routine is an auxiliary routine used in the Householder
     ! reconstruction routine DORHR_COL. In DORHR_COL, this routine is
     ! applied to an M-by-N matrix A with orthonormal columns, where each
     ! element is bounded by one in absolute value. With the choice of
     ! the matrix S above, one can show that the diagonal element at each
     ! step of Gaussian elimination is the largest (in absolute value) in
     ! the column on or below the diagonal, so that no pivoting is required
     ! for numerical stability [1].
     ! For more details on the Householder reconstruction algorithm,
     ! including the modified LU factorization, see [1].
     ! This is the recursive version of the LU factorization algorithm.
     ! Denote A - S by B. The algorithm divides the matrix B into four
     ! submatrices:
     ! [  B11 | B12  ]  where B11 is n1 by n1,
     ! B = [ -----|----- ]        B21 is (m-n1) by n1,
     ! [  B21 | B22  ]        B12 is n1 by n2,
     ! B22 is (m-n1) by n2,
     ! with n1 = min(m,n)/2, n2 = n-n1.
     ! The subroutine calls itself to factor B11, solves for B21,
     ! solves for B12, updates B22, then calls itself to factor B22.
     ! For more details on the recursive LU algorithm, see [2].
     ! DLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
     ! routine DLAORHR_COL_GETRFNP, which uses blocked code calling
     ! Level 3 BLAS to update the submatrix. However, DLAORHR_COL_GETRFNP2
     ! is self-sufficient and can be used without DLAORHR_COL_GETRFNP.
     ! [1] "Reconstructing Householder vectors from tall-skinny QR",
     ! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     ! E. Solomonik, J. Parallel Distrib. Comput.,
     ! vol. 85, pp. 3-31, 2015.
     ! [2] "Recursion leads to automatic variable blocking for dense linear
     ! algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
     ! vol. 41, no. 6, pp. 737-755, 1997.

     recursive subroutine stdlib_dlaorhr_col_getrfnp2(m, n, a, lda, d, info)
     
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), d(*)
        ! =====================================================================
           
           ! .. local scalars ..
           real(dp) :: sfmin
           integer(ilp) :: i, iinfo, n1, n2
     
           ! .. intrinsic functions ..
           intrinsic :: abs, sign, max, min
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, m)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlaorhr_col_getrfnp2', -info)
              return
           end if
           ! quick return if possible
           if (min(m, n) == 0) return
           if (m == 1) then
              ! one row case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d(1) = -sign(one, a(1, 1))
              ! construct the row of u
              a(1, 1) = a(1, 1) - d(1)
           else if (n == 1) then
              ! one column case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d(1) = -sign(one, a(1, 1))
              ! construct the row of u
              a(1, 1) = a(1, 1) - d(1)
              ! scale the elements 2:m of the column
              ! determine machine safe minimum
              sfmin = stdlib_dlamch('s')
              ! construct the subdiagonal elements of l
              if (abs(a(1, 1)) >= sfmin) then
                 call stdlib_dscal(m - 1, one/a(1, 1), a(2, 1), 1)
              else
                 do i = 2, m
                    a(i, 1) = a(i, 1)/a(1, 1)
                 end do
              end if
           else
              ! divide the matrix b into four submatrices
              n1 = min(m, n)/2
              n2 = n - n1
              ! factor b11, recursive call
              call stdlib_dlaorhr_col_getrfnp2(n1, n1, a, lda, d, iinfo)
              ! solve for b21
              call stdlib_dtrsm('r', 'u', 'n', 'n', m - n1, n1, one, a, lda, a(n1 + 1, 1), lda)
                        
              ! solve for b12
              call stdlib_dtrsm('l', 'l', 'n', 'u', n1, n2, one, a, lda, a(1, n1 + 1), lda)
                        
              ! update b22, i.e. compute the schur complement
              ! b22 := b22 - b21*b12
              call stdlib_dgemm('n', 'n', m - n1, n2, n1, -one, a(n1 + 1, 1), lda, a(1, n1 + 1), &
                        lda, one, a(n1 + 1, n1 + 1), lda)
              ! factor b22, recursive call
              call stdlib_dlaorhr_col_getrfnp2(m - n1, n2, a(n1 + 1, n1 + 1), lda, d(n1 + 1), iinfo)
                        
           end if
           return
           ! end of stdlib_dlaorhr_col_getrfnp2
     end subroutine stdlib_dlaorhr_col_getrfnp2

     ! DLAPMR rearranges the rows of the M by N matrix X as specified
     ! by the permutation K(1),K(2),...,K(M) of the integers 1,...,M.
     ! If FORWRD = .TRUE.,  forward permutation:
     ! X(K(I),*) is moved X(I,*) for I = 1,2,...,M.
     ! If FORWRD = .FALSE., backward permutation:
     ! X(I,*) is moved to X(K(I),*) for I = 1,2,...,M.

     subroutine stdlib_dlapmr(forwrd, m, n, x, ldx, k)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           logical(lk) :: forwrd
           integer(ilp) :: ldx, m, n
           ! .. array arguments ..
           integer(ilp) :: k(*)
           real(dp) :: x(ldx, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, in, j, jj
           real(dp) :: temp
           ! .. executable statements ..
           if (m <= 1) return
           do i = 1, m
              k(i) = -k(i)
           end do
           if (forwrd) then
              ! forward permutation
              do i = 1, m
                 if (k(i) > 0) go to 40
                 j = i
                 k(j) = -k(j)
                 in = k(j)
20      continue
                 if (k(in) > 0) go to 40
                 do jj = 1, n
                    temp = x(j, jj)
                    x(j, jj) = x(in, jj)
                    x(in, jj) = temp
                 end do
                 k(in) = -k(in)
                 j = in
                 in = k(in)
                 go to 20
40      continue
              end do
           else
              ! backward permutation
              do i = 1, m
                 if (k(i) > 0) go to 80
                 k(i) = -k(i)
                 j = k(i)
60      continue
                 if (j == i) go to 80
                 do jj = 1, n
                    temp = x(i, jj)
                    x(i, jj) = x(j, jj)
                    x(j, jj) = temp
                 end do
                 k(j) = -k(j)
                 j = k(j)
                 go to 60
80      continue
              end do
           end if
           return
           ! end of stdlib_dlapmr
     end subroutine stdlib_dlapmr

     ! DLAPMT rearranges the columns of the M by N matrix X as specified
     ! by the permutation K(1),K(2),...,K(N) of the integers 1,...,N.
     ! If FORWRD = .TRUE.,  forward permutation:
     ! X(*,K(J)) is moved X(*,J) for J = 1,2,...,N.
     ! If FORWRD = .FALSE., backward permutation:
     ! X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N.

     subroutine stdlib_dlapmt(forwrd, m, n, x, ldx, k)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           logical(lk) :: forwrd
           integer(ilp) :: ldx, m, n
           ! .. array arguments ..
           integer(ilp) :: k(*)
           real(dp) :: x(ldx, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ii, in, j
           real(dp) :: temp
           ! .. executable statements ..
           if (n <= 1) return
           do i = 1, n
              k(i) = -k(i)
           end do
           if (forwrd) then
              ! forward permutation
              do i = 1, n
                 if (k(i) > 0) go to 40
                 j = i
                 k(j) = -k(j)
                 in = k(j)
20      continue
                 if (k(in) > 0) go to 40
                 do ii = 1, m
                    temp = x(ii, j)
                    x(ii, j) = x(ii, in)
                    x(ii, in) = temp
                 end do
                 k(in) = -k(in)
                 j = in
                 in = k(in)
                 go to 20
40      continue
              end do
           else
              ! backward permutation
              do i = 1, n
                 if (k(i) > 0) go to 80
                 k(i) = -k(i)
                 j = k(i)
60      continue
                 if (j == i) go to 80
                 do ii = 1, m
                    temp = x(ii, i)
                    x(ii, i) = x(ii, j)
                    x(ii, j) = temp
                 end do
                 k(j) = -k(j)
                 j = k(j)
                 go to 60
80      continue
              end do
           end if
           return
           ! end of stdlib_dlapmt
     end subroutine stdlib_dlapmt

     ! DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
     ! unnecessary overflow and unnecessary underflow.

     real(dp) function stdlib_dlapy3(x, y, z)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: x, y, z
        ! =====================================================================
           
           ! .. local scalars ..
           real(dp) :: w, xabs, yabs, zabs, hugeval
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, sqrt
           ! .. executable statements ..
           hugeval = stdlib_dlamch('overflow')
           xabs = abs(x)
           yabs = abs(y)
           zabs = abs(z)
           w = max(xabs, yabs, zabs)
           if (w == zero .or. w > hugeval) then
           ! w can be zero for max(0,nan,0)
           ! adding all three entries together will make sure
           ! nan will not disappear.
              stdlib_dlapy3 = xabs + yabs + zabs
           else
              stdlib_dlapy3 = w*sqrt((xabs/w)**2 + (yabs/w)**2 + (zabs/w)**2)
           end if
           return
           ! end of stdlib_dlapy3
     end function stdlib_dlapy3

     ! DLAQGB equilibrates a general M by N band matrix A with KL
     ! subdiagonals and KU superdiagonals using the row and scaling factors
     ! in the vectors R and C.

     subroutine stdlib_dlaqgb(m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed
           integer(ilp) :: kl, ku, ldab, m, n
           real(dp) :: amax, colcnd, rowcnd
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), c(*), r(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: thresh = 0.1_dp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(dp) :: cj, large, small
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_dlamch('safe minimum')/stdlib_dlamch('precision')
           large = one/small
           if (rowcnd >= thresh .and. amax >= small .and. amax <= large) then
              ! no row scaling
              if (colcnd >= thresh) then
                 ! no column scaling
                 equed = 'n'
              else
                 ! column scaling
                 do j = 1, n
                    cj = c(j)
                    do i = max(1, j - ku), min(m, j + kl)
                       ab(ku + 1 + i - j, j) = cj*ab(ku + 1 + i - j, j)
                    end do
                 end do
                 equed = 'c'
              end if
           else if (colcnd >= thresh) then
              ! row scaling, no column scaling
              do j = 1, n
                 do i = max(1, j - ku), min(m, j + kl)
                    ab(ku + 1 + i - j, j) = r(i)*ab(ku + 1 + i - j, j)
                 end do
              end do
              equed = 'r'
           else
              ! row and column scaling
              do j = 1, n
                 cj = c(j)
                 do i = max(1, j - ku), min(m, j + kl)
                    ab(ku + 1 + i - j, j) = cj*r(i)*ab(ku + 1 + i - j, j)
                 end do
              end do
              equed = 'b'
           end if
           return
           ! end of stdlib_dlaqgb
     end subroutine stdlib_dlaqgb

     ! DLAQGE equilibrates a general M by N matrix A using the row and
     ! column scaling factors in the vectors R and C.

     subroutine stdlib_dlaqge(m, n, a, lda, r, c, rowcnd, colcnd, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed
           integer(ilp) :: lda, m, n
           real(dp) :: amax, colcnd, rowcnd
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(*), r(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: thresh = 0.1_dp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(dp) :: cj, large, small
     
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_dlamch('safe minimum')/stdlib_dlamch('precision')
           large = one/small
           if (rowcnd >= thresh .and. amax >= small .and. amax <= large) then
              ! no row scaling
              if (colcnd >= thresh) then
                 ! no column scaling
                 equed = 'n'
              else
                 ! column scaling
                 do j = 1, n
                    cj = c(j)
                    do i = 1, m
                       a(i, j) = cj*a(i, j)
                    end do
                 end do
                 equed = 'c'
              end if
           else if (colcnd >= thresh) then
              ! row scaling, no column scaling
              do j = 1, n
                 do i = 1, m
                    a(i, j) = r(i)*a(i, j)
                 end do
              end do
              equed = 'r'
           else
              ! row and column scaling
              do j = 1, n
                 cj = c(j)
                 do i = 1, m
                    a(i, j) = cj*r(i)*a(i, j)
                 end do
              end do
              equed = 'b'
           end if
           return
           ! end of stdlib_dlaqge
     end subroutine stdlib_dlaqge

     ! Given a 2-by-2 or 3-by-3 matrix H, DLAQR1 sets v to a
     ! scalar multiple of the first column of the product
     ! (*)  K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I)
     ! scaling to avoid overflows and most underflows. It
     ! is assumed that either
     ! 1) sr1 = sr2 and si1 = -si2
     ! or
     ! 2) si1 = si2 = 0.
     ! This is useful for starting double implicit shift bulges
     ! in the QR algorithm.

     subroutine stdlib_dlaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: si1, si2, sr1, sr2
           integer(ilp) :: ldh, n
           ! .. array arguments ..
           real(dp) :: h(ldh, *), v(*)
        ! ================================================================
           
           ! .. local scalars ..
           real(dp) :: h21s, h31s, s
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           ! quick return if possible
           if (n /= 2 .and. n /= 3) then
              return
           end if
           if (n == 2) then
              s = abs(h(1, 1) - sr2) + abs(si2) + abs(h(2, 1))
              if (s == zero) then
                 v(1) = zero
                 v(2) = zero
              else
                 h21s = h(2, 1)/s
                 v(1) = h21s*h(1, 2) + (h(1, 1) - sr1)*((h(1, 1) - sr2)/s) - si1*( &
                           si2/s)
                 v(2) = h21s*(h(1, 1) + h(2, 2) - sr1 - sr2)
              end if
           else
              s = abs(h(1, 1) - sr2) + abs(si2) + abs(h(2, 1)) + abs(h(3, 1))
              if (s == zero) then
                 v(1) = zero
                 v(2) = zero
                 v(3) = zero
              else
                 h21s = h(2, 1)/s
                 h31s = h(3, 1)/s
                 v(1) = (h(1, 1) - sr1)*((h(1, 1) - sr2)/s) - si1*(si2/s) + h(1, 2) &
                           *h21s + h(1, 3)*h31s
                 v(2) = h21s*(h(1, 1) + h(2, 2) - sr1 - sr2) + h(2, 3)*h31s
                 v(3) = h31s*(h(1, 1) + h(3, 3) - sr1 - sr2) + h21s*h(3, 2)
              end if
           end if
     end subroutine stdlib_dlaqr1

     ! DLAQSB equilibrates a symmetric band matrix A using the scaling
     ! factors in the vector S.

     subroutine stdlib_dlaqsb(uplo, n, kd, ab, ldab, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: kd, ldab, n
           real(dp) :: amax, scond
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), s(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: thresh = 0.1_dp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(dp) :: cj, large, small
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_dlamch('safe minimum')/stdlib_dlamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored in band format.
                 do j = 1, n
                    cj = s(j)
                    do i = max(1, j - kd), j
                       ab(kd + 1 + i - j, j) = cj*s(i)*ab(kd + 1 + i - j, j)
                    end do
                 end do
              else
                 ! lower triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    do i = j, min(n, j + kd)
                       ab(1 + i - j, j) = cj*s(i)*ab(1 + i - j, j)
                    end do
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_dlaqsb
     end subroutine stdlib_dlaqsb

     ! DLAQSP equilibrates a symmetric matrix A using the scaling factors
     ! in the vector S.

     subroutine stdlib_dlaqsp(uplo, n, ap, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: n
           real(dp) :: amax, scond
           ! .. array arguments ..
           real(dp) :: ap(*), s(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: thresh = 0.1_dp
           
           ! .. local scalars ..
           integer(ilp) :: i, j, jc
           real(dp) :: cj, large, small
     
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_dlamch('safe minimum')/stdlib_dlamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored.
                 jc = 1
                 do j = 1, n
                    cj = s(j)
                    do i = 1, j
                       ap(jc + i - 1) = cj*s(i)*ap(jc + i - 1)
                    end do
                    jc = jc + j
                 end do
              else
                 ! lower triangle of a is stored.
                 jc = 1
                 do j = 1, n
                    cj = s(j)
                    do i = j, n
                       ap(jc + i - j) = cj*s(i)*ap(jc + i - j)
                    end do
                    jc = jc + n - j + 1
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_dlaqsp
     end subroutine stdlib_dlaqsp

     ! DLAQSY equilibrates a symmetric matrix A using the scaling factors
     ! in the vector S.

     subroutine stdlib_dlaqsy(uplo, n, a, lda, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: lda, n
           real(dp) :: amax, scond
           ! .. array arguments ..
           real(dp) :: a(lda, *), s(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: thresh = 0.1_dp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(dp) :: cj, large, small
     
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_dlamch('safe minimum')/stdlib_dlamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    do i = 1, j
                       a(i, j) = cj*s(i)*a(i, j)
                    end do
                 end do
              else
                 ! lower triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    do i = j, n
                       a(i, j) = cj*s(i)*a(i, j)
                    end do
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_dlaqsy
     end subroutine stdlib_dlaqsy

     ! DLAR2V applies a vector of real plane rotations from both sides to
     ! a sequence of 2-by-2 real symmetric matrices, defined by the elements
     ! of the vectors x, y and z. For i = 1,2,...,n
     ! ( x(i)  z(i) ) := (  c(i)  s(i) ) ( x(i)  z(i) ) ( c(i) -s(i) )
     ! ( z(i)  y(i) )    ( -s(i)  c(i) ) ( z(i)  y(i) ) ( s(i)  c(i) )

     subroutine stdlib_dlar2v(n, x, y, z, incx, c, s, incc)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incc, incx, n
           ! .. array arguments ..
           real(dp) :: c(*), s(*), x(*), y(*), z(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ic, ix
           real(dp) :: ci, si, t1, t2, t3, t4, t5, t6, xi, yi, zi
           ! .. executable statements ..
           ix = 1
           ic = 1
           do i = 1, n
              xi = x(ix)
              yi = y(ix)
              zi = z(ix)
              ci = c(ic)
              si = s(ic)
              t1 = si*zi
              t2 = ci*zi
              t3 = t2 - si*xi
              t4 = t2 + si*yi
              t5 = ci*xi + t1
              t6 = ci*yi - t1
              x(ix) = ci*t5 + si*t4
              y(ix) = ci*t6 - si*t3
              z(ix) = ci*t4 - si*t5
              ix = ix + incx
              ic = ic + incc
           end do
           ! end of stdlib_dlar2v
           return
     end subroutine stdlib_dlar2v

     ! DLARF applies a real elementary reflector H to a real m by n matrix
     ! C, from either the left or the right. H is represented in the form
     ! H = I - tau * v * v**T
     ! where tau is a real scalar and v is a real vector.
     ! If tau = 0, then H is taken to be the unit matrix.

     subroutine stdlib_dlarf(side, m, n, v, incv, tau, c, ldc, work)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side
           integer(ilp) :: incv, ldc, m, n
           real(dp) :: tau
           ! .. array arguments ..
           real(dp) :: c(ldc, *), v(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: applyleft
           integer(ilp) :: i, lastv, lastc
     
           ! .. executable statements ..
           applyleft = stdlib_lsame(side, 'l')
           lastv = 0
           lastc = 0
           if (tau /= zero) then
           ! set up variables for scanning v.  lastv begins pointing to the end
           ! of v.
              if (applyleft) then
                 lastv = m
              else
                 lastv = n
              end if
              if (incv > 0) then
                 i = 1 + (lastv - 1)*incv
              else
                 i = 1
              end if
           ! look for the last non-zero row in v.
              do while (lastv > 0 .and. v(i) == zero)
                 lastv = lastv - 1
                 i = i - incv
              end do
              if (applyleft) then
           ! scan for the last non-zero column in c(1:lastv,:).
                 lastc = stdlib_iladlc(lastv, n, c, ldc)
              else
           ! scan for the last non-zero row in c(:,1:lastv).
                 lastc = stdlib_iladlr(m, lastv, c, ldc)
              end if
           end if
           ! note that lastc.eq.0 renders the blas operations null; no special
           ! case is needed at this level.
           if (applyleft) then
              ! form  h * c
              if (lastv > 0) then
                 ! w(1:lastc,1) := c(1:lastv,1:lastc)**t * v(1:lastv,1)
                 call stdlib_dgemv('transpose', lastv, lastc, one, c, ldc, v, incv, zero, work, 1 &
                           )
                 ! c(1:lastv,1:lastc) := c(...) - v(1:lastv,1) * w(1:lastc,1)**t
                 call stdlib_dger(lastv, lastc, -tau, v, incv, work, 1, c, ldc)
              end if
           else
              ! form  c * h
              if (lastv > 0) then
                 ! w(1:lastc,1) := c(1:lastc,1:lastv) * v(1:lastv,1)
                 call stdlib_dgemv('no transpose', lastc, lastv, one, c, ldc, v, incv, zero, work, &
                            1)
                 ! c(1:lastc,1:lastv) := c(...) - w(1:lastc,1) * v(1:lastv,1)**t
                 call stdlib_dger(lastc, lastv, -tau, work, 1, v, incv, c, ldc)
              end if
           end if
           return
           ! end of stdlib_dlarf
     end subroutine stdlib_dlarf

     ! DLARFB applies a real block reflector H or its transpose H**T to a
     ! real m by n matrix C, from either the left or the right.

     subroutine stdlib_dlarfb(side, trans, direct, storev, m, n, k, v, ldv, t, ldt, c, ldc, work, &
               ldwork)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, side, storev, trans
           integer(ilp) :: k, ldc, ldt, ldv, ldwork, m, n
           ! .. array arguments ..
           real(dp) :: c(ldc, *), t(ldt, *), v(ldv, *), work(ldwork, *)
        ! =====================================================================
           
           ! .. local scalars ..
           character :: transt
           integer(ilp) :: i, j
     
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0) return
           if (stdlib_lsame(trans, 'n')) then
              transt = 't'
           else
              transt = 'n'
           end if
           if (stdlib_lsame(storev, 'c')) then
              if (stdlib_lsame(direct, 'f')) then
                 ! let  v =  ( v1 )    (first k rows)
                           ! ( v2 )
                 ! where  v1  is unit lower triangular.
                 if (stdlib_lsame(side, 'l')) then
                    ! form  h * c  or  h**t * c  where  c = ( c1 )
                                                          ! ( c2 )
                    ! w := c**t * v  =  (c1**t * v1 + c2**t * v2)  (stored in work)
                    ! w := c1**t
                    do j = 1, k
                       call stdlib_dcopy(n, c(j, 1), ldc, work(1, j), 1)
                    end do
                    ! w := w * v1
                    call stdlib_dtrmm('right', 'lower', 'no transpose', 'unit', n, k, one, v, ldv, &
                               work, ldwork)
                    if (m > k) then
                       ! w := w + c2**t * v2
                       call stdlib_dgemm('transpose', 'no transpose', n, k, m - k, one, c(k + 1, 1), &
                                  ldc, v(k + 1, 1), ldv, one, work, ldwork)
                    end if
                    ! w := w * t**t  or  w * t
                    call stdlib_dtrmm('right', 'upper', transt, 'non-unit', n, k, one, t, ldt, &
                              work, ldwork)
                    ! c := c - v * w**t
                    if (m > k) then
                       ! c2 := c2 - v2 * w**t
                       call stdlib_dgemm('no transpose', 'transpose', m - k, n, k, -one, v(k + 1, 1) &
                                 , ldv, work, ldwork, one, c(k + 1, 1), ldc)
                    end if
                    ! w := w * v1**t
                    call stdlib_dtrmm('right', 'lower', 'transpose', 'unit', n, k, one, v, ldv, &
                              work, ldwork)
                    ! c1 := c1 - w**t
                    do j = 1, k
                       do i = 1, n
                          c(j, i) = c(j, i) - work(i, j)
                       end do
                    end do
                 else if (stdlib_lsame(side, 'r')) then
                    ! form  c * h  or  c * h**t  where  c = ( c1  c2 )
                    ! w := c * v  =  (c1*v1 + c2*v2)  (stored in work)
                    ! w := c1
                    do j = 1, k
                       call stdlib_dcopy(m, c(1, j), 1, work(1, j), 1)
                    end do
                    ! w := w * v1
                    call stdlib_dtrmm('right', 'lower', 'no transpose', 'unit', m, k, one, v, ldv, &
                               work, ldwork)
                    if (n > k) then
                       ! w := w + c2 * v2
                       call stdlib_dgemm('no transpose', 'no transpose', m, k, n - k, one, c(1, k + &
                                 1), ldc, v(k + 1, 1), ldv, one, work, ldwork)
                    end if
                    ! w := w * t  or  w * t**t
                    call stdlib_dtrmm('right', 'upper', trans, 'non-unit', m, k, one, t, ldt, &
                              work, ldwork)
                    ! c := c - w * v**t
                    if (n > k) then
                       ! c2 := c2 - w * v2**t
                       call stdlib_dgemm('no transpose', 'transpose', m, n - k, k, -one, work, &
                                 ldwork, v(k + 1, 1), ldv, one, c(1, k + 1), ldc)
                    end if
                    ! w := w * v1**t
                    call stdlib_dtrmm('right', 'lower', 'transpose', 'unit', m, k, one, v, ldv, &
                              work, ldwork)
                    ! c1 := c1 - w
                    do j = 1, k
                       do i = 1, m
                          c(i, j) = c(i, j) - work(i, j)
                       end do
                    end do
                 end if
              else
                 ! let  v =  ( v1 )
                           ! ( v2 )    (last k rows)
                 ! where  v2  is unit upper triangular.
                 if (stdlib_lsame(side, 'l')) then
                    ! form  h * c  or  h**t * c  where  c = ( c1 )
                                                          ! ( c2 )
                    ! w := c**t * v  =  (c1**t * v1 + c2**t * v2)  (stored in work)
                    ! w := c2**t
                    do j = 1, k
                       call stdlib_dcopy(n, c(m - k + j, 1), ldc, work(1, j), 1)
                    end do
                    ! w := w * v2
                    call stdlib_dtrmm('right', 'upper', 'no transpose', 'unit', n, k, one, v(m - k + &
                              1, 1), ldv, work, ldwork)
                    if (m > k) then
                       ! w := w + c1**t * v1
                       call stdlib_dgemm('transpose', 'no transpose', n, k, m - k, one, c, ldc, v, &
                                 ldv, one, work, ldwork)
                    end if
                    ! w := w * t**t  or  w * t
                    call stdlib_dtrmm('right', 'lower', transt, 'non-unit', n, k, one, t, ldt, &
                              work, ldwork)
                    ! c := c - v * w**t
                    if (m > k) then
                       ! c1 := c1 - v1 * w**t
                       call stdlib_dgemm('no transpose', 'transpose', m - k, n, k, -one, v, ldv, &
                                 work, ldwork, one, c, ldc)
                    end if
                    ! w := w * v2**t
                    call stdlib_dtrmm('right', 'upper', 'transpose', 'unit', n, k, one, v(m - k + 1, &
                              1), ldv, work, ldwork)
                    ! c2 := c2 - w**t
                    do j = 1, k
                       do i = 1, n
                          c(m - k + j, i) = c(m - k + j, i) - work(i, j)
                       end do
                    end do
                 else if (stdlib_lsame(side, 'r')) then
                    ! form  c * h  or  c * h**t  where  c = ( c1  c2 )
                    ! w := c * v  =  (c1*v1 + c2*v2)  (stored in work)
                    ! w := c2
                    do j = 1, k
                       call stdlib_dcopy(m, c(1, n - k + j), 1, work(1, j), 1)
                    end do
                    ! w := w * v2
                    call stdlib_dtrmm('right', 'upper', 'no transpose', 'unit', m, k, one, v(n - k + &
                              1, 1), ldv, work, ldwork)
                    if (n > k) then
                       ! w := w + c1 * v1
                       call stdlib_dgemm('no transpose', 'no transpose', m, k, n - k, one, c, ldc, &
                                 v, ldv, one, work, ldwork)
                    end if
                    ! w := w * t  or  w * t**t
                    call stdlib_dtrmm('right', 'lower', trans, 'non-unit', m, k, one, t, ldt, &
                              work, ldwork)
                    ! c := c - w * v**t
                    if (n > k) then
                       ! c1 := c1 - w * v1**t
                       call stdlib_dgemm('no transpose', 'transpose', m, n - k, k, -one, work, &
                                 ldwork, v, ldv, one, c, ldc)
                    end if
                    ! w := w * v2**t
                    call stdlib_dtrmm('right', 'upper', 'transpose', 'unit', m, k, one, v(n - k + 1, &
                              1), ldv, work, ldwork)
                    ! c2 := c2 - w
                    do j = 1, k
                       do i = 1, m
                          c(i, n - k + j) = c(i, n - k + j) - work(i, j)
                       end do
                    end do
                 end if
              end if
           else if (stdlib_lsame(storev, 'r')) then
              if (stdlib_lsame(direct, 'f')) then
                 ! let  v =  ( v1  v2 )    (v1: first k columns)
                 ! where  v1  is unit upper triangular.
                 if (stdlib_lsame(side, 'l')) then
                    ! form  h * c  or  h**t * c  where  c = ( c1 )
                                                          ! ( c2 )
                    ! w := c**t * v**t  =  (c1**t * v1**t + c2**t * v2**t) (stored in work)
                    ! w := c1**t
                    do j = 1, k
                       call stdlib_dcopy(n, c(j, 1), ldc, work(1, j), 1)
                    end do
                    ! w := w * v1**t
                    call stdlib_dtrmm('right', 'upper', 'transpose', 'unit', n, k, one, v, ldv, &
                              work, ldwork)
                    if (m > k) then
                       ! w := w + c2**t * v2**t
                       call stdlib_dgemm('transpose', 'transpose', n, k, m - k, one, c(k + 1, 1), &
                                 ldc, v(1, k + 1), ldv, one, work, ldwork)
                    end if
                    ! w := w * t**t  or  w * t
                    call stdlib_dtrmm('right', 'upper', transt, 'non-unit', n, k, one, t, ldt, &
                              work, ldwork)
                    ! c := c - v**t * w**t
                    if (m > k) then
                       ! c2 := c2 - v2**t * w**t
                       call stdlib_dgemm('transpose', 'transpose', m - k, n, k, -one, v(1, k + 1), &
                                 ldv, work, ldwork, one, c(k + 1, 1), ldc)
                    end if
                    ! w := w * v1
                    call stdlib_dtrmm('right', 'upper', 'no transpose', 'unit', n, k, one, v, ldv, &
                               work, ldwork)
                    ! c1 := c1 - w**t
                    do j = 1, k
                       do i = 1, n
                          c(j, i) = c(j, i) - work(i, j)
                       end do
                    end do
                 else if (stdlib_lsame(side, 'r')) then
                    ! form  c * h  or  c * h**t  where  c = ( c1  c2 )
                    ! w := c * v**t  =  (c1*v1**t + c2*v2**t)  (stored in work)
                    ! w := c1
                    do j = 1, k
                       call stdlib_dcopy(m, c(1, j), 1, work(1, j), 1)
                    end do
                    ! w := w * v1**t
                    call stdlib_dtrmm('right', 'upper', 'transpose', 'unit', m, k, one, v, ldv, &
                              work, ldwork)
                    if (n > k) then
                       ! w := w + c2 * v2**t
                       call stdlib_dgemm('no transpose', 'transpose', m, k, n - k, one, c(1, k + 1), &
                                  ldc, v(1, k + 1), ldv, one, work, ldwork)
                    end if
                    ! w := w * t  or  w * t**t
                    call stdlib_dtrmm('right', 'upper', trans, 'non-unit', m, k, one, t, ldt, &
                              work, ldwork)
                    ! c := c - w * v
                    if (n > k) then
                       ! c2 := c2 - w * v2
                       call stdlib_dgemm('no transpose', 'no transpose', m, n - k, k, -one, work, &
                                 ldwork, v(1, k + 1), ldv, one, c(1, k + 1), ldc)
                    end if
                    ! w := w * v1
                    call stdlib_dtrmm('right', 'upper', 'no transpose', 'unit', m, k, one, v, ldv, &
                               work, ldwork)
                    ! c1 := c1 - w
                    do j = 1, k
                       do i = 1, m
                          c(i, j) = c(i, j) - work(i, j)
                       end do
                    end do
                 end if
              else
                 ! let  v =  ( v1  v2 )    (v2: last k columns)
                 ! where  v2  is unit lower triangular.
                 if (stdlib_lsame(side, 'l')) then
                    ! form  h * c  or  h**t * c  where  c = ( c1 )
                                                          ! ( c2 )
                    ! w := c**t * v**t  =  (c1**t * v1**t + c2**t * v2**t) (stored in work)
                    ! w := c2**t
                    do j = 1, k
                       call stdlib_dcopy(n, c(m - k + j, 1), ldc, work(1, j), 1)
                    end do
                    ! w := w * v2**t
                    call stdlib_dtrmm('right', 'lower', 'transpose', 'unit', n, k, one, v(1, m - k + &
                              1), ldv, work, ldwork)
                    if (m > k) then
                       ! w := w + c1**t * v1**t
                       call stdlib_dgemm('transpose', 'transpose', n, k, m - k, one, c, ldc, v, ldv, &
                                  one, work, ldwork)
                    end if
                    ! w := w * t**t  or  w * t
                    call stdlib_dtrmm('right', 'lower', transt, 'non-unit', n, k, one, t, ldt, &
                              work, ldwork)
                    ! c := c - v**t * w**t
                    if (m > k) then
                       ! c1 := c1 - v1**t * w**t
                       call stdlib_dgemm('transpose', 'transpose', m - k, n, k, -one, v, ldv, work, &
                                 ldwork, one, c, ldc)
                    end if
                    ! w := w * v2
                    call stdlib_dtrmm('right', 'lower', 'no transpose', 'unit', n, k, one, v(1, &
                              m - k + 1), ldv, work, ldwork)
                    ! c2 := c2 - w**t
                    do j = 1, k
                       do i = 1, n
                          c(m - k + j, i) = c(m - k + j, i) - work(i, j)
                       end do
                    end do
                 else if (stdlib_lsame(side, 'r')) then
                    ! form  c * h  or  c * h'  where  c = ( c1  c2 )
                    ! w := c * v**t  =  (c1*v1**t + c2*v2**t)  (stored in work)
                    ! w := c2
                    do j = 1, k
                       call stdlib_dcopy(m, c(1, n - k + j), 1, work(1, j), 1)
                    end do
                    ! w := w * v2**t
                    call stdlib_dtrmm('right', 'lower', 'transpose', 'unit', m, k, one, v(1, n - k + &
                              1), ldv, work, ldwork)
                    if (n > k) then
                       ! w := w + c1 * v1**t
                       call stdlib_dgemm('no transpose', 'transpose', m, k, n - k, one, c, ldc, v, &
                                 ldv, one, work, ldwork)
                    end if
                    ! w := w * t  or  w * t**t
                    call stdlib_dtrmm('right', 'lower', trans, 'non-unit', m, k, one, t, ldt, &
                              work, ldwork)
                    ! c := c - w * v
                    if (n > k) then
                       ! c1 := c1 - w * v1
                       call stdlib_dgemm('no transpose', 'no transpose', m, n - k, k, -one, work, &
                                 ldwork, v, ldv, one, c, ldc)
                    end if
                    ! w := w * v2
                    call stdlib_dtrmm('right', 'lower', 'no transpose', 'unit', m, k, one, v(1, &
                              n - k + 1), ldv, work, ldwork)
                    ! c1 := c1 - w
                    do j = 1, k
                       do i = 1, m
                          c(i, n - k + j) = c(i, n - k + j) - work(i, j)
                       end do
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_dlarfb
     end subroutine stdlib_dlarfb

     ! DLARFB_GETT applies a real Householder block reflector H from the
     ! left to a real (K+M)-by-N  "triangular-pentagonal" matrix
     ! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     ! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     ! in the array B. The block reflector H is stored in a compact
     ! WY-representation, where the elementary reflectors are in the
     ! arrays A, B and T. See Further Details section.

     subroutine stdlib_dlarfb_gett(ident, m, n, k, t, ldt, a, lda, b, ldb, work, ldwork)
     
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: ident
           integer(ilp) :: k, lda, ldb, ldt, ldwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), b(ldb, *), t(ldt, *), work(ldwork, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lnotident
           integer(ilp) :: i, j
     
           ! .. executable statements ..
           ! quick return if possible
           if (m < 0 .or. n <= 0 .or. k == 0 .or. k > n) return
           lnotident = .not. stdlib_lsame(ident, 'i')
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if (n > k) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n - k
                 call stdlib_dcopy(k, a(1, k + j), 1, work(1, j), 1)
              end do
              if (lnotident) then
                 ! col2_(2) compute w2: = (v1**t) * w2 = (a1**t) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib_dtrmm('l', 'l', 't', 'u', k, n - k, one, a, lda, work, ldwork)
              end if
              ! col2_(3) compute w2: = w2 + (v2**t) * b2 = w2 + (b1**t) * b2
              ! v2 stored in b1.
              if (m > 0) then
                 call stdlib_dgemm('t', 'n', k, n - k, m, one, b, ldb, b(1, k + 1), ldb, one, work, &
                           ldwork)
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib_dtrmm('l', 'u', 'n', 'n', k, n - k, one, t, ldt, work, ldwork)
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if (m > 0) then
                 call stdlib_dgemm('n', 'n', m, n - k, k, -one, b, ldb, work, ldwork, one, b(1, k + &
                           1), ldb)
              end if
              if (lnotident) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib_dtrmm('l', 'l', 'n', 'u', k, n - k, one, a, lda, work, ldwork)
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n - k
                 do i = 1, k
                    a(i, k + j) = a(i, k + j) - work(i, j)
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib_dcopy(j, a(1, j), 1, work(1, j), 1)
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work(i, j) = zero
              end do
           end do
           if (lnotident) then
              ! col1_(2) compute w1: = (v1**t) * w1 = (a1**t) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib_dtrmm('l', 'l', 't', 'u', k, k, one, a, lda, work, ldwork)
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib_dtrmm('l', 'u', 'n', 'n', k, k, one, t, ldt, work, ldwork)
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if (m > 0) then
              call stdlib_dtrmm('r', 'u', 'n', 'n', m, k, -one, work, ldwork, b, ldb)
           end if
           if (lnotident) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib_dtrmm('l', 'l', 'n', 'u', k, k, one, a, lda, work, ldwork)
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a(i, j) = -work(i, j)
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a(i, j) = a(i, j) - work(i, j)
              end do
           end do
           return
           ! end of stdlib_dlarfb_gett
     end subroutine stdlib_dlarfb_gett

     ! DLARFT forms the triangular factor T of a real block reflector H
     ! of order n, which is defined as a product of k elementary reflectors.
     ! If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
     ! If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
     ! If STOREV = 'C', the vector which defines the elementary reflector
     ! H(i) is stored in the i-th column of the array V, and
     ! H  =  I - V * T * V**T
     ! If STOREV = 'R', the vector which defines the elementary reflector
     ! H(i) is stored in the i-th row of the array V, and
     ! H  =  I - V**T * T * V

     subroutine stdlib_dlarft(direct, storev, n, k, v, ldv, tau, t, ldt)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, storev
           integer(ilp) :: k, ldt, ldv, n
           ! .. array arguments ..
           real(dp) :: t(ldt, *), tau(*), v(ldv, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, prevlastv, lastv
     
           ! .. executable statements ..
           ! quick return if possible
           if (n == 0) return
           if (stdlib_lsame(direct, 'f')) then
              prevlastv = n
              do i = 1, k
                 prevlastv = max(i, prevlastv)
                 if (tau(i) == zero) then
                    ! h(i)  =  i
                    do j = 1, i
                       t(j, i) = zero
                    end do
                 else
                    ! general case
                    if (stdlib_lsame(storev, 'c')) then
                       ! skip any trailing zeros.
                       do lastv = n, i + 1, -1
                          if (v(lastv, i) /= zero) exit
                       end do
                       do j = 1, i - 1
                          t(j, i) = -tau(i)*v(i, j)
                       end do
                       j = min(lastv, prevlastv)
                       ! t(1:i-1,i) := - tau(i) * v(i:j,1:i-1)**t * v(i:j,i)
                       call stdlib_dgemv('transpose', j - i, i - 1, -tau(i), v(i + 1, 1), ldv, v(i + &
                                 1, i), 1, one, t(1, i), 1)
                    else
                       ! skip any trailing zeros.
                       do lastv = n, i + 1, -1
                          if (v(i, lastv) /= zero) exit
                       end do
                       do j = 1, i - 1
                          t(j, i) = -tau(i)*v(j, i)
                       end do
                       j = min(lastv, prevlastv)
                       ! t(1:i-1,i) := - tau(i) * v(1:i-1,i:j) * v(i,i:j)**t
                       call stdlib_dgemv('no transpose', i - 1, j - i, -tau(i), v(1, i + 1), ldv, v( &
                                  i, i + 1), ldv, one, t(1, i), 1)
                    end if
                    ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
                    call stdlib_dtrmv('upper', 'no transpose', 'non-unit', i - 1, t, ldt, t(1, i), &
                               1)
                    t(i, i) = tau(i)
                    if (i > 1) then
                       prevlastv = max(prevlastv, lastv)
                    else
                       prevlastv = lastv
                    end if
                 end if
              end do
           else
              prevlastv = 1
              do i = k, 1, -1
                 if (tau(i) == zero) then
                    ! h(i)  =  i
                    do j = i, k
                       t(j, i) = zero
                    end do
                 else
                    ! general case
                    if (i < k) then
                       if (stdlib_lsame(storev, 'c')) then
                          ! skip any leading zeros.
                          do lastv = 1, i - 1
                             if (v(lastv, i) /= zero) exit
                          end do
                          do j = i + 1, k
                             t(j, i) = -tau(i)*v(n - k + i, j)
                          end do
                          j = max(lastv, prevlastv)
                          ! t(i+1:k,i) = -tau(i) * v(j:n-k+i,i+1:k)**t * v(j:n-k+i,i)
                          call stdlib_dgemv('transpose', n - k + i - j, k - i, -tau(i), v(j, i + 1), &
                                    ldv, v(j, i), 1, one, t(i + 1, i), 1)
                       else
                          ! skip any leading zeros.
                          do lastv = 1, i - 1
                             if (v(i, lastv) /= zero) exit
                          end do
                          do j = i + 1, k
                             t(j, i) = -tau(i)*v(j, n - k + i)
                          end do
                          j = max(lastv, prevlastv)
                          ! t(i+1:k,i) = -tau(i) * v(i+1:k,j:n-k+i) * v(i,j:n-k+i)**t
                          call stdlib_dgemv('no transpose', k - i, n - k + i - j, -tau(i), v(i + 1, j), &
                                    ldv, v(i, j), ldv, one, t(i + 1, i), 1)
                       end if
                       ! t(i+1:k,i) := t(i+1:k,i+1:k) * t(i+1:k,i)
                       call stdlib_dtrmv('lower', 'no transpose', 'non-unit', k - i, t(i + 1, i + 1), &
                                 ldt, t(i + 1, i), 1)
                       if (i > 1) then
                          prevlastv = min(prevlastv, lastv)
                       else
                          prevlastv = lastv
                       end if
                    end if
                    t(i, i) = tau(i)
                 end if
              end do
           end if
           return
           ! end of stdlib_dlarft
     end subroutine stdlib_dlarft

     ! DLARFX applies a real elementary reflector H to a real m by n
     ! matrix C, from either the left or the right. H is represented in the
     ! form
     ! H = I - tau * v * v**T
     ! where tau is a real scalar and v is a real vector.
     ! If tau = 0, then H is taken to be the unit matrix
     ! This version uses inline code if H has order < 11.

     subroutine stdlib_dlarfx(side, m, n, v, tau, c, ldc, work)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side
           integer(ilp) :: ldc, m, n
           real(dp) :: tau
           ! .. array arguments ..
           real(dp) :: c(ldc, *), v(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: j
           real(dp) :: sum, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9, v1, v10, v2, v3, v4, v5, v6, &
                     v7, v8, v9
     
           ! .. executable statements ..
           if (tau == zero) return
           if (stdlib_lsame(side, 'l')) then
              ! form  h * c, where h has order m.
              go to(10, 30, 50, 70, 90, 110, 130, 150, 170, 190) m
              ! code for general m
              call stdlib_dlarf(side, m, n, v, 1, tau, c, ldc, work)
              go to 410
10      continue
              ! special code for 1 x 1 householder
              t1 = one - tau*v(1)*v(1)
              do j = 1, n
                 c(1, j) = t1*c(1, j)
              end do
              go to 410
30      continue
              ! special code for 2 x 2 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
              end do
              go to 410
50      continue
              ! special code for 3 x 3 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
              end do
              go to 410
70      continue
              ! special code for 4 x 4 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
              end do
              go to 410
90      continue
              ! special code for 5 x 5 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j)
                           
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
              end do
              go to 410
110    continue
              ! special code for 6 x 6 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
              end do
              go to 410
130    continue
              ! special code for 7 x 7 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              v7 = v(7)
              t7 = tau*v7
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j) + v7*c(7, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
                 c(7, j) = c(7, j) - sum*t7
              end do
              go to 410
150    continue
              ! special code for 8 x 8 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              v7 = v(7)
              t7 = tau*v7
              v8 = v(8)
              t8 = tau*v8
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j) + v7*c(7, j) + v8*c(8, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
                 c(7, j) = c(7, j) - sum*t7
                 c(8, j) = c(8, j) - sum*t8
              end do
              go to 410
170    continue
              ! special code for 9 x 9 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              v7 = v(7)
              t7 = tau*v7
              v8 = v(8)
              t8 = tau*v8
              v9 = v(9)
              t9 = tau*v9
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j) + v7*c(7, j) + v8*c(8, j) + v9*c(9, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
                 c(7, j) = c(7, j) - sum*t7
                 c(8, j) = c(8, j) - sum*t8
                 c(9, j) = c(9, j) - sum*t9
              end do
              go to 410
190    continue
              ! special code for 10 x 10 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              v7 = v(7)
              t7 = tau*v7
              v8 = v(8)
              t8 = tau*v8
              v9 = v(9)
              t9 = tau*v9
              v10 = v(10)
              t10 = tau*v10
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j) + v7*c(7, j) + v8*c(8, j) + v9*c(9, j) + v10*c(10, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
                 c(7, j) = c(7, j) - sum*t7
                 c(8, j) = c(8, j) - sum*t8
                 c(9, j) = c(9, j) - sum*t9
                 c(10, j) = c(10, j) - sum*t10
              end do
              go to 410
           else
              ! form  c * h, where h has order n.
              go to(210, 230, 250, 270, 290, 310, 330, 350, 370, 390) n
              ! code for general n
              call stdlib_dlarf(side, m, n, v, 1, tau, c, ldc, work)
              go to 410
210    continue
              ! special code for 1 x 1 householder
              t1 = one - tau*v(1)*v(1)
              do j = 1, m
                 c(j, 1) = t1*c(j, 1)
              end do
              go to 410
230    continue
              ! special code for 2 x 2 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
              end do
              go to 410
250    continue
              ! special code for 3 x 3 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
              end do
              go to 410
270    continue
              ! special code for 4 x 4 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
              end do
              go to 410
290    continue
              ! special code for 5 x 5 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5)
                           
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
              end do
              go to 410
310    continue
              ! special code for 6 x 6 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
              end do
              go to 410
330    continue
              ! special code for 7 x 7 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              v7 = v(7)
              t7 = tau*v7
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6) + v7*c(j, 7)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
                 c(j, 7) = c(j, 7) - sum*t7
              end do
              go to 410
350    continue
              ! special code for 8 x 8 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              v7 = v(7)
              t7 = tau*v7
              v8 = v(8)
              t8 = tau*v8
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6) + v7*c(j, 7) + v8*c(j, 8)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
                 c(j, 7) = c(j, 7) - sum*t7
                 c(j, 8) = c(j, 8) - sum*t8
              end do
              go to 410
370    continue
              ! special code for 9 x 9 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              v7 = v(7)
              t7 = tau*v7
              v8 = v(8)
              t8 = tau*v8
              v9 = v(9)
              t9 = tau*v9
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6) + v7*c(j, 7) + v8*c(j, 8) + v9*c(j, 9)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
                 c(j, 7) = c(j, 7) - sum*t7
                 c(j, 8) = c(j, 8) - sum*t8
                 c(j, 9) = c(j, 9) - sum*t9
              end do
              go to 410
390    continue
              ! special code for 10 x 10 householder
              v1 = v(1)
              t1 = tau*v1
              v2 = v(2)
              t2 = tau*v2
              v3 = v(3)
              t3 = tau*v3
              v4 = v(4)
              t4 = tau*v4
              v5 = v(5)
              t5 = tau*v5
              v6 = v(6)
              t6 = tau*v6
              v7 = v(7)
              t7 = tau*v7
              v8 = v(8)
              t8 = tau*v8
              v9 = v(9)
              t9 = tau*v9
              v10 = v(10)
              t10 = tau*v10
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6) + v7*c(j, 7) + v8*c(j, 8) + v9*c(j, 9) + v10*c(j, 10)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
                 c(j, 7) = c(j, 7) - sum*t7
                 c(j, 8) = c(j, 8) - sum*t8
                 c(j, 9) = c(j, 9) - sum*t9
                 c(j, 10) = c(j, 10) - sum*t10
              end do
              go to 410
           end if
410    continue
           return
           ! end of stdlib_dlarfx
     end subroutine stdlib_dlarfx

     ! DLARFY applies an elementary reflector, or Householder matrix, H,
     ! to an n x n symmetric matrix C, from both the left and the right.
     ! H is represented in the form
     ! H = I - tau * v * v'
     ! where  tau  is a scalar and  v  is a vector.
     ! If  tau  is  zero, then  H  is taken to be the unit matrix.

     subroutine stdlib_dlarfy(uplo, n, v, incv, tau, c, ldc, work)
        ! -- lapack test routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: incv, ldc, n
           real(dp) :: tau
           ! .. array arguments ..
           real(dp) :: c(ldc, *), v(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           real(dp) :: alpha
     
           ! .. executable statements ..
           if (tau == zero) return
           ! form  w:= c * v
           call stdlib_dsymv(uplo, n, one, c, ldc, v, incv, zero, work, 1)
           alpha = -half*tau*stdlib_ddot(n, work, 1, v, incv)
           call stdlib_daxpy(n, alpha, v, incv, work, 1)
           ! c := c - v * w' - w * v'
           call stdlib_dsyr2(uplo, n, -tau, v, incv, work, 1, c, ldc)
           return
           ! end of stdlib_dlarfy
     end subroutine stdlib_dlarfy

     ! DLARGV generates a vector of real plane rotations, determined by
     ! elements of the real vectors x and y. For i = 1,2,...,n
     ! (  c(i)  s(i) ) ( x(i) ) = ( a(i) )
     ! ( -s(i)  c(i) ) ( y(i) ) = (   0  )

     subroutine stdlib_dlargv(n, x, incx, y, incy, c, incc)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incc, incx, incy, n
           ! .. array arguments ..
           real(dp) :: c(*), x(*), y(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, ic, ix, iy
           real(dp) :: f, g, t, tt
           ! .. intrinsic functions ..
           intrinsic :: abs, sqrt
           ! .. executable statements ..
           ix = 1
           iy = 1
           ic = 1
           loop_10: do i = 1, n
              f = x(ix)
              g = y(iy)
              if (g == zero) then
                 c(ic) = one
              else if (f == zero) then
                 c(ic) = zero
                 y(iy) = one
                 x(ix) = g
              else if (abs(f) > abs(g)) then
                 t = g/f
                 tt = sqrt(one + t*t)
                 c(ic) = one/tt
                 y(iy) = t*c(ic)
                 x(ix) = f*tt
              else
                 t = f/g
                 tt = sqrt(one + t*t)
                 y(iy) = one/tt
                 c(ic) = t*y(iy)
                 x(ix) = g*tt
              end if
              ic = ic + incc
              iy = iy + incy
              ix = ix + incx
           end do loop_10
           return
           ! end of stdlib_dlargv
     end subroutine stdlib_dlargv

     ! Compute the splitting points with threshold SPLTOL.
     ! DLARRA sets any "small" off-diagonal elements to zero.

     subroutine stdlib_dlarra(n, d, e, e2, spltol, tnrm, nsplit, isplit, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, n, nsplit
           real(dp) :: spltol, tnrm
           ! .. array arguments ..
           integer(ilp) :: isplit(*)
           real(dp) :: d(*), e(*), e2(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           real(dp) :: eabs, tmp1
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           info = 0
           ! quick return if possible
           if (n <= 0) then
              return
           end if
           ! compute splitting points
           nsplit = 1
           if (spltol < zero) then
              ! criterion based on absolute off-diagonal value
              tmp1 = abs(spltol)*tnrm
              do i = 1, n - 1
                 eabs = abs(e(i))
                 if (eabs <= tmp1) then
                    e(i) = zero
                    e2(i) = zero
                    isplit(nsplit) = i
                    nsplit = nsplit + 1
                 end if
              end do
           else
              ! criterion that guarantees relative accuracy
              do i = 1, n - 1
                 eabs = abs(e(i))
                 if (eabs <= spltol*sqrt(abs(d(i)))*sqrt(abs(d(i + 1)))) then
                    e(i) = zero
                    e2(i) = zero
                    isplit(nsplit) = i
                    nsplit = nsplit + 1
                 end if
              end do
           end if
           isplit(nsplit) = n
           return
           ! end of stdlib_dlarra
     end subroutine stdlib_dlarra

     ! Find the number of eigenvalues of the symmetric tridiagonal matrix T
     ! that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T
     ! if JOBT = 'L'.

     subroutine stdlib_dlarrc(jobt, n, vl, vu, d, e, pivmin, eigcnt, lcnt, rcnt, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: jobt
           integer(ilp) :: eigcnt, info, lcnt, n, rcnt
           real(dp) :: pivmin, vl, vu
           ! .. array arguments ..
           real(dp) :: d(*), e(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           logical(lk) :: matt
           real(dp) :: lpivot, rpivot, sl, su, tmp, tmp2
     
           ! .. executable statements ..
           info = 0
           ! quick return if possible
           if (n <= 0) then
              return
           end if
           lcnt = 0
           rcnt = 0
           eigcnt = 0
           matt = stdlib_lsame(jobt, 't')
           if (matt) then
              ! sturm sequence count on t
              lpivot = d(1) - vl
              rpivot = d(1) - vu
              if (lpivot <= zero) then
                 lcnt = lcnt + 1
              end if
              if (rpivot <= zero) then
                 rcnt = rcnt + 1
              end if
              do i = 1, n - 1
                 tmp = e(i)**2
                 lpivot = (d(i + 1) - vl) - tmp/lpivot
                 rpivot = (d(i + 1) - vu) - tmp/rpivot
                 if (lpivot <= zero) then
                    lcnt = lcnt + 1
                 end if
                 if (rpivot <= zero) then
                    rcnt = rcnt + 1
                 end if
              end do
           else
              ! sturm sequence count on l d l^t
              sl = -vl
              su = -vu
              do i = 1, n - 1
                 lpivot = d(i) + sl
                 rpivot = d(i) + su
                 if (lpivot <= zero) then
                    lcnt = lcnt + 1
                 end if
                 if (rpivot <= zero) then
                    rcnt = rcnt + 1
                 end if
                 tmp = e(i)*d(i)*e(i)
                 tmp2 = tmp/lpivot
                 if (tmp2 == zero) then
                    sl = tmp - vl
                 else
                    sl = sl*tmp2 - vl
                 end if
                 tmp2 = tmp/rpivot
                 if (tmp2 == zero) then
                    su = tmp - vu
                 else
                    su = su*tmp2 - vu
                 end if
              end do
              lpivot = d(n) + sl
              rpivot = d(n) + su
              if (lpivot <= zero) then
                 lcnt = lcnt + 1
              end if
              if (rpivot <= zero) then
                 rcnt = rcnt + 1
              end if
           end if
           eigcnt = rcnt - lcnt
           return
           ! end of stdlib_dlarrc
     end subroutine stdlib_dlarrc

     ! Given the initial eigenvalue approximations of T, DLARRJ
     ! does  bisection to refine the eigenvalues of T,
     ! W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial
     ! guesses for these eigenvalues are input in W, the corresponding estimate
     ! of the error in these guesses in WERR. During bisection, intervals
     ! [left, right] are maintained by storing their mid-points and
     ! semi-widths in the arrays W and WERR respectively.

     subroutine stdlib_dlarrj(n, d, e2, ifirst, ilast, rtol, offset, w, werr, work, iwork, pivmin, &
               spdiam, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: ifirst, ilast, info, n, offset
           real(dp) :: pivmin, rtol, spdiam
           ! .. array arguments ..
           integer(ilp) :: iwork(*)
           real(dp) :: d(*), e2(*), w(*), werr(*), work(*)
        ! =====================================================================
           
           integer(ilp) :: maxitr
           ! .. local scalars ..
           integer(ilp) :: cnt, i, i1, i2, ii, iter, j, k, next, nint, olnint, p, prev, &
                     savi1
           real(dp) :: dplus, fac, left, mid, right, s, tmp, width
           ! .. intrinsic functions ..
           intrinsic :: abs, max
           ! .. executable statements ..
           info = 0
           ! quick return if possible
           if (n <= 0) then
              return
           end if
           maxitr = int((log(spdiam + pivmin) - log(pivmin))/log(two)) + 2
           ! initialize unconverged intervals in [ work(2*i-1), work(2*i) ].
           ! the sturm count, count( work(2*i-1) ) is arranged to be i-1, while
           ! count( work(2*i) ) is stored in iwork( 2*i ). the integer iwork( 2*i-1 )
           ! for an unconverged interval is set to the index of the next unconverged
           ! interval, and is -1 or 0 for a converged interval. thus a linked
           ! list of unconverged intervals is set up.
           i1 = ifirst
           i2 = ilast
           ! the number of unconverged intervals
           nint = 0
           ! the last unconverged interval found
           prev = 0
           loop_75: do i = i1, i2
              k = 2*i
              ii = i - offset
              left = w(ii) - werr(ii)
              mid = w(ii)
              right = w(ii) + werr(ii)
              width = right - mid
              tmp = max(abs(left), abs(right))
              ! the following test prevents the test of converged intervals
              if (width < rtol*tmp) then
                 ! this interval has already converged and does not need refinement.
                 ! (note that the gaps might change through refining the
                  ! eigenvalues, however, they can only get bigger.)
                 ! remove it from the list.
                 iwork(k - 1) = -1
                 ! make sure that i1 always points to the first unconverged interval
                 if ((i == i1) .and. (i < i2)) i1 = i + 1
                 if ((prev >= i1) .and. (i <= i2)) iwork(2*prev - 1) = i + 1
              else
                 ! unconverged interval found
                 prev = i
                 ! make sure that [left,right] contains the desired eigenvalue
                 ! do while( cnt(left)>i-1 )
                 fac = one
20    continue
                 cnt = 0
                 s = left
                 dplus = d(1) - s
                 if (dplus < zero) cnt = cnt + 1
                 do j = 2, n
                    dplus = d(j) - s - e2(j - 1)/dplus
                    if (dplus < zero) cnt = cnt + 1
                 end do
                 if (cnt > i - 1) then
                    left = left - werr(ii)*fac
                    fac = two*fac
                    go to 20
                 end if
                 ! do while( cnt(right)<i )
                 fac = one
50    continue
                 cnt = 0
                 s = right
                 dplus = d(1) - s
                 if (dplus < zero) cnt = cnt + 1
                 do j = 2, n
                    dplus = d(j) - s - e2(j - 1)/dplus
                    if (dplus < zero) cnt = cnt + 1
                 end do
                 if (cnt < i) then
                    right = right + werr(ii)*fac
                    fac = two*fac
                    go to 50
                 end if
                 nint = nint + 1
                 iwork(k - 1) = i + 1
                 iwork(k) = cnt
              end if
              work(k - 1) = left
              work(k) = right
           end do loop_75
           savi1 = i1
           ! do while( nint>0 ), i.e. there are still unconverged intervals
           ! and while (iter<maxitr)
           iter = 0
80    continue
           prev = i1 - 1
           i = i1
           olnint = nint
           loop_100: do p = 1, olnint
              k = 2*i
              ii = i - offset
              next = iwork(k - 1)
              left = work(k - 1)
              right = work(k)
              mid = half*(left + right)
              ! semiwidth of interval
              width = right - mid
              tmp = max(abs(left), abs(right))
              if ((width < rtol*tmp) .or. (iter == maxitr)) then
                 ! reduce number of unconverged intervals
                 nint = nint - 1
                 ! mark interval as converged.
                 iwork(k - 1) = 0
                 if (i1 == i) then
                    i1 = next
                 else
                    ! prev holds the last unconverged interval previously examined
                    if (prev >= i1) iwork(2*prev - 1) = next
                 end if
                 i = next
                 cycle loop_100
              end if
              prev = i
              ! perform one bisection step
              cnt = 0
              s = mid
              dplus = d(1) - s
              if (dplus < zero) cnt = cnt + 1
              do j = 2, n
                 dplus = d(j) - s - e2(j - 1)/dplus
                 if (dplus < zero) cnt = cnt + 1
              end do
              if (cnt <= i - 1) then
                 work(k - 1) = mid
              else
                 work(k) = mid
              end if
              i = next
           end do loop_100
           iter = iter + 1
           ! do another loop if there are still unconverged intervals
           ! however, in the last iteration, all intervals are accepted
           ! since this is the best we can do.
           if ((nint > 0) .and. (iter <= maxitr)) go to 80
           ! at this point, all the intervals have converged
           do i = savi1, ilast
              k = 2*i
              ii = i - offset
              ! all intervals marked by '0' have been refined.
              if (iwork(k - 1) == 0) then
                 w(ii) = half*(work(k - 1) + work(k))
                 werr(ii) = work(k) - w(ii)
              end if
           end do
           return
           ! end of stdlib_dlarrj
     end subroutine stdlib_dlarrj

     ! DLARRK computes one eigenvalue of a symmetric tridiagonal
     ! matrix T to suitable accuracy. This is an auxiliary code to be
     ! called from DSTEMR.
     ! To avoid overflow, the matrix must be scaled so that its
     ! largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest
     ! accuracy, it should not be much smaller than that.
     ! See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal
     ! Matrix", Report CS41, Computer Science Dept., Stanford
     ! University, July 21, 1966.

     subroutine stdlib_dlarrk(n, iw, gl, gu, d, e2, pivmin, reltol, w, werr, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, iw, n
           real(dp) :: pivmin, reltol, gl, gu, w, werr
           ! .. array arguments ..
           real(dp) :: d(*), e2(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: fudge = two
           
           ! .. local scalars ..
           integer(ilp) :: i, it, itmax, negcnt
           real(dp) :: atoli, eps, left, mid, right, rtoli, tmp1, tmp2, tnorm
     
           ! .. intrinsic functions ..
           intrinsic :: abs, int, log, max
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              info = 0
              return
           end if
           ! get machine constants
           eps = stdlib_dlamch('p')
           tnorm = max(abs(gl), abs(gu))
           rtoli = reltol
           atoli = fudge*two*pivmin
           itmax = int((log(tnorm + pivmin) - log(pivmin))/log(two)) + 2
           info = -1
           left = gl - fudge*tnorm*eps*n - fudge*two*pivmin
           right = gu + fudge*tnorm*eps*n + fudge*two*pivmin
           it = 0
10    continue
           ! check if interval converged or maximum number of iterations reached
           tmp1 = abs(right - left)
           tmp2 = max(abs(right), abs(left))
           if (tmp1 < max(atoli, pivmin, rtoli*tmp2)) then
              info = 0
              goto 30
           end if
           if (it > itmax) goto 30
           ! count number of negative pivots for mid-point
           it = it + 1
           mid = half*(left + right)
           negcnt = 0
           tmp1 = d(1) - mid
           if (abs(tmp1) < pivmin) tmp1 = -pivmin
           if (tmp1 <= zero) negcnt = negcnt + 1
           do i = 2, n
              tmp1 = d(i) - e2(i - 1)/tmp1 - mid
              if (abs(tmp1) < pivmin) tmp1 = -pivmin
              if (tmp1 <= zero) negcnt = negcnt + 1
           end do
           if (negcnt >= iw) then
              right = mid
           else
              left = mid
           end if
           goto 10
30    continue
           ! converged or maximum number of iterations reached
           w = half*(left + right)
           werr = half*abs(right - left)
           return
           ! end of stdlib_dlarrk
     end subroutine stdlib_dlarrk

     ! Perform tests to decide whether the symmetric tridiagonal matrix T
     ! warrants expensive computations which guarantee high relative accuracy
     ! in the eigenvalues.

     subroutine stdlib_dlarrr(n, d, e, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n, info
           ! .. array arguments ..
           real(dp) :: d(*), e(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: relcond = 0.999d0
           
           ! .. local scalars ..
           integer(ilp) :: i
           logical(lk) :: yesrel
           real(dp) :: eps, safmin, smlnum, rmin, tmp, tmp2, offdig, offdig2
     
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              info = 0
              return
           end if
           ! as a default, do not go for relative-accuracy preserving computations.
           info = 1
           safmin = stdlib_dlamch('safe minimum')
           eps = stdlib_dlamch('precision')
           smlnum = safmin/eps
           rmin = sqrt(smlnum)
           ! tests for relative accuracy
           ! test for scaled diagonal dominance
           ! scale the diagonal entries to one and check whether the sum of the
           ! off-diagonals is less than one
           ! the sdd relative error bounds have a 1/(1- 2*x) factor in them,
           ! x = max(offdig + offdig2), so when x is close to 1/2, no relative
           ! accuracy is promised.  in the notation of the code fragment below,
           ! 1/(1 - (offdig + offdig2)) is the condition number.
           ! we don't think it is worth going into "sdd mode" unless the relative
           ! condition number is reasonable, not 1/macheps.
           ! the threshold should be compatible with other thresholds used in the
           ! code. we set  offdig + offdig2 <= .999 =: relcond, it corresponds
           ! to losing at most 3 decimal digits: 1 / (1 - (offdig + offdig2)) <= 1000
           ! instead of the current offdig + offdig2 < 1
           yesrel = .true.
           offdig = zero
           tmp = sqrt(abs(d(1)))
           if (tmp < rmin) yesrel = .false.
           if (.not. yesrel) goto 11
           do i = 2, n
              tmp2 = sqrt(abs(d(i)))
              if (tmp2 < rmin) yesrel = .false.
              if (.not. yesrel) goto 11
              offdig2 = abs(e(i - 1))/(tmp*tmp2)
              if (offdig + offdig2 >= relcond) yesrel = .false.
              if (.not. yesrel) goto 11
              tmp = tmp2
              offdig = offdig2
           end do
11    continue
           if (yesrel) then
              info = 0
              return
           else
           end if
           ! *** more to be implemented ***
           ! test if the lower bidiagonal matrix l from t = l d l^t
           ! (zero shift facto) is well conditioned
           ! test if the upper bidiagonal matrix u from t = u d u^t
           ! (zero shift facto) is well conditioned.
           ! in this case, the matrix needs to be flipped and, at the end
           ! of the eigenvector computation, the flip needs to be applied
           ! to the computed eigenvectors (and the support)
           return
           ! end of stdlib_dlarrr
     end subroutine stdlib_dlarrr

     ! !
     ! DLARTG generates a plane rotation so that
     ! [  C  S  ]  .  [ F ]  =  [ R ]
     ! [ -S  C  ]     [ G ]     [ 0 ]
     ! where C**2 + S**2 = 1.
     ! The mathematical formulas used for C and S are
     ! R = sign(F) * sqrt(F**2 + G**2)
     ! C = F / R
     ! S = G / R
     ! Hence C >= 0. The algorithm used to compute these quantities
     ! incorporates scaling to avoid overflow or underflow in computing the
     ! square root of the sum of squares.
     ! This version is discontinuous in R at F = 0 but it returns the same
     ! C and S as ZLARTG for complex inputs (F,0) and (G,0).
     ! This is a more accurate version of the BLAS1 routine DROTG,
     ! with the following other differences:
     ! F and G are unchanged on return.
     ! If G=0, then C=1 and S=0.
     ! If F=0 and (G .ne. 0), then C=0 and S=sign(1,G) without doing any
     ! floating point operations (saves work in DBDSQR when
     ! there are zeros on the diagonal).
     ! If F exceeds G in magnitude, C will be positive.
     ! Below, wp=>dp stands for double precision from LA_CONSTANTS module.

     subroutine stdlib_dlartg(f, g, c, s, r)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! february 2021
        ! .. scalar arguments ..
        real(dp) :: c, f, g, r, s
        ! .. local scalars ..
        real(dp) :: d, f1, fs, g1, gs, p, u, uu
        ! .. intrinsic functions ..
        intrinsic :: abs, sign, sqrt
        ! .. executable statements ..
        f1 = abs(f)
        g1 = abs(g)
        if (g == zero) then
           c = one
           s = zero
           r = f
        else if (f == zero) then
           c = zero
           s = sign(one, g)
           r = g1
     else if (f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax) &
               then
           d = sqrt(f*f + g*g)
           p = one/d
           c = f1*p
           s = g*sign(p, f)
           r = sign(d, f)
        else
           u = min(safmax, max(safmin, f1, g1))
           uu = one/u
           fs = f*uu
           gs = g*uu
           d = sqrt(fs*fs + gs*gs)
           p = one/d
           c = abs(fs)*p
           s = gs*sign(p, f)
           r = sign(d, f)*u
        end if
        return
     end subroutine stdlib_dlartg

     ! DLARTGP generates a plane rotation so that
     ! [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
     ! [ -SN  CS  ]     [ G ]     [ 0 ]
     ! This is a slower, more accurate version of the Level 1 BLAS routine DROTG,
     ! with the following other differences:
     ! F and G are unchanged on return.
     ! If G=0, then CS=(+/-)1 and SN=0.
     ! If F=0 and (G .ne. 0), then CS=0 and SN=(+/-)1.
     ! The sign is chosen so that R >= 0.

     subroutine stdlib_dlartgp(f, g, cs, sn, r)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: cs, f, g, r, sn
        ! =====================================================================
           
           ! .. local scalars ..
           ! logical            first
           integer(ilp) :: count, i
           real(dp) :: eps, f1, g1, safmin, safmn2, safmx2, scale
     
           ! .. intrinsic functions ..
           intrinsic :: abs, int, log, max, sign, sqrt
           ! .. save statement ..
           ! save               first, safmx2, safmin, safmn2
           ! .. data statements ..
           ! data               first / .true. /
           ! .. executable statements ..
           ! if( first ) then
              safmin = stdlib_dlamch('s')
              eps = stdlib_dlamch('e')
              safmn2 = stdlib_dlamch('b')**int(log(safmin/eps)/log(stdlib_dlamch('b')) &
                         /two)
              safmx2 = one/safmn2
              ! first = .false.
           ! end if
           if (g == zero) then
              cs = sign(one, f)
              sn = zero
              r = abs(f)
           else if (f == zero) then
              cs = zero
              sn = sign(one, g)
              r = abs(g)
           else
              f1 = f
              g1 = g
              scale = max(abs(f1), abs(g1))
              if (scale >= safmx2) then
                 count = 0
10      continue
                 count = count + 1
                 f1 = f1*safmn2
                 g1 = g1*safmn2
                 scale = max(abs(f1), abs(g1))
                 if (scale >= safmx2 .and. count < 20) go to 10
                 r = sqrt(f1**2 + g1**2)
                 cs = f1/r
                 sn = g1/r
                 do i = 1, count
                    r = r*safmx2
                 end do
              else if (scale <= safmn2) then
                 count = 0
30      continue
                 count = count + 1
                 f1 = f1*safmx2
                 g1 = g1*safmx2
                 scale = max(abs(f1), abs(g1))
                 if (scale <= safmn2) go to 30
                 r = sqrt(f1**2 + g1**2)
                 cs = f1/r
                 sn = g1/r
                 do i = 1, count
                    r = r*safmn2
                 end do
              else
                 r = sqrt(f1**2 + g1**2)
                 cs = f1/r
                 sn = g1/r
              end if
              if (r < zero) then
                 cs = -cs
                 sn = -sn
                 r = -r
              end if
           end if
           return
           ! end of stdlib_dlartgp
     end subroutine stdlib_dlartgp

     ! DLARTGS generates a plane rotation designed to introduce a bulge in
     ! Golub-Reinsch-style implicit QR iteration for the bidiagonal SVD
     ! problem. X and Y are the top-row entries, and SIGMA is the shift.
     ! The computed CS and SN define a plane rotation satisfying
     ! [  CS  SN  ]  .  [ X^2 - SIGMA ]  =  [ R ],
     ! [ -SN  CS  ]     [    X * Y    ]     [ 0 ]
     ! with R nonnegative.  If X^2 - SIGMA and X * Y are 0, then the
     ! rotation is by PI/2.

     subroutine stdlib_dlartgs(x, y, sigma, cs, sn)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: cs, sigma, sn, x, y
        ! ===================================================================
           ! .. parameters ..
           real(dp), parameter :: negone = -1.0d0
           
           ! .. local scalars ..
           real(dp) :: r, s, thresh, w, z
     
           thresh = stdlib_dlamch('e')
           ! compute the first column of b**t*b - sigma^2*i, up to a scale
           ! factor.
           if ((sigma == zero .and. abs(x) < thresh) .or. (abs(x) == sigma .and. y == zero)) &
                     then
              z = zero
              w = zero
           else if (sigma == zero) then
              if (x >= zero) then
                 z = x
                 w = y
              else
                 z = -x
                 w = -y
              end if
           else if (abs(x) < thresh) then
              z = -sigma*sigma
              w = zero
           else
              if (x >= zero) then
                 s = one
              else
                 s = negone
              end if
              z = s*(abs(x) - sigma)*(s + sigma/x)
              w = s*y
           end if
           ! generate the rotation.
           ! call stdlib_dlartgp( z, w, cs, sn, r ) might seem more natural;
           ! reordering the arguments ensures that if z = 0 then the rotation
           ! is by pi/2.
           call stdlib_dlartgp(w, z, sn, cs, r)
           return
           ! end stdlib_dlartgs
     end subroutine stdlib_dlartgs

     ! DLARTV applies a vector of real plane rotations to elements of the
     ! real vectors x and y. For i = 1,2,...,n
     ! ( x(i) ) := (  c(i)  s(i) ) ( x(i) )
     ! ( y(i) )    ( -s(i)  c(i) ) ( y(i) )

     subroutine stdlib_dlartv(n, x, incx, y, incy, c, s, incc)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incc, incx, incy, n
           ! .. array arguments ..
           real(dp) :: c(*), s(*), x(*), y(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ic, ix, iy
           real(dp) :: xi, yi
           ! .. executable statements ..
           ix = 1
           iy = 1
           ic = 1
           do i = 1, n
              xi = x(ix)
              yi = y(iy)
              x(ix) = c(ic)*xi + s(ic)*yi
              y(iy) = c(ic)*yi - s(ic)*xi
              ix = ix + incx
              iy = iy + incy
              ic = ic + incc
           end do
           return
           ! end of stdlib_dlartv
     end subroutine stdlib_dlartv

     ! DLARUV returns a vector of n random real numbers from a uniform (0,1)
     ! distribution (n <= 128).
     ! This is an auxiliary routine called by DLARNV and ZLARNV.

     subroutine stdlib_dlaruv(iseed, n, x)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n
           ! .. array arguments ..
           integer(ilp) :: iseed(4)
           real(dp) :: x(n)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: lv = 128
           integer(ilp), parameter :: ipw2 = 4096
           real(dp), parameter :: r = one/ipw2
           
           ! .. local scalars ..
           integer(ilp) :: i, i1, i2, i3, i4, it1, it2, it3, it4, j
           ! .. local arrays ..
           integer(ilp) :: mm(lv, 4)
           ! .. intrinsic functions ..
           intrinsic :: dble, min, mod
           ! .. data statements ..
           data(mm(1, j), j=1, 4)/494, 322, 2508, 2549/
           data(mm(2, j), j=1, 4)/2637, 789, 3754, 1145/
           data(mm(3, j), j=1, 4)/255, 1440, 1766, 2253/
           data(mm(4, j), j=1, 4)/2008, 752, 3572, 305/
           data(mm(5, j), j=1, 4)/1253, 2859, 2893, 3301/
           data(mm(6, j), j=1, 4)/3344, 123, 307, 1065/
           data(mm(7, j), j=1, 4)/4084, 1848, 1297, 3133/
           data(mm(8, j), j=1, 4)/1739, 643, 3966, 2913/
           data(mm(9, j), j=1, 4)/3143, 2405, 758, 3285/
           data(mm(10, j), j=1, 4)/3468, 2638, 2598, 1241/
           data(mm(11, j), j=1, 4)/688, 2344, 3406, 1197/
           data(mm(12, j), j=1, 4)/1657, 46, 2922, 3729/
           data(mm(13, j), j=1, 4)/1238, 3814, 1038, 2501/
           data(mm(14, j), j=1, 4)/3166, 913, 2934, 1673/
           data(mm(15, j), j=1, 4)/1292, 3649, 2091, 541/
           data(mm(16, j), j=1, 4)/3422, 339, 2451, 2753/
           data(mm(17, j), j=1, 4)/1270, 3808, 1580, 949/
           data(mm(18, j), j=1, 4)/2016, 822, 1958, 2361/
           data(mm(19, j), j=1, 4)/154, 2832, 2055, 1165/
           data(mm(20, j), j=1, 4)/2862, 3078, 1507, 4081/
           data(mm(21, j), j=1, 4)/697, 3633, 1078, 2725/
           data(mm(22, j), j=1, 4)/1706, 2970, 3273, 3305/
           data(mm(23, j), j=1, 4)/491, 637, 17, 3069/
           data(mm(24, j), j=1, 4)/931, 2249, 854, 3617/
           data(mm(25, j), j=1, 4)/1444, 2081, 2916, 3733/
           data(mm(26, j), j=1, 4)/444, 4019, 3971, 409/
           data(mm(27, j), j=1, 4)/3577, 1478, 2889, 2157/
           data(mm(28, j), j=1, 4)/3944, 242, 3831, 1361/
           data(mm(29, j), j=1, 4)/2184, 481, 2621, 3973/
           data(mm(30, j), j=1, 4)/1661, 2075, 1541, 1865/
           data(mm(31, j), j=1, 4)/3482, 4058, 893, 2525/
           data(mm(32, j), j=1, 4)/657, 622, 736, 1409/
           data(mm(33, j), j=1, 4)/3023, 3376, 3992, 3445/
           data(mm(34, j), j=1, 4)/3618, 812, 787, 3577/
           data(mm(35, j), j=1, 4)/1267, 234, 2125, 77/
           data(mm(36, j), j=1, 4)/1828, 641, 2364, 3761/
           data(mm(37, j), j=1, 4)/164, 4005, 2460, 2149/
           data(mm(38, j), j=1, 4)/3798, 1122, 257, 1449/
           data(mm(39, j), j=1, 4)/3087, 3135, 1574, 3005/
           data(mm(40, j), j=1, 4)/2400, 2640, 3912, 225/
           data(mm(41, j), j=1, 4)/2870, 2302, 1216, 85/
           data(mm(42, j), j=1, 4)/3876, 40, 3248, 3673/
           data(mm(43, j), j=1, 4)/1905, 1832, 3401, 3117/
           data(mm(44, j), j=1, 4)/1593, 2247, 2124, 3089/
           data(mm(45, j), j=1, 4)/1797, 2034, 2762, 1349/
           data(mm(46, j), j=1, 4)/1234, 2637, 149, 2057/
           data(mm(47, j), j=1, 4)/3460, 1287, 2245, 413/
           data(mm(48, j), j=1, 4)/328, 1691, 166, 65/
           data(mm(49, j), j=1, 4)/2861, 496, 466, 1845/
           data(mm(50, j), j=1, 4)/1950, 1597, 4018, 697/
           data(mm(51, j), j=1, 4)/617, 2394, 1399, 3085/
           data(mm(52, j), j=1, 4)/2070, 2584, 190, 3441/
           data(mm(53, j), j=1, 4)/3331, 1843, 2879, 1573/
           data(mm(54, j), j=1, 4)/769, 336, 153, 3689/
           data(mm(55, j), j=1, 4)/1558, 1472, 2320, 2941/
           data(mm(56, j), j=1, 4)/2412, 2407, 18, 929/
           data(mm(57, j), j=1, 4)/2800, 433, 712, 533/
           data(mm(58, j), j=1, 4)/189, 2096, 2159, 2841/
           data(mm(59, j), j=1, 4)/287, 1761, 2318, 4077/
           data(mm(60, j), j=1, 4)/2045, 2810, 2091, 721/
           data(mm(61, j), j=1, 4)/1227, 566, 3443, 2821/
           data(mm(62, j), j=1, 4)/2838, 442, 1510, 2249/
           data(mm(63, j), j=1, 4)/209, 41, 449, 2397/
           data(mm(64, j), j=1, 4)/2770, 1238, 1956, 2817/
           data(mm(65, j), j=1, 4)/3654, 1086, 2201, 245/
           data(mm(66, j), j=1, 4)/3993, 603, 3137, 1913/
           data(mm(67, j), j=1, 4)/192, 840, 3399, 1997/
           data(mm(68, j), j=1, 4)/2253, 3168, 1321, 3121/
           data(mm(69, j), j=1, 4)/3491, 1499, 2271, 997/
           data(mm(70, j), j=1, 4)/2889, 1084, 3667, 1833/
           data(mm(71, j), j=1, 4)/2857, 3438, 2703, 2877/
           data(mm(72, j), j=1, 4)/2094, 2408, 629, 1633/
           data(mm(73, j), j=1, 4)/1818, 1589, 2365, 981/
           data(mm(74, j), j=1, 4)/688, 2391, 2431, 2009/
           data(mm(75, j), j=1, 4)/1407, 288, 1113, 941/
           data(mm(76, j), j=1, 4)/634, 26, 3922, 2449/
           data(mm(77, j), j=1, 4)/3231, 512, 2554, 197/
           data(mm(78, j), j=1, 4)/815, 1456, 184, 2441/
           data(mm(79, j), j=1, 4)/3524, 171, 2099, 285/
           data(mm(80, j), j=1, 4)/1914, 1677, 3228, 1473/
           data(mm(81, j), j=1, 4)/516, 2657, 4012, 2741/
           data(mm(82, j), j=1, 4)/164, 2270, 1921, 3129/
           data(mm(83, j), j=1, 4)/303, 2587, 3452, 909/
           data(mm(84, j), j=1, 4)/2144, 2961, 3901, 2801/
           data(mm(85, j), j=1, 4)/3480, 1970, 572, 421/
           data(mm(86, j), j=1, 4)/119, 1817, 3309, 4073/
           data(mm(87, j), j=1, 4)/3357, 676, 3171, 2813/
           data(mm(88, j), j=1, 4)/837, 1410, 817, 2337/
           data(mm(89, j), j=1, 4)/2826, 3723, 3039, 1429/
           data(mm(90, j), j=1, 4)/2332, 2803, 1696, 1177/
           data(mm(91, j), j=1, 4)/2089, 3185, 1256, 1901/
           data(mm(92, j), j=1, 4)/3780, 184, 3715, 81/
           data(mm(93, j), j=1, 4)/1700, 663, 2077, 1669/
           data(mm(94, j), j=1, 4)/3712, 499, 3019, 2633/
           data(mm(95, j), j=1, 4)/150, 3784, 1497, 2269/
           data(mm(96, j), j=1, 4)/2000, 1631, 1101, 129/
           data(mm(97, j), j=1, 4)/3375, 1925, 717, 1141/
           data(mm(98, j), j=1, 4)/1621, 3912, 51, 249/
           data(mm(99, j), j=1, 4)/3090, 1398, 981, 3917/
           data(mm(100, j), j=1, 4)/3765, 1349, 1978, 2481/
           data(mm(101, j), j=1, 4)/1149, 1441, 1813, 3941/
           data(mm(102, j), j=1, 4)/3146, 2224, 3881, 2217/
           data(mm(103, j), j=1, 4)/33, 2411, 76, 2749/
           data(mm(104, j), j=1, 4)/3082, 1907, 3846, 3041/
           data(mm(105, j), j=1, 4)/2741, 3192, 3694, 1877/
           data(mm(106, j), j=1, 4)/359, 2786, 1682, 345/
           data(mm(107, j), j=1, 4)/3316, 382, 124, 2861/
           data(mm(108, j), j=1, 4)/1749, 37, 1660, 1809/
           data(mm(109, j), j=1, 4)/185, 759, 3997, 3141/
           data(mm(110, j), j=1, 4)/2784, 2948, 479, 2825/
           data(mm(111, j), j=1, 4)/2202, 1862, 1141, 157/
           data(mm(112, j), j=1, 4)/2199, 3802, 886, 2881/
           data(mm(113, j), j=1, 4)/1364, 2423, 3514, 3637/
           data(mm(114, j), j=1, 4)/1244, 2051, 1301, 1465/
           data(mm(115, j), j=1, 4)/2020, 2295, 3604, 2829/
           data(mm(116, j), j=1, 4)/3160, 1332, 1888, 2161/
           data(mm(117, j), j=1, 4)/2785, 1832, 1836, 3365/
           data(mm(118, j), j=1, 4)/2772, 2405, 1990, 361/
           data(mm(119, j), j=1, 4)/1217, 3638, 2058, 2685/
           data(mm(120, j), j=1, 4)/1822, 3661, 692, 3745/
           data(mm(121, j), j=1, 4)/1245, 327, 1194, 2325/
           data(mm(122, j), j=1, 4)/2252, 3660, 20, 3609/
           data(mm(123, j), j=1, 4)/3904, 716, 3285, 3821/
           data(mm(124, j), j=1, 4)/2774, 1842, 2046, 3537/
           data(mm(125, j), j=1, 4)/997, 3987, 2107, 517/
           data(mm(126, j), j=1, 4)/2573, 1368, 3508, 3017/
           data(mm(127, j), j=1, 4)/1148, 1848, 3525, 2141/
           data(mm(128, j), j=1, 4)/545, 2366, 3801, 1537/
           ! .. executable statements ..
           i1 = iseed(1)
           i2 = iseed(2)
           i3 = iseed(3)
           i4 = iseed(4)
           loop_10: do i = 1, min(n, lv)
20     continue
              ! multiply the seed by i-th power of the multiplier modulo 2**48
              it4 = i4*mm(i, 4)
              it3 = it4/ipw2
              it4 = it4 - ipw2*it3
              it3 = it3 + i3*mm(i, 4) + i4*mm(i, 3)
              it2 = it3/ipw2
              it3 = it3 - ipw2*it2
              it2 = it2 + i2*mm(i, 4) + i3*mm(i, 3) + i4*mm(i, 2)
              it1 = it2/ipw2
              it2 = it2 - ipw2*it1
              it1 = it1 + i1*mm(i, 4) + i2*mm(i, 3) + i3*mm(i, 2) + i4*mm(i, 1)
              it1 = mod(it1, ipw2)
              ! convert 48-bit integer to a real number in the interval (0,1)
              x(i) = r*(real(it1, KIND=dp) + r*(real(it2, KIND=dp) + r*(real(it3, KIND=dp) + &
                        r*real(it4, KIND=dp))))
              if (x(i) == 1.0d0) then
                 ! if a real number has n bits of precision, and the first
                 ! n bits of the 48-bit integer above happen to be all 1 (which
                 ! will occur about once every 2**n calls), then x( i ) will
                 ! be rounded to exactly 1.0.
                 ! since x( i ) is not supposed to return exactly 0.0 or 1.0,
                 ! the statistically correct thing to do in this situation is
                 ! simply to iterate again.
                 ! n.b. the case x( i ) = 0.0 should not be possible.
                 i1 = i1 + 2
                 i2 = i2 + 2
                 i3 = i3 + 2
                 i4 = i4 + 2
                 goto 20
              end if
           end do loop_10
           ! return final value of seed
           iseed(1) = it1
           iseed(2) = it2
           iseed(3) = it3
           iseed(4) = it4
           return
           ! end of stdlib_dlaruv
     end subroutine stdlib_dlaruv

     ! DLARZ applies a real elementary reflector H to a real M-by-N
     ! matrix C, from either the left or the right. H is represented in the
     ! form
     ! H = I - tau * v * v**T
     ! where tau is a real scalar and v is a real vector.
     ! If tau = 0, then H is taken to be the unit matrix.
     ! H is a product of k elementary reflectors as returned by DTZRZF.

     subroutine stdlib_dlarz(side, m, n, l, v, incv, tau, c, ldc, work)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side
           integer(ilp) :: incv, l, ldc, m, n
           real(dp) :: tau
           ! .. array arguments ..
           real(dp) :: c(ldc, *), v(*), work(*)
        ! =====================================================================
           
           ! .. executable statements ..
           if (stdlib_lsame(side, 'l')) then
              ! form  h * c
              if (tau /= zero) then
                 ! w( 1:n ) = c( 1, 1:n )
                 call stdlib_dcopy(n, c, ldc, work, 1)
                 ! w( 1:n ) = w( 1:n ) + c( m-l+1:m, 1:n )**t * v( 1:l )
                 call stdlib_dgemv('transpose', l, n, one, c(m - l + 1, 1), ldc, v, incv, one, work, &
                            1)
                 ! c( 1, 1:n ) = c( 1, 1:n ) - tau * w( 1:n )
                 call stdlib_daxpy(n, -tau, work, 1, c, ldc)
                 ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ...
                                     ! tau * v( 1:l ) * w( 1:n )**t
                 call stdlib_dger(l, n, -tau, v, incv, work, 1, c(m - l + 1, 1), ldc)
              end if
           else
              ! form  c * h
              if (tau /= zero) then
                 ! w( 1:m ) = c( 1:m, 1 )
                 call stdlib_dcopy(m, c, 1, work, 1)
                 ! w( 1:m ) = w( 1:m ) + c( 1:m, n-l+1:n, 1:n ) * v( 1:l )
                 call stdlib_dgemv('no transpose', m, l, one, c(1, n - l + 1), ldc, v, incv, one, &
                           work, 1)
                 ! c( 1:m, 1 ) = c( 1:m, 1 ) - tau * w( 1:m )
                 call stdlib_daxpy(m, -tau, work, 1, c, 1)
                 ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ...
                                     ! tau * w( 1:m ) * v( 1:l )**t
                 call stdlib_dger(m, l, -tau, work, 1, v, incv, c(1, n - l + 1), ldc)
              end if
           end if
           return
           ! end of stdlib_dlarz
     end subroutine stdlib_dlarz

     ! DLARZB applies a real block reflector H or its transpose H**T to
     ! a real distributed M-by-N  C from the left or the right.
     ! Currently, only STOREV = 'R' and DIRECT = 'B' are supported.

     subroutine stdlib_dlarzb(side, trans, direct, storev, m, n, k, l, v, ldv, t, ldt, c, ldc, &
               work, ldwork)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, side, storev, trans
           integer(ilp) :: k, l, ldc, ldt, ldv, ldwork, m, n
           ! .. array arguments ..
           real(dp) :: c(ldc, *), t(ldt, *), v(ldv, *), work(ldwork, *)
        ! =====================================================================
           
           ! .. local scalars ..
           character :: transt
           integer(ilp) :: i, info, j
     
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0) return
           ! check for currently supported options
           info = 0
           if (.not. stdlib_lsame(direct, 'b')) then
              info = -3
           else if (.not. stdlib_lsame(storev, 'r')) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlarzb', -info)
              return
           end if
           if (stdlib_lsame(trans, 'n')) then
              transt = 't'
           else
              transt = 'n'
           end if
           if (stdlib_lsame(side, 'l')) then
              ! form  h * c  or  h**t * c
              ! w( 1:n, 1:k ) = c( 1:k, 1:n )**t
              do j = 1, k
                 call stdlib_dcopy(n, c(j, 1), ldc, work(1, j), 1)
              end do
              ! w( 1:n, 1:k ) = w( 1:n, 1:k ) + ...
                              ! c( m-l+1:m, 1:n )**t * v( 1:k, 1:l )**t
              if (l > 0) call stdlib_dgemm('transpose', 'transpose', n, k, l, one, c(m - l + 1, 1), &
                        ldc, v, ldv, one, work, ldwork)
              ! w( 1:n, 1:k ) = w( 1:n, 1:k ) * t**t  or  w( 1:m, 1:k ) * t
              call stdlib_dtrmm('right', 'lower', transt, 'non-unit', n, k, one, t, ldt, work, &
                        ldwork)
              ! c( 1:k, 1:n ) = c( 1:k, 1:n ) - w( 1:n, 1:k )**t
              do j = 1, n
                 do i = 1, k
                    c(i, j) = c(i, j) - work(j, i)
                 end do
              end do
              ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ...
                                  ! v( 1:k, 1:l )**t * w( 1:n, 1:k )**t
              if (l > 0) call stdlib_dgemm('transpose', 'transpose', l, n, k, -one, v, ldv, work, &
                        ldwork, one, c(m - l + 1, 1), ldc)
           else if (stdlib_lsame(side, 'r')) then
              ! form  c * h  or  c * h**t
              ! w( 1:m, 1:k ) = c( 1:m, 1:k )
              do j = 1, k
                 call stdlib_dcopy(m, c(1, j), 1, work(1, j), 1)
              end do
              ! w( 1:m, 1:k ) = w( 1:m, 1:k ) + ...
                              ! c( 1:m, n-l+1:n ) * v( 1:k, 1:l )**t
              if (l > 0) call stdlib_dgemm('no transpose', 'transpose', m, k, l, one, c(1, n - l + 1), &
                         ldc, v, ldv, one, work, ldwork)
              ! w( 1:m, 1:k ) = w( 1:m, 1:k ) * t  or  w( 1:m, 1:k ) * t**t
              call stdlib_dtrmm('right', 'lower', trans, 'non-unit', m, k, one, t, ldt, work, &
                        ldwork)
              ! c( 1:m, 1:k ) = c( 1:m, 1:k ) - w( 1:m, 1:k )
              do j = 1, k
                 do i = 1, m
                    c(i, j) = c(i, j) - work(i, j)
                 end do
              end do
              ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ...
                                  ! w( 1:m, 1:k ) * v( 1:k, 1:l )
              if (l > 0) call stdlib_dgemm('no transpose', 'no transpose', m, l, k, -one, work, &
                        ldwork, v, ldv, one, c(1, n - l + 1), ldc)
           end if
           return
           ! end of stdlib_dlarzb
     end subroutine stdlib_dlarzb

     ! DLARZT forms the triangular factor T of a real block reflector
     ! H of order > n, which is defined as a product of k elementary
     ! reflectors.
     ! If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
     ! If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
     ! If STOREV = 'C', the vector which defines the elementary reflector
     ! H(i) is stored in the i-th column of the array V, and
     ! H  =  I - V * T * V**T
     ! If STOREV = 'R', the vector which defines the elementary reflector
     ! H(i) is stored in the i-th row of the array V, and
     ! H  =  I - V**T * T * V
     ! Currently, only STOREV = 'R' and DIRECT = 'B' are supported.

     subroutine stdlib_dlarzt(direct, storev, n, k, v, ldv, tau, t, ldt)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, storev
           integer(ilp) :: k, ldt, ldv, n
           ! .. array arguments ..
           real(dp) :: t(ldt, *), tau(*), v(ldv, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, info, j
     
           ! .. executable statements ..
           ! check for currently supported options
           info = 0
           if (.not. stdlib_lsame(direct, 'b')) then
              info = -1
           else if (.not. stdlib_lsame(storev, 'r')) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlarzt', -info)
              return
           end if
           do i = k, 1, -1
              if (tau(i) == zero) then
                 ! h(i)  =  i
                 do j = i, k
                    t(j, i) = zero
                 end do
              else
                 ! general case
                 if (i < k) then
                    ! t(i+1:k,i) = - tau(i) * v(i+1:k,1:n) * v(i,1:n)**t
                    call stdlib_dgemv('no transpose', k - i, n, -tau(i), v(i + 1, 1), ldv, v(i, &
                              1), ldv, zero, t(i + 1, i), 1)
                    ! t(i+1:k,i) = t(i+1:k,i+1:k) * t(i+1:k,i)
                    call stdlib_dtrmv('lower', 'no transpose', 'non-unit', k - i, t(i + 1, i + 1), &
                              ldt, t(i + 1, i), 1)
                 end if
                 t(i, i) = tau(i)
              end if
           end do
           return
           ! end of stdlib_dlarzt
     end subroutine stdlib_dlarzt

     ! DLAS2  computes the singular values of the 2-by-2 matrix
     ! [  F   G  ]
     ! [  0   H  ].
     ! On return, SSMIN is the smaller singular value and SSMAX is the
     ! larger singular value.

     subroutine stdlib_dlas2(f, g, h, ssmin, ssmax)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: f, g, h, ssmax, ssmin
        ! ====================================================================
           
           ! .. local scalars ..
           real(dp) :: as, at, au, c, fa, fhmn, fhmx, ga, ha
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, sqrt
           ! .. executable statements ..
           fa = abs(f)
           ga = abs(g)
           ha = abs(h)
           fhmn = min(fa, ha)
           fhmx = max(fa, ha)
           if (fhmn == zero) then
              ssmin = zero
              if (fhmx == zero) then
                 ssmax = ga
              else
                 ssmax = max(fhmx, ga)*sqrt(one + (min(fhmx, ga)/max(fhmx, ga))**2)
                           
              end if
           else
              if (ga < fhmx) then
                 as = one + fhmn/fhmx
                 at = (fhmx - fhmn)/fhmx
                 au = (ga/fhmx)**2
                 c = two/(sqrt(as*as + au) + sqrt(at*at + au))
                 ssmin = fhmn*c
                 ssmax = fhmx/c
              else
                 au = fhmx/ga
                 if (au == zero) then
                    ! avoid possible harmful underflow if exponent range
                    ! asymmetric (true ssmin may not underflow even if
                    ! au underflows)
                    ssmin = (fhmn*fhmx)/ga
                    ssmax = ga
                 else
                    as = one + fhmn/fhmx
                    at = (fhmx - fhmn)/fhmx
                    c = one/(sqrt(one + (as*au)**2) + sqrt(one + (at*au)**2))
                    ssmin = (fhmn*c)*au
                    ssmin = ssmin + ssmin
                    ssmax = ga/(c + c)
                 end if
              end if
           end if
           return
           ! end of stdlib_dlas2
     end subroutine stdlib_dlas2

     ! This subroutine computes the square root of the I-th eigenvalue
     ! of a positive symmetric rank-one modification of a 2-by-2 diagonal
     ! matrix
     ! diag( D ) * diag( D ) +  RHO * Z * transpose(Z) .
     ! The diagonal entries in the array D are assumed to satisfy
     ! 0 <= D(i) < D(j)  for  i < j .
     ! We also assume RHO > 0 and that the Euclidean norm of the vector
     ! Z is one.

     subroutine stdlib_dlasd5(i, d, z, delta, rho, dsigma, work)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: i
           real(dp) :: dsigma, rho
           ! .. array arguments ..
           real(dp) :: d(2), delta(2), work(2), z(2)
        ! =====================================================================
           
           ! .. local scalars ..
           real(dp) :: b, c, del, delsq, tau, w
           ! .. intrinsic functions ..
           intrinsic :: abs, sqrt
           ! .. executable statements ..
           del = d(2) - d(1)
           delsq = del*(d(2) + d(1))
           if (i == 1) then
              w = one + four*rho*(z(2)*z(2)/(d(1) + three*d(2)) - z(1)*z(1)/( &
                        three*d(1) + d(2)))/del
              if (w > zero) then
                 b = delsq + rho*(z(1)*z(1) + z(2)*z(2))
                 c = rho*z(1)*z(1)*delsq
                 ! b > zero, always
                 ! the following tau is dsigma * dsigma - d( 1 ) * d( 1 )
                 tau = two*c/(b + sqrt(abs(b*b - four*c)))
                 ! the following tau is dsigma - d( 1 )
                 tau = tau/(d(1) + sqrt(d(1)*d(1) + tau))
                 dsigma = d(1) + tau
                 delta(1) = -tau
                 delta(2) = del - tau
                 work(1) = two*d(1) + tau
                 work(2) = (d(1) + tau) + d(2)
                 ! delta( 1 ) = -z( 1 ) / tau
                 ! delta( 2 ) = z( 2 ) / ( del-tau )
              else
                 b = -delsq + rho*(z(1)*z(1) + z(2)*z(2))
                 c = rho*z(2)*z(2)*delsq
                 ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 )
                 if (b > zero) then
                    tau = -two*c/(b + sqrt(b*b + four*c))
                 else
                    tau = (b - sqrt(b*b + four*c))/two
                 end if
                 ! the following tau is dsigma - d( 2 )
                 tau = tau/(d(2) + sqrt(abs(d(2)*d(2) + tau)))
                 dsigma = d(2) + tau
                 delta(1) = -(del + tau)
                 delta(2) = -tau
                 work(1) = d(1) + tau + d(2)
                 work(2) = two*d(2) + tau
                 ! delta( 1 ) = -z( 1 ) / ( del+tau )
                 ! delta( 2 ) = -z( 2 ) / tau
              end if
              ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) )
              ! delta( 1 ) = delta( 1 ) / temp
              ! delta( 2 ) = delta( 2 ) / temp
           else
              ! now i=2
              b = -delsq + rho*(z(1)*z(1) + z(2)*z(2))
              c = rho*z(2)*z(2)*delsq
              ! the following tau is dsigma * dsigma - d( 2 ) * d( 2 )
              if (b > zero) then
                 tau = (b + sqrt(b*b + four*c))/two
              else
                 tau = two*c/(-b + sqrt(b*b + four*c))
              end if
              ! the following tau is dsigma - d( 2 )
              tau = tau/(d(2) + sqrt(d(2)*d(2) + tau))
              dsigma = d(2) + tau
              delta(1) = -(del + tau)
              delta(2) = -tau
              work(1) = d(1) + tau + d(2)
              work(2) = two*d(2) + tau
              ! delta( 1 ) = -z( 1 ) / ( del+tau )
              ! delta( 2 ) = -z( 2 ) / tau
              ! temp = sqrt( delta( 1 )*delta( 1 )+delta( 2 )*delta( 2 ) )
              ! delta( 1 ) = delta( 1 ) / temp
              ! delta( 2 ) = delta( 2 ) / temp
           end if
           return
           ! end of stdlib_dlasd5
     end subroutine stdlib_dlasd5

     ! DLASDT creates a tree of subproblems for bidiagonal divide and
     ! conquer.

     subroutine stdlib_dlasdt(n, lvl, nd, inode, ndiml, ndimr, msub)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: lvl, msub, n, nd
           ! .. array arguments ..
           integer(ilp) :: inode(*), ndiml(*), ndimr(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, il, ir, llst, maxn, ncrnt, nlvl
           real(dp) :: temp
           ! .. intrinsic functions ..
           intrinsic :: dble, int, log, max
           ! .. executable statements ..
           ! find the number of levels on the tree.
           maxn = max(1, n)
           temp = log(real(maxn, KIND=dp)/real(msub + 1, KIND=dp))/log(two)
           lvl = int(temp, KIND=ilp) + 1
           i = n/2
           inode(1) = i + 1
           ndiml(1) = i
           ndimr(1) = n - i - 1
           il = 0
           ir = 1
           llst = 1
           do nlvl = 1, lvl - 1
              ! constructing the tree at (nlvl+1)-st level. the number of
              ! nodes created on this level is llst * 2.
              do i = 0, llst - 1
                 il = il + 2
                 ir = ir + 2
                 ncrnt = llst + i
                 ndiml(il) = ndiml(ncrnt)/2
                 ndimr(il) = ndiml(ncrnt) - ndiml(il) - 1
                 inode(il) = inode(ncrnt) - ndimr(il) - 1
                 ndiml(ir) = ndimr(ncrnt)/2
                 ndimr(ir) = ndimr(ncrnt) - ndiml(ir) - 1
                 inode(ir) = inode(ncrnt) + ndiml(ir) + 1
              end do
              llst = llst*2
           end do
           nd = llst*2 - 1
           return
           ! end of stdlib_dlasdt
     end subroutine stdlib_dlasdt

     ! DLASET initializes an m-by-n matrix A to BETA on the diagonal and
     ! ALPHA on the offdiagonals.

     subroutine stdlib_dlaset(uplo, m, n, alpha, beta, a, lda)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: lda, m, n
           real(dp) :: alpha, beta
           ! .. array arguments ..
           real(dp) :: a(lda, *)
       ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. intrinsic functions ..
           intrinsic :: min
           ! .. executable statements ..
           if (stdlib_lsame(uplo, 'u')) then
              ! set the strictly upper triangular or trapezoidal part of the
              ! array to alpha.
              do j = 2, n
                 do i = 1, min(j - 1, m)
                    a(i, j) = alpha
                 end do
              end do
           else if (stdlib_lsame(uplo, 'l')) then
              ! set the strictly lower triangular or trapezoidal part of the
              ! array to alpha.
              do j = 1, min(m, n)
                 do i = j + 1, m
                    a(i, j) = alpha
                 end do
              end do
           else
              ! set the leading m-by-n submatrix to alpha.
              do j = 1, n
                 do i = 1, m
                    a(i, j) = alpha
                 end do
              end do
           end if
           ! set the first min(m,n) diagonal elements to beta.
           do i = 1, min(m, n)
              a(i, i) = beta
           end do
           return
           ! end of stdlib_dlaset
     end subroutine stdlib_dlaset

     ! DLASQ4 computes an approximation TAU to the smallest eigenvalue
     ! using values of d from the previous transform.

     subroutine stdlib_dlasq4(i0, n0, z, pp, n0in, dmin, dmin1, dmin2, dn, dn1, dn2, tau, ttype, &
               g)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: i0, n0, n0in, pp, ttype
           real(dp) :: dmin, dmin1, dmin2, dn, dn1, dn2, g, tau
           ! .. array arguments ..
           real(dp) :: z(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: cnst1 = 0.5630d0
           real(dp), parameter :: cnst2 = 1.010d0
           real(dp), parameter :: cnst3 = 1.050d0
           real(dp), parameter :: qurtr = 0.250d0
           real(dp), parameter :: third = 0.3330d0
           real(dp), parameter :: hundrd = 100.0d0
           
           ! .. local scalars ..
           integer(ilp) :: i4, nn, np
           real(dp) :: a2, b1, b2, gam, gap1, gap2, s
           ! .. intrinsic functions ..
           intrinsic :: max, min, sqrt
           ! .. executable statements ..
           ! a negative dmin forces the shift to take that absolute value
           ! ttype records the type of shift.
           if (dmin <= zero) then
              tau = -dmin
              ttype = -1
              return
           end if
           nn = 4*n0 + pp
           if (n0in == n0) then
              ! no eigenvalues deflated.
              if (dmin == dn .or. dmin == dn1) then
                 b1 = sqrt(z(nn - 3))*sqrt(z(nn - 5))
                 b2 = sqrt(z(nn - 7))*sqrt(z(nn - 9))
                 a2 = z(nn - 7) + z(nn - 5)
                 ! cases 2 and 3.
                 if (dmin == dn .and. dmin1 == dn1) then
                    gap2 = dmin2 - a2 - dmin2*qurtr
                    if (gap2 > zero .and. gap2 > b2) then
                       gap1 = a2 - dn - (b2/gap2)*b2
                    else
                       gap1 = a2 - dn - (b1 + b2)
                    end if
                    if (gap1 > zero .and. gap1 > b1) then
                       s = max(dn - (b1/gap1)*b1, half*dmin)
                       ttype = -2
                    else
                       s = zero
                       if (dn > b1) s = dn - b1
                       if (a2 > (b1 + b2)) s = min(s, a2 - (b1 + b2))
                       s = max(s, third*dmin)
                       ttype = -3
                    end if
                 else
                    ! case 4.
                    ttype = -4
                    s = qurtr*dmin
                    if (dmin == dn) then
                       gam = dn
                       a2 = zero
                       if (z(nn - 5) > z(nn - 7)) return
                       b2 = z(nn - 5)/z(nn - 7)
                       np = nn - 9
                    else
                       np = nn - 2*pp
                       gam = dn1
                       if (z(np - 4) > z(np - 2)) return
                       a2 = z(np - 4)/z(np - 2)
                       if (z(nn - 9) > z(nn - 11)) return
                       b2 = z(nn - 9)/z(nn - 11)
                       np = nn - 13
                    end if
                    ! approximate contribution to norm squared from i < nn-1.
                    a2 = a2 + b2
                    do i4 = np, 4*i0 - 1 + pp, -4
                       if (b2 == zero) go to 20
                       b1 = b2
                       if (z(i4) > z(i4 - 2)) return
                       b2 = b2*(z(i4)/z(i4 - 2))
                       a2 = a2 + b2
                       if (hundrd*max(b2, b1) < a2 .or. cnst1 < a2) go to 20
                    end do
20      continue
                    a2 = cnst3*a2
                    ! rayleigh quotient residual bound.
                    if (a2 < cnst1) s = gam*(one - sqrt(a2))/(one + a2)
                 end if
              else if (dmin == dn2) then
                 ! case 5.
                 ttype = -5
                 s = qurtr*dmin
                 ! compute contribution to norm squared from i > nn-2.
                 np = nn - 2*pp
                 b1 = z(np - 2)
                 b2 = z(np - 6)
                 gam = dn2
                 if (z(np - 8) > b2 .or. z(np - 4) > b1) return
                 a2 = (z(np - 8)/b2)*(one + z(np - 4)/b1)
                 ! approximate contribution to norm squared from i < nn-2.
                 if (n0 - i0 > 2) then
                    b2 = z(nn - 13)/z(nn - 15)
                    a2 = a2 + b2
                    do i4 = nn - 17, 4*i0 - 1 + pp, -4
                       if (b2 == zero) go to 40
                       b1 = b2
                       if (z(i4) > z(i4 - 2)) return
                       b2 = b2*(z(i4)/z(i4 - 2))
                       a2 = a2 + b2
                       if (hundrd*max(b2, b1) < a2 .or. cnst1 < a2) go to 40
                    end do
40      continue
                    a2 = cnst3*a2
                 end if
                 if (a2 < cnst1) s = gam*(one - sqrt(a2))/(one + a2)
              else
                 ! case 6, no information to guide us.
                 if (ttype == -6) then
                    g = g + third*(one - g)
                 else if (ttype == -18) then
                    g = qurtr*third
                 else
                    g = qurtr
                 end if
                 s = g*dmin
                 ttype = -6
              end if
           else if (n0in == (n0 + 1)) then
              ! one eigenvalue just deflated. use dmin1, dn1 for dmin and dn.
              if (dmin1 == dn1 .and. dmin2 == dn2) then
                 ! cases 7 and 8.
                 ttype = -7
                 s = third*dmin1
                 if (z(nn - 5) > z(nn - 7)) return
                 b1 = z(nn - 5)/z(nn - 7)
                 b2 = b1
                 if (b2 == zero) go to 60
                 do i4 = 4*n0 - 9 + pp, 4*i0 - 1 + pp, -4
                    a2 = b1
                    if (z(i4) > z(i4 - 2)) return
                    b1 = b1*(z(i4)/z(i4 - 2))
                    b2 = b2 + b1
                    if (hundrd*max(b1, a2) < b2) go to 60
                 end do
60      continue
                 b2 = sqrt(cnst3*b2)
                 a2 = dmin1/(one + b2**2)
                 gap2 = half*dmin2 - a2
                 if (gap2 > zero .and. gap2 > b2*a2) then
                    s = max(s, a2*(one - cnst2*a2*(b2/gap2)*b2))
                 else
                    s = max(s, a2*(one - cnst2*b2))
                    ttype = -8
                 end if
              else
                 ! case 9.
                 s = qurtr*dmin1
                 if (dmin1 == dn1) s = half*dmin1
                 ttype = -9
              end if
           else if (n0in == (n0 + 2)) then
              ! two eigenvalues deflated. use dmin2, dn2 for dmin and dn.
              ! cases 10 and 11.
              if (dmin2 == dn2 .and. two*z(nn - 5) < z(nn - 7)) then
                 ttype = -10
                 s = third*dmin2
                 if (z(nn - 5) > z(nn - 7)) return
                 b1 = z(nn - 5)/z(nn - 7)
                 b2 = b1
                 if (b2 == zero) go to 80
                 do i4 = 4*n0 - 9 + pp, 4*i0 - 1 + pp, -4
                    if (z(i4) > z(i4 - 2)) return
                    b1 = b1*(z(i4)/z(i4 - 2))
                    b2 = b2 + b1
                    if (hundrd*b1 < b2) go to 80
                 end do
80      continue
                 b2 = sqrt(cnst3*b2)
                 a2 = dmin2/(one + b2**2)
                 gap2 = z(nn - 7) + z(nn - 9) - sqrt(z(nn - 11))*sqrt(z(nn - 9)) - a2
                 if (gap2 > zero .and. gap2 > b2*a2) then
                    s = max(s, a2*(one - cnst2*a2*(b2/gap2)*b2))
                 else
                    s = max(s, a2*(one - cnst2*b2))
                 end if
              else
                 s = qurtr*dmin2
                 ttype = -11
              end if
           else if (n0in > (n0 + 2)) then
              ! case 12, more than two eigenvalues deflated. no information.
              s = zero
              ttype = -12
           end if
           tau = s
           return
           ! end of stdlib_dlasq4
     end subroutine stdlib_dlasq4

     ! DLASQ5 computes one dqds transform in ping-pong form, one
     ! version for IEEE machines another for non IEEE machines.

     subroutine stdlib_dlasq5(i0, n0, z, pp, tau, sigma, dmin, dmin1, dmin2, dn, dnm1, dnm2, ieee, &
                eps)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           logical(lk) :: ieee
           integer(ilp) :: i0, n0, pp
           real(dp) :: dmin, dmin1, dmin2, dn, dnm1, dnm2, tau, sigma, eps
           ! .. array arguments ..
           real(dp) :: z(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: j4, j4p2
           real(dp) :: d, emin, temp, dthresh
           ! .. intrinsic functions ..
           intrinsic :: min
           ! .. executable statements ..
           if ((n0 - i0 - 1) <= 0) return
           dthresh = eps*(sigma + tau)
           if (tau < dthresh*half) tau = zero
           if (tau /= zero) then
           j4 = 4*i0 + pp - 3
           emin = z(j4 + 4)
           d = z(j4) - tau
           dmin = d
           dmin1 = -z(j4)
           if (ieee) then
              ! code for ieee arithmetic.
              if (pp == 0) then
                 do j4 = 4*i0, 4*(n0 - 3), 4
                    z(j4 - 2) = d + z(j4 - 1)
                    temp = z(j4 + 1)/z(j4 - 2)
                    d = d*temp - tau
                    dmin = min(dmin, d)
                    z(j4) = z(j4 - 1)*temp
                    emin = min(z(j4), emin)
                 end do
              else
                 do j4 = 4*i0, 4*(n0 - 3), 4
                    z(j4 - 3) = d + z(j4)
                    temp = z(j4 + 2)/z(j4 - 3)
                    d = d*temp - tau
                    dmin = min(dmin, d)
                    z(j4 - 1) = z(j4)*temp
                    emin = min(z(j4 - 1), emin)
                 end do
              end if
              ! unroll last two steps.
              dnm2 = d
              dmin2 = dmin
              j4 = 4*(n0 - 2) - pp
              j4p2 = j4 + 2*pp - 1
              z(j4 - 2) = dnm2 + z(j4p2)
              z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
              dnm1 = z(j4p2 + 2)*(dnm2/z(j4 - 2)) - tau
              dmin = min(dmin, dnm1)
              dmin1 = dmin
              j4 = j4 + 4
              j4p2 = j4 + 2*pp - 1
              z(j4 - 2) = dnm1 + z(j4p2)
              z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
              dn = z(j4p2 + 2)*(dnm1/z(j4 - 2)) - tau
              dmin = min(dmin, dn)
           else
              ! code for non ieee arithmetic.
              if (pp == 0) then
                 do j4 = 4*i0, 4*(n0 - 3), 4
                    z(j4 - 2) = d + z(j4 - 1)
                    if (d < zero) then
                       return
                    else
                       z(j4) = z(j4 + 1)*(z(j4 - 1)/z(j4 - 2))
                       d = z(j4 + 1)*(d/z(j4 - 2)) - tau
                    end if
                    dmin = min(dmin, d)
                    emin = min(emin, z(j4))
                 end do
              else
                 do j4 = 4*i0, 4*(n0 - 3), 4
                    z(j4 - 3) = d + z(j4)
                    if (d < zero) then
                       return
                    else
                       z(j4 - 1) = z(j4 + 2)*(z(j4)/z(j4 - 3))
                       d = z(j4 + 2)*(d/z(j4 - 3)) - tau
                    end if
                    dmin = min(dmin, d)
                    emin = min(emin, z(j4 - 1))
                 end do
              end if
              ! unroll last two steps.
              dnm2 = d
              dmin2 = dmin
              j4 = 4*(n0 - 2) - pp
              j4p2 = j4 + 2*pp - 1
              z(j4 - 2) = dnm2 + z(j4p2)
              if (dnm2 < zero) then
                 return
              else
                 z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
                 dnm1 = z(j4p2 + 2)*(dnm2/z(j4 - 2)) - tau
              end if
              dmin = min(dmin, dnm1)
              dmin1 = dmin
              j4 = j4 + 4
              j4p2 = j4 + 2*pp - 1
              z(j4 - 2) = dnm1 + z(j4p2)
              if (dnm1 < zero) then
                 return
              else
                 z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
                 dn = z(j4p2 + 2)*(dnm1/z(j4 - 2)) - tau
              end if
              dmin = min(dmin, dn)
           end if
           else
           ! this is the version that sets d's to zero if they are small enough
              j4 = 4*i0 + pp - 3
              emin = z(j4 + 4)
              d = z(j4) - tau
              dmin = d
              dmin1 = -z(j4)
              if (ieee) then
           ! code for ieee arithmetic.
                 if (pp == 0) then
                    do j4 = 4*i0, 4*(n0 - 3), 4
                       z(j4 - 2) = d + z(j4 - 1)
                       temp = z(j4 + 1)/z(j4 - 2)
                       d = d*temp - tau
                       if (d < dthresh) d = zero
                       dmin = min(dmin, d)
                       z(j4) = z(j4 - 1)*temp
                       emin = min(z(j4), emin)
                    end do
                 else
                    do j4 = 4*i0, 4*(n0 - 3), 4
                       z(j4 - 3) = d + z(j4)
                       temp = z(j4 + 2)/z(j4 - 3)
                       d = d*temp - tau
                       if (d < dthresh) d = zero
                       dmin = min(dmin, d)
                       z(j4 - 1) = z(j4)*temp
                       emin = min(z(j4 - 1), emin)
                    end do
                 end if
           ! unroll last two steps.
                 dnm2 = d
                 dmin2 = dmin
                 j4 = 4*(n0 - 2) - pp
                 j4p2 = j4 + 2*pp - 1
                 z(j4 - 2) = dnm2 + z(j4p2)
                 z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
                 dnm1 = z(j4p2 + 2)*(dnm2/z(j4 - 2)) - tau
                 dmin = min(dmin, dnm1)
                 dmin1 = dmin
                 j4 = j4 + 4
                 j4p2 = j4 + 2*pp - 1
                 z(j4 - 2) = dnm1 + z(j4p2)
                 z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
                 dn = z(j4p2 + 2)*(dnm1/z(j4 - 2)) - tau
                 dmin = min(dmin, dn)
              else
           ! code for non ieee arithmetic.
                 if (pp == 0) then
                    do j4 = 4*i0, 4*(n0 - 3), 4
                       z(j4 - 2) = d + z(j4 - 1)
                       if (d < zero) then
                          return
                       else
                          z(j4) = z(j4 + 1)*(z(j4 - 1)/z(j4 - 2))
                          d = z(j4 + 1)*(d/z(j4 - 2)) - tau
                       end if
                       if (d < dthresh) d = zero
                       dmin = min(dmin, d)
                       emin = min(emin, z(j4))
                    end do
                 else
                    do j4 = 4*i0, 4*(n0 - 3), 4
                       z(j4 - 3) = d + z(j4)
                       if (d < zero) then
                          return
                       else
                          z(j4 - 1) = z(j4 + 2)*(z(j4)/z(j4 - 3))
                          d = z(j4 + 2)*(d/z(j4 - 3)) - tau
                       end if
                       if (d < dthresh) d = zero
                       dmin = min(dmin, d)
                       emin = min(emin, z(j4 - 1))
                    end do
                 end if
           ! unroll last two steps.
                 dnm2 = d
                 dmin2 = dmin
                 j4 = 4*(n0 - 2) - pp
                 j4p2 = j4 + 2*pp - 1
                 z(j4 - 2) = dnm2 + z(j4p2)
                 if (dnm2 < zero) then
                    return
                 else
                    z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
                    dnm1 = z(j4p2 + 2)*(dnm2/z(j4 - 2)) - tau
                 end if
                 dmin = min(dmin, dnm1)
                 dmin1 = dmin
                 j4 = j4 + 4
                 j4p2 = j4 + 2*pp - 1
                 z(j4 - 2) = dnm1 + z(j4p2)
                 if (dnm1 < zero) then
                    return
                 else
                    z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
                    dn = z(j4p2 + 2)*(dnm1/z(j4 - 2)) - tau
                 end if
                 dmin = min(dmin, dn)
              end if
           end if
           z(j4 + 2) = dn
           z(4*n0 - pp) = emin
           return
           ! end of stdlib_dlasq5
     end subroutine stdlib_dlasq5

     ! DLASQ6 computes one dqd (shift equal to zero) transform in
     ! ping-pong form, with protection against underflow and overflow.

     subroutine stdlib_dlasq6(i0, n0, z, pp, dmin, dmin1, dmin2, dn, dnm1, dnm2)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: i0, n0, pp
           real(dp) :: dmin, dmin1, dmin2, dn, dnm1, dnm2
           ! .. array arguments ..
           real(dp) :: z(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: j4, j4p2
           real(dp) :: d, emin, safmin, temp
     
           ! .. intrinsic functions ..
           intrinsic :: min
           ! .. executable statements ..
           if ((n0 - i0 - 1) <= 0) return
           safmin = stdlib_dlamch('safe minimum')
           j4 = 4*i0 + pp - 3
           emin = z(j4 + 4)
           d = z(j4)
           dmin = d
           if (pp == 0) then
              do j4 = 4*i0, 4*(n0 - 3), 4
                 z(j4 - 2) = d + z(j4 - 1)
                 if (z(j4 - 2) == zero) then
                    z(j4) = zero
                    d = z(j4 + 1)
                    dmin = d
                    emin = zero
                 else if (safmin*z(j4 + 1) < z(j4 - 2) .and. safmin*z(j4 - 2) < z(j4 + 1)) &
                           then
                    temp = z(j4 + 1)/z(j4 - 2)
                    z(j4) = z(j4 - 1)*temp
                    d = d*temp
                 else
                    z(j4) = z(j4 + 1)*(z(j4 - 1)/z(j4 - 2))
                    d = z(j4 + 1)*(d/z(j4 - 2))
                 end if
                 dmin = min(dmin, d)
                 emin = min(emin, z(j4))
              end do
           else
              do j4 = 4*i0, 4*(n0 - 3), 4
                 z(j4 - 3) = d + z(j4)
                 if (z(j4 - 3) == zero) then
                    z(j4 - 1) = zero
                    d = z(j4 + 2)
                    dmin = d
                    emin = zero
                 else if (safmin*z(j4 + 2) < z(j4 - 3) .and. safmin*z(j4 - 3) < z(j4 + 2)) &
                           then
                    temp = z(j4 + 2)/z(j4 - 3)
                    z(j4 - 1) = z(j4)*temp
                    d = d*temp
                 else
                    z(j4 - 1) = z(j4 + 2)*(z(j4)/z(j4 - 3))
                    d = z(j4 + 2)*(d/z(j4 - 3))
                 end if
                 dmin = min(dmin, d)
                 emin = min(emin, z(j4 - 1))
              end do
           end if
           ! unroll last two steps.
           dnm2 = d
           dmin2 = dmin
           j4 = 4*(n0 - 2) - pp
           j4p2 = j4 + 2*pp - 1
           z(j4 - 2) = dnm2 + z(j4p2)
           if (z(j4 - 2) == zero) then
              z(j4) = zero
              dnm1 = z(j4p2 + 2)
              dmin = dnm1
              emin = zero
           else if (safmin*z(j4p2 + 2) < z(j4 - 2) .and. safmin*z(j4 - 2) < z(j4p2 + 2)) then
              temp = z(j4p2 + 2)/z(j4 - 2)
              z(j4) = z(j4p2)*temp
              dnm1 = dnm2*temp
           else
              z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
              dnm1 = z(j4p2 + 2)*(dnm2/z(j4 - 2))
           end if
           dmin = min(dmin, dnm1)
           dmin1 = dmin
           j4 = j4 + 4
           j4p2 = j4 + 2*pp - 1
           z(j4 - 2) = dnm1 + z(j4p2)
           if (z(j4 - 2) == zero) then
              z(j4) = zero
              dn = z(j4p2 + 2)
              dmin = dn
              emin = zero
           else if (safmin*z(j4p2 + 2) < z(j4 - 2) .and. safmin*z(j4 - 2) < z(j4p2 + 2)) then
              temp = z(j4p2 + 2)/z(j4 - 2)
              z(j4) = z(j4p2)*temp
              dn = dnm1*temp
           else
              z(j4) = z(j4p2 + 2)*(z(j4p2)/z(j4 - 2))
              dn = z(j4p2 + 2)*(dnm1/z(j4 - 2))
           end if
           dmin = min(dmin, dn)
           z(j4 + 2) = dn
           z(4*n0 - pp) = emin
           return
           ! end of stdlib_dlasq6
     end subroutine stdlib_dlasq6

     ! DLASR applies a sequence of plane rotations to a real matrix A,
     ! from either the left or the right.
     ! When SIDE = 'L', the transformation takes the form
     ! A := P*A
     ! and when SIDE = 'R', the transformation takes the form
     ! A := A*P**T
     ! where P is an orthogonal matrix consisting of a sequence of z plane
     ! rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R',
     ! and P**T is the transpose of P.
     ! When DIRECT = 'F' (Forward sequence), then
     ! P = P(z-1) * ... * P(2) * P(1)
     ! and when DIRECT = 'B' (Backward sequence), then
     ! P = P(1) * P(2) * ... * P(z-1)
     ! where P(k) is a plane rotation matrix defined by the 2-by-2 rotation
     ! R(k) = (  c(k)  s(k) )
     ! = ( -s(k)  c(k) ).
     ! When PIVOT = 'V' (Variable pivot), the rotation is performed
     ! for the plane (k,k+1), i.e., P(k) has the form
     ! P(k) = (  1                                            )
     ! (       ...                                     )
     ! (              1                                )
     ! (                   c(k)  s(k)                  )
     ! (                  -s(k)  c(k)                  )
     ! (                                1              )
     ! (                                     ...       )
     ! (                                            1  )
     ! where R(k) appears as a rank-2 modification to the identity matrix in
     ! rows and columns k and k+1.
     ! When PIVOT = 'T' (Top pivot), the rotation is performed for the
     ! plane (1,k+1), so P(k) has the form
     ! P(k) = (  c(k)                    s(k)                 )
     ! (         1                                     )
     ! (              ...                              )
     ! (                     1                         )
     ! ( -s(k)                    c(k)                 )
     ! (                                 1             )
     ! (                                      ...      )
     ! (                                             1 )
     ! where R(k) appears in rows and columns 1 and k+1.
     ! Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is
     ! performed for the plane (k,z), giving P(k) the form
     ! P(k) = ( 1                                             )
     ! (      ...                                      )
     ! (             1                                 )
     ! (                  c(k)                    s(k) )
     ! (                         1                     )
     ! (                              ...              )
     ! (                                     1         )
     ! (                 -s(k)                    c(k) )
     ! where R(k) appears in rows and columns k and z.  The rotations are
     ! performed without ever forming P(k) explicitly.

     subroutine stdlib_dlasr(side, pivot, direct, m, n, c, s, a, lda)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, pivot, side
           integer(ilp) :: lda, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(*), s(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, info, j
           real(dp) :: ctemp, stemp, temp
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           if (.not. (stdlib_lsame(side, 'l') .or. stdlib_lsame(side, 'r'))) then
              info = 1
           else if (.not. (stdlib_lsame(pivot, 'v') .or. stdlib_lsame(pivot, 't') .or. &
                     stdlib_lsame(pivot, 'b'))) then
              info = 2
           else if (.not. (stdlib_lsame(direct, 'f') .or. stdlib_lsame(direct, 'b'))) &
                     then
              info = 3
           else if (m < 0) then
              info = 4
           else if (n < 0) then
              info = 5
           else if (lda < max(1, m)) then
              info = 9
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlasr ', info)
              return
           end if
           ! quick return if possible
           if ((m == 0) .or. (n == 0)) return
           if (stdlib_lsame(side, 'l')) then
              ! form  p * a
              if (stdlib_lsame(pivot, 'v')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 1, m - 1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j + 1, i)
                             a(j + 1, i) = ctemp*temp - stemp*a(j, i)
                             a(j, i) = stemp*temp + ctemp*a(j, i)
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = m - 1, 1, -1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j + 1, i)
                             a(j + 1, i) = ctemp*temp - stemp*a(j, i)
                             a(j, i) = stemp*temp + ctemp*a(j, i)
                          end do
                       end if
                    end do
                 end if
              else if (stdlib_lsame(pivot, 't')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 2, m
                       ctemp = c(j - 1)
                       stemp = s(j - 1)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j, i)
                             a(j, i) = ctemp*temp - stemp*a(1, i)
                             a(1, i) = stemp*temp + ctemp*a(1, i)
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = m, 2, -1
                       ctemp = c(j - 1)
                       stemp = s(j - 1)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j, i)
                             a(j, i) = ctemp*temp - stemp*a(1, i)
                             a(1, i) = stemp*temp + ctemp*a(1, i)
                          end do
                       end if
                    end do
                 end if
              else if (stdlib_lsame(pivot, 'b')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 1, m - 1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j, i)
                             a(j, i) = stemp*a(m, i) + ctemp*temp
                             a(m, i) = ctemp*a(m, i) - stemp*temp
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = m - 1, 1, -1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j, i)
                             a(j, i) = stemp*a(m, i) + ctemp*temp
                             a(m, i) = ctemp*a(m, i) - stemp*temp
                          end do
                       end if
                    end do
                 end if
              end if
           else if (stdlib_lsame(side, 'r')) then
              ! form a * p**t
              if (stdlib_lsame(pivot, 'v')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 1, n - 1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j + 1)
                             a(i, j + 1) = ctemp*temp - stemp*a(i, j)
                             a(i, j) = stemp*temp + ctemp*a(i, j)
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = n - 1, 1, -1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j + 1)
                             a(i, j + 1) = ctemp*temp - stemp*a(i, j)
                             a(i, j) = stemp*temp + ctemp*a(i, j)
                          end do
                       end if
                    end do
                 end if
              else if (stdlib_lsame(pivot, 't')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 2, n
                       ctemp = c(j - 1)
                       stemp = s(j - 1)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j)
                             a(i, j) = ctemp*temp - stemp*a(i, 1)
                             a(i, 1) = stemp*temp + ctemp*a(i, 1)
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = n, 2, -1
                       ctemp = c(j - 1)
                       stemp = s(j - 1)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j)
                             a(i, j) = ctemp*temp - stemp*a(i, 1)
                             a(i, 1) = stemp*temp + ctemp*a(i, 1)
                          end do
                       end if
                    end do
                 end if
              else if (stdlib_lsame(pivot, 'b')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 1, n - 1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j)
                             a(i, j) = stemp*a(i, n) + ctemp*temp
                             a(i, n) = ctemp*a(i, n) - stemp*temp
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = n - 1, 1, -1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j)
                             a(i, j) = stemp*a(i, n) + ctemp*temp
                             a(i, n) = ctemp*a(i, n) - stemp*temp
                          end do
                       end if
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_dlasr
     end subroutine stdlib_dlasr

     ! Sort the numbers in D in increasing order (if ID = 'I') or
     ! in decreasing order (if ID = 'D' ).
     ! Use Quick Sort, reverting to Insertion sort on arrays of
     ! size <= 20. Dimension of STACK limits N to about 2**32.

     subroutine stdlib_dlasrt(id, n, d, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: id
           integer(ilp) :: info, n
           ! .. array arguments ..
           real(dp) :: d(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: select = 20
           
           ! .. local scalars ..
           integer(ilp) :: dir, endd, i, j, start, stkpnt
           real(dp) :: d1, d2, d3, dmnmx, tmp
           ! .. local arrays ..
           integer(ilp) :: stack(2, 32)
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           dir = -1
           if (stdlib_lsame(id, 'd')) then
              dir = 0
           else if (stdlib_lsame(id, 'i')) then
              dir = 1
           end if
           if (dir == -1) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlasrt', -info)
              return
           end if
           ! quick return if possible
           if (n <= 1) return
           stkpnt = 1
           stack(1, 1) = 1
           stack(2, 1) = n
10      continue
           start = stack(1, stkpnt)
           endd = stack(2, stkpnt)
           stkpnt = stkpnt - 1
           if (endd - start <= select .and. endd - start > 0) then
              ! do insertion sort on d( start:endd )
              if (dir == 0) then
                 ! sort into decreasing order
                 loop_30: do i = start + 1, endd
                    do j = i, start + 1, -1
                       if (d(j) > d(j - 1)) then
                          dmnmx = d(j)
                          d(j) = d(j - 1)
                          d(j - 1) = dmnmx
                       else
                          cycle loop_30
                       end if
                    end do
                 end do loop_30
              else
                 ! sort into increasing order
                 loop_50: do i = start + 1, endd
                    do j = i, start + 1, -1
                       if (d(j) < d(j - 1)) then
                          dmnmx = d(j)
                          d(j) = d(j - 1)
                          d(j - 1) = dmnmx
                       else
                          cycle loop_50
                       end if
                    end do
                 end do loop_50
              end if
           else if (endd - start > select) then
              ! partition d( start:endd ) and stack parts, largest one first
              ! choose partition entry as median of 3
              d1 = d(start)
              d2 = d(endd)
              i = (start + endd)/2
              d3 = d(i)
              if (d1 < d2) then
                 if (d3 < d1) then
                    dmnmx = d1
                 else if (d3 < d2) then
                    dmnmx = d3
                 else
                    dmnmx = d2
                 end if
              else
                 if (d3 < d2) then
                    dmnmx = d2
                 else if (d3 < d1) then
                    dmnmx = d3
                 else
                    dmnmx = d1
                 end if
              end if
              if (dir == 0) then
                 ! sort into decreasing order
                 i = start - 1
                 j = endd + 1
60      continue
70      continue
                 j = j - 1
                 if (d(j) < dmnmx) go to 70
80      continue
                 i = i + 1
                 if (d(i) > dmnmx) go to 80
                 if (i < j) then
                    tmp = d(i)
                    d(i) = d(j)
                    d(j) = tmp
                    go to 60
                 end if
                 if (j - start > endd - j - 1) then
                    stkpnt = stkpnt + 1
                    stack(1, stkpnt) = start
                    stack(2, stkpnt) = j
                    stkpnt = stkpnt + 1
                    stack(1, stkpnt) = j + 1
                    stack(2, stkpnt) = endd
                 else
                    stkpnt = stkpnt + 1
                    stack(1, stkpnt) = j + 1
                    stack(2, stkpnt) = endd
                    stkpnt = stkpnt + 1
                    stack(1, stkpnt) = start
                    stack(2, stkpnt) = j
                 end if
              else
                 ! sort into increasing order
                 i = start - 1
                 j = endd + 1
90      continue
100    continue
                 j = j - 1
                 if (d(j) > dmnmx) go to 100
110    continue
                 i = i + 1
                 if (d(i) < dmnmx) go to 110
                 if (i < j) then
                    tmp = d(i)
                    d(i) = d(j)
                    d(j) = tmp
                    go to 90
                 end if
                 if (j - start > endd - j - 1) then
                    stkpnt = stkpnt + 1
                    stack(1, stkpnt) = start
                    stack(2, stkpnt) = j
                    stkpnt = stkpnt + 1
                    stack(1, stkpnt) = j + 1
                    stack(2, stkpnt) = endd
                 else
                    stkpnt = stkpnt + 1
                    stack(1, stkpnt) = j + 1
                    stack(2, stkpnt) = endd
                    stkpnt = stkpnt + 1
                    stack(1, stkpnt) = start
                    stack(2, stkpnt) = j
                 end if
              end if
           end if
           if (stkpnt > 0) go to 10
           return
           ! end of stdlib_dlasrt
     end subroutine stdlib_dlasrt

     ! !
     ! DLASSQ  returns the values  scl  and  smsq  such that
     ! ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
     ! where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
     ! assumed to be non-negative.
     ! scale and sumsq must be supplied in SCALE and SUMSQ and
     ! scl and smsq are overwritten on SCALE and SUMSQ respectively.
     ! If scale * sqrt( sumsq ) > tbig then
     ! we require:   scale >= sqrt( TINY*EPS ) / sbig   on entry,
     ! and if 0 < scale * sqrt( sumsq ) < tsml then
     ! we require:   scale <= sqrt( HUGE ) / ssml       on entry,
     ! where
     ! tbig -- upper threshold for values whose square is representable;
     ! sbig -- scaling constant for big numbers; \see la_constants.f90
     ! tsml -- lower threshold for values whose square is representable;
     ! ssml -- scaling constant for small numbers; \see la_constants.f90
     ! and
     ! TINY*EPS -- tiniest representable number;
     ! HUGE     -- biggest representable number.

     subroutine stdlib_dlassq(n, x, incx, scl, sumsq)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
        ! .. scalar arguments ..
     integer(ilp) :: incx, n
        real(dp) :: scl, sumsq
        ! .. array arguments ..
        real(dp) :: x(*)
        ! .. local scalars ..
     integer(ilp) :: i, ix
     logical(lk) :: notbig
        real(dp) :: abig, amed, asml, ax, ymax, ymin
        ! quick return if possible
        if (ieee_is_nan(scl) .or. ieee_is_nan(sumsq)) return
        if (sumsq == zero) scl = one
        if (scl == zero) then
           scl = one
           sumsq = zero
        end if
        if (n <= 0) then
           return
        end if
        ! compute the sum of squares in 3 accumulators:
           ! abig -- sums of squares scaled down to avoid overflow
           ! asml -- sums of squares scaled up to avoid underflow
           ! amed -- sums of squares that do not require scaling
        ! the thresholds and multipliers are
           ! tbig -- values bigger than this are scaled down by sbig
           ! tsml -- values smaller than this are scaled up by ssml
        notbig = .true.
        asml = zero
        amed = zero
        abig = zero
        ix = 1
        if (incx < 0) ix = 1 - (n - 1)*incx
        do i = 1, n
           ax = abs(x(ix))
           if (ax > tbig) then
              abig = abig + (ax*sbig)**2
              notbig = .false.
           else if (ax < tsml) then
              if (notbig) asml = asml + (ax*ssml)**2
           else
              amed = amed + ax**2
           end if
           ix = ix + incx
        end do
        ! put the existing sum of squares into one of the accumulators
        if (sumsq > zero) then
           ax = scl*sqrt(sumsq)
           if (ax > tbig) then
              ! we assume scl >= sqrt( tiny*eps ) / sbig
              abig = abig + (scl*sbig)**2*sumsq
           else if (ax < tsml) then
              ! we assume scl <= sqrt( huge ) / ssml
              if (notbig) asml = asml + (scl*ssml)**2*sumsq
           else
              amed = amed + scl**2*sumsq
           end if
        end if
        ! combine abig and amed or amed and asml if more than one
        ! accumulator was used.
        if (abig > zero) then
           ! combine abig and amed if abig > 0.
           if (amed > zero .or. ieee_is_nan(amed)) then
              abig = abig + (amed*sbig)*sbig
           end if
           scl = one/sbig
           sumsq = abig
        else if (asml > zero) then
           ! combine amed and asml if asml > 0.
           if (amed > zero .or. ieee_is_nan(amed)) then
              amed = sqrt(amed)
              asml = sqrt(asml)/ssml
              if (asml > amed) then
                 ymin = amed
                 ymax = asml
              else
                 ymin = asml
                 ymax = amed
              end if
              scl = one
              sumsq = ymax**2*(one + (ymin/ymax)**2)
           else
              scl = one/ssml
              sumsq = asml
           end if
        else
           ! otherwise all values are mid-range or zero
           scl = one
           sumsq = amed
        end if
        return
     end subroutine stdlib_dlassq

     ! DLASV2 computes the singular value decomposition of a 2-by-2
     ! triangular matrix
     ! [  F   G  ]
     ! [  0   H  ].
     ! On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
     ! smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
     ! right singular vectors for abs(SSMAX), giving the decomposition
     ! [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]
     ! [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].

     subroutine stdlib_dlasv2(f, g, h, ssmin, ssmax, snr, csr, snl, csl)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: csl, csr, f, g, h, snl, snr, ssmax, ssmin
       ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: gasmal, swap
           integer(ilp) :: pmax
           real(dp) :: a, clt, crt, d, fa, ft, ga, gt, ha, ht, l, m, mm, r, s, slt, srt, t, temp, &
                     tsign, tt
           ! .. intrinsic functions ..
           intrinsic :: abs, sign, sqrt
     
           ! .. executable statements ..
           ft = f
           fa = abs(ft)
           ht = h
           ha = abs(h)
           ! pmax points to the maximum absolute element of matrix
             ! pmax = 1 if f largest in absolute values
             ! pmax = 2 if g largest in absolute values
             ! pmax = 3 if h largest in absolute values
           pmax = 1
           swap = (ha > fa)
           if (swap) then
              pmax = 3
              temp = ft
              ft = ht
              ht = temp
              temp = fa
              fa = ha
              ha = temp
              ! now fa .ge. ha
           end if
           gt = g
           ga = abs(gt)
           if (ga == zero) then
              ! diagonal matrix
              ssmin = ha
              ssmax = fa
              clt = one
              crt = one
              slt = zero
              srt = zero
           else
              gasmal = .true.
              if (ga > fa) then
                 pmax = 2
                 if ((fa/ga) < stdlib_dlamch('eps')) then
                    ! case of very large ga
                    gasmal = .false.
                    ssmax = ga
                    if (ha > one) then
                       ssmin = fa/(ga/ha)
                    else
                       ssmin = (fa/ga)*ha
                    end if
                    clt = one
                    slt = ht/gt
                    srt = one
                    crt = ft/gt
                 end if
              end if
              if (gasmal) then
                 ! normal case
                 d = fa - ha
                 if (d == fa) then
                    ! copes with infinite f or h
                    l = one
                 else
                    l = d/fa
                 end if
                 ! note that 0 .le. l .le. 1
                 m = gt/ft
                 ! note that abs(m) .le. 1/macheps
                 t = two - l
                 ! note that t .ge. 1
                 mm = m*m
                 tt = t*t
                 s = sqrt(tt + mm)
                 ! note that 1 .le. s .le. 1 + 1/macheps
                 if (l == zero) then
                    r = abs(m)
                 else
                    r = sqrt(l*l + mm)
                 end if
                 ! note that 0 .le. r .le. 1 + 1/macheps
                 a = half*(s + r)
                 ! note that 1 .le. a .le. 1 + abs(m)
                 ssmin = ha/a
                 ssmax = fa*a
                 if (mm == zero) then
                    ! note that m is very tiny
                    if (l == zero) then
                       t = sign(two, ft)*sign(one, gt)
                    else
                       t = gt/sign(d, ft) + m/t
                    end if
                 else
                    t = (m/(s + t) + m/(r + l))*(one + a)
                 end if
                 l = sqrt(t*t + four)
                 crt = two/l
                 srt = t/l
                 clt = (crt + srt*m)/a
                 slt = (ht/ft)*srt/a
              end if
           end if
           if (swap) then
              csl = srt
              snl = crt
              csr = slt
              snr = clt
           else
              csl = clt
              snl = slt
              csr = crt
              snr = srt
           end if
           ! correct signs of ssmax and ssmin
           if (pmax == 1) tsign = sign(one, csr)*sign(one, csl)*sign(one, f)
           if (pmax == 2) tsign = sign(one, snr)*sign(one, csl)*sign(one, g)
           if (pmax == 3) tsign = sign(one, snr)*sign(one, snl)*sign(one, h)
           ssmax = sign(ssmax, tsign)
           ssmin = sign(ssmin, tsign*sign(one, f)*sign(one, h))
           return
           ! end of stdlib_dlasv2
     end subroutine stdlib_dlasv2

     ! DLASWP performs a series of row interchanges on the matrix A.
     ! One row interchange is initiated for each of rows K1 through K2 of A.

     subroutine stdlib_dlaswp(n, a, lda, k1, k2, ipiv, incx)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, k1, k2, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *)
       ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, i1, i2, inc, ip, ix, ix0, j, k, n32
           real(dp) :: temp
           ! .. executable statements ..
           ! interchange row i with row ipiv(k1+(i-k1)*abs(incx)) for each of rows
           ! k1 through k2.
           if (incx > 0) then
              ix0 = k1
              i1 = k1
              i2 = k2
              inc = 1
           else if (incx < 0) then
              ix0 = k1 + (k1 - k2)*incx
              i1 = k2
              i2 = k1
              inc = -1
           else
              return
           end if
           n32 = (n/32)*32
           if (n32 /= 0) then
              do j = 1, n32, 32
                 ix = ix0
                 do i = i1, i2, inc
                    ip = ipiv(ix)
                    if (ip /= i) then
                       do k = j, j + 31
                          temp = a(i, k)
                          a(i, k) = a(ip, k)
                          a(ip, k) = temp
                       end do
                    end if
                    ix = ix + incx
                 end do
              end do
           end if
           if (n32 /= n) then
              n32 = n32 + 1
              ix = ix0
              do i = i1, i2, inc
                 ip = ipiv(ix)
                 if (ip /= i) then
                    do k = n32, n
                       temp = a(i, k)
                       a(i, k) = a(ip, k)
                       a(ip, k) = temp
                    end do
                 end if
                 ix = ix + incx
              end do
           end if
           return
           ! end of stdlib_dlaswp
     end subroutine stdlib_dlaswp

     ! DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in
     ! op(TL)*X + ISGN*X*op(TR) = SCALE*B,
     ! where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or
     ! -1.  op(T) = T or T**T, where T**T denotes the transpose of T.

     subroutine stdlib_dlasy2(ltranl, ltranr, isgn, n1, n2, tl, ldtl, tr, ldtr, b, ldb, scale, x, &
               ldx, xnorm, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           logical(lk) :: ltranl, ltranr
           integer(ilp) :: info, isgn, ldb, ldtl, ldtr, ldx, n1, n2
           real(dp) :: scale, xnorm
           ! .. array arguments ..
           real(dp) :: b(ldb, *), tl(ldtl, *), tr(ldtr, *), x(ldx, *)
       ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: bswap, xswap
           integer(ilp) :: i, ip, ipiv, ipsv, j, jp, jpsv, k
           real(dp) :: bet, eps, gam, l21, sgn, smin, smlnum, tau1, temp, u11, u12, u22, &
                     xmax
           ! .. local arrays ..
           logical(lk) :: bswpiv(4), xswpiv(4)
           integer(ilp) :: jpiv(4), locl21(4), locu12(4), locu22(4)
           real(dp) :: btmp(4), t16(4, 4), tmp(4), x2(2)
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
           ! .. data statements ..
           data locu12/3, 4, 1, 2/, locl21/2, 1, 4, 3/, locu22/4, 3, 2, 1 &
                     /
           data xswpiv/.false., .false., .true., .true./
           data bswpiv/.false., .true., .false., .true./
           ! .. executable statements ..
           ! do not check the input parameters for errors
           info = 0
           ! quick return if possible
           if (n1 == 0 .or. n2 == 0) return
           ! set constants to control overflow
           eps = stdlib_dlamch('p')
           smlnum = stdlib_dlamch('s')/eps
           sgn = isgn
           k = n1 + n1 + n2 - 2
           go to(10, 20, 30, 50) k
           ! 1 by 1: tl11*x + sgn*x*tr11 = b11
10      continue
           tau1 = tl(1, 1) + sgn*tr(1, 1)
           bet = abs(tau1)
           if (bet <= smlnum) then
              tau1 = smlnum
              bet = smlnum
              info = 1
           end if
           scale = one
           gam = abs(b(1, 1))
           if (smlnum*gam > bet) scale = one/gam
           x(1, 1) = (b(1, 1)*scale)/tau1
           xnorm = abs(x(1, 1))
           return
           ! 1 by 2:
           ! tl11*[x11 x12] + isgn*[x11 x12]*op[tr11 tr12]  = [b11 b12]
                                             ! [tr21 tr22]
20      continue
           smin = max(eps*max(abs(tl(1, 1)), abs(tr(1, 1)), abs(tr(1, 2)), abs(tr( &
                     2, 1)), abs(tr(2, 2))), smlnum)
           tmp(1) = tl(1, 1) + sgn*tr(1, 1)
           tmp(4) = tl(1, 1) + sgn*tr(2, 2)
           if (ltranr) then
              tmp(2) = sgn*tr(2, 1)
              tmp(3) = sgn*tr(1, 2)
           else
              tmp(2) = sgn*tr(1, 2)
              tmp(3) = sgn*tr(2, 1)
           end if
           btmp(1) = b(1, 1)
           btmp(2) = b(1, 2)
           go to 40
           ! 2 by 1:
                ! op[tl11 tl12]*[x11] + isgn* [x11]*tr11  = [b11]
                  ! [tl21 tl22] [x21]         [x21]         [b21]
30      continue
           smin = max(eps*max(abs(tr(1, 1)), abs(tl(1, 1)), abs(tl(1, 2)), abs(tl( &
                     2, 1)), abs(tl(2, 2))), smlnum)
           tmp(1) = tl(1, 1) + sgn*tr(1, 1)
           tmp(4) = tl(2, 2) + sgn*tr(1, 1)
           if (ltranl) then
              tmp(2) = tl(1, 2)
              tmp(3) = tl(2, 1)
           else
              tmp(2) = tl(2, 1)
              tmp(3) = tl(1, 2)
           end if
           btmp(1) = b(1, 1)
           btmp(2) = b(2, 1)
40      continue
           ! solve 2 by 2 system using complete pivoting.
           ! set pivots less than smin to smin.
           ipiv = stdlib_idamax(4, tmp, 1)
           u11 = tmp(ipiv)
           if (abs(u11) <= smin) then
              info = 1
              u11 = smin
           end if
           u12 = tmp(locu12(ipiv))
           l21 = tmp(locl21(ipiv))/u11
           u22 = tmp(locu22(ipiv)) - u12*l21
           xswap = xswpiv(ipiv)
           bswap = bswpiv(ipiv)
           if (abs(u22) <= smin) then
              info = 1
              u22 = smin
           end if
           if (bswap) then
              temp = btmp(2)
              btmp(2) = btmp(1) - l21*temp
              btmp(1) = temp
           else
              btmp(2) = btmp(2) - l21*btmp(1)
           end if
           scale = one
           if ((two*smlnum)*abs(btmp(2)) > abs(u22) .or. (two*smlnum)*abs(btmp(1)) > abs( &
                      u11)) then
              scale = half/max(abs(btmp(1)), abs(btmp(2)))
              btmp(1) = btmp(1)*scale
              btmp(2) = btmp(2)*scale
           end if
           x2(2) = btmp(2)/u22
           x2(1) = btmp(1)/u11 - (u12/u11)*x2(2)
           if (xswap) then
              temp = x2(2)
              x2(2) = x2(1)
              x2(1) = temp
           end if
           x(1, 1) = x2(1)
           if (n1 == 1) then
              x(1, 2) = x2(2)
              xnorm = abs(x(1, 1)) + abs(x(1, 2))
           else
              x(2, 1) = x2(2)
              xnorm = max(abs(x(1, 1)), abs(x(2, 1)))
           end if
           return
           ! 2 by 2:
           ! op[tl11 tl12]*[x11 x12] +isgn* [x11 x12]*op[tr11 tr12] = [b11 b12]
             ! [tl21 tl22] [x21 x22]        [x21 x22]   [tr21 tr22]   [b21 b22]
           ! solve equivalent 4 by 4 system using complete pivoting.
           ! set pivots less than smin to smin.
50      continue
           smin = max(abs(tr(1, 1)), abs(tr(1, 2)), abs(tr(2, 1)), abs(tr(2, 2)))
                     
           smin = max(smin, abs(tl(1, 1)), abs(tl(1, 2)), abs(tl(2, 1)), abs(tl(2, &
                     2)))
           smin = max(eps*smin, smlnum)
           btmp(1) = zero
           call stdlib_dcopy(16, btmp, 0, t16, 1)
           t16(1, 1) = tl(1, 1) + sgn*tr(1, 1)
           t16(2, 2) = tl(2, 2) + sgn*tr(1, 1)
           t16(3, 3) = tl(1, 1) + sgn*tr(2, 2)
           t16(4, 4) = tl(2, 2) + sgn*tr(2, 2)
           if (ltranl) then
              t16(1, 2) = tl(2, 1)
              t16(2, 1) = tl(1, 2)
              t16(3, 4) = tl(2, 1)
              t16(4, 3) = tl(1, 2)
           else
              t16(1, 2) = tl(1, 2)
              t16(2, 1) = tl(2, 1)
              t16(3, 4) = tl(1, 2)
              t16(4, 3) = tl(2, 1)
           end if
           if (ltranr) then
              t16(1, 3) = sgn*tr(1, 2)
              t16(2, 4) = sgn*tr(1, 2)
              t16(3, 1) = sgn*tr(2, 1)
              t16(4, 2) = sgn*tr(2, 1)
           else
              t16(1, 3) = sgn*tr(2, 1)
              t16(2, 4) = sgn*tr(2, 1)
              t16(3, 1) = sgn*tr(1, 2)
              t16(4, 2) = sgn*tr(1, 2)
           end if
           btmp(1) = b(1, 1)
           btmp(2) = b(2, 1)
           btmp(3) = b(1, 2)
           btmp(4) = b(2, 2)
           ! perform elimination
           loop_100: do i = 1, 3
              xmax = zero
              do ip = i, 4
                 do jp = i, 4
                    if (abs(t16(ip, jp)) >= xmax) then
                       xmax = abs(t16(ip, jp))
                       ipsv = ip
                       jpsv = jp
                    end if
                 end do
              end do
              if (ipsv /= i) then
                 call stdlib_dswap(4, t16(ipsv, 1), 4, t16(i, 1), 4)
                 temp = btmp(i)
                 btmp(i) = btmp(ipsv)
                 btmp(ipsv) = temp
              end if
              if (jpsv /= i) call stdlib_dswap(4, t16(1, jpsv), 1, t16(1, i), 1)
              jpiv(i) = jpsv
              if (abs(t16(i, i)) < smin) then
                 info = 1
                 t16(i, i) = smin
              end if
              do j = i + 1, 4
                 t16(j, i) = t16(j, i)/t16(i, i)
                 btmp(j) = btmp(j) - t16(j, i)*btmp(i)
                 do k = i + 1, 4
                    t16(j, k) = t16(j, k) - t16(j, i)*t16(i, k)
                 end do
              end do
           end do loop_100
           if (abs(t16(4, 4)) < smin) then
              info = 1
              t16(4, 4) = smin
           end if
           scale = one
           if ((eight*smlnum)*abs(btmp(1)) > abs(t16(1, 1)) .or. (eight*smlnum)*abs( &
           btmp(2)) > abs(t16(2, 2)) .or. (eight*smlnum)*abs(btmp(3)) > abs(t16(3, 3)) &
                      .or. (eight*smlnum)*abs(btmp(4)) > abs(t16(4, 4))) then
              scale = (one/eight)/max(abs(btmp(1)), abs(btmp(2)), abs(btmp(3)), &
                        abs(btmp(4)))
              btmp(1) = btmp(1)*scale
              btmp(2) = btmp(2)*scale
              btmp(3) = btmp(3)*scale
              btmp(4) = btmp(4)*scale
           end if
           do i = 1, 4
              k = 5 - i
              temp = one/t16(k, k)
              tmp(k) = btmp(k)*temp
              do j = k + 1, 4
                 tmp(k) = tmp(k) - (temp*t16(k, j))*tmp(j)
              end do
           end do
           do i = 1, 3
              if (jpiv(4 - i) /= 4 - i) then
                 temp = tmp(4 - i)
                 tmp(4 - i) = tmp(jpiv(4 - i))
                 tmp(jpiv(4 - i)) = temp
              end if
           end do
           x(1, 1) = tmp(1)
           x(2, 1) = tmp(2)
           x(1, 2) = tmp(3)
           x(2, 2) = tmp(4)
           xnorm = max(abs(tmp(1)) + abs(tmp(3)), abs(tmp(2)) + abs(tmp(4)))
           return
           ! end of stdlib_dlasy2
     end subroutine stdlib_dlasy2

     ! DLASYF computes a partial factorization of a real symmetric matrix A
     ! using the Bunch-Kaufman diagonal pivoting method. The partial
     ! factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I       0    )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**T U22**T )
     ! A  =  ( L11  0 ) (  D   0  ) ( L11**T L21**T )  if UPLO = 'L'
     ! ( L21  I ) (  0  A22 ) (  0       I    )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code
     ! (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
     ! A22 (if UPLO = 'L').

     subroutine stdlib_dlasyf(uplo, n, nb, kb, a, lda, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), w(ldw, *)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: sevten = 17.0_dp
           
           ! .. local scalars ..
           integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw
           real(dp) :: absakk, alpha, colmax, d11, d21, d22, r1, rowmax, t
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, sqrt
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              ! kw is the column of w which corresponds to column k of a
              k = n
10      continue
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              ! copy column k of a to column kw of w and update it
              call stdlib_dcopy(k, a(1, k), 1, w(1, kw), 1)
              if (k < n) call stdlib_dgemv('no transpose', k, n - k, -one, a(1, k + 1), lda, w(k, kw + &
                        1), ldw, one, w(1, kw), 1)
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(w(k, kw))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_idamax(k - 1, w(1, kw), 1)
                 colmax = abs(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! copy column imax to column kw-1 of w and update it
                    call stdlib_dcopy(imax, a(1, imax), 1, w(1, kw - 1), 1)
                    call stdlib_dcopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                              
                    if (k < n) call stdlib_dgemv('no transpose', k, n - k, -one, a(1, k + 1), lda, w( &
                              imax, kw + 1), ldw, one, w(1, kw - 1), 1)
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    jmax = imax + stdlib_idamax(k - imax, w(imax + 1, kw - 1), 1)
                    rowmax = abs(w(jmax, kw - 1))
                    if (imax > 1) then
                       jmax = stdlib_idamax(imax - 1, w(1, kw - 1), 1)
                       rowmax = max(rowmax, abs(w(jmax, kw - 1)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (abs(w(imax, kw - 1)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                       ! copy column kw-1 of w to column kw of w
                       call stdlib_dcopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                    else
                       ! interchange rows and columns k-1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kkw of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k-1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = a(kk, kk)
                    call stdlib_dcopy(kk - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    if (kp > 1) call stdlib_dcopy(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last k+1 to n columns of a
                    ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in last kkw to nb columns of w.
                    if (k < n) call stdlib_dswap(n - k, a(kk, k + 1), lda, a(kp, k + 1), lda)
                    call stdlib_dswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(kw) = u(k)*d(k),
                    ! where u(k) is the k-th column of u
                    ! store subdiag. elements of column u(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! note: diagonal element u(k,k) is a unit element
                    ! and not stored.
                       ! a(k,k) := d(k,k) = w(k,kw)
                       ! a(1:k-1,k) := u(1:k-1,k) = w(1:k-1,kw)/d(k,k)
                    call stdlib_dcopy(k, w(1, kw), 1, a(1, k), 1)
                    r1 = one/a(k, k)
                    call stdlib_dscal(k - 1, r1, a(1, k), 1)
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold
                    ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2
                    ! block d(k-1:k,k-1:k) in columns k-1 and k of a.
                    ! note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit
                    ! block and not stored.
                       ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw)
                       ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) =
                       ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) )
                    if (k > 2) then
                       ! compose the columns of the inverse of 2-by-2 pivot
                       ! block d in the following way to reduce the number
                       ! of flops when we myltiply panel ( w(kw-1) w(kw) ) by
                       ! this inverse
                       ! d**(-1) = ( d11 d21 )**(-1) =
                                 ! ( d21 d22 )
                       ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) =
                                              ! ( (-d21 ) ( d11 ) )
                       ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) *
                         ! * ( ( d22/d21 ) (      -1 ) ) =
                           ! ( (      -1 ) ( d11/d21 ) )
                       ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) (  -1 ) ) =
                                                 ! ( ( -1  ) ( d22 ) )
                       ! = 1/d21 * t * ( ( d11 ) (  -1 ) )
                                     ! ( (  -1 ) ( d22 ) )
                       ! = d21 * ( ( d11 ) (  -1 ) )
                               ! ( (  -1 ) ( d22 ) )
                       d21 = w(k - 1, kw)
                       d11 = w(k, kw)/d21
                       d22 = w(k - 1, kw - 1)/d21
                       t = one/(d11*d22 - one)
                       d21 = t/d21
                       ! update elements in columns a(k-1) and a(k) as
                       ! dot products of rows of ( w(kw-1) w(kw) ) and columns
                       ! of d**(-1)
                       do j = 1, k - 2
                          a(j, k - 1) = d21*(d11*w(j, kw - 1) - w(j, kw))
                          a(j, k) = d21*(d22*w(j, kw) - w(j, kw - 1))
                       end do
                    end if
                    ! copy d(k) to a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = w(k - 1, kw)
                    a(k, k) = w(k, kw)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t
              ! computing blocks of nb columns at a time
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_dgemv('no transpose', jj - j + 1, n - k, -one, a(j, k + 1), lda, w(jj, &
                              kw + 1), ldw, one, a(j, jj), 1)
                 end do
                 ! update the rectangular superdiagonal block
                 call stdlib_dgemm('no transpose', 'transpose', j - 1, jb, n - k, -one, a(1, k + 1), &
                           lda, w(j, kw + 1), ldw, one, a(1, j), lda)
              end do
              ! put u12 in standard form by partially undoing the interchanges
              ! in columns k+1:n looping backwards from k+1 to n
              j = k + 1
60      continue
                 ! undo the interchanges (if any) of rows jj and jp at each
                 ! step j
                 ! (here, j is a diagonal index)
                 jj = j
                 jp = ipiv(j)
                 if (jp < 0) then
                    jp = -jp
                    ! (here, j is a diagonal index)
                    j = j + 1
                 end if
                 ! (note: here, j is used to determine row length. length n-j+1
                 ! of the rows to swap back doesn't include diagonal element)
                 j = j + 1
                 if (jp /= jj .and. j <= n) call stdlib_dswap(n - j + 1, a(jp, j), lda, a(jj, j), &
                           lda)
              if (j < n) go to 60
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              ! copy column k of a to column k of w and update it
              call stdlib_dcopy(n - k + 1, a(k, k), 1, w(k, k), 1)
              call stdlib_dgemv('no transpose', n - k + 1, k - 1, -one, a(k, 1), lda, w(k, 1), ldw, &
                        one, w(k, k), 1)
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(w(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_idamax(n - k, w(k + 1, k), 1)
                 colmax = abs(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! copy column imax to column k+1 of w and update it
                    call stdlib_dcopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                    call stdlib_dcopy(n - imax + 1, a(imax, imax), 1, w(imax, k + 1), 1)
                    call stdlib_dgemv('no transpose', n - k + 1, k - 1, -one, a(k, 1), lda, w(imax, &
                              1), ldw, one, w(k, k + 1), 1)
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    jmax = k - 1 + stdlib_idamax(imax - k, w(k, k + 1), 1)
                    rowmax = abs(w(jmax, k + 1))
                    if (imax < n) then
                       jmax = imax + stdlib_idamax(n - imax, w(imax + 1, k + 1), 1)
                       rowmax = max(rowmax, abs(w(jmax, k + 1)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (abs(w(imax, k + 1)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                       ! copy column k+1 of w to column k of w
                       call stdlib_dcopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                    else
                       ! interchange rows and columns k+1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k + kstep - 1
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kk of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k+1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = a(kk, kk)
                    call stdlib_dcopy(kp - kk - 1, a(kk + 1, kk), 1, a(kp, kk + 1), lda)
                    if (kp < n) call stdlib_dcopy(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    ! interchange rows kk and kp in first k-1 columns of a
                    ! (columns k (or k and k+1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in first kk columns of w.
                    if (k > 1) call stdlib_dswap(k - 1, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_dswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k),
                    ! where l(k) is the k-th column of l
                    ! store subdiag. elements of column l(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! (note: diagonal element l(k,k) is a unit element
                    ! and not stored)
                       ! a(k,k) := d(k,k) = w(k,k)
                       ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k)
                    call stdlib_dcopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       r1 = one/a(k, k)
                       call stdlib_dscal(n - k, r1, a(k + 1, k), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2
                    ! block d(k:k+1,k:k+1) in columns k and k+1 of a.
                    ! (note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit
                    ! block and not stored)
                       ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1)
                       ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) =
                       ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) )
                    if (k < n - 1) then
                       ! compose the columns of the inverse of 2-by-2 pivot
                       ! block d in the following way to reduce the number
                       ! of flops when we myltiply panel ( w(k) w(k+1) ) by
                       ! this inverse
                       ! d**(-1) = ( d11 d21 )**(-1) =
                                 ! ( d21 d22 )
                       ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) =
                                              ! ( (-d21 ) ( d11 ) )
                       ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) *
                         ! * ( ( d22/d21 ) (      -1 ) ) =
                           ! ( (      -1 ) ( d11/d21 ) )
                       ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) (  -1 ) ) =
                                                 ! ( ( -1  ) ( d22 ) )
                       ! = 1/d21 * t * ( ( d11 ) (  -1 ) )
                                     ! ( (  -1 ) ( d22 ) )
                       ! = d21 * ( ( d11 ) (  -1 ) )
                               ! ( (  -1 ) ( d22 ) )
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/d21
                       t = one/(d11*d22 - one)
                       d21 = t/d21
                       ! update elements in columns a(k) and a(k+1) as
                       ! dot products of rows of ( w(k) w(k+1) ) and columns
                       ! of d**(-1)
                       do j = k + 2, n
                          a(j, k) = d21*(d11*w(j, k) - w(j, k + 1))
                          a(j, k + 1) = d21*(d22*w(j, k + 1) - w(j, k))
                       end do
                    end if
                    ! copy d(k) to a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = w(k + 1, k)
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**t = a22 - l21*w**t
              ! computing blocks of nb columns at a time
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_dgemv('no transpose', j + jb - jj, k - 1, -one, a(jj, 1), lda, w(jj, &
                              1), ldw, one, a(jj, jj), 1)
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_dgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           one, a(j + jb, 1), lda, w(j, 1), ldw, one, a(j + jb, j), lda)
              end do
              ! put l21 in standard form by partially undoing the interchanges
              ! of rows in columns 1:k-1 looping backwards from k-1 to 1
              j = k - 1
120    continue
                 ! undo the interchanges (if any) of rows jj and jp at each
                 ! step j
                 ! (here, j is a diagonal index)
                 jj = j
                 jp = ipiv(j)
                 if (jp < 0) then
                    jp = -jp
                    ! (here, j is a diagonal index)
                    j = j - 1
                 end if
                 ! (note: here, j is used to determine row length. length j
                 ! of the rows to swap back doesn't include diagonal element)
                 j = j - 1
                 if (jp /= jj .and. j >= 1) call stdlib_dswap(j, a(jp, 1), lda, a(jj, 1), lda)
                           
              if (j > 1) go to 120
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_dlasyf
     end subroutine stdlib_dlasyf

     ! DLASYF_RK computes a partial factorization of a real symmetric
     ! matrix A using the bounded Bunch-Kaufman (rook) diagonal
     ! pivoting method. The partial factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I       0    )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**T U22**T )
     ! A  =  ( L11  0 ) (  D   0  ) ( L11**T L21**T )  if UPLO = 'L',
     ! ( L21  I ) (  0  A22 ) (  0       I    )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! DLASYF_RK is an auxiliary routine called by DSYTRF_RK. It uses
     ! blocked code (calling Level 3 BLAS) to update the submatrix
     ! A11 (if UPLO = 'U') or A22 (if UPLO = 'L').

     subroutine stdlib_dlasyf_rk(uplo, n, nb, kb, a, lda, e, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), e(*), w(ldw, *)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: sevten = 17.0_dp
           
           ! .. local scalars ..
           logical(lk) :: done
           integer(ilp) :: imax, itemp, j, jb, jj, jmax, k, kk, kw, kkw, kp, kstep, p, ii
           real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, dtemp, r1, rowmax, t, &
                     sfmin
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, sqrt
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_dlamch('s')
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11
              ! initialize the first entry of array e, where superdiagonal
              ! elements of d are stored
              e(1) = zero
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              k = n
10      continue
              ! kw is the column of w which corresponds to column k of a
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              kstep = 1
              p = k
              ! copy column k of a to column kw of w and update it
              call stdlib_dcopy(k, a(1, k), 1, w(1, kw), 1)
              if (k < n) call stdlib_dgemv('no transpose', k, n - k, -one, a(1, k + 1), lda, w(k, kw + &
                        1), ldw, one, w(1, kw), 1)
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(w(k, kw))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_idamax(k - 1, w(1, kw), 1)
                 colmax = abs(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 call stdlib_dcopy(k, w(1, kw), 1, a(1, k), 1)
                 ! set e( k ) to zero
                 if (k > 1) e(k) = zero
              else
                 ! ============================================================
                 ! test for interchange
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! copy column imax to column kw-1 of w and update it
                       call stdlib_dcopy(imax, a(1, imax), 1, w(1, kw - 1), 1)
                       call stdlib_dcopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                                 
                       if (k < n) call stdlib_dgemv('no transpose', k, n - k, -one, a(1, k + 1), lda, &
                                 w(imax, kw + 1), ldw, one, w(1, kw - 1), 1)
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_idamax(k - imax, w(imax + 1, kw - 1), 1)
                          rowmax = abs(w(jmax, kw - 1))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_idamax(imax - 1, w(1, kw - 1), 1)
                          dtemp = abs(w(itemp, kw - 1))
                          if (dtemp > rowmax) then
                             rowmax = dtemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for
                       ! abs( w( imax, kw-1 ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(w(imax, kw - 1)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column kw-1 of w to column kw of w
                          call stdlib_dcopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                          done = .true.
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k-1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_dcopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! ============================================================
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p
                    call stdlib_dcopy(k - p, a(p + 1, k), 1, a(p, p + 1), lda)
                    call stdlib_dcopy(p, a(1, k), 1, a(1, p), 1)
                    ! interchange rows k and p in last n-k+1 columns of a
                    ! and last n-k+2 columns of w
                    call stdlib_dswap(n - k + 1, a(k, k), lda, a(p, k), lda)
                    call stdlib_dswap(n - kk + 1, w(k, kkw), ldw, w(p, kkw), ldw)
                 end if
                 ! updated column kp is already stored in column kkw of w
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp
                    a(kp, k) = a(kk, k)
                    call stdlib_dcopy(k - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    call stdlib_dcopy(kp, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last n-kk+1 columns
                    ! of a and w
                    call stdlib_dswap(n - kk + 1, a(kk, kk), lda, a(kp, kk), lda)
                    call stdlib_dswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! store u(k) in column k of a
                    call stdlib_dcopy(k, w(1, kw), 1, a(1, k), 1)
                    if (k > 1) then
                       if (abs(a(k, k)) >= sfmin) then
                          r1 = one/a(k, k)
                          call stdlib_dscal(k - 1, r1, a(1, k), 1)
                       else if (a(k, k) /= zero) then
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/a(k, k)
                          end do
                       end if
                       ! store the superdiagonal element of d in array e
                       e(k) = zero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now
                    ! hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    if (k > 2) then
                       ! store u(k) and u(k-1) in columns k and k-1 of a
                       d12 = w(k - 1, kw)
                       d11 = w(k, kw)/d12
                       d22 = w(k - 1, kw - 1)/d12
                       t = one/(d11*d22 - one)
                       do j = 1, k - 2
                          a(j, k - 1) = t*((d11*w(j, kw - 1) - w(j, kw))/d12)
                          a(j, k) = t*((d22*w(j, kw) - w(j, kw - 1))/d12)
                       end do
                    end if
                    ! copy diagonal elements of d(k) to a,
                    ! copy superdiagonal element of d(k) to e(k) and
                    ! zero out superdiagonal entry of a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = zero
                    a(k, k) = w(k, kw)
                    e(k) = w(k - 1, kw)
                    e(k - 1) = zero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t
              ! computing blocks of nb columns at a time
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_dgemv('no transpose', jj - j + 1, n - k, -one, a(j, k + 1), lda, w(jj, &
                              kw + 1), ldw, one, a(j, jj), 1)
                 end do
                 ! update the rectangular superdiagonal block
                 if (j >= 2) call stdlib_dgemm('no transpose', 'transpose', j - 1, jb, n - k, -one, a( &
                           1, k + 1), lda, w(j, kw + 1), ldw, one, a(1, j), lda)
              end do
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22
              ! initialize the unused last entry of the subdiagonal array e.
              e(n) = zero
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              kstep = 1
              p = k
              ! copy column k of a to column k of w and update it
              call stdlib_dcopy(n - k + 1, a(k, k), 1, w(k, k), 1)
              if (k > 1) call stdlib_dgemv('no transpose', n - k + 1, k - 1, -one, a(k, 1), lda, w(k, &
                        1), ldw, one, w(k, k), 1)
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(w(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_idamax(n - k, w(k + 1, k), 1)
                 colmax = abs(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 call stdlib_dcopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                 ! set e( k ) to zero
                 if (k < n) e(k) = zero
              else
                 ! ============================================================
                 ! test for interchange
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
72      continue
                       ! begin pivot search loop body
                       ! copy column imax to column k+1 of w and update it
                       call stdlib_dcopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                       call stdlib_dcopy(n - imax + 1, a(imax, imax), 1, w(imax, k + 1), 1)
                       if (k > 1) call stdlib_dgemv('no transpose', n - k + 1, k - 1, -one, a(k, 1), &
                                 lda, w(imax, 1), ldw, one, w(k, k + 1), 1)
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_idamax(imax - k, w(k, k + 1), 1)
                          rowmax = abs(w(jmax, k + 1))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_idamax(n - imax, w(imax + 1, k + 1), 1)
                          dtemp = abs(w(itemp, k + 1))
                          if (dtemp > rowmax) then
                             rowmax = dtemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for
                       ! abs( w( imax, k+1 ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(w(imax, k + 1)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column k+1 of w to column k of w
                          call stdlib_dcopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                          done = .true.
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_dcopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 72
                 end if
                 ! ============================================================
                 kk = k + kstep - 1
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p
                    call stdlib_dcopy(p - k, a(k, k), 1, a(p, k), lda)
                    call stdlib_dcopy(n - p + 1, a(p, k), 1, a(p, p), 1)
                    ! interchange rows k and p in first k columns of a
                    ! and first k+1 columns of w
                    call stdlib_dswap(k, a(k, 1), lda, a(p, 1), lda)
                    call stdlib_dswap(kk, w(k, 1), ldw, w(p, 1), ldw)
                 end if
                 ! updated column kp is already stored in column kk of w
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp
                    a(kp, k) = a(kk, k)
                    call stdlib_dcopy(kp - k - 1, a(k + 1, kk), 1, a(kp, k + 1), lda)
                    call stdlib_dcopy(n - kp + 1, a(kp, kk), 1, a(kp, kp), 1)
                    ! interchange rows kk and kp in first kk columns of a and w
                    call stdlib_dswap(kk, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_dswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    ! store l(k) in column k of a
                    call stdlib_dcopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       if (abs(a(k, k)) >= sfmin) then
                          r1 = one/a(k, k)
                          call stdlib_dscal(n - k, r1, a(k + 1, k), 1)
                       else if (a(k, k) /= zero) then
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/a(k, k)
                          end do
                       end if
                       ! store the subdiagonal element of d in array e
                       e(k) = zero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    if (k < n - 1) then
                       ! store l(k) and l(k+1) in columns k and k+1 of a
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/d21
                       t = one/(d11*d22 - one)
                       do j = k + 2, n
                          a(j, k) = t*((d11*w(j, k) - w(j, k + 1))/d21)
                          a(j, k + 1) = t*((d22*w(j, k + 1) - w(j, k))/d21)
                       end do
                    end if
                    ! copy diagonal elements of d(k) to a,
                    ! copy subdiagonal element of d(k) to e(k) and
                    ! zero out subdiagonal entry of a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = zero
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                    e(k) = w(k + 1, k)
                    e(k + 1) = zero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**t = a22 - l21*w**t
              ! computing blocks of nb columns at a time
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_dgemv('no transpose', j + jb - jj, k - 1, -one, a(jj, 1), lda, w(jj, &
                              1), ldw, one, a(jj, jj), 1)
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_dgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           one, a(j + jb, 1), lda, w(j, 1), ldw, one, a(j + jb, j), lda)
              end do
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_dlasyf_rk
     end subroutine stdlib_dlasyf_rk

     ! DLASYF_ROOK computes a partial factorization of a real symmetric
     ! matrix A using the bounded Bunch-Kaufman ("rook") diagonal
     ! pivoting method. The partial factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I       0    )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**T U22**T )
     ! A  =  ( L11  0 ) (  D   0  ) ( L11**T L21**T )  if UPLO = 'L'
     ! ( L21  I ) (  0  A22 ) (  0       I    )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! DLASYF_ROOK is an auxiliary routine called by DSYTRF_ROOK. It uses
     ! blocked code (calling Level 3 BLAS) to update the submatrix
     ! A11 (if UPLO = 'U') or A22 (if UPLO = 'L').

     subroutine stdlib_dlasyf_rook(uplo, n, nb, kb, a, lda, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), w(ldw, *)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: sevten = 17.0_dp
           
           ! .. local scalars ..
           logical(lk) :: done
           integer(ilp) :: imax, itemp, j, jb, jj, jmax, jp1, jp2, k, kk, kw, kkw, kp, kstep, p, &
                     ii
           real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, dtemp, r1, rowmax, t, &
                     sfmin
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, sqrt
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_dlamch('s')
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              k = n
10      continue
              ! kw is the column of w which corresponds to column k of a
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              kstep = 1
              p = k
              ! copy column k of a to column kw of w and update it
              call stdlib_dcopy(k, a(1, k), 1, w(1, kw), 1)
              if (k < n) call stdlib_dgemv('no transpose', k, n - k, -one, a(1, k + 1), lda, w(k, kw + &
                        1), ldw, one, w(1, kw), 1)
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(w(k, kw))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_idamax(k - 1, w(1, kw), 1)
                 colmax = abs(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 call stdlib_dcopy(k, w(1, kw), 1, a(1, k), 1)
              else
                 ! ============================================================
                 ! test for interchange
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! copy column imax to column kw-1 of w and update it
                       call stdlib_dcopy(imax, a(1, imax), 1, w(1, kw - 1), 1)
                       call stdlib_dcopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                                 
                       if (k < n) call stdlib_dgemv('no transpose', k, n - k, -one, a(1, k + 1), lda, &
                                 w(imax, kw + 1), ldw, one, w(1, kw - 1), 1)
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_idamax(k - imax, w(imax + 1, kw - 1), 1)
                          rowmax = abs(w(jmax, kw - 1))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_idamax(imax - 1, w(1, kw - 1), 1)
                          dtemp = abs(w(itemp, kw - 1))
                          if (dtemp > rowmax) then
                             rowmax = dtemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for
                       ! abs( w( imax, kw-1 ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(w(imax, kw - 1)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column kw-1 of w to column kw of w
                          call stdlib_dcopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                          done = .true.
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k-1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_dcopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! ============================================================
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p
                    call stdlib_dcopy(k - p, a(p + 1, k), 1, a(p, p + 1), lda)
                    call stdlib_dcopy(p, a(1, k), 1, a(1, p), 1)
                    ! interchange rows k and p in last n-k+1 columns of a
                    ! and last n-k+2 columns of w
                    call stdlib_dswap(n - k + 1, a(k, k), lda, a(p, k), lda)
                    call stdlib_dswap(n - kk + 1, w(k, kkw), ldw, w(p, kkw), ldw)
                 end if
                 ! updated column kp is already stored in column kkw of w
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp
                    a(kp, k) = a(kk, k)
                    call stdlib_dcopy(k - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    call stdlib_dcopy(kp, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last n-kk+1 columns
                    ! of a and w
                    call stdlib_dswap(n - kk + 1, a(kk, kk), lda, a(kp, kk), lda)
                    call stdlib_dswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! store u(k) in column k of a
                    call stdlib_dcopy(k, w(1, kw), 1, a(1, k), 1)
                    if (k > 1) then
                       if (abs(a(k, k)) >= sfmin) then
                          r1 = one/a(k, k)
                          call stdlib_dscal(k - 1, r1, a(1, k), 1)
                       else if (a(k, k) /= zero) then
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/a(k, k)
                          end do
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now
                    ! hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    if (k > 2) then
                       ! store u(k) and u(k-1) in columns k and k-1 of a
                       d12 = w(k - 1, kw)
                       d11 = w(k, kw)/d12
                       d22 = w(k - 1, kw - 1)/d12
                       t = one/(d11*d22 - one)
                       do j = 1, k - 2
                          a(j, k - 1) = t*((d11*w(j, kw - 1) - w(j, kw))/d12)
                          a(j, k) = t*((d22*w(j, kw) - w(j, kw - 1))/d12)
                       end do
                    end if
                    ! copy d(k) to a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = w(k - 1, kw)
                    a(k, k) = w(k, kw)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t
              ! computing blocks of nb columns at a time
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_dgemv('no transpose', jj - j + 1, n - k, -one, a(j, k + 1), lda, w(jj, &
                              kw + 1), ldw, one, a(j, jj), 1)
                 end do
                 ! update the rectangular superdiagonal block
                 if (j >= 2) call stdlib_dgemm('no transpose', 'transpose', j - 1, jb, n - k, -one, a( &
                           1, k + 1), lda, w(j, kw + 1), ldw, one, a(1, j), lda)
              end do
              ! put u12 in standard form by partially undoing the interchanges
              ! in columns k+1:n
              j = k + 1
60      continue
                 kstep = 1
                 jp1 = 1
                 jj = j
                 jp2 = ipiv(j)
                 if (jp2 < 0) then
                    jp2 = -jp2
                    j = j + 1
                    jp1 = -ipiv(j)
                    kstep = 2
                 end if
                 j = j + 1
                 if (jp2 /= jj .and. j <= n) call stdlib_dswap(n - j + 1, a(jp2, j), lda, a(jj, j), &
                           lda)
                 jj = j - 1
                 if (jp1 /= jj .and. kstep == 2) call stdlib_dswap(n - j + 1, a(jp1, j), lda, a(jj, j &
                           ), lda)
              if (j <= n) go to 60
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              kstep = 1
              p = k
              ! copy column k of a to column k of w and update it
              call stdlib_dcopy(n - k + 1, a(k, k), 1, w(k, k), 1)
              if (k > 1) call stdlib_dgemv('no transpose', n - k + 1, k - 1, -one, a(k, 1), lda, w(k, &
                        1), ldw, one, w(k, k), 1)
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(w(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_idamax(n - k, w(k + 1, k), 1)
                 colmax = abs(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 call stdlib_dcopy(n - k + 1, w(k, k), 1, a(k, k), 1)
              else
                 ! ============================================================
                 ! test for interchange
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
72      continue
                       ! begin pivot search loop body
                       ! copy column imax to column k+1 of w and update it
                       call stdlib_dcopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                       call stdlib_dcopy(n - imax + 1, a(imax, imax), 1, w(imax, k + 1), 1)
                       if (k > 1) call stdlib_dgemv('no transpose', n - k + 1, k - 1, -one, a(k, 1), &
                                 lda, w(imax, 1), ldw, one, w(k, k + 1), 1)
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_idamax(imax - k, w(k, k + 1), 1)
                          rowmax = abs(w(jmax, k + 1))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_idamax(n - imax, w(imax + 1, k + 1), 1)
                          dtemp = abs(w(itemp, k + 1))
                          if (dtemp > rowmax) then
                             rowmax = dtemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for
                       ! abs( w( imax, k+1 ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(w(imax, k + 1)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column k+1 of w to column k of w
                          call stdlib_dcopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                          done = .true.
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_dcopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 72
                 end if
                 ! ============================================================
                 kk = k + kstep - 1
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p
                    call stdlib_dcopy(p - k, a(k, k), 1, a(p, k), lda)
                    call stdlib_dcopy(n - p + 1, a(p, k), 1, a(p, p), 1)
                    ! interchange rows k and p in first k columns of a
                    ! and first k+1 columns of w
                    call stdlib_dswap(k, a(k, 1), lda, a(p, 1), lda)
                    call stdlib_dswap(kk, w(k, 1), ldw, w(p, 1), ldw)
                 end if
                 ! updated column kp is already stored in column kk of w
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp
                    a(kp, k) = a(kk, k)
                    call stdlib_dcopy(kp - k - 1, a(k + 1, kk), 1, a(kp, k + 1), lda)
                    call stdlib_dcopy(n - kp + 1, a(kp, kk), 1, a(kp, kp), 1)
                    ! interchange rows kk and kp in first kk columns of a and w
                    call stdlib_dswap(kk, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_dswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    ! store l(k) in column k of a
                    call stdlib_dcopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       if (abs(a(k, k)) >= sfmin) then
                          r1 = one/a(k, k)
                          call stdlib_dscal(n - k, r1, a(k + 1, k), 1)
                       else if (a(k, k) /= zero) then
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/a(k, k)
                          end do
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    if (k < n - 1) then
                       ! store l(k) and l(k+1) in columns k and k+1 of a
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/d21
                       t = one/(d11*d22 - one)
                       do j = k + 2, n
                          a(j, k) = t*((d11*w(j, k) - w(j, k + 1))/d21)
                          a(j, k + 1) = t*((d22*w(j, k + 1) - w(j, k))/d21)
                       end do
                    end if
                    ! copy d(k) to a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = w(k + 1, k)
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**t = a22 - l21*w**t
              ! computing blocks of nb columns at a time
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_dgemv('no transpose', j + jb - jj, k - 1, -one, a(jj, 1), lda, w(jj, &
                              1), ldw, one, a(jj, jj), 1)
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_dgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           one, a(j + jb, 1), lda, w(j, 1), ldw, one, a(j + jb, j), lda)
              end do
              ! put l21 in standard form by partially undoing the interchanges
              ! in columns 1:k-1
              j = k - 1
120    continue
                 kstep = 1
                 jp1 = 1
                 jj = j
                 jp2 = ipiv(j)
                 if (jp2 < 0) then
                    jp2 = -jp2
                    j = j - 1
                    jp1 = -ipiv(j)
                    kstep = 2
                 end if
                 j = j - 1
                 if (jp2 /= jj .and. j >= 1) call stdlib_dswap(j, a(jp2, 1), lda, a(jj, 1), lda)
                           
                 jj = j + 1
                 if (jp1 /= jj .and. kstep == 2) call stdlib_dswap(j, a(jp1, 1), lda, a(jj, 1), &
                           lda)
              if (j >= 1) go to 120
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_dlasyf_rook
     end subroutine stdlib_dlasyf_rook

     ! DLAT2S converts a DOUBLE PRECISION triangular matrix, SA, to a SINGLE
     ! PRECISION triangular matrix, A.
     ! RMAX is the overflow for the SINGLE PRECISION arithmetic
     ! DLAS2S checks that all the entries of A are between -RMAX and
     ! RMAX. If not the conversion is aborted and a flag is raised.
     ! This is an auxiliary routine so there is no argument checking.

     subroutine stdlib_dlat2s(uplo, n, a, lda, sa, ldsa, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldsa, n
           ! .. array arguments ..
           real(sp) :: sa(ldsa, *)
           real(dp) :: a(lda, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(dp) :: rmax
           logical(lk) :: upper
     
           ! .. executable statements ..
           rmax = stdlib_slamch('o')
           upper = stdlib_lsame(uplo, 'u')
           if (upper) then
              do j = 1, n
                 do i = 1, j
                    if ((a(i, j) < -rmax) .or. (a(i, j) > rmax)) then
                       info = 1
                       go to 50
                    end if
                    sa(i, j) = a(i, j)
                 end do
              end do
           else
              do j = 1, n
                 do i = j, n
                    if ((a(i, j) < -rmax) .or. (a(i, j) > rmax)) then
                       info = 1
                       go to 50
                    end if
                    sa(i, j) = a(i, j)
                 end do
              end do
           end if
50      continue
           return
           ! end of stdlib_dlat2s
     end subroutine stdlib_dlat2s

     ! DLATBS solves one of the triangular systems
     ! A *x = s*b  or  A**T*x = s*b
     ! with scaling to prevent overflow, where A is an upper or lower
     ! triangular band matrix.  Here A**T denotes the transpose of A, x and b
     ! are n-element vectors, and s is a scaling factor, usually less than
     ! or equal to 1, chosen so that the components of x will be less than
     ! the overflow threshold.  If the unscaled problem will not cause
     ! overflow, the Level 2 BLAS routine DTBSV is called.  If the matrix A
     ! is singular (A(j,j) = 0 for some j), then s is set to 0 and a
     ! non-trivial solution to A*x = 0 is returned.

     subroutine stdlib_dlatbs(uplo, trans, diag, normin, n, kd, ab, ldab, x, scale, cnorm, info)
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, normin, trans, uplo
           integer(ilp) :: info, kd, ldab, n
           real(dp) :: scale
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), cnorm(*), x(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           integer(ilp) :: i, imax, j, jfirst, jinc, jlast, jlen, maind
           real(dp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, &
                     xmax
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           ! test the input parameters.
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (.not. stdlib_lsame(normin, 'y') .and. .not. stdlib_lsame(normin, 'n')) &
                     then
              info = -4
           else if (n < 0) then
              info = -5
           else if (kd < 0) then
              info = -6
           else if (ldab < kd + 1) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlatbs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine machine dependent parameters to control overflow.
           smlnum = stdlib_dlamch('safe minimum')/stdlib_dlamch('precision')
           bignum = one/smlnum
           scale = one
           if (stdlib_lsame(normin, 'n')) then
              ! compute the 1-norm of each column, not including the diagonal.
              if (upper) then
                 ! a is upper triangular.
                 do j = 1, n
                    jlen = min(kd, j - 1)
                    cnorm(j) = stdlib_dasum(jlen, ab(kd + 1 - jlen, j), 1)
                 end do
              else
                 ! a is lower triangular.
                 do j = 1, n
                    jlen = min(kd, n - j)
                    if (jlen > 0) then
                       cnorm(j) = stdlib_dasum(jlen, ab(2, j), 1)
                    else
                       cnorm(j) = zero
                    end if
                 end do
              end if
           end if
           ! scale the column norms by tscal if the maximum element in cnorm is
           ! greater than bignum.
           imax = stdlib_idamax(n, cnorm, 1)
           tmax = cnorm(imax)
           if (tmax <= bignum) then
              tscal = one
           else
              tscal = one/(smlnum*tmax)
              call stdlib_dscal(n, tscal, cnorm, 1)
           end if
           ! compute a bound on the computed solution vector to see if the
           ! level 2 blas routine stdlib_dtbsv can be used.
           j = stdlib_idamax(n, x, 1)
           xmax = abs(x(j))
           xbnd = xmax
           if (notran) then
              ! compute the growth in a * x = b.
              if (upper) then
                 jfirst = n
                 jlast = 1
                 jinc = -1
                 maind = kd + 1
              else
                 jfirst = 1
                 jlast = n
                 jinc = 1
                 maind = 1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 50
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, g(0) = max{x(i), i=1,...,n}.
                 grow = one/max(xbnd, smlnum)
                 xbnd = grow
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 50
                    ! m(j) = g(j-1) / abs(a(j,j))
                    tjj = abs(ab(maind, j))
                    xbnd = min(xbnd, min(one, tjj)*grow)
                    if (tjj + cnorm(j) >= smlnum) then
                       ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) )
                       grow = grow*(tjj/(tjj + cnorm(j)))
                    else
                       ! g(j) could overflow, set grow to 0.
                       grow = zero
                    end if
                 end do
                 grow = xbnd
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, one/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 50
                    ! g(j) = g(j-1)*( 1 + cnorm(j) )
                    grow = grow*(one/(one + cnorm(j)))
                 end do
              end if
50      continue
           else
              ! compute the growth in a**t * x = b.
              if (upper) then
                 jfirst = 1
                 jlast = n
                 jinc = 1
                 maind = kd + 1
              else
                 jfirst = n
                 jlast = 1
                 jinc = -1
                 maind = 1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 80
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, m(0) = max{x(i), i=1,...,n}.
                 grow = one/max(xbnd, smlnum)
                 xbnd = grow
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 80
                    ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) )
                    xj = one + cnorm(j)
                    grow = min(grow, xbnd/xj)
                    ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j))
                    tjj = abs(ab(maind, j))
                    if (xj > tjj) xbnd = xbnd*(tjj/xj)
                 end do
                 grow = min(grow, xbnd)
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, one/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 80
                    ! g(j) = ( 1 + cnorm(j) )*g(j-1)
                    xj = one + cnorm(j)
                    grow = grow/xj
                 end do
              end if
80      continue
           end if
           if ((grow*tscal) > smlnum) then
              ! use the level 2 blas solve if the reciprocal of the bound on
              ! elements of x is not too small.
              call stdlib_dtbsv(uplo, trans, diag, n, kd, ab, ldab, x, 1)
           else
              ! use a level 1 blas solve, scaling intermediate results.
              if (xmax > bignum) then
                 ! scale x so that its components are less than or equal to
                 ! bignum in absolute value.
                 scale = bignum/xmax
                 call stdlib_dscal(n, scale, x, 1)
                 xmax = bignum
              end if
              if (notran) then
                 ! solve a * x = b
                 loop_110: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) / a(j,j), scaling x if necessary.
                    xj = abs(x(j))
                    if (nounit) then
                       tjjs = ab(maind, j)*tscal
                    else
                       tjjs = tscal
                       if (tscal == one) go to 100
                    end if
                    tjj = abs(tjjs)
                    if (tjj > smlnum) then
                          ! abs(a(j,j)) > smlnum:
                       if (tjj < one) then
                          if (xj > tjj*bignum) then
                                ! scale x by 1/b(j).
                             rec = one/xj
                             call stdlib_dscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x(j) = x(j)/tjjs
                       xj = abs(x(j))
                    else if (tjj > zero) then
                          ! 0 < abs(a(j,j)) <= smlnum:
                       if (xj > tjj*bignum) then
                             ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum
                             ! to avoid overflow when dividing by a(j,j).
                          rec = (tjj*bignum)/xj
                          if (cnorm(j) > one) then
                                ! scale by 1/cnorm(j) to avoid overflow when
                                ! multiplying x(j) times column j.
                             rec = rec/cnorm(j)
                          end if
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                       x(j) = x(j)/tjjs
                       xj = abs(x(j))
                    else
                          ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                          ! scale = 0, and compute a solution to a*x = 0.
                       do i = 1, n
                          x(i) = zero
                       end do
                       x(j) = one
                       xj = one
                       scale = zero
                       xmax = zero
                    end if
100    continue
                    ! scale x if necessary to avoid overflow when adding a
                    ! multiple of column j of a.
                    if (xj > one) then
                       rec = one/xj
                       if (cnorm(j) > (bignum - xmax)*rec) then
                          ! scale x by 1/(2*abs(x(j))).
                          rec = rec*half
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                       end if
                    else if (xj*cnorm(j) > (bignum - xmax)) then
                       ! scale x by 1/2.
                       call stdlib_dscal(n, half, x, 1)
                       scale = scale*half
                    end if
                    if (upper) then
                       if (j > 1) then
                          ! compute the update
                             ! x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) -
                                                   ! x(j)* a(max(1,j-kd):j-1,j)
                          jlen = min(kd, j - 1)
                          call stdlib_daxpy(jlen, -x(j)*tscal, ab(kd + 1 - jlen, j), 1, x(j - jlen &
                                    ), 1)
                          i = stdlib_idamax(j - 1, x, 1)
                          xmax = abs(x(i))
                       end if
                    else if (j < n) then
                       ! compute the update
                          ! x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) -
                                                ! x(j) * a(j+1:min(j+kd,n),j)
                       jlen = min(kd, n - j)
                       if (jlen > 0) call stdlib_daxpy(jlen, -x(j)*tscal, ab(2, j), 1, x(j + 1), &
                                  1)
                       i = j + stdlib_idamax(n - j, x(j + 1), 1)
                       xmax = abs(x(i))
                    end if
                 end do loop_110
              else
                 ! solve a**t * x = b
                 loop_160: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = abs(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = ab(maind, j)*tscal
                       else
                          tjjs = tscal
                       end if
                       tjj = abs(tjjs)
                       if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                          rec = min(one, rec*tjj)
                          uscal = uscal/tjjs
                       end if
                       if (rec < one) then
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    sumj = zero
                    if (uscal == one) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_ddot to perform the dot product.
                       if (upper) then
                          jlen = min(kd, j - 1)
                          sumj = stdlib_ddot(jlen, ab(kd + 1 - jlen, j), 1, x(j - jlen), 1)
                       else
                          jlen = min(kd, n - j)
                          if (jlen > 0) sumj = stdlib_ddot(jlen, ab(2, j), 1, x(j + 1), 1)
                                    
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          jlen = min(kd, j - 1)
                          do i = 1, jlen
                             sumj = sumj + (ab(kd + i - jlen, j)*uscal)*x(j - jlen - 1 + i)
                          end do
                       else
                          jlen = min(kd, n - j)
                          do i = 1, jlen
                             sumj = sumj + (ab(i + 1, j)*uscal)*x(j + i)
                          end do
                       end if
                    end if
                    if (uscal == tscal) then
                       ! compute x(j) := ( x(j) - sumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - sumj
                       xj = abs(x(j))
                       if (nounit) then
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                          tjjs = ab(maind, j)*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 150
                       end if
                       tjj = abs(tjjs)
                       if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                          if (tjj < one) then
                             if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                rec = one/xj
                                call stdlib_dscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                          end if
                          x(j) = x(j)/tjjs
                       else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                          if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                             rec = (tjj*bignum)/xj
                             call stdlib_dscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                          x(j) = x(j)/tjjs
                       else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0, and compute a solution to a**t*x = 0.
                          do i = 1, n
                             x(i) = zero
                          end do
                          x(j) = one
                          scale = zero
                          xmax = zero
                       end if
150    continue
                    else
                       ! compute x(j) := x(j) / a(j,j) - sumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = x(j)/tjjs - sumj
                    end if
                    xmax = max(xmax, abs(x(j)))
                 end do loop_160
              end if
              scale = scale/tscal
           end if
           ! scale the column norms by 1/tscal for return.
           if (tscal /= one) then
              call stdlib_dscal(n, one/tscal, cnorm, 1)
           end if
           return
           ! end of stdlib_dlatbs
     end subroutine stdlib_dlatbs

     ! DLATPS solves one of the triangular systems
     ! A *x = s*b  or  A**T*x = s*b
     ! with scaling to prevent overflow, where A is an upper or lower
     ! triangular matrix stored in packed form.  Here A**T denotes the
     ! transpose of A, x and b are n-element vectors, and s is a scaling
     ! factor, usually less than or equal to 1, chosen so that the
     ! components of x will be less than the overflow threshold.  If the
     ! unscaled problem will not cause overflow, the Level 2 BLAS routine
     ! DTPSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
     ! then s is set to 0 and a non-trivial solution to A*x = 0 is returned.

     subroutine stdlib_dlatps(uplo, trans, diag, normin, n, ap, x, scale, cnorm, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, normin, trans, uplo
           integer(ilp) :: info, n
           real(dp) :: scale
           ! .. array arguments ..
           real(dp) :: ap(*), cnorm(*), x(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           integer(ilp) :: i, imax, ip, j, jfirst, jinc, jlast, jlen
           real(dp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, &
                     xmax
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           ! test the input parameters.
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (.not. stdlib_lsame(normin, 'y') .and. .not. stdlib_lsame(normin, 'n')) &
                     then
              info = -4
           else if (n < 0) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlatps', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine machine dependent parameters to control overflow.
           smlnum = stdlib_dlamch('safe minimum')/stdlib_dlamch('precision')
           bignum = one/smlnum
           scale = one
           if (stdlib_lsame(normin, 'n')) then
              ! compute the 1-norm of each column, not including the diagonal.
              if (upper) then
                 ! a is upper triangular.
                 ip = 1
                 do j = 1, n
                    cnorm(j) = stdlib_dasum(j - 1, ap(ip), 1)
                    ip = ip + j
                 end do
              else
                 ! a is lower triangular.
                 ip = 1
                 do j = 1, n - 1
                    cnorm(j) = stdlib_dasum(n - j, ap(ip + 1), 1)
                    ip = ip + n - j + 1
                 end do
                 cnorm(n) = zero
              end if
           end if
           ! scale the column norms by tscal if the maximum element in cnorm is
           ! greater than bignum.
           imax = stdlib_idamax(n, cnorm, 1)
           tmax = cnorm(imax)
           if (tmax <= bignum) then
              tscal = one
           else
              tscal = one/(smlnum*tmax)
              call stdlib_dscal(n, tscal, cnorm, 1)
           end if
           ! compute a bound on the computed solution vector to see if the
           ! level 2 blas routine stdlib_dtpsv can be used.
           j = stdlib_idamax(n, x, 1)
           xmax = abs(x(j))
           xbnd = xmax
           if (notran) then
              ! compute the growth in a * x = b.
              if (upper) then
                 jfirst = n
                 jlast = 1
                 jinc = -1
              else
                 jfirst = 1
                 jlast = n
                 jinc = 1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 50
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, g(0) = max{x(i), i=1,...,n}.
                 grow = one/max(xbnd, smlnum)
                 xbnd = grow
                 ip = jfirst*(jfirst + 1)/2
                 jlen = n
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 50
                    ! m(j) = g(j-1) / abs(a(j,j))
                    tjj = abs(ap(ip))
                    xbnd = min(xbnd, min(one, tjj)*grow)
                    if (tjj + cnorm(j) >= smlnum) then
                       ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) )
                       grow = grow*(tjj/(tjj + cnorm(j)))
                    else
                       ! g(j) could overflow, set grow to 0.
                       grow = zero
                    end if
                    ip = ip + jinc*jlen
                    jlen = jlen - 1
                 end do
                 grow = xbnd
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, one/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 50
                    ! g(j) = g(j-1)*( 1 + cnorm(j) )
                    grow = grow*(one/(one + cnorm(j)))
                 end do
              end if
50      continue
           else
              ! compute the growth in a**t * x = b.
              if (upper) then
                 jfirst = 1
                 jlast = n
                 jinc = 1
              else
                 jfirst = n
                 jlast = 1
                 jinc = -1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 80
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, m(0) = max{x(i), i=1,...,n}.
                 grow = one/max(xbnd, smlnum)
                 xbnd = grow
                 ip = jfirst*(jfirst + 1)/2
                 jlen = 1
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 80
                    ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) )
                    xj = one + cnorm(j)
                    grow = min(grow, xbnd/xj)
                    ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j))
                    tjj = abs(ap(ip))
                    if (xj > tjj) xbnd = xbnd*(tjj/xj)
                    jlen = jlen + 1
                    ip = ip + jinc*jlen
                 end do
                 grow = min(grow, xbnd)
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, one/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 80
                    ! g(j) = ( 1 + cnorm(j) )*g(j-1)
                    xj = one + cnorm(j)
                    grow = grow/xj
                 end do
              end if
80      continue
           end if
           if ((grow*tscal) > smlnum) then
              ! use the level 2 blas solve if the reciprocal of the bound on
              ! elements of x is not too small.
              call stdlib_dtpsv(uplo, trans, diag, n, ap, x, 1)
           else
              ! use a level 1 blas solve, scaling intermediate results.
              if (xmax > bignum) then
                 ! scale x so that its components are less than or equal to
                 ! bignum in absolute value.
                 scale = bignum/xmax
                 call stdlib_dscal(n, scale, x, 1)
                 xmax = bignum
              end if
              if (notran) then
                 ! solve a * x = b
                 ip = jfirst*(jfirst + 1)/2
                 loop_110: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) / a(j,j), scaling x if necessary.
                    xj = abs(x(j))
                    if (nounit) then
                       tjjs = ap(ip)*tscal
                    else
                       tjjs = tscal
                       if (tscal == one) go to 100
                    end if
                    tjj = abs(tjjs)
                    if (tjj > smlnum) then
                          ! abs(a(j,j)) > smlnum:
                       if (tjj < one) then
                          if (xj > tjj*bignum) then
                                ! scale x by 1/b(j).
                             rec = one/xj
                             call stdlib_dscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x(j) = x(j)/tjjs
                       xj = abs(x(j))
                    else if (tjj > zero) then
                          ! 0 < abs(a(j,j)) <= smlnum:
                       if (xj > tjj*bignum) then
                             ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum
                             ! to avoid overflow when dividing by a(j,j).
                          rec = (tjj*bignum)/xj
                          if (cnorm(j) > one) then
                                ! scale by 1/cnorm(j) to avoid overflow when
                                ! multiplying x(j) times column j.
                             rec = rec/cnorm(j)
                          end if
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                       x(j) = x(j)/tjjs
                       xj = abs(x(j))
                    else
                          ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                          ! scale = 0, and compute a solution to a*x = 0.
                       do i = 1, n
                          x(i) = zero
                       end do
                       x(j) = one
                       xj = one
                       scale = zero
                       xmax = zero
                    end if
100    continue
                    ! scale x if necessary to avoid overflow when adding a
                    ! multiple of column j of a.
                    if (xj > one) then
                       rec = one/xj
                       if (cnorm(j) > (bignum - xmax)*rec) then
                          ! scale x by 1/(2*abs(x(j))).
                          rec = rec*half
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                       end if
                    else if (xj*cnorm(j) > (bignum - xmax)) then
                       ! scale x by 1/2.
                       call stdlib_dscal(n, half, x, 1)
                       scale = scale*half
                    end if
                    if (upper) then
                       if (j > 1) then
                          ! compute the update
                             ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j)
                          call stdlib_daxpy(j - 1, -x(j)*tscal, ap(ip - j + 1), 1, x, 1)
                          i = stdlib_idamax(j - 1, x, 1)
                          xmax = abs(x(i))
                       end if
                       ip = ip - j
                    else
                       if (j < n) then
                          ! compute the update
                             ! x(j+1:n) := x(j+1:n) - x(j) * a(j+1:n,j)
                          call stdlib_daxpy(n - j, -x(j)*tscal, ap(ip + 1), 1, x(j + 1), 1)
                                    
                          i = j + stdlib_idamax(n - j, x(j + 1), 1)
                          xmax = abs(x(i))
                       end if
                       ip = ip + n - j + 1
                    end if
                 end do loop_110
              else
                 ! solve a**t * x = b
                 ip = jfirst*(jfirst + 1)/2
                 jlen = 1
                 loop_160: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = abs(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = ap(ip)*tscal
                       else
                          tjjs = tscal
                       end if
                       tjj = abs(tjjs)
                       if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                          rec = min(one, rec*tjj)
                          uscal = uscal/tjjs
                       end if
                       if (rec < one) then
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    sumj = zero
                    if (uscal == one) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_ddot to perform the dot product.
                       if (upper) then
                          sumj = stdlib_ddot(j - 1, ap(ip - j + 1), 1, x, 1)
                       else if (j < n) then
                          sumj = stdlib_ddot(n - j, ap(ip + 1), 1, x(j + 1), 1)
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          do i = 1, j - 1
                             sumj = sumj + (ap(ip - j + i)*uscal)*x(i)
                          end do
                       else if (j < n) then
                          do i = 1, n - j
                             sumj = sumj + (ap(ip + i)*uscal)*x(j + i)
                          end do
                       end if
                    end if
                    if (uscal == tscal) then
                       ! compute x(j) := ( x(j) - sumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - sumj
                       xj = abs(x(j))
                       if (nounit) then
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                          tjjs = ap(ip)*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 150
                       end if
                       tjj = abs(tjjs)
                       if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                          if (tjj < one) then
                             if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                rec = one/xj
                                call stdlib_dscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                          end if
                          x(j) = x(j)/tjjs
                       else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                          if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                             rec = (tjj*bignum)/xj
                             call stdlib_dscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                          x(j) = x(j)/tjjs
                       else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0, and compute a solution to a**t*x = 0.
                          do i = 1, n
                             x(i) = zero
                          end do
                          x(j) = one
                          scale = zero
                          xmax = zero
                       end if
150    continue
                    else
                       ! compute x(j) := x(j) / a(j,j)  - sumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = x(j)/tjjs - sumj
                    end if
                    xmax = max(xmax, abs(x(j)))
                    jlen = jlen + 1
                    ip = ip + jinc*jlen
                 end do loop_160
              end if
              scale = scale/tscal
           end if
           ! scale the column norms by 1/tscal for return.
           if (tscal /= one) then
              call stdlib_dscal(n, one/tscal, cnorm, 1)
           end if
           return
           ! end of stdlib_dlatps
     end subroutine stdlib_dlatps

     ! DLATRS solves one of the triangular systems
     ! A *x = s*b  or  A**T *x = s*b
     ! with scaling to prevent overflow.  Here A is an upper or lower
     ! triangular matrix, A**T denotes the transpose of A, x and b are
     ! n-element vectors, and s is a scaling factor, usually less than
     ! or equal to 1, chosen so that the components of x will be less than
     ! the overflow threshold.  If the unscaled problem will not cause
     ! overflow, the Level 2 BLAS routine DTRSV is called.  If the matrix A
     ! is singular (A(j,j) = 0 for some j), then s is set to 0 and a
     ! non-trivial solution to A*x = 0 is returned.

     subroutine stdlib_dlatrs(uplo, trans, diag, normin, n, a, lda, x, scale, cnorm, info)
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, normin, trans, uplo
           integer(ilp) :: info, lda, n
           real(dp) :: scale
           ! .. array arguments ..
           real(dp) :: a(lda, *), cnorm(*), x(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           integer(ilp) :: i, imax, j, jfirst, jinc, jlast
           real(dp) :: bignum, grow, rec, smlnum, sumj, tjj, tjjs, tmax, tscal, uscal, xbnd, xj, &
                     xmax
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           ! test the input parameters.
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (.not. stdlib_lsame(normin, 'y') .and. .not. stdlib_lsame(normin, 'n')) &
                     then
              info = -4
           else if (n < 0) then
              info = -5
           else if (lda < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlatrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine machine dependent parameters to control overflow.
           smlnum = stdlib_dlamch('safe minimum')/stdlib_dlamch('precision')
           bignum = one/smlnum
           scale = one
           if (stdlib_lsame(normin, 'n')) then
              ! compute the 1-norm of each column, not including the diagonal.
              if (upper) then
                 ! a is upper triangular.
                 do j = 1, n
                    cnorm(j) = stdlib_dasum(j - 1, a(1, j), 1)
                 end do
              else
                 ! a is lower triangular.
                 do j = 1, n - 1
                    cnorm(j) = stdlib_dasum(n - j, a(j + 1, j), 1)
                 end do
                 cnorm(n) = zero
              end if
           end if
           ! scale the column norms by tscal if the maximum element in cnorm is
           ! greater than bignum.
           imax = stdlib_idamax(n, cnorm, 1)
           tmax = cnorm(imax)
           if (tmax <= bignum) then
              tscal = one
           else
              tscal = one/(smlnum*tmax)
              call stdlib_dscal(n, tscal, cnorm, 1)
           end if
           ! compute a bound on the computed solution vector to see if the
           ! level 2 blas routine stdlib_dtrsv can be used.
           j = stdlib_idamax(n, x, 1)
           xmax = abs(x(j))
           xbnd = xmax
           if (notran) then
              ! compute the growth in a * x = b.
              if (upper) then
                 jfirst = n
                 jlast = 1
                 jinc = -1
              else
                 jfirst = 1
                 jlast = n
                 jinc = 1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 50
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, g(0) = max{x(i), i=1,...,n}.
                 grow = one/max(xbnd, smlnum)
                 xbnd = grow
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 50
                    ! m(j) = g(j-1) / abs(a(j,j))
                    tjj = abs(a(j, j))
                    xbnd = min(xbnd, min(one, tjj)*grow)
                    if (tjj + cnorm(j) >= smlnum) then
                       ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) )
                       grow = grow*(tjj/(tjj + cnorm(j)))
                    else
                       ! g(j) could overflow, set grow to 0.
                       grow = zero
                    end if
                 end do
                 grow = xbnd
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, one/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 50
                    ! g(j) = g(j-1)*( 1 + cnorm(j) )
                    grow = grow*(one/(one + cnorm(j)))
                 end do
              end if
50      continue
           else
              ! compute the growth in a**t * x = b.
              if (upper) then
                 jfirst = 1
                 jlast = n
                 jinc = 1
              else
                 jfirst = n
                 jlast = 1
                 jinc = -1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 80
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, m(0) = max{x(i), i=1,...,n}.
                 grow = one/max(xbnd, smlnum)
                 xbnd = grow
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 80
                    ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) )
                    xj = one + cnorm(j)
                    grow = min(grow, xbnd/xj)
                    ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j))
                    tjj = abs(a(j, j))
                    if (xj > tjj) xbnd = xbnd*(tjj/xj)
                 end do
                 grow = min(grow, xbnd)
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, one/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 80
                    ! g(j) = ( 1 + cnorm(j) )*g(j-1)
                    xj = one + cnorm(j)
                    grow = grow/xj
                 end do
              end if
80      continue
           end if
           if ((grow*tscal) > smlnum) then
              ! use the level 2 blas solve if the reciprocal of the bound on
              ! elements of x is not too small.
              call stdlib_dtrsv(uplo, trans, diag, n, a, lda, x, 1)
           else
              ! use a level 1 blas solve, scaling intermediate results.
              if (xmax > bignum) then
                 ! scale x so that its components are less than or equal to
                 ! bignum in absolute value.
                 scale = bignum/xmax
                 call stdlib_dscal(n, scale, x, 1)
                 xmax = bignum
              end if
              if (notran) then
                 ! solve a * x = b
                 loop_110: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) / a(j,j), scaling x if necessary.
                    xj = abs(x(j))
                    if (nounit) then
                       tjjs = a(j, j)*tscal
                    else
                       tjjs = tscal
                       if (tscal == one) go to 100
                    end if
                    tjj = abs(tjjs)
                    if (tjj > smlnum) then
                          ! abs(a(j,j)) > smlnum:
                       if (tjj < one) then
                          if (xj > tjj*bignum) then
                                ! scale x by 1/b(j).
                             rec = one/xj
                             call stdlib_dscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x(j) = x(j)/tjjs
                       xj = abs(x(j))
                    else if (tjj > zero) then
                          ! 0 < abs(a(j,j)) <= smlnum:
                       if (xj > tjj*bignum) then
                             ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum
                             ! to avoid overflow when dividing by a(j,j).
                          rec = (tjj*bignum)/xj
                          if (cnorm(j) > one) then
                                ! scale by 1/cnorm(j) to avoid overflow when
                                ! multiplying x(j) times column j.
                             rec = rec/cnorm(j)
                          end if
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                       x(j) = x(j)/tjjs
                       xj = abs(x(j))
                    else
                          ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                          ! scale = 0, and compute a solution to a*x = 0.
                       do i = 1, n
                          x(i) = zero
                       end do
                       x(j) = one
                       xj = one
                       scale = zero
                       xmax = zero
                    end if
100    continue
                    ! scale x if necessary to avoid overflow when adding a
                    ! multiple of column j of a.
                    if (xj > one) then
                       rec = one/xj
                       if (cnorm(j) > (bignum - xmax)*rec) then
                          ! scale x by 1/(2*abs(x(j))).
                          rec = rec*half
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                       end if
                    else if (xj*cnorm(j) > (bignum - xmax)) then
                       ! scale x by 1/2.
                       call stdlib_dscal(n, half, x, 1)
                       scale = scale*half
                    end if
                    if (upper) then
                       if (j > 1) then
                          ! compute the update
                             ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j)
                          call stdlib_daxpy(j - 1, -x(j)*tscal, a(1, j), 1, x, 1)
                          i = stdlib_idamax(j - 1, x, 1)
                          xmax = abs(x(i))
                       end if
                    else
                       if (j < n) then
                          ! compute the update
                             ! x(j+1:n) := x(j+1:n) - x(j) * a(j+1:n,j)
                          call stdlib_daxpy(n - j, -x(j)*tscal, a(j + 1, j), 1, x(j + 1), 1)
                                    
                          i = j + stdlib_idamax(n - j, x(j + 1), 1)
                          xmax = abs(x(i))
                       end if
                    end if
                 end do loop_110
              else
                 ! solve a**t * x = b
                 loop_160: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = abs(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = a(j, j)*tscal
                       else
                          tjjs = tscal
                       end if
                       tjj = abs(tjjs)
                       if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                          rec = min(one, rec*tjj)
                          uscal = uscal/tjjs
                       end if
                       if (rec < one) then
                          call stdlib_dscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    sumj = zero
                    if (uscal == one) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_ddot to perform the dot product.
                       if (upper) then
                          sumj = stdlib_ddot(j - 1, a(1, j), 1, x, 1)
                       else if (j < n) then
                          sumj = stdlib_ddot(n - j, a(j + 1, j), 1, x(j + 1), 1)
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          do i = 1, j - 1
                             sumj = sumj + (a(i, j)*uscal)*x(i)
                          end do
                       else if (j < n) then
                          do i = j + 1, n
                             sumj = sumj + (a(i, j)*uscal)*x(i)
                          end do
                       end if
                    end if
                    if (uscal == tscal) then
                       ! compute x(j) := ( x(j) - sumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - sumj
                       xj = abs(x(j))
                       if (nounit) then
                          tjjs = a(j, j)*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 150
                       end if
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                       tjj = abs(tjjs)
                       if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                          if (tjj < one) then
                             if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                rec = one/xj
                                call stdlib_dscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                          end if
                          x(j) = x(j)/tjjs
                       else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                          if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                             rec = (tjj*bignum)/xj
                             call stdlib_dscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                          x(j) = x(j)/tjjs
                       else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0, and compute a solution to a**t*x = 0.
                          do i = 1, n
                             x(i) = zero
                          end do
                          x(j) = one
                          scale = zero
                          xmax = zero
                       end if
150    continue
                    else
                       ! compute x(j) := x(j) / a(j,j)  - sumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = x(j)/tjjs - sumj
                    end if
                    xmax = max(xmax, abs(x(j)))
                 end do loop_160
              end if
              scale = scale/tscal
           end if
           ! scale the column norms by 1/tscal for return.
           if (tscal /= one) then
              call stdlib_dscal(n, one/tscal, cnorm, 1)
           end if
           return
           ! end of stdlib_dlatrs
     end subroutine stdlib_dlatrs

     ! DLAUU2 computes the product U * U**T or L**T * L, where the triangular
     ! factor U or L is stored in the upper or lower triangular part of
     ! the array A.
     ! If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
     ! overwriting the factor U in A.
     ! If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
     ! overwriting the factor L in A.
     ! This is the unblocked form of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_dlauu2(uplo, n, a, lda, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           real(dp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i
           real(dp) :: aii
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlauu2', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! compute the product u * u**t.
              do i = 1, n
                 aii = a(i, i)
                 if (i < n) then
                    a(i, i) = stdlib_ddot(n - i + 1, a(i, i), lda, a(i, i), lda)
                    call stdlib_dgemv('no transpose', i - 1, n - i, one, a(1, i + 1), lda, a(i, i + 1) &
                              , lda, aii, a(1, i), 1)
                 else
                    call stdlib_dscal(i, aii, a(1, i), 1)
                 end if
              end do
           else
              ! compute the product l**t * l.
              do i = 1, n
                 aii = a(i, i)
                 if (i < n) then
                    a(i, i) = stdlib_ddot(n - i + 1, a(i, i), 1, a(i, i), 1)
                    call stdlib_dgemv('transpose', n - i, i - 1, one, a(i + 1, 1), lda, a(i + 1, i), &
                              1, aii, a(i, 1), lda)
                 else
                    call stdlib_dscal(i, aii, a(i, 1), lda)
                 end if
              end do
           end if
           return
           ! end of stdlib_dlauu2
     end subroutine stdlib_dlauu2

     ! DLAUUM computes the product U * U**T or L**T * L, where the triangular
     ! factor U or L is stored in the upper or lower triangular part of
     ! the array A.
     ! If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
     ! overwriting the factor U in A.
     ! If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
     ! overwriting the factor L in A.
     ! This is the blocked form of the algorithm, calling Level 3 BLAS.

     subroutine stdlib_dlauum(uplo, n, a, lda, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           real(dp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, ib, nb
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dlauum', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine the block size for this environment.
           nb = stdlib_ilaenv(1, 'stdlib_dlauum', uplo, n, -1, -1, -1)
           if (nb <= 1 .or. nb >= n) then
              ! use unblocked code
              call stdlib_dlauu2(uplo, n, a, lda, info)
           else
              ! use blocked code
              if (upper) then
                 ! compute the product u * u**t.
                 do i = 1, n, nb
                    ib = min(nb, n - i + 1)
                    call stdlib_dtrmm('right', 'upper', 'transpose', 'non-unit', i - 1, ib, one, a( &
                              i, i), lda, a(1, i), lda)
                    call stdlib_dlauu2('upper', ib, a(i, i), lda, info)
                    if (i + ib <= n) then
                       call stdlib_dgemm('no transpose', 'transpose', i - 1, ib, n - i - ib + 1, one, a( &
                                 1, i + ib), lda, a(i, i + ib), lda, one, a(1, i), lda)
                       call stdlib_dsyrk('upper', 'no transpose', ib, n - i - ib + 1, one, a(i, i + ib), &
                                  lda, one, a(i, i), lda)
                    end if
                 end do
              else
                 ! compute the product l**t * l.
                 do i = 1, n, nb
                    ib = min(nb, n - i + 1)
                    call stdlib_dtrmm('left', 'lower', 'transpose', 'non-unit', ib, i - 1, one, a( &
                              i, i), lda, a(i, 1), lda)
                    call stdlib_dlauu2('lower', ib, a(i, i), lda, info)
                    if (i + ib <= n) then
                       call stdlib_dgemm('transpose', 'no transpose', ib, i - 1, n - i - ib + 1, one, a( &
                                 i + ib, i), lda, a(i + ib, 1), lda, one, a(i, 1), lda)
                       call stdlib_dsyrk('lower', 'transpose', ib, n - i - ib + 1, one, a(i + ib, i), &
                                 lda, one, a(i, i), lda)
                    end if
                 end do
              end if
           end if
           return
           ! end of stdlib_dlauum
     end subroutine stdlib_dlauum

     ! DORBDB6 orthogonalizes the column vector
     ! X = [ X1 ]
     ! [ X2 ]
     ! with respect to the columns of
     ! Q = [ Q1 ] .
     ! [ Q2 ]
     ! The columns of Q must be orthonormal.
     ! If the projection is zero according to Kahan's "twice is enough"
     ! criterion, then the zero vector is returned.

     subroutine stdlib_dorbdb6(m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2, ldq2, work, lwork, &
               info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx1, incx2, info, ldq1, ldq2, lwork, m1, m2, n
           ! .. array arguments ..
           real(dp) :: q1(ldq1, *), q2(ldq2, *), work(*), x1(*), x2(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: alphasq = 0.01d0
           real(dp), parameter :: realone = 1.0d0
           real(dp), parameter :: realzero = 0.0d0
           real(dp), parameter :: negone = -1.0d0
           
           ! .. local scalars ..
           integer(ilp) :: i
           real(dp) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2
     
           ! .. intrinsic function ..
           intrinsic :: max
           ! .. executable statements ..
           ! test input arguments
           info = 0
           if (m1 < 0) then
              info = -1
           else if (m2 < 0) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (incx1 < 1) then
              info = -5
           else if (incx2 < 1) then
              info = -7
           else if (ldq1 < max(1, m1)) then
              info = -9
           else if (ldq2 < max(1, m2)) then
              info = -11
           else if (lwork < n) then
              info = -13
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorbdb6', -info)
              return
           end if
           ! first, project x onto the orthogonal complement of q's column
           ! space
           scl1 = realzero
           ssq1 = realone
           call stdlib_dlassq(m1, x1, incx1, scl1, ssq1)
           scl2 = realzero
           ssq2 = realone
           call stdlib_dlassq(m2, x2, incx2, scl2, ssq2)
           normsq1 = scl1**2*ssq1 + scl2**2*ssq2
           if (m1 == 0) then
              do i = 1, n
                 work(i) = zero
              end do
           else
              call stdlib_dgemv('c', m1, n, one, q1, ldq1, x1, incx1, zero, work, 1)
           end if
           call stdlib_dgemv('c', m2, n, one, q2, ldq2, x2, incx2, one, work, 1)
           call stdlib_dgemv('n', m1, n, negone, q1, ldq1, work, 1, one, x1, incx1)
           call stdlib_dgemv('n', m2, n, negone, q2, ldq2, work, 1, one, x2, incx2)
           scl1 = realzero
           ssq1 = realone
           call stdlib_dlassq(m1, x1, incx1, scl1, ssq1)
           scl2 = realzero
           ssq2 = realone
           call stdlib_dlassq(m2, x2, incx2, scl2, ssq2)
           normsq2 = scl1**2*ssq1 + scl2**2*ssq2
           ! if projection is sufficiently large in norm, then stop.
           ! if projection is zero, then stop.
           ! otherwise, project again.
           if (normsq2 >= alphasq*normsq1) then
              return
           end if
           if (normsq2 == zero) then
              return
           end if
           normsq1 = normsq2
           do i = 1, n
              work(i) = zero
           end do
           if (m1 == 0) then
              do i = 1, n
                 work(i) = zero
              end do
           else
              call stdlib_dgemv('c', m1, n, one, q1, ldq1, x1, incx1, zero, work, 1)
           end if
           call stdlib_dgemv('c', m2, n, one, q2, ldq2, x2, incx2, one, work, 1)
           call stdlib_dgemv('n', m1, n, negone, q1, ldq1, work, 1, one, x1, incx1)
           call stdlib_dgemv('n', m2, n, negone, q2, ldq2, work, 1, one, x2, incx2)
           scl1 = realzero
           ssq1 = realone
           call stdlib_dlassq(m1, x1, incx1, scl1, ssq1)
           scl2 = realzero
           ssq2 = realone
           call stdlib_dlassq(m1, x1, incx1, scl1, ssq1)
           normsq2 = scl1**2*ssq1 + scl2**2*ssq2
           ! if second projection is sufficiently large in norm, then do
           ! nothing more. alternatively, if it shrunk significantly, then
           ! truncate it to zero.
           if (normsq2 < alphasq*normsq1) then
              do i = 1, m1
                 x1(i) = zero
              end do
              do i = 1, m2
                 x2(i) = zero
              end do
           end if
           return
           ! end of stdlib_dorbdb6
     end subroutine stdlib_dorbdb6

     ! DORG2L generates an m by n real matrix Q with orthonormal columns,
     ! which is defined as the last n columns of a product of k elementary
     ! reflectors of order m
     ! Q  =  H(k) . . . H(2) H(1)
     ! as returned by DGEQLF.

     subroutine stdlib_dorg2l(m, n, k, a, lda, tau, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, k, lda, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, ii, j, l
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0 .or. n > m) then
              info = -2
           else if (k < 0 .or. k > n) then
              info = -3
           else if (lda < max(1, m)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorg2l', -info)
              return
           end if
           ! quick return if possible
           if (n <= 0) return
           ! initialise columns 1:n-k to columns of the unit matrix
           do j = 1, n - k
              do l = 1, m
                 a(l, j) = zero
              end do
              a(m - n + j, j) = one
           end do
           do i = 1, k
              ii = n - k + i
              ! apply h(i) to a(1:m-k+i,1:n-k+i) from the left
              a(m - n + ii, ii) = one
              call stdlib_dlarf('left', m - n + ii, ii - 1, a(1, ii), 1, tau(i), a, lda, work)
                        
              call stdlib_dscal(m - n + ii - 1, -tau(i), a(1, ii), 1)
              a(m - n + ii, ii) = one - tau(i)
              ! set a(m-k+i+1:m,n-k+i) to zero
              do l = m - n + ii + 1, m
                 a(l, ii) = zero
              end do
           end do
           return
           ! end of stdlib_dorg2l
     end subroutine stdlib_dorg2l

     ! DORG2R generates an m by n real matrix Q with orthonormal columns,
     ! which is defined as the first n columns of a product of k elementary
     ! reflectors of order m
     ! Q  =  H(1) H(2) . . . H(k)
     ! as returned by DGEQRF.

     subroutine stdlib_dorg2r(m, n, k, a, lda, tau, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, k, lda, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, l
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0 .or. n > m) then
              info = -2
           else if (k < 0 .or. k > n) then
              info = -3
           else if (lda < max(1, m)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorg2r', -info)
              return
           end if
           ! quick return if possible
           if (n <= 0) return
           ! initialise columns k+1:n to columns of the unit matrix
           do j = k + 1, n
              do l = 1, m
                 a(l, j) = zero
              end do
              a(j, j) = one
           end do
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the left
              if (i < n) then
                 a(i, i) = one
                 call stdlib_dlarf('left', m - i + 1, n - i, a(i, i), 1, tau(i), a(i, i + 1), lda, &
                           work)
              end if
              if (i < m) call stdlib_dscal(m - i, -tau(i), a(i + 1, i), 1)
              a(i, i) = one - tau(i)
              ! set a(1:i-1,i) to zero
              do l = 1, i - 1
                 a(l, i) = zero
              end do
           end do
           return
           ! end of stdlib_dorg2r
     end subroutine stdlib_dorg2r

     ! DORGL2 generates an m by n real matrix Q with orthonormal rows,
     ! which is defined as the first m rows of a product of k elementary
     ! reflectors of order n
     ! Q  =  H(k) . . . H(2) H(1)
     ! as returned by DGELQF.

     subroutine stdlib_dorgl2(m, n, k, a, lda, tau, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, k, lda, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, l
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           if (m < 0) then
              info = -1
           else if (n < m) then
              info = -2
           else if (k < 0 .or. k > m) then
              info = -3
           else if (lda < max(1, m)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorgl2', -info)
              return
           end if
           ! quick return if possible
           if (m <= 0) return
           if (k < m) then
              ! initialise rows k+1:m to rows of the unit matrix
              do j = 1, n
                 do l = k + 1, m
                    a(l, j) = zero
                 end do
                 if (j > k .and. j <= m) a(j, j) = one
              end do
           end if
           do i = k, 1, -1
              ! apply h(i) to a(i:m,i:n) from the right
              if (i < n) then
                 if (i < m) then
                    a(i, i) = one
                    call stdlib_dlarf('right', m - i, n - i + 1, a(i, i), lda, tau(i), a(i + 1, i), &
                              lda, work)
                 end if
                 call stdlib_dscal(n - i, -tau(i), a(i, i + 1), lda)
              end if
              a(i, i) = one - tau(i)
              ! set a(i,1:i-1) to zero
              do l = 1, i - 1
                 a(i, l) = zero
              end do
           end do
           return
           ! end of stdlib_dorgl2
     end subroutine stdlib_dorgl2

     ! DORGLQ generates an M-by-N real matrix Q with orthonormal rows,
     ! which is defined as the first M rows of a product of K elementary
     ! reflectors of order N
     ! Q  =  H(k) . . . H(2) H(1)
     ! as returned by DGELQF.

     subroutine stdlib_dorglq(m, n, k, a, lda, tau, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, k, lda, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lquery
           integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
     
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           nb = stdlib_ilaenv(1, 'stdlib_dorglq', ' ', m, n, k, -1)
           lwkopt = max(1, m)*nb
           work(1) = lwkopt
           lquery = (lwork == -1)
           if (m < 0) then
              info = -1
           else if (n < m) then
              info = -2
           else if (k < 0 .or. k > m) then
              info = -3
           else if (lda < max(1, m)) then
              info = -5
           else if (lwork < max(1, m) .and. .not. lquery) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorglq', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (m <= 0) then
              work(1) = 1
              return
           end if
           nbmin = 2
           nx = 0
           iws = m
           if (nb > 1 .and. nb < k) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max(0, stdlib_ilaenv(3, 'stdlib_dorglq', ' ', m, n, k, -1))
              if (nx < k) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if (lwork < iws) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork/ldwork
                    nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dorglq', ' ', m, n, k, -1))
                              
                 end if
              end if
           end if
           if (nb >= nbmin .and. nb < k .and. nx < k) then
              ! use blocked code after the last block.
              ! the first kk rows are handled by the block method.
              ki = ((k - nx - 1)/nb)*nb
              kk = min(k, ki + nb)
              ! set a(kk+1:m,1:kk) to zero.
              do j = 1, kk
                 do i = kk + 1, m
                    a(i, j) = zero
                 end do
              end do
           else
              kk = 0
           end if
           ! use unblocked code for the last or only block.
           if (kk < m) call stdlib_dorgl2(m - kk, n - kk, k - kk, a(kk + 1, kk + 1), lda, tau(kk + 1), work, &
                      iinfo)
           if (kk > 0) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min(nb, k - i + 1)
                 if (i + ib <= m) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib_dlarft('forward', 'rowwise', n - i + 1, ib, a(i, i), lda, tau(i), &
                              work, ldwork)
                    ! apply h**t to a(i+ib:m,i:n) from the right
                    call stdlib_dlarfb('right', 'transpose', 'forward', 'rowwise', m - i - ib + 1, n - i + &
                    1, ib, a(i, i), lda, work, ldwork, a(i + ib, i), lda, work(ib + 1), ldwork)
                              
                 end if
                 ! apply h**t to columns i:n of current block
                 call stdlib_dorgl2(ib, n - i + 1, ib, a(i, i), lda, tau(i), work, iinfo)
                 ! set columns 1:i-1 of current block to zero
                 do j = 1, i - 1
                    do l = i, i + ib - 1
                       a(l, j) = zero
                    end do
                 end do
              end do
           end if
           work(1) = iws
           return
           ! end of stdlib_dorglq
     end subroutine stdlib_dorglq

     ! DORGQL generates an M-by-N real matrix Q with orthonormal columns,
     ! which is defined as the last N columns of a product of K elementary
     ! reflectors of order M
     ! Q  =  H(k) . . . H(2) H(1)
     ! as returned by DGEQLF.

     subroutine stdlib_dorgql(m, n, k, a, lda, tau, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, k, lda, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lquery
           integer(ilp) :: i, ib, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
     
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           lquery = (lwork == -1)
           if (m < 0) then
              info = -1
           else if (n < 0 .or. n > m) then
              info = -2
           else if (k < 0 .or. k > n) then
              info = -3
           else if (lda < max(1, m)) then
              info = -5
           end if
           if (info == 0) then
              if (n == 0) then
                 lwkopt = 1
              else
                 nb = stdlib_ilaenv(1, 'stdlib_dorgql', ' ', m, n, k, -1)
                 lwkopt = n*nb
              end if
              work(1) = lwkopt
              if (lwork < max(1, n) .and. .not. lquery) then
                 info = -8
              end if
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorgql', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (n <= 0) then
              return
           end if
           nbmin = 2
           nx = 0
           iws = n
           if (nb > 1 .and. nb < k) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max(0, stdlib_ilaenv(3, 'stdlib_dorgql', ' ', m, n, k, -1))
              if (nx < k) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if (lwork < iws) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork/ldwork
                    nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dorgql', ' ', m, n, k, -1))
                              
                 end if
              end if
           end if
           if (nb >= nbmin .and. nb < k .and. nx < k) then
              ! use blocked code after the first block.
              ! the last kk columns are handled by the block method.
              kk = min(k, ((k - nx + nb - 1)/nb)*nb)
              ! set a(m-kk+1:m,1:n-kk) to zero.
              do j = 1, n - kk
                 do i = m - kk + 1, m
                    a(i, j) = zero
                 end do
              end do
           else
              kk = 0
           end if
           ! use unblocked code for the first or only block.
           call stdlib_dorg2l(m - kk, n - kk, k - kk, a, lda, tau, work, iinfo)
           if (kk > 0) then
              ! use blocked code
              do i = k - kk + 1, k, nb
                 ib = min(nb, k - i + 1)
                 if (n - k + i > 1) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib_dlarft('backward', 'columnwise', m - k + i + ib - 1, ib, a(1, n - k + i), &
                              lda, tau(i), work, ldwork)
                    ! apply h to a(1:m-k+i+ib-1,1:n-k+i-1) from the left
                    call stdlib_dlarfb('left', 'no transpose', 'backward', 'columnwise', m - k + i + ib - &
                    1, n - k + i - 1, ib, a(1, n - k + i), lda, work, ldwork, a, lda, work(ib + 1), ldwork)
                              
                 end if
                 ! apply h to rows 1:m-k+i+ib-1 of current block
                 call stdlib_dorg2l(m - k + i + ib - 1, ib, ib, a(1, n - k + i), lda, tau(i), work, iinfo &
                           )
                 ! set rows m-k+i+ib:m of current block to zero
                 do j = n - k + i, n - k + i + ib - 1
                    do l = m - k + i + ib, m
                       a(l, j) = zero
                    end do
                 end do
              end do
           end if
           work(1) = iws
           return
           ! end of stdlib_dorgql
     end subroutine stdlib_dorgql

     ! DORGQR generates an M-by-N real matrix Q with orthonormal columns,
     ! which is defined as the first N columns of a product of K elementary
     ! reflectors of order M
     ! Q  =  H(1) H(2) . . . H(k)
     ! as returned by DGEQRF.

     subroutine stdlib_dorgqr(m, n, k, a, lda, tau, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, k, lda, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lquery
           integer(ilp) :: i, ib, iinfo, iws, j, ki, kk, l, ldwork, lwkopt, nb, nbmin, nx
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
     
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           nb = stdlib_ilaenv(1, 'stdlib_dorgqr', ' ', m, n, k, -1)
           lwkopt = max(1, n)*nb
           work(1) = lwkopt
           lquery = (lwork == -1)
           if (m < 0) then
              info = -1
           else if (n < 0 .or. n > m) then
              info = -2
           else if (k < 0 .or. k > n) then
              info = -3
           else if (lda < max(1, m)) then
              info = -5
           else if (lwork < max(1, n) .and. .not. lquery) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorgqr', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (n <= 0) then
              work(1) = 1
              return
           end if
           nbmin = 2
           nx = 0
           iws = n
           if (nb > 1 .and. nb < k) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max(0, stdlib_ilaenv(3, 'stdlib_dorgqr', ' ', m, n, k, -1))
              if (nx < k) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = n
                 iws = ldwork*nb
                 if (lwork < iws) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork/ldwork
                    nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dorgqr', ' ', m, n, k, -1))
                              
                 end if
              end if
           end if
           if (nb >= nbmin .and. nb < k .and. nx < k) then
              ! use blocked code after the last block.
              ! the first kk columns are handled by the block method.
              ki = ((k - nx - 1)/nb)*nb
              kk = min(k, ki + nb)
              ! set a(1:kk,kk+1:n) to zero.
              do j = kk + 1, n
                 do i = 1, kk
                    a(i, j) = zero
                 end do
              end do
           else
              kk = 0
           end if
           ! use unblocked code for the last or only block.
           if (kk < n) call stdlib_dorg2r(m - kk, n - kk, k - kk, a(kk + 1, kk + 1), lda, tau(kk + 1), work, &
                      iinfo)
           if (kk > 0) then
              ! use blocked code
              do i = ki + 1, 1, -nb
                 ib = min(nb, k - i + 1)
                 if (i + ib <= n) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i) h(i+1) . . . h(i+ib-1)
                    call stdlib_dlarft('forward', 'columnwise', m - i + 1, ib, a(i, i), lda, tau(i &
                              ), work, ldwork)
                    ! apply h to a(i:m,i+ib:n) from the left
                    call stdlib_dlarfb('left', 'no transpose', 'forward', 'columnwise', m - i + 1, n - &
                    i - ib + 1, ib, a(i, i), lda, work, ldwork, a(i, i + ib), lda, work(ib + 1), &
                              ldwork)
                 end if
                 ! apply h to rows i:m of current block
                 call stdlib_dorg2r(m - i + 1, ib, ib, a(i, i), lda, tau(i), work, iinfo)
                 ! set rows 1:i-1 of current block to zero
                 do j = i, i + ib - 1
                    do l = 1, i - 1
                       a(l, j) = zero
                    end do
                 end do
              end do
           end if
           work(1) = iws
           return
           ! end of stdlib_dorgqr
     end subroutine stdlib_dorgqr

     ! DORGR2 generates an m by n real matrix Q with orthonormal rows,
     ! which is defined as the last m rows of a product of k elementary
     ! reflectors of order n
     ! Q  =  H(1) H(2) . . . H(k)
     ! as returned by DGERQF.

     subroutine stdlib_dorgr2(m, n, k, a, lda, tau, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, k, lda, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, ii, j, l
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           if (m < 0) then
              info = -1
           else if (n < m) then
              info = -2
           else if (k < 0 .or. k > m) then
              info = -3
           else if (lda < max(1, m)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorgr2', -info)
              return
           end if
           ! quick return if possible
           if (m <= 0) return
           if (k < m) then
              ! initialise rows 1:m-k to rows of the unit matrix
              do j = 1, n
                 do l = 1, m - k
                    a(l, j) = zero
                 end do
                 if (j > n - m .and. j <= n - k) a(m - n + j, j) = one
              end do
           end if
           do i = 1, k
              ii = m - k + i
              ! apply h(i) to a(1:m-k+i,1:n-k+i) from the right
              a(ii, n - m + ii) = one
              call stdlib_dlarf('right', ii - 1, n - m + ii, a(ii, 1), lda, tau(i), a, lda, work)
                        
              call stdlib_dscal(n - m + ii - 1, -tau(i), a(ii, 1), lda)
              a(ii, n - m + ii) = one - tau(i)
              ! set a(m-k+i,n-k+i+1:n) to zero
              do l = n - m + ii + 1, n
                 a(ii, l) = zero
              end do
           end do
           return
           ! end of stdlib_dorgr2
     end subroutine stdlib_dorgr2

     ! DORGRQ generates an M-by-N real matrix Q with orthonormal rows,
     ! which is defined as the last M rows of a product of K elementary
     ! reflectors of order N
     ! Q  =  H(1) H(2) . . . H(k)
     ! as returned by DGERQF.

     subroutine stdlib_dorgrq(m, n, k, a, lda, tau, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, k, lda, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lquery
           integer(ilp) :: i, ib, ii, iinfo, iws, j, kk, l, ldwork, lwkopt, nb, nbmin, nx
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
     
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           lquery = (lwork == -1)
           if (m < 0) then
              info = -1
           else if (n < m) then
              info = -2
           else if (k < 0 .or. k > m) then
              info = -3
           else if (lda < max(1, m)) then
              info = -5
           end if
           if (info == 0) then
              if (m <= 0) then
                 lwkopt = 1
              else
                 nb = stdlib_ilaenv(1, 'stdlib_dorgrq', ' ', m, n, k, -1)
                 lwkopt = m*nb
              end if
              work(1) = lwkopt
              if (lwork < max(1, m) .and. .not. lquery) then
                 info = -8
              end if
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorgrq', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (m <= 0) then
              return
           end if
           nbmin = 2
           nx = 0
           iws = m
           if (nb > 1 .and. nb < k) then
              ! determine when to cross over from blocked to unblocked code.
              nx = max(0, stdlib_ilaenv(3, 'stdlib_dorgrq', ' ', m, n, k, -1))
              if (nx < k) then
                 ! determine if workspace is large enough for blocked code.
                 ldwork = m
                 iws = ldwork*nb
                 if (lwork < iws) then
                    ! not enough workspace to use optimal nb:  reduce nb and
                    ! determine the minimum value of nb.
                    nb = lwork/ldwork
                    nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dorgrq', ' ', m, n, k, -1))
                              
                 end if
              end if
           end if
           if (nb >= nbmin .and. nb < k .and. nx < k) then
              ! use blocked code after the first block.
              ! the last kk rows are handled by the block method.
              kk = min(k, ((k - nx + nb - 1)/nb)*nb)
              ! set a(1:m-kk,n-kk+1:n) to zero.
              do j = n - kk + 1, n
                 do i = 1, m - kk
                    a(i, j) = zero
                 end do
              end do
           else
              kk = 0
           end if
           ! use unblocked code for the first or only block.
           call stdlib_dorgr2(m - kk, n - kk, k - kk, a, lda, tau, work, iinfo)
           if (kk > 0) then
              ! use blocked code
              do i = k - kk + 1, k, nb
                 ib = min(nb, k - i + 1)
                 ii = m - k + i
                 if (ii > 1) then
                    ! form the triangular factor of the block reflector
                    ! h = h(i+ib-1) . . . h(i+1) h(i)
                    call stdlib_dlarft('backward', 'rowwise', n - k + i + ib - 1, ib, a(ii, 1), lda, &
                              tau(i), work, ldwork)
                    ! apply h**t to a(1:m-k+i-1,1:n-k+i+ib-1) from the right
                    call stdlib_dlarfb('right', 'transpose', 'backward', 'rowwise', ii - 1, n - k + i + &
                              ib - 1, ib, a(ii, 1), lda, work, ldwork, a, lda, work(ib + 1), ldwork)
                 end if
                 ! apply h**t to columns 1:n-k+i+ib-1 of current block
                 call stdlib_dorgr2(ib, n - k + i + ib - 1, ib, a(ii, 1), lda, tau(i), work, iinfo)
                           
                 ! set columns n-k+i+ib:n of current block to zero
                 do l = n - k + i + ib, n
                    do j = ii, ii + ib - 1
                       a(j, l) = zero
                    end do
                 end do
              end do
           end if
           work(1) = iws
           return
           ! end of stdlib_dorgrq
     end subroutine stdlib_dorgrq

     ! DORGTSQR_ROW generates an M-by-N real matrix Q_out with
     ! orthonormal columns from the output of DLATSQR. These N orthonormal
     ! columns are the first N columns of a product of complex unitary
     ! matrices Q(k)_in of order M, which are returned by DLATSQR in
     ! a special format.
     ! Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
     ! The input matrices Q(k)_in are stored in row and column blocks in A.
     ! See the documentation of DLATSQR for more details on the format of
     ! Q(k)_in, where each Q(k)_in is represented by block Householder
     ! transformations. This routine calls an auxiliary routine DLARFB_GETT,
     ! where the computation is performed on each individual block. The
     ! algorithm first sweeps NB-sized column blocks from the right to left
     ! starting in the bottom row block and continues to the top row block
     ! (hence _ROW in the routine name). This sweep is in reverse order of
     ! the order in which DLATSQR generates the output blocks.

     subroutine stdlib_dorgtsqr_row(m, n, mb, nb, a, lda, t, ldt, work, lwork, info)
     
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, ldt, lwork, m, n, mb, nb
           ! .. array arguments ..
           real(dp) :: a(lda, *), t(ldt, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lquery
           integer(ilp) :: nblocal, mb2, m_plus_one, itmp, ib_bottom, lworkopt, num_all_row_blocks, &
                      jb_t, ib, imb, kb, kb_last, knb, mb1
           ! .. local arrays ..
           real(dp) :: dummy(1, 1)
     
           ! .. intrinsic functions ..
           intrinsic :: dble, max, min
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           lquery = lwork == -1
           if (m < 0) then
              info = -1
           else if (n < 0 .or. m < n) then
              info = -2
           else if (mb <= n) then
              info = -3
           else if (nb < 1) then
              info = -4
           else if (lda < max(1, m)) then
              info = -6
           else if (ldt < max(1, min(nb, n))) then
              info = -8
           else if (lwork < 1 .and. .not. lquery) then
              info = -10
           end if
           nblocal = min(nb, n)
           ! determine the workspace size.
           if (info == 0) then
              lworkopt = nblocal*max(nblocal, (n - nblocal))
           end if
           ! handle error in the input parameters and handle the workspace query.
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorgtsqr_row', -info)
              return
           else if (lquery) then
              work(1) = real(lworkopt, KIND=dp)
              return
           end if
           ! quick return if possible
           if (min(m, n) == 0) then
              work(1) = real(lworkopt, KIND=dp)
              return
           end if
           ! (0) set the upper-triangular part of the matrix a to zero and
           ! its diagonal elements to one.
           call stdlib_dlaset('u', m, n, zero, one, a, lda)
           ! kb_last is the column index of the last column block reflector
           ! in the matrices t and v.
           kb_last = ((n - 1)/nblocal)*nblocal + 1
           ! (1) bottom-up loop over row blocks of a, except the top row block.
           ! note: if mb>=m, then the loop is never executed.
           if (mb < m) then
              ! mb2 is the row blocking size for the row blocks before the
              ! first top row block in the matrix a. ib is the row index for
              ! the row blocks in the matrix a before the first top row block.
              ! ib_bottom is the row index for the last bottom row block
              ! in the matrix a. jb_t is the column index of the corresponding
              ! column block in the matrix t.
              ! initialize variables.
              ! num_all_row_blocks is the number of row blocks in the matrix a
              ! including the first row block.
              mb2 = mb - n
              m_plus_one = m + 1
              itmp = (m - mb - 1)/mb2
              ib_bottom = itmp*mb2 + mb + 1
              num_all_row_blocks = itmp + 2
              jb_t = num_all_row_blocks*n + 1
              do ib = ib_bottom, mb + 1, -mb2
                 ! determine the block size imb for the current row block
                 ! in the matrix a.
                 imb = min(m_plus_one - ib, mb2)
                 ! determine the column index jb_t for the current column block
                 ! in the matrix t.
                 jb_t = jb_t - n
                 ! apply column blocks of h in the row block from right to left.
                 ! kb is the column index of the current column block reflector
                 ! in the matrices t and v.
                 do kb = kb_last, 1, -nblocal
                    ! determine the size of the current column block knb in
                    ! the matrices t and v.
                    knb = min(nblocal, n - kb + 1)
                    call stdlib_dlarfb_gett('i', imb, n - kb + 1, knb, t(1, jb_t + kb - 1), ldt, a(kb, &
                              kb), lda, a(ib, kb), lda, work, knb)
                 end do
              end do
           end if
           ! (2) top row block of a.
           ! note: if mb>=m, then we have only one row block of a of size m
           ! and we work on the entire matrix a.
           mb1 = min(mb, m)
           ! apply column blocks of h in the top row block from right to left.
           ! kb is the column index of the current block reflector in
           ! the matrices t and v.
           do kb = kb_last, 1, -nblocal
              ! determine the size of the current column block knb in
              ! the matrices t and v.
              knb = min(nblocal, n - kb + 1)
              if (mb1 - kb - knb + 1 == 0) then
                 ! in stdlib_slarfb_gett parameters, when m=0, then the matrix b
                 ! does not exist, hence we need to pass a dummy array
                 ! reference dummy(1,1) to b with lddummy=1.
                 call stdlib_dlarfb_gett('n', 0, n - kb + 1, knb, t(1, kb), ldt, a(kb, kb), lda, &
                           dummy(1, 1), 1, work, knb)
              else
                 call stdlib_dlarfb_gett('n', mb1 - kb - knb + 1, n - kb + 1, knb, t(1, kb), ldt, a(kb, &
                           kb), lda, a(kb + knb, kb), lda, work, knb)
              end if
           end do
           work(1) = real(lworkopt, KIND=dp)
           return
           ! end of stdlib_dorgtsqr_row
     end subroutine stdlib_dorgtsqr_row

     subroutine stdlib_dorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
     
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: m, n, n1, n2, ldq, ldc, lwork, info
           ! .. array arguments ..
           real(dp) :: q(ldq, *), c(ldc, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: left, lquery, notran
           integer(ilp) :: i, ldwork, len, lwkopt, nb, nq, nw
     
           ! .. intrinsic functions ..
           intrinsic :: dble, max, min
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           lquery = (lwork == -1)
           ! nq is the order of q;
           ! nw is the minimum dimension of work.
           if (left) then
              nq = m
           else
              nq = n
           end if
           nw = nq
           if (n1 == 0 .or. n2 == 0) nw = 1
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. stdlib_lsame(trans, 'n') .and. .not. stdlib_lsame(trans, 't')) &
                     then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (n1 < 0 .or. n1 + n2 /= nq) then
              info = -5
           else if (n2 < 0) then
              info = -6
           else if (ldq < max(1, nq)) then
              info = -8
           else if (ldc < max(1, m)) then
              info = -10
           else if (lwork < nw .and. .not. lquery) then
              info = -12
           end if
           if (info == 0) then
              lwkopt = m*n
              work(1) = real(lwkopt, KIND=dp)
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorm22', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) then
              work(1) = 1
              return
           end if
           ! degenerate cases (n1 = 0 or n2 = 0) are handled using stdlib_dtrmm.
           if (n1 == 0) then
              call stdlib_dtrmm(side, 'upper', trans, 'non-unit', m, n, one, q, ldq, c, ldc)
                        
              work(1) = one
              return
           else if (n2 == 0) then
              call stdlib_dtrmm(side, 'lower', trans, 'non-unit', m, n, one, q, ldq, c, ldc)
                        
              work(1) = one
              return
           end if
           ! compute the largest chunk size available from the workspace.
           nb = max(1, min(lwork, lwkopt)/nq)
           if (left) then
              if (notran) then
                 do i = 1, n, nb
                    len = min(nb, n - i + 1)
                    ldwork = m
                    ! multiply bottom part of c by q12.
                    call stdlib_dlacpy('all', n1, len, c(n2 + 1, i), ldc, work, ldwork)
                    call stdlib_dtrmm('left', 'lower', 'no transpose', 'non-unit', n1, len, one, &
                              q(1, n2 + 1), ldq, work, ldwork)
                    ! multiply top part of c by q11.
                    call stdlib_dgemm('no transpose', 'no transpose', n1, len, n2, one, q, ldq, c( &
                               1, i), ldc, one, work, ldwork)
                    ! multiply top part of c by q21.
                    call stdlib_dlacpy('all', n2, len, c(1, i), ldc, work(n1 + 1), ldwork)
                              
                    call stdlib_dtrmm('left', 'upper', 'no transpose', 'non-unit', n2, len, one, &
                              q(n1 + 1, 1), ldq, work(n1 + 1), ldwork)
                    ! multiply bottom part of c by q22.
                    call stdlib_dgemm('no transpose', 'no transpose', n2, len, n1, one, q(n1 + 1, &
                              n2 + 1), ldq, c(n2 + 1, i), ldc, one, work(n1 + 1), ldwork)
                    ! copy everything back.
                    call stdlib_dlacpy('all', m, len, work, ldwork, c(1, i), ldc)
                 end do
              else
                 do i = 1, n, nb
                    len = min(nb, n - i + 1)
                    ldwork = m
                    ! multiply bottom part of c by q21**t.
                    call stdlib_dlacpy('all', n2, len, c(n1 + 1, i), ldc, work, ldwork)
                    call stdlib_dtrmm('left', 'upper', 'transpose', 'non-unit', n2, len, one, q( &
                              n1 + 1, 1), ldq, work, ldwork)
                    ! multiply top part of c by q11**t.
                    call stdlib_dgemm('transpose', 'no transpose', n2, len, n1, one, q, ldq, c(1, &
                               i), ldc, one, work, ldwork)
                    ! multiply top part of c by q12**t.
                    call stdlib_dlacpy('all', n1, len, c(1, i), ldc, work(n2 + 1), ldwork)
                              
                    call stdlib_dtrmm('left', 'lower', 'transpose', 'non-unit', n1, len, one, q( &
                              1, n2 + 1), ldq, work(n2 + 1), ldwork)
                    ! multiply bottom part of c by q22**t.
                    call stdlib_dgemm('transpose', 'no transpose', n1, len, n2, one, q(n1 + 1, n2 + &
                              1), ldq, c(n1 + 1, i), ldc, one, work(n2 + 1), ldwork)
                    ! copy everything back.
                    call stdlib_dlacpy('all', m, len, work, ldwork, c(1, i), ldc)
                 end do
              end if
           else
              if (notran) then
                 do i = 1, m, nb
                    len = min(nb, m - i + 1)
                    ldwork = len
                    ! multiply right part of c by q21.
                    call stdlib_dlacpy('all', len, n2, c(i, n1 + 1), ldc, work, ldwork)
                    call stdlib_dtrmm('right', 'upper', 'no transpose', 'non-unit', len, n2, one, &
                              q(n1 + 1, 1), ldq, work, ldwork)
                    ! multiply left part of c by q11.
                    call stdlib_dgemm('no transpose', 'no transpose', len, n2, n1, one, c(i, 1), &
                               ldc, q, ldq, one, work, ldwork)
                    ! multiply left part of c by q12.
                    call stdlib_dlacpy('all', len, n1, c(i, 1), ldc, work(1 + n2*ldwork), &
                              ldwork)
                    call stdlib_dtrmm('right', 'lower', 'no transpose', 'non-unit', len, n1, one, &
                              q(1, n2 + 1), ldq, work(1 + n2*ldwork), ldwork)
                    ! multiply right part of c by q22.
                    call stdlib_dgemm('no transpose', 'no transpose', len, n1, n2, one, c(i, n1 + &
                              1), ldc, q(n1 + 1, n2 + 1), ldq, one, work(1 + n2*ldwork), ldwork)
                    ! copy everything back.
                    call stdlib_dlacpy('all', len, n, work, ldwork, c(i, 1), ldc)
                 end do
              else
                 do i = 1, m, nb
                    len = min(nb, m - i + 1)
                    ldwork = len
                    ! multiply right part of c by q12**t.
                    call stdlib_dlacpy('all', len, n1, c(i, n2 + 1), ldc, work, ldwork)
                    call stdlib_dtrmm('right', 'lower', 'transpose', 'non-unit', len, n1, one, q( &
                              1, n2 + 1), ldq, work, ldwork)
                    ! multiply left part of c by q11**t.
                    call stdlib_dgemm('no transpose', 'transpose', len, n1, n2, one, c(i, 1), &
                              ldc, q, ldq, one, work, ldwork)
                    ! multiply left part of c by q21**t.
                    call stdlib_dlacpy('all', len, n2, c(i, 1), ldc, work(1 + n1*ldwork), &
                              ldwork)
                    call stdlib_dtrmm('right', 'upper', 'transpose', 'non-unit', len, n2, one, q( &
                              n1 + 1, 1), ldq, work(1 + n1*ldwork), ldwork)
                    ! multiply right part of c by q22**t.
                    call stdlib_dgemm('no transpose', 'transpose', len, n2, n1, one, c(i, n2 + 1), &
                               ldc, q(n1 + 1, n2 + 1), ldq, one, work(1 + n1*ldwork), ldwork)
                    ! copy everything back.
                    call stdlib_dlacpy('all', len, n, work, ldwork, c(i, 1), ldc)
                 end do
              end if
           end if
           work(1) = real(lwkopt, KIND=dp)
           return
           ! end of stdlib_dorm22
     end subroutine stdlib_dorm22

     ! DORM2L overwrites the general real m by n matrix C with
     ! Q * C  if SIDE = 'L' and TRANS = 'N', or
     ! Q**T * C  if SIDE = 'L' and TRANS = 'T', or
     ! C * Q  if SIDE = 'R' and TRANS = 'N', or
     ! C * Q**T if SIDE = 'R' and TRANS = 'T',
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(k) . . . H(2) H(1)
     ! as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n
     ! if SIDE = 'R'.

     subroutine stdlib_dorm2l(side, trans, m, n, k, a, lda, tau, c, ldc, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, lda, ldc, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: left, notran
           integer(ilp) :: i, i1, i2, i3, mi, ni, nq
           real(dp) :: aii
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           ! nq is the order of q
           if (left) then
              nq = m
           else
              nq = n
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (lda < max(1, nq)) then
              info = -7
           else if (ldc < max(1, m)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorm2l', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0 .or. k == 0) return
           if ((left .and. notran) .or. (.not. left .and. .not. notran)) then
              i1 = 1
              i2 = k
              i3 = 1
           else
              i1 = k
              i2 = 1
              i3 = -1
           end if
           if (left) then
              ni = n
           else
              mi = m
           end if
           do i = i1, i2, i3
              if (left) then
                 ! h(i) is applied to c(1:m-k+i,1:n)
                 mi = m - k + i
              else
                 ! h(i) is applied to c(1:m,1:n-k+i)
                 ni = n - k + i
              end if
              ! apply h(i)
              aii = a(nq - k + i, i)
              a(nq - k + i, i) = one
              call stdlib_dlarf(side, mi, ni, a(1, i), 1, tau(i), c, ldc, work)
              a(nq - k + i, i) = aii
           end do
           return
           ! end of stdlib_dorm2l
     end subroutine stdlib_dorm2l

     ! DORM2R overwrites the general real m by n matrix C with
     ! Q * C  if SIDE = 'L' and TRANS = 'N', or
     ! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     ! C * Q  if SIDE = 'R' and TRANS = 'N', or
     ! C * Q**T if SIDE = 'R' and TRANS = 'T',
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(1) H(2) . . . H(k)
     ! as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
     ! if SIDE = 'R'.

     subroutine stdlib_dorm2r(side, trans, m, n, k, a, lda, tau, c, ldc, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, lda, ldc, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: left, notran
           integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           real(dp) :: aii
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           ! nq is the order of q
           if (left) then
              nq = m
           else
              nq = n
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (lda < max(1, nq)) then
              info = -7
           else if (ldc < max(1, m)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorm2r', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0 .or. k == 0) return
           if ((left .and. .not. notran) .or. (.not. left .and. notran)) then
              i1 = 1
              i2 = k
              i3 = 1
           else
              i1 = k
              i2 = 1
              i3 = -1
           end if
           if (left) then
              ni = n
              jc = 1
           else
              mi = m
              ic = 1
           end if
           do i = i1, i2, i3
              if (left) then
                 ! h(i) is applied to c(i:m,1:n)
                 mi = m - i + 1
                 ic = i
              else
                 ! h(i) is applied to c(1:m,i:n)
                 ni = n - i + 1
                 jc = i
              end if
              ! apply h(i)
              aii = a(i, i)
              a(i, i) = one
              call stdlib_dlarf(side, mi, ni, a(i, i), 1, tau(i), c(ic, jc), ldc, work)
                        
              a(i, i) = aii
           end do
           return
           ! end of stdlib_dorm2r
     end subroutine stdlib_dorm2r

     ! DORML2 overwrites the general real m by n matrix C with
     ! Q * C  if SIDE = 'L' and TRANS = 'N', or
     ! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     ! C * Q  if SIDE = 'R' and TRANS = 'N', or
     ! C * Q**T if SIDE = 'R' and TRANS = 'T',
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(k) . . . H(2) H(1)
     ! as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
     ! if SIDE = 'R'.

     subroutine stdlib_dorml2(side, trans, m, n, k, a, lda, tau, c, ldc, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, lda, ldc, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: left, notran
           integer(ilp) :: i, i1, i2, i3, ic, jc, mi, ni, nq
           real(dp) :: aii
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           ! nq is the order of q
           if (left) then
              nq = m
           else
              nq = n
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (lda < max(1, k)) then
              info = -7
           else if (ldc < max(1, m)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dorml2', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0 .or. k == 0) return
           if ((left .and. notran) .or. (.not. left .and. .not. notran)) then
              i1 = 1
              i2 = k
              i3 = 1
           else
              i1 = k
              i2 = 1
              i3 = -1
           end if
           if (left) then
              ni = n
              jc = 1
           else
              mi = m
              ic = 1
           end if
           do i = i1, i2, i3
              if (left) then
                 ! h(i) is applied to c(i:m,1:n)
                 mi = m - i + 1
                 ic = i
              else
                 ! h(i) is applied to c(1:m,i:n)
                 ni = n - i + 1
                 jc = i
              end if
              ! apply h(i)
              aii = a(i, i)
              a(i, i) = one
              call stdlib_dlarf(side, mi, ni, a(i, i), lda, tau(i), c(ic, jc), ldc, work)
                        
              a(i, i) = aii
           end do
           return
           ! end of stdlib_dorml2
     end subroutine stdlib_dorml2

     ! DORMLQ overwrites the general real M-by-N matrix C with
     ! SIDE = 'L'     SIDE = 'R'
     ! TRANS = 'N':      Q * C          C * Q
     ! TRANS = 'T':      Q**T * C       C * Q**T
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(k) . . . H(2) H(1)
     ! as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
     ! if SIDE = 'R'.

     subroutine stdlib_dormlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, lda, ldc, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: nbmax = 64
           integer(ilp), parameter :: ldt = nbmax + 1
           integer(ilp), parameter :: tsize = ldt*nbmax
           
           ! .. local scalars ..
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           lquery = (lwork == -1)
           ! nq is the order of q and nw is the minimum dimension of work
           if (left) then
              nq = m
              nw = max(1, n)
           else
              nq = n
              nw = max(1, m)
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (lda < max(1, k)) then
              info = -7
           else if (ldc < max(1, m)) then
              info = -10
           else if (lwork < nw .and. .not. lquery) then
              info = -12
           end if
           if (info == 0) then
              ! compute the workspace requirements
              nb = min(nbmax, stdlib_ilaenv(1, 'stdlib_dormlq', side//trans, m, n, k, -1))
                        
              lwkopt = nw*nb + tsize
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dormlq', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0 .or. k == 0) then
              work(1) = 1
              return
           end if
           nbmin = 2
           ldwork = nw
           if (nb > 1 .and. nb < k) then
              if (lwork < lwkopt) then
                 nb = (lwork - tsize)/ldwork
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dormlq', side//trans, m, n, k, -1))
                           
              end if
           end if
           if (nb < nbmin .or. nb >= k) then
              ! use unblocked code
              call stdlib_dorml2(side, trans, m, n, k, a, lda, tau, c, ldc, work, iinfo)
           else
              ! use blocked code
              iwt = 1 + nw*nb
              if ((left .and. notran) .or. (.not. left .and. .not. notran)) then
                 i1 = 1
                 i2 = k
                 i3 = nb
              else
                 i1 = ((k - 1)/nb)*nb + 1
                 i2 = 1
                 i3 = -nb
              end if
              if (left) then
                 ni = n
                 jc = 1
              else
                 mi = m
                 ic = 1
              end if
              if (notran) then
                 transt = 't'
              else
                 transt = 'n'
              end if
              do i = i1, i2, i3
                 ib = min(nb, k - i + 1)
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib_dlarft('forward', 'rowwise', nq - i + 1, ib, a(i, i), lda, tau(i), &
                           work(iwt), ldt)
                 if (left) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib_dlarfb(side, transt, 'forward', 'rowwise', mi, ni, ib, a(i, i), &
                           lda, work(iwt), ldt, c(ic, jc), ldc, work, ldwork)
              end do
           end if
           work(1) = lwkopt
           return
           ! end of stdlib_dormlq
     end subroutine stdlib_dormlq

     ! DORMQL overwrites the general real M-by-N matrix C with
     ! SIDE = 'L'     SIDE = 'R'
     ! TRANS = 'N':      Q * C          C * Q
     ! TRANS = 'T':      Q**T * C       C * Q**T
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(k) . . . H(2) H(1)
     ! as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N
     ! if SIDE = 'R'.

     subroutine stdlib_dormql(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, lda, ldc, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: nbmax = 64
           integer(ilp), parameter :: ldt = nbmax + 1
           integer(ilp), parameter :: tsize = ldt*nbmax
           
           ! .. local scalars ..
           logical(lk) :: left, lquery, notran
           integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, &
                     nw
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           lquery = (lwork == -1)
           ! nq is the order of q and nw is the minimum dimension of work
           if (left) then
              nq = m
              nw = max(1, n)
           else
              nq = n
              nw = max(1, m)
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (lda < max(1, nq)) then
              info = -7
           else if (ldc < max(1, m)) then
              info = -10
           else if (lwork < nw .and. .not. lquery) then
              info = -12
           end if
           if (info == 0) then
              ! compute the workspace requirements
              if (m == 0 .or. n == 0) then
                 lwkopt = 1
              else
                 nb = min(nbmax, stdlib_ilaenv(1, 'stdlib_dormql', side//trans, m, n, k, -1))
                           
                 lwkopt = nw*nb + tsize
              end if
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dormql', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) then
              return
           end if
           nbmin = 2
           ldwork = nw
           if (nb > 1 .and. nb < k) then
              if (lwork < lwkopt) then
                 nb = (lwork - tsize)/ldwork
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dormql', side//trans, m, n, k, -1))
                           
              end if
           end if
           if (nb < nbmin .or. nb >= k) then
              ! use unblocked code
              call stdlib_dorm2l(side, trans, m, n, k, a, lda, tau, c, ldc, work, iinfo)
           else
              ! use blocked code
              iwt = 1 + nw*nb
              if ((left .and. notran) .or. (.not. left .and. .not. notran)) then
                 i1 = 1
                 i2 = k
                 i3 = nb
              else
                 i1 = ((k - 1)/nb)*nb + 1
                 i2 = 1
                 i3 = -nb
              end if
              if (left) then
                 ni = n
              else
                 mi = m
              end if
              do i = i1, i2, i3
                 ib = min(nb, k - i + 1)
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib_dlarft('backward', 'columnwise', nq - k + i + ib - 1, ib, a(1, i), lda, &
                           tau(i), work(iwt), ldt)
                 if (left) then
                    ! h or h**t is applied to c(1:m-k+i+ib-1,1:n)
                    mi = m - k + i + ib - 1
                 else
                    ! h or h**t is applied to c(1:m,1:n-k+i+ib-1)
                    ni = n - k + i + ib - 1
                 end if
                 ! apply h or h**t
                 call stdlib_dlarfb(side, trans, 'backward', 'columnwise', mi, ni, ib, a(1, i), &
                           lda, work(iwt), ldt, c, ldc, work, ldwork)
              end do
           end if
           work(1) = lwkopt
           return
           ! end of stdlib_dormql
     end subroutine stdlib_dormql

     ! DORMQR overwrites the general real M-by-N matrix C with
     ! SIDE = 'L'     SIDE = 'R'
     ! TRANS = 'N':      Q * C          C * Q
     ! TRANS = 'T':      Q**T * C       C * Q**T
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(1) H(2) . . . H(k)
     ! as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
     ! if SIDE = 'R'.

     subroutine stdlib_dormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, lda, ldc, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: nbmax = 64
           integer(ilp), parameter :: ldt = nbmax + 1
           integer(ilp), parameter :: tsize = ldt*nbmax
           
           ! .. local scalars ..
           logical(lk) :: left, lquery, notran
           integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, jc, ldwork, lwkopt, mi, nb, nbmin, &
                     ni, nq, nw
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           lquery = (lwork == -1)
           ! nq is the order of q and nw is the minimum dimension of work
           if (left) then
              nq = m
              nw = max(1, n)
           else
              nq = n
              nw = max(1, m)
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (lda < max(1, nq)) then
              info = -7
           else if (ldc < max(1, m)) then
              info = -10
           else if (lwork < nw .and. .not. lquery) then
              info = -12
           end if
           if (info == 0) then
              ! compute the workspace requirements
              nb = min(nbmax, stdlib_ilaenv(1, 'stdlib_dormqr', side//trans, m, n, k, -1))
                        
              lwkopt = nw*nb + tsize
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dormqr', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0 .or. k == 0) then
              work(1) = 1
              return
           end if
           nbmin = 2
           ldwork = nw
           if (nb > 1 .and. nb < k) then
              if (lwork < lwkopt) then
                 nb = (lwork - tsize)/ldwork
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dormqr', side//trans, m, n, k, -1))
                           
              end if
           end if
           if (nb < nbmin .or. nb >= k) then
              ! use unblocked code
              call stdlib_dorm2r(side, trans, m, n, k, a, lda, tau, c, ldc, work, iinfo)
           else
              ! use blocked code
              iwt = 1 + nw*nb
              if ((left .and. .not. notran) .or. (.not. left .and. notran)) then
                 i1 = 1
                 i2 = k
                 i3 = nb
              else
                 i1 = ((k - 1)/nb)*nb + 1
                 i2 = 1
                 i3 = -nb
              end if
              if (left) then
                 ni = n
                 jc = 1
              else
                 mi = m
                 ic = 1
              end if
              do i = i1, i2, i3
                 ib = min(nb, k - i + 1)
                 ! form the triangular factor of the block reflector
                 ! h = h(i) h(i+1) . . . h(i+ib-1)
                 call stdlib_dlarft('forward', 'columnwise', nq - i + 1, ib, a(i, i), lda, tau(i), &
                            work(iwt), ldt)
                 if (left) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib_dlarfb(side, trans, 'forward', 'columnwise', mi, ni, ib, a(i, i), &
                           lda, work(iwt), ldt, c(ic, jc), ldc, work, ldwork)
              end do
           end if
           work(1) = lwkopt
           return
           ! end of stdlib_dormqr
     end subroutine stdlib_dormqr

     ! DORMR2 overwrites the general real m by n matrix C with
     ! Q * C  if SIDE = 'L' and TRANS = 'N', or
     ! Q**T* C  if SIDE = 'L' and TRANS = 'T', or
     ! C * Q  if SIDE = 'R' and TRANS = 'N', or
     ! C * Q**T if SIDE = 'R' and TRANS = 'T',
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(1) H(2) . . . H(k)
     ! as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n
     ! if SIDE = 'R'.

     subroutine stdlib_dormr2(side, trans, m, n, k, a, lda, tau, c, ldc, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, lda, ldc, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: left, notran
           integer(ilp) :: i, i1, i2, i3, mi, ni, nq
           real(dp) :: aii
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           ! nq is the order of q
           if (left) then
              nq = m
           else
              nq = n
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (lda < max(1, k)) then
              info = -7
           else if (ldc < max(1, m)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dormr2', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0 .or. k == 0) return
           if ((left .and. .not. notran) .or. (.not. left .and. notran)) then
              i1 = 1
              i2 = k
              i3 = 1
           else
              i1 = k
              i2 = 1
              i3 = -1
           end if
           if (left) then
              ni = n
           else
              mi = m
           end if
           do i = i1, i2, i3
              if (left) then
                 ! h(i) is applied to c(1:m-k+i,1:n)
                 mi = m - k + i
              else
                 ! h(i) is applied to c(1:m,1:n-k+i)
                 ni = n - k + i
              end if
              ! apply h(i)
              aii = a(i, nq - k + i)
              a(i, nq - k + i) = one
              call stdlib_dlarf(side, mi, ni, a(i, 1), lda, tau(i), c, ldc, work)
              a(i, nq - k + i) = aii
           end do
           return
           ! end of stdlib_dormr2
     end subroutine stdlib_dormr2

     ! DORMR3 overwrites the general real m by n matrix C with
     ! Q * C  if SIDE = 'L' and TRANS = 'N', or
     ! Q**T* C  if SIDE = 'L' and TRANS = 'C', or
     ! C * Q  if SIDE = 'R' and TRANS = 'N', or
     ! C * Q**T if SIDE = 'R' and TRANS = 'C',
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(1) H(2) . . . H(k)
     ! as returned by DTZRZF. Q is of order m if SIDE = 'L' and of order n
     ! if SIDE = 'R'.

     subroutine stdlib_dormr3(side, trans, m, n, k, l, a, lda, tau, c, ldc, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, l, lda, ldc, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: left, notran
           integer(ilp) :: i, i1, i2, i3, ic, ja, jc, mi, ni, nq
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           ! nq is the order of q
           if (left) then
              nq = m
           else
              nq = n
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (l < 0 .or. (left .and. (l > m)) .or. (.not. left .and. (l > n))) then
              info = -6
           else if (lda < max(1, k)) then
              info = -8
           else if (ldc < max(1, m)) then
              info = -11
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dormr3', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0 .or. k == 0) return
           if ((left .and. .not. notran .or. .not. left .and. notran)) then
              i1 = 1
              i2 = k
              i3 = 1
           else
              i1 = k
              i2 = 1
              i3 = -1
           end if
           if (left) then
              ni = n
              ja = m - l + 1
              jc = 1
           else
              mi = m
              ja = n - l + 1
              ic = 1
           end if
           do i = i1, i2, i3
              if (left) then
                 ! h(i) or h(i)**t is applied to c(i:m,1:n)
                 mi = m - i + 1
                 ic = i
              else
                 ! h(i) or h(i)**t is applied to c(1:m,i:n)
                 ni = n - i + 1
                 jc = i
              end if
              ! apply h(i) or h(i)**t
              call stdlib_dlarz(side, mi, ni, l, a(i, ja), lda, tau(i), c(ic, jc), ldc, &
                        work)
           end do
           return
           ! end of stdlib_dormr3
     end subroutine stdlib_dormr3

     ! DORMRQ overwrites the general real M-by-N matrix C with
     ! SIDE = 'L'     SIDE = 'R'
     ! TRANS = 'N':      Q * C          C * Q
     ! TRANS = 'T':      Q**T * C       C * Q**T
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(1) H(2) . . . H(k)
     ! as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N
     ! if SIDE = 'R'.

     subroutine stdlib_dormrq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, lda, ldc, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: nbmax = 64
           integer(ilp), parameter :: ldt = nbmax + 1
           integer(ilp), parameter :: tsize = ldt*nbmax
           
           ! .. local scalars ..
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(ilp) :: i, i1, i2, i3, ib, iinfo, iwt, ldwork, lwkopt, mi, nb, nbmin, ni, nq, &
                     nw
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           lquery = (lwork == -1)
           ! nq is the order of q and nw is the minimum dimension of work
           if (left) then
              nq = m
              nw = max(1, n)
           else
              nq = n
              nw = max(1, m)
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (lda < max(1, k)) then
              info = -7
           else if (ldc < max(1, m)) then
              info = -10
           else if (lwork < nw .and. .not. lquery) then
              info = -12
           end if
           if (info == 0) then
              ! compute the workspace requirements
              if (m == 0 .or. n == 0) then
                 lwkopt = 1
              else
                 nb = min(nbmax, stdlib_ilaenv(1, 'stdlib_dormrq', side//trans, m, n, k, -1))
                           
                 lwkopt = nw*nb + tsize
              end if
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dormrq', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) then
              return
           end if
           nbmin = 2
           ldwork = nw
           if (nb > 1 .and. nb < k) then
              if (lwork < lwkopt) then
                 nb = (lwork - tsize)/ldwork
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dormrq', side//trans, m, n, k, -1))
                           
              end if
           end if
           if (nb < nbmin .or. nb >= k) then
              ! use unblocked code
              call stdlib_dormr2(side, trans, m, n, k, a, lda, tau, c, ldc, work, iinfo)
           else
              ! use blocked code
              iwt = 1 + nw*nb
              if ((left .and. .not. notran) .or. (.not. left .and. notran)) then
                 i1 = 1
                 i2 = k
                 i3 = nb
              else
                 i1 = ((k - 1)/nb)*nb + 1
                 i2 = 1
                 i3 = -nb
              end if
              if (left) then
                 ni = n
              else
                 mi = m
              end if
              if (notran) then
                 transt = 't'
              else
                 transt = 'n'
              end if
              do i = i1, i2, i3
                 ib = min(nb, k - i + 1)
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib_dlarft('backward', 'rowwise', nq - k + i + ib - 1, ib, a(i, 1), lda, tau( &
                           i), work(iwt), ldt)
                 if (left) then
                    ! h or h**t is applied to c(1:m-k+i+ib-1,1:n)
                    mi = m - k + i + ib - 1
                 else
                    ! h or h**t is applied to c(1:m,1:n-k+i+ib-1)
                    ni = n - k + i + ib - 1
                 end if
                 ! apply h or h**t
                 call stdlib_dlarfb(side, transt, 'backward', 'rowwise', mi, ni, ib, a(i, 1), &
                           lda, work(iwt), ldt, c, ldc, work, ldwork)
              end do
           end if
           work(1) = lwkopt
           return
           ! end of stdlib_dormrq
     end subroutine stdlib_dormrq

     ! DORMRZ overwrites the general real M-by-N matrix C with
     ! SIDE = 'L'     SIDE = 'R'
     ! TRANS = 'N':      Q * C          C * Q
     ! TRANS = 'T':      Q**T * C       C * Q**T
     ! where Q is a real orthogonal matrix defined as the product of k
     ! elementary reflectors
     ! Q = H(1) H(2) . . . H(k)
     ! as returned by DTZRZF. Q is of order M if SIDE = 'L' and of order N
     ! if SIDE = 'R'.

     subroutine stdlib_dormrz(side, trans, m, n, k, l, a, lda, tau, c, ldc, work, lwork, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side, trans
           integer(ilp) :: info, k, l, lda, ldc, lwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(ldc, *), tau(*), work(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: nbmax = 64
           integer(ilp), parameter :: ldt = nbmax + 1
           integer(ilp), parameter :: tsize = ldt*nbmax
           
           ! .. local scalars ..
           logical(lk) :: left, lquery, notran
           character :: transt
           integer(ilp) :: i, i1, i2, i3, ib, ic, iinfo, iwt, ja, jc, ldwork, lwkopt, mi, nb, &
                     nbmin, ni, nq, nw
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           left = stdlib_lsame(side, 'l')
           notran = stdlib_lsame(trans, 'n')
           lquery = (lwork == -1)
           ! nq is the order of q and nw is the minimum dimension of work
           if (left) then
              nq = m
              nw = max(1, n)
           else
              nq = n
              nw = max(1, m)
           end if
           if (.not. left .and. .not. stdlib_lsame(side, 'r')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't')) then
              info = -2
           else if (m < 0) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0 .or. k > nq) then
              info = -5
           else if (l < 0 .or. (left .and. (l > m)) .or. (.not. left .and. (l > n))) then
              info = -6
           else if (lda < max(1, k)) then
              info = -8
           else if (ldc < max(1, m)) then
              info = -11
           else if (lwork < nw .and. .not. lquery) then
              info = -13
           end if
           if (info == 0) then
              ! compute the workspace requirements
              if (m == 0 .or. n == 0) then
                 lwkopt = 1
              else
                 nb = min(nbmax, stdlib_ilaenv(1, 'stdlib_dormrq', side//trans, m, n, k, -1))
                           
                 lwkopt = nw*nb + tsize
              end if
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dormrz', -info)
              return
           else if (lquery) then
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) then
              work(1) = 1
              return
           end if
           nbmin = 2
           ldwork = nw
           if (nb > 1 .and. nb < k) then
              if (lwork < lwkopt) then
                 nb = (lwork - tsize)/ldwork
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dormrq', side//trans, m, n, k, -1))
                           
              end if
           end if
           if (nb < nbmin .or. nb >= k) then
              ! use unblocked code
              call stdlib_dormr3(side, trans, m, n, k, l, a, lda, tau, c, ldc, work, iinfo)
                        
           else
              ! use blocked code
              iwt = 1 + nw*nb
              if ((left .and. .not. notran) .or. (.not. left .and. notran)) then
                 i1 = 1
                 i2 = k
                 i3 = nb
              else
                 i1 = ((k - 1)/nb)*nb + 1
                 i2 = 1
                 i3 = -nb
              end if
              if (left) then
                 ni = n
                 jc = 1
                 ja = m - l + 1
              else
                 mi = m
                 ic = 1
                 ja = n - l + 1
              end if
              if (notran) then
                 transt = 't'
              else
                 transt = 'n'
              end if
              do i = i1, i2, i3
                 ib = min(nb, k - i + 1)
                 ! form the triangular factor of the block reflector
                 ! h = h(i+ib-1) . . . h(i+1) h(i)
                 call stdlib_dlarzt('backward', 'rowwise', l, ib, a(i, ja), lda, tau(i), work( &
                            iwt), ldt)
                 if (left) then
                    ! h or h**t is applied to c(i:m,1:n)
                    mi = m - i + 1
                    ic = i
                 else
                    ! h or h**t is applied to c(1:m,i:n)
                    ni = n - i + 1
                    jc = i
                 end if
                 ! apply h or h**t
                 call stdlib_dlarzb(side, transt, 'backward', 'rowwise', mi, ni, ib, l, a(i, ja) &
                           , lda, work(iwt), ldt, c(ic, jc), ldc, work, ldwork)
              end do
           end if
           work(1) = lwkopt
           return
           ! end of stdlib_dormrz
     end subroutine stdlib_dormrz

     ! DPBEQU computes row and column scalings intended to equilibrate a
     ! symmetric positive definite band matrix A and reduce its condition
     ! number (with respect to the two-norm).  S contains the scale factors,
     ! S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
     ! elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
     ! choice of S puts the condition number of B within a factor N of the
     ! smallest possible condition number over all possible diagonal
     ! scalings.

     subroutine stdlib_dpbequ(uplo, n, kd, ab, ldab, s, scond, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kd, ldab, n
           real(dp) :: amax, scond
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), s(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, j
           real(dp) :: smin
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kd < 0) then
              info = -3
           else if (ldab < kd + 1) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpbequ', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) then
              scond = one
              amax = zero
              return
           end if
           if (upper) then
              j = kd + 1
           else
              j = 1
           end if
           ! initialize smin and amax.
           s(1) = ab(j, 1)
           smin = s(1)
           amax = s(1)
           ! find the minimum and maximum diagonal elements.
           do i = 2, n
              s(i) = ab(j, i)
              smin = min(smin, s(i))
              amax = max(amax, s(i))
           end do
           if (smin <= zero) then
              ! find the first non-positive diagonal element and return.
              do i = 1, n
                 if (s(i) <= zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! set the scale factors to the reciprocals
              ! of the diagonal elements.
              do i = 1, n
                 s(i) = one/sqrt(s(i))
              end do
              ! compute scond = min(s(i)) / max(s(i))
              scond = sqrt(smin)/sqrt(amax)
           end if
           return
           ! end of stdlib_dpbequ
     end subroutine stdlib_dpbequ

     ! DPBSTF computes a split Cholesky factorization of a real
     ! symmetric positive definite band matrix A.
     ! This routine is designed to be used in conjunction with DSBGST.
     ! The factorization has the form  A = S**T*S  where S is a band matrix
     ! of the same bandwidth as A and the following structure:
     ! S = ( U    )
     ! ( M  L )
     ! where U is upper triangular of order m = (n+kd)/2, and L is lower
     ! triangular of order n-m.

     subroutine stdlib_dpbstf(uplo, n, kd, ab, ldab, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kd, ldab, n
           ! .. array arguments ..
           real(dp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, kld, km, m
           real(dp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kd < 0) then
              info = -3
           else if (ldab < kd + 1) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpbstf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           kld = max(1, ldab - 1)
           ! set the splitting point m.
           m = (n + kd)/2
           if (upper) then
              ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab(kd + 1, j)
                 if (ajj <= zero) go to 50
                 ajj = sqrt(ajj)
                 ab(kd + 1, j) = ajj
                 km = min(j - 1, kd)
                 ! compute elements j-km:j-1 of the j-th column and update the
                 ! the leading submatrix within the band.
                 call stdlib_dscal(km, one/ajj, ab(kd + 1 - km, j), 1)
                 call stdlib_dsyr('upper', km, -one, ab(kd + 1 - km, j), 1, ab(kd + 1, j - km), kld)
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**t*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab(kd + 1, j)
                 if (ajj <= zero) go to 50
                 ajj = sqrt(ajj)
                 ab(kd + 1, j) = ajj
                 km = min(kd, m - j)
                 ! compute elements j+1:j+km of the j-th row and update the
                 ! trailing submatrix within the band.
                 if (km > 0) then
                    call stdlib_dscal(km, one/ajj, ab(kd, j + 1), kld)
                    call stdlib_dsyr('upper', km, -one, ab(kd, j + 1), kld, ab(kd + 1, j + 1), kld)
                              
                 end if
              end do
           else
              ! factorize a(m+1:n,m+1:n) as l**t*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab(1, j)
                 if (ajj <= zero) go to 50
                 ajj = sqrt(ajj)
                 ab(1, j) = ajj
                 km = min(j - 1, kd)
                 ! compute elements j-km:j-1 of the j-th row and update the
                 ! trailing submatrix within the band.
                 call stdlib_dscal(km, one/ajj, ab(km + 1, j - km), kld)
                 call stdlib_dsyr('lower', km, -one, ab(km + 1, j - km), kld, ab(1, j - km), kld)
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**t*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = ab(1, j)
                 if (ajj <= zero) go to 50
                 ajj = sqrt(ajj)
                 ab(1, j) = ajj
                 km = min(kd, m - j)
                 ! compute elements j+1:j+km of the j-th column and update the
                 ! trailing submatrix within the band.
                 if (km > 0) then
                    call stdlib_dscal(km, one/ajj, ab(2, j), 1)
                    call stdlib_dsyr('lower', km, -one, ab(2, j), 1, ab(1, j + 1), kld)
                 end if
              end do
           end if
           return
50      continue
           info = j
           return
           ! end of stdlib_dpbstf
     end subroutine stdlib_dpbstf

     ! DPBTF2 computes the Cholesky factorization of a real symmetric
     ! positive definite band matrix A.
     ! The factorization has the form
     ! A = U**T * U ,  if UPLO = 'U', or
     ! A = L  * L**T,  if UPLO = 'L',
     ! where U is an upper triangular matrix, U**T is the transpose of U, and
     ! L is lower triangular.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_dpbtf2(uplo, n, kd, ab, ldab, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kd, ldab, n
           ! .. array arguments ..
           real(dp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, kld, kn
           real(dp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kd < 0) then
              info = -3
           else if (ldab < kd + 1) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpbtf2', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           kld = max(1, ldab - 1)
           if (upper) then
              ! compute the cholesky factorization a = u**t*u.
              do j = 1, n
                 ! compute u(j,j) and test for non-positive-definiteness.
                 ajj = ab(kd + 1, j)
                 if (ajj <= zero) go to 30
                 ajj = sqrt(ajj)
                 ab(kd + 1, j) = ajj
                 ! compute elements j+1:j+kn of row j and update the
                 ! trailing submatrix within the band.
                 kn = min(kd, n - j)
                 if (kn > 0) then
                    call stdlib_dscal(kn, one/ajj, ab(kd, j + 1), kld)
                    call stdlib_dsyr('upper', kn, -one, ab(kd, j + 1), kld, ab(kd + 1, j + 1), kld)
                              
                 end if
              end do
           else
              ! compute the cholesky factorization a = l*l**t.
              do j = 1, n
                 ! compute l(j,j) and test for non-positive-definiteness.
                 ajj = ab(1, j)
                 if (ajj <= zero) go to 30
                 ajj = sqrt(ajj)
                 ab(1, j) = ajj
                 ! compute elements j+1:j+kn of column j and update the
                 ! trailing submatrix within the band.
                 kn = min(kd, n - j)
                 if (kn > 0) then
                    call stdlib_dscal(kn, one/ajj, ab(2, j), 1)
                    call stdlib_dsyr('lower', kn, -one, ab(2, j), 1, ab(1, j + 1), kld)
                 end if
              end do
           end if
           return
30      continue
           info = j
           return
           ! end of stdlib_dpbtf2
     end subroutine stdlib_dpbtf2

     ! DPBTRS solves a system of linear equations A*X = B with a symmetric
     ! positive definite band matrix A using the Cholesky factorization
     ! A = U**T*U or A = L*L**T computed by DPBTRF.

     subroutine stdlib_dpbtrs(uplo, n, kd, nrhs, ab, ldab, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kd, ldab, ldb, n, nrhs
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), b(ldb, *)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kd < 0) then
              info = -3
           else if (nrhs < 0) then
              info = -4
           else if (ldab < kd + 1) then
              info = -6
           else if (ldb < max(1, n)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpbtrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b where a = u**t *u.
              do j = 1, nrhs
                 ! solve u**t *x = b, overwriting b with x.
                 call stdlib_dtbsv('upper', 'transpose', 'non-unit', n, kd, ab, ldab, b(1, j), &
                           1)
                 ! solve u*x = b, overwriting b with x.
                 call stdlib_dtbsv('upper', 'no transpose', 'non-unit', n, kd, ab, ldab, b(1, j) &
                           , 1)
              end do
           else
              ! solve a*x = b where a = l*l**t.
              do j = 1, nrhs
                 ! solve l*x = b, overwriting b with x.
                 call stdlib_dtbsv('lower', 'no transpose', 'non-unit', n, kd, ab, ldab, b(1, j) &
                           , 1)
                 ! solve l**t *x = b, overwriting b with x.
                 call stdlib_dtbsv('lower', 'transpose', 'non-unit', n, kd, ab, ldab, b(1, j), &
                           1)
              end do
           end if
           return
           ! end of stdlib_dpbtrs
     end subroutine stdlib_dpbtrs

     ! DPOEQU computes row and column scalings intended to equilibrate a
     ! symmetric positive definite matrix A and reduce its condition number
     ! (with respect to the two-norm).  S contains the scale factors,
     ! S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
     ! elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
     ! choice of S puts the condition number of B within a factor N of the
     ! smallest possible condition number over all possible diagonal
     ! scalings.

     subroutine stdlib_dpoequ(n, a, lda, s, scond, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, n
           real(dp) :: amax, scond
           ! .. array arguments ..
           real(dp) :: a(lda, *), s(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           real(dp) :: smin
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (n < 0) then
              info = -1
           else if (lda < max(1, n)) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpoequ', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) then
              scond = one
              amax = zero
              return
           end if
           ! find the minimum and maximum diagonal elements.
           s(1) = a(1, 1)
           smin = s(1)
           amax = s(1)
           do i = 2, n
              s(i) = a(i, i)
              smin = min(smin, s(i))
              amax = max(amax, s(i))
           end do
           if (smin <= zero) then
              ! find the first non-positive diagonal element and return.
              do i = 1, n
                 if (s(i) <= zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! set the scale factors to the reciprocals
              ! of the diagonal elements.
              do i = 1, n
                 s(i) = one/sqrt(s(i))
              end do
              ! compute scond = min(s(i)) / max(s(i))
              scond = sqrt(smin)/sqrt(amax)
           end if
           return
           ! end of stdlib_dpoequ
     end subroutine stdlib_dpoequ

     ! DPOEQUB computes row and column scalings intended to equilibrate a
     ! symmetric positive definite matrix A and reduce its condition number
     ! (with respect to the two-norm).  S contains the scale factors,
     ! S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
     ! elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
     ! choice of S puts the condition number of B within a factor N of the
     ! smallest possible condition number over all possible diagonal
     ! scalings.
     ! This routine differs from DPOEQU by restricting the scaling factors
     ! to a power of the radix.  Barring over- and underflow, scaling by
     ! these factors introduces no additional rounding errors.  However, the
     ! scaled diagonal entries are no longer approximately 1 but lie
     ! between sqrt(radix) and 1/sqrt(radix).

     subroutine stdlib_dpoequb(n, a, lda, s, scond, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, n
           real(dp) :: amax, scond
           ! .. array arguments ..
           real(dp) :: a(lda, *), s(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           real(dp) :: smin, base, tmp
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, sqrt, log, int
           ! .. executable statements ..
           ! test the input parameters.
           ! positive definite only performs 1 pass of equilibration.
           info = 0
           if (n < 0) then
              info = -1
           else if (lda < max(1, n)) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpoequb', -info)
              return
           end if
           ! quick return if possible.
           if (n == 0) then
              scond = one
              amax = zero
              return
           end if
           base = stdlib_dlamch('b')
           tmp = -0.5_dp/log(base)
           ! find the minimum and maximum diagonal elements.
           s(1) = a(1, 1)
           smin = s(1)
           amax = s(1)
           do i = 2, n
              s(i) = a(i, i)
              smin = min(smin, s(i))
              amax = max(amax, s(i))
           end do
           if (smin <= zero) then
              ! find the first non-positive diagonal element and return.
              do i = 1, n
                 if (s(i) <= zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! set the scale factors to the reciprocals
              ! of the diagonal elements.
              do i = 1, n
                 s(i) = base**int(tmp*log(s(i)))
              end do
              ! compute scond = min(s(i)) / max(s(i)).
              scond = sqrt(smin)/sqrt(amax)
           end if
           return
           ! end of stdlib_dpoequb
     end subroutine stdlib_dpoequb

     ! DPOTRS solves a system of linear equations A*X = B with a symmetric
     ! positive definite matrix A using the Cholesky factorization
     ! A = U**T*U or A = L*L**T computed by DPOTRF.

     subroutine stdlib_dpotrs(uplo, n, nrhs, a, lda, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           real(dp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpotrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b where a = u**t *u.
              ! solve u**t *x = b, overwriting b with x.
              call stdlib_dtrsm('left', 'upper', 'transpose', 'non-unit', n, nrhs, one, a, lda, b, &
                         ldb)
              ! solve u*x = b, overwriting b with x.
              call stdlib_dtrsm('left', 'upper', 'no transpose', 'non-unit', n, nrhs, one, a, lda, &
                         b, ldb)
           else
              ! solve a*x = b where a = l*l**t.
              ! solve l*x = b, overwriting b with x.
              call stdlib_dtrsm('left', 'lower', 'no transpose', 'non-unit', n, nrhs, one, a, lda, &
                         b, ldb)
              ! solve l**t *x = b, overwriting b with x.
              call stdlib_dtrsm('left', 'lower', 'transpose', 'non-unit', n, nrhs, one, a, lda, b, &
                         ldb)
           end if
           return
           ! end of stdlib_dpotrs
     end subroutine stdlib_dpotrs

     ! DPPEQU computes row and column scalings intended to equilibrate a
     ! symmetric positive definite matrix A in packed storage and reduce
     ! its condition number (with respect to the two-norm).  S contains the
     ! scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix
     ! B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal.
     ! This choice of S puts the condition number of B within a factor N of
     ! the smallest possible condition number over all possible diagonal
     ! scalings.

     subroutine stdlib_dppequ(uplo, n, ap, s, scond, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           real(dp) :: amax, scond
           ! .. array arguments ..
           real(dp) :: ap(*), s(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, jj
           real(dp) :: smin
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dppequ', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) then
              scond = one
              amax = zero
              return
           end if
           ! initialize smin and amax.
           s(1) = ap(1)
           smin = s(1)
           amax = s(1)
           if (upper) then
              ! uplo = 'u':  upper triangle of a is stored.
              ! find the minimum and maximum diagonal elements.
              jj = 1
              do i = 2, n
                 jj = jj + i
                 s(i) = ap(jj)
                 smin = min(smin, s(i))
                 amax = max(amax, s(i))
              end do
           else
              ! uplo = 'l':  lower triangle of a is stored.
              ! find the minimum and maximum diagonal elements.
              jj = 1
              do i = 2, n
                 jj = jj + n - i + 2
                 s(i) = ap(jj)
                 smin = min(smin, s(i))
                 amax = max(amax, s(i))
              end do
           end if
           if (smin <= zero) then
              ! find the first non-positive diagonal element and return.
              do i = 1, n
                 if (s(i) <= zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! set the scale factors to the reciprocals
              ! of the diagonal elements.
              do i = 1, n
                 s(i) = one/sqrt(s(i))
              end do
              ! compute scond = min(s(i)) / max(s(i))
              scond = sqrt(smin)/sqrt(amax)
           end if
           return
           ! end of stdlib_dppequ
     end subroutine stdlib_dppequ

     ! DPPTRF computes the Cholesky factorization of a real symmetric
     ! positive definite matrix A stored in packed format.
     ! The factorization has the form
     ! A = U**T * U,  if UPLO = 'U', or
     ! A = L  * L**T,  if UPLO = 'L',
     ! where U is an upper triangular matrix and L is lower triangular.

     subroutine stdlib_dpptrf(uplo, n, ap, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           real(dp) :: ap(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, jc, jj
           real(dp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpptrf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! compute the cholesky factorization a = u**t*u.
              jj = 0
              do j = 1, n
                 jc = jj + 1
                 jj = jj + j
                 ! compute elements 1:j-1 of column j.
                 if (j > 1) call stdlib_dtpsv('upper', 'transpose', 'non-unit', j - 1, ap, ap(jc), &
                           1)
                 ! compute u(j,j) and test for non-positive-definiteness.
                 ajj = ap(jj) - stdlib_ddot(j - 1, ap(jc), 1, ap(jc), 1)
                 if (ajj <= zero) then
                    ap(jj) = ajj
                    go to 30
                 end if
                 ap(jj) = sqrt(ajj)
              end do
           else
              ! compute the cholesky factorization a = l*l**t.
              jj = 1
              do j = 1, n
                 ! compute l(j,j) and test for non-positive-definiteness.
                 ajj = ap(jj)
                 if (ajj <= zero) then
                    ap(jj) = ajj
                    go to 30
                 end if
                 ajj = sqrt(ajj)
                 ap(jj) = ajj
                 ! compute elements j+1:n of column j and update the trailing
                 ! submatrix.
                 if (j < n) then
                    call stdlib_dscal(n - j, one/ajj, ap(jj + 1), 1)
                    call stdlib_dspr('lower', n - j, -one, ap(jj + 1), 1, ap(jj + n - j + 1))
                    jj = jj + n - j + 1
                 end if
              end do
           end if
           go to 40
30      continue
           info = j
40      continue
           return
           ! end of stdlib_dpptrf
     end subroutine stdlib_dpptrf

     ! DPPTRS solves a system of linear equations A*X = B with a symmetric
     ! positive definite matrix A in packed storage using the Cholesky
     ! factorization A = U**T*U or A = L*L**T computed by DPPTRF.

     subroutine stdlib_dpptrs(uplo, n, nrhs, ap, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, ldb, n, nrhs
           ! .. array arguments ..
           real(dp) :: ap(*), b(ldb, *)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (ldb < max(1, n)) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dpptrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b where a = u**t * u.
              do i = 1, nrhs
                 ! solve u**t *x = b, overwriting b with x.
                 call stdlib_dtpsv('upper', 'transpose', 'non-unit', n, ap, b(1, i), 1)
                 ! solve u*x = b, overwriting b with x.
                 call stdlib_dtpsv('upper', 'no transpose', 'non-unit', n, ap, b(1, i), 1)
                           
              end do
           else
              ! solve a*x = b where a = l * l**t.
              do i = 1, nrhs
                 ! solve l*y = b, overwriting b with x.
                 call stdlib_dtpsv('lower', 'no transpose', 'non-unit', n, ap, b(1, i), 1)
                           
                 ! solve l**t *x = y, overwriting b with x.
                 call stdlib_dtpsv('lower', 'transpose', 'non-unit', n, ap, b(1, i), 1)
              end do
           end if
           return
           ! end of stdlib_dpptrs
     end subroutine stdlib_dpptrs

     ! DPTCON computes the reciprocal of the condition number (in the
     ! 1-norm) of a real symmetric positive definite tridiagonal matrix
     ! using the factorization A = L*D*L**T or A = U**T*D*U computed by
     ! DPTTRF.
     ! Norm(inv(A)) is computed by a direct method, and the reciprocal of
     ! the condition number is computed as
     ! RCOND = 1 / (ANORM * norm(inv(A))).

     subroutine stdlib_dptcon(n, d, e, anorm, rcond, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, n
           real(dp) :: anorm, rcond
           ! .. array arguments ..
           real(dp) :: d(*), e(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, ix
           real(dp) :: ainvnm
     
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           ! test the input arguments.
           info = 0
           if (n < 0) then
              info = -1
           else if (anorm < zero) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dptcon', -info)
              return
           end if
           ! quick return if possible
           rcond = zero
           if (n == 0) then
              rcond = one
              return
           else if (anorm == zero) then
              return
           end if
           ! check that d(1:n) is positive.
           do i = 1, n
              if (d(i) <= zero) return
           end do
           ! solve m(a) * x = e, where m(a) = (m(i,j)) is given by
              ! m(i,j) =  abs(a(i,j)), i = j,
              ! m(i,j) = -abs(a(i,j)), i .ne. j,
           ! and e = [ 1, 1, ..., 1 ]**t.  note m(a) = m(l)*d*m(l)**t.
           ! solve m(l) * x = e.
           work(1) = one
           do i = 2, n
              work(i) = one + work(i - 1)*abs(e(i - 1))
           end do
           ! solve d * m(l)**t * x = b.
           work(n) = work(n)/d(n)
           do i = n - 1, 1, -1
              work(i) = work(i)/d(i) + work(i + 1)*abs(e(i))
           end do
           ! compute ainvnm = max(x(i)), 1<=i<=n.
           ix = stdlib_idamax(n, work, 1)
           ainvnm = abs(work(ix))
           ! compute the reciprocal condition number.
           if (ainvnm /= zero) rcond = (one/ainvnm)/anorm
           return
           ! end of stdlib_dptcon
     end subroutine stdlib_dptcon

     ! DPTTRF computes the L*D*L**T factorization of a real symmetric
     ! positive definite tridiagonal matrix A.  The factorization may also
     ! be regarded as having the form A = U**T*D*U.

     subroutine stdlib_dpttrf(n, d, e, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, n
           ! .. array arguments ..
           real(dp) :: d(*), e(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, i4
           real(dp) :: ei
     
           ! .. intrinsic functions ..
           intrinsic :: mod
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (n < 0) then
              info = -1
              call stdlib_xerbla('stdlib_dpttrf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! compute the l*d*l**t (or u**t*d*u) factorization of a.
           i4 = mod(n - 1, 4)
           do i = 1, i4
              if (d(i) <= zero) then
                 info = i
                 go to 30
              end if
              ei = e(i)
              e(i) = ei/d(i)
              d(i + 1) = d(i + 1) - e(i)*ei
           end do
           loop_20: do i = i4 + 1, n - 4, 4
              ! drop out of the loop if d(i) <= 0: the matrix is not positive
              ! definite.
              if (d(i) <= zero) then
                 info = i
                 go to 30
              end if
              ! solve for e(i) and d(i+1).
              ei = e(i)
              e(i) = ei/d(i)
              d(i + 1) = d(i + 1) - e(i)*ei
              if (d(i + 1) <= zero) then
                 info = i + 1
                 go to 30
              end if
              ! solve for e(i+1) and d(i+2).
              ei = e(i + 1)
              e(i + 1) = ei/d(i + 1)
              d(i + 2) = d(i + 2) - e(i + 1)*ei
              if (d(i + 2) <= zero) then
                 info = i + 2
                 go to 30
              end if
              ! solve for e(i+2) and d(i+3).
              ei = e(i + 2)
              e(i + 2) = ei/d(i + 2)
              d(i + 3) = d(i + 3) - e(i + 2)*ei
              if (d(i + 3) <= zero) then
                 info = i + 3
                 go to 30
              end if
              ! solve for e(i+3) and d(i+4).
              ei = e(i + 3)
              e(i + 3) = ei/d(i + 3)
              d(i + 4) = d(i + 4) - e(i + 3)*ei
           end do loop_20
           ! check d(n) for positive definiteness.
           if (d(n) <= zero) info = n
30      continue
           return
           ! end of stdlib_dpttrf
     end subroutine stdlib_dpttrf

     ! DPTTS2 solves a tridiagonal system of the form
     ! A * X = B
     ! using the L*D*L**T factorization of A computed by DPTTRF.  D is a
     ! diagonal matrix specified in the vector D, L is a unit bidiagonal
     ! matrix whose subdiagonal is specified in the vector E, and X and B
     ! are N by NRHS matrices.

     subroutine stdlib_dptts2(n, nrhs, d, e, b, ldb)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: ldb, n, nrhs
           ! .. array arguments ..
           real(dp) :: b(ldb, *), d(*), e(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 1) then
              if (n == 1) call stdlib_dscal(nrhs, 1.d0/d(1), b, ldb)
              return
           end if
           ! solve a * x = b using the factorization a = l*d*l**t,
           ! overwriting each right hand side vector with its solution.
           do j = 1, nrhs
                 ! solve l * x = b.
              do i = 2, n
                 b(i, j) = b(i, j) - b(i - 1, j)*e(i - 1)
              end do
                 ! solve d * l**t * x = b.
              b(n, j) = b(n, j)/d(n)
              do i = n - 1, 1, -1
                 b(i, j) = b(i, j)/d(i) - b(i + 1, j)*e(i)
              end do
           end do
           return
           ! end of stdlib_dptts2
     end subroutine stdlib_dptts2

     ! DRSCL multiplies an n-element real vector x by the real scalar 1/a.
     ! This is done without overflow or underflow as long as
     ! the final result x/a does not overflow or underflow.

     subroutine stdlib_drscl(n, sa, sx, incx)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, n
           real(dp) :: sa
           ! .. array arguments ..
           real(dp) :: sx(*)
       ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: done
           real(dp) :: bignum, cden, cden1, cnum, cnum1, mul, smlnum
     
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) return
           ! get machine parameters
           smlnum = stdlib_dlamch('s')
           bignum = one/smlnum
           call stdlib_dlabad(smlnum, bignum)
           ! initialize the denominator to sa and the numerator to 1.
           cden = sa
           cnum = one
10      continue
           cden1 = cden*smlnum
           cnum1 = cnum/bignum
           if (abs(cden1) > abs(cnum) .and. cnum /= zero) then
              ! pre-multiply x by smlnum if cden is large compared to cnum.
              mul = smlnum
              done = .false.
              cden = cden1
           else if (abs(cnum1) > abs(cden)) then
              ! pre-multiply x by bignum if cden is small compared to cnum.
              mul = bignum
              done = .false.
              cnum = cnum1
           else
              ! multiply x by cnum / cden and return.
              mul = cnum/cden
              done = .true.
           end if
           ! scale the vector x by mul
           call stdlib_dscal(n, mul, sx, incx)
           if (.not. done) go to 10
           return
           ! end of stdlib_drscl
     end subroutine stdlib_drscl

     ! DSBGST reduces a real symmetric-definite banded generalized
     ! eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
     ! such that C has the same bandwidth as A.
     ! B must have been previously factorized as S**T*S by DPBSTF, using a
     ! split Cholesky factorization. A is overwritten by C = X**T*A*X, where
     ! X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the
     ! bandwidth of A.

     subroutine stdlib_dsbgst(vect, uplo, n, ka, kb, ab, ldab, bb, ldbb, x, ldx, work, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo, vect
           integer(ilp) :: info, ka, kb, ldab, ldbb, ldx, n
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), bb(ldbb, *), work(*), x(ldx, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: update, upper, wantx
           integer(ilp) :: i, i0, i1, i2, inca, j, j1, j1t, j2, j2t, k, ka1, kb1, kbt, l, m, nr, &
                     nrt, nx
           real(dp) :: bii, ra, ra1, t
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input parameters
           wantx = stdlib_lsame(vect, 'v')
           upper = stdlib_lsame(uplo, 'u')
           ka1 = ka + 1
           kb1 = kb + 1
           info = 0
           if (.not. wantx .and. .not. stdlib_lsame(vect, 'n')) then
              info = -1
           else if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (ka < 0) then
              info = -4
           else if (kb < 0 .or. kb > ka) then
              info = -5
           else if (ldab < ka + 1) then
              info = -7
           else if (ldbb < kb + 1) then
              info = -9
           else if (ldx < 1 .or. wantx .and. ldx < max(1, n)) then
              info = -11
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsbgst', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           inca = ldab*ka1
           ! initialize x to the unit matrix, if needed
           if (wantx) call stdlib_dlaset('full', n, n, zero, one, x, ldx)
           ! set m to the splitting point m. it must be the same value as is
           ! used in stdlib_dpbstf. the chosen value allows the arrays work and rwork
           ! to be of dimension (n).
           m = (n + kb)/2
           ! the routine works in two phases, corresponding to the two halves
           ! of the split cholesky factorization of b as s**t*s where
           ! s = ( u    )
               ! ( m  l )
           ! with u upper triangular of order m, and l lower triangular of
           ! order n-m. s has the same bandwidth as b.
           ! s is treated as a product of elementary matrices:
           ! s = s(m)*s(m-1)*...*s(2)*s(1)*s(m+1)*s(m+2)*...*s(n-1)*s(n)
           ! where s(i) is determined by the i-th row of s.
           ! in phase 1, the index i takes the values n, n-1, ... , m+1;
           ! in phase 2, it takes the values 1, 2, ... , m.
           ! for each value of i, the current matrix a is updated by forming
           ! inv(s(i))**t*a*inv(s(i)). this creates a triangular bulge outside
           ! the band of a. the bulge is then pushed down toward the bottom of
           ! a in phase 1, and up toward the top of a in phase 2, by applying
           ! plane rotations.
           ! there are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
           ! of them are linearly independent, so annihilating a bulge requires
           ! only 2*kb-1 plane rotations. the rotations are divided into a 1st
           ! set of kb-1 rotations, and a 2nd set of kb rotations.
           ! wherever possible, rotations are generated and applied in vector
           ! operations of length nr between the indices j1 and j2 (sometimes
           ! replaced by modified values nrt, j1t or j2t).
           ! the cosines and sines of the rotations are stored in the array
           ! work. the cosines of the 1st set of rotations are stored in
           ! elements n+2:n+m-kb-1 and the sines of the 1st set in elements
           ! 2:m-kb-1; the cosines of the 2nd set are stored in elements
           ! n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n.
           ! the bulges are not formed explicitly; nonzero elements outside the
           ! band are created only when they are required for generating new
           ! rotations; they are stored in the array work, in positions where
           ! they are later overwritten by the sines of the rotations which
           ! annihilate them.
           ! **************************** phase 1 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = n, m + 1, -1
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! update = .false.
           ! do i = m + ka + 1, n - 1
              ! apply rotations to push all bulges ka positions downward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = n + 1
10      continue
           if (update) then
              i = i - 1
              kbt = min(kb, i - 1)
              i0 = i - 1
              i1 = min(n, i + ka)
              i2 = i - kbt + ka1
              if (i < m + 1) then
                 update = .false.
                 i = i + 1
                 i0 = m
                 if (ka == 0) go to 480
                 go to 10
              end if
           else
              i = i + ka
              if (i > n - 1) go to 480
           end if
           if (upper) then
              ! transform a, working with the upper triangle
              if (update) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb(kb1, i)
                 do j = i, i1
                    ab(i - j + ka1, j) = ab(i - j + ka1, j)/bii
                 end do
                 do j = max(1, i - ka), i
                    ab(j - i + ka1, i) = ab(j - i + ka1, i)/bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab(j - k + ka1, k) = ab(j - k + ka1, k) - bb(j - i + kb1, i)*ab(k - i + ka1, i) - bb( &
                        k - i + kb1, i)*ab(j - i + ka1, i) + ab(ka1, i)*bb(j - i + kb1, i)*bb(k - i + kb1, &
                                  i)
                    end do
                    do j = max(1, i - ka), i - kbt - 1
                       ab(j - k + ka1, k) = ab(j - k + ka1, k) - bb(k - i + kb1, i)*ab(j - i + ka1, i)
                                 
                    end do
                 end do
                 do j = i, i1
                    do k = max(j - ka, i - kbt), i - 1
                       ab(k - j + ka1, j) = ab(k - j + ka1, j) - bb(k - i + kb1, i)*ab(i - j + ka1, j)
                                 
                    end do
                 end do
                 if (wantx) then
                    ! post-multiply x by inv(s(i))
                    call stdlib_dscal(n - m, one/bii, x(m + 1, i), 1)
                    if (kbt > 0) call stdlib_dger(n - m, kbt, -one, x(m + 1, i), 1, bb(kb1 - kbt, i), &
                              1, x(m + 1, i - kbt), ldx)
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab(i - i1 + ka1, i1)
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_130: do k = 1, kb - 1
                 if (update) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if (i - k + ka < n .and. i - k > 1) then
                       ! generate rotation to annihilate a(i,i-k+ka+1)
                       call stdlib_dlartg(ab(k + 1, i - k + ka), ra1, work(n + i - k + ka - m), work(i - k + &
                                 ka - m), ra)
                       ! create nonzero element a(i-k,i-k+ka+1) outside the
                       ! band and store it in work(i-k)
                       t = -bb(kb1 - k, i)*ra1
                       work(i - k) = work(n + i - k + ka - m)*t - work(i - k + ka - m)*ab(1, i - k + ka)
                                 
                       ab(1, i - k + ka) = work(i - k + ka - m)*t + work(n + i - k + ka - m)*ab(1, i - k + ka)
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1 + max(1, k - i0 + 2)*ka1
                 nr = (n - j2 + ka)/ka1
                 j1 = j2 + (nr - 1)*ka1
                 if (update) then
                    j2t = max(j2, i + 2*ka - k + 1)
                 else
                    j2t = j2
                 end if
                 nrt = (n - j2t + ka)/ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j-m)
                    work(j - m) = work(j - m)*ab(1, j + 1)
                    ab(1, j + 1) = work(n + j - m)*ab(1, j + 1)
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if (nrt > 0) call stdlib_dlargv(nrt, ab(1, j2t), inca, work(j2t - m), ka1, work( &
                           n + j2t - m), ka1)
                 if (nr > 0) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib_dlartv(nr, ab(ka1 - l, j2), inca, ab(ka - l, j2 + 1), inca, work( &
                                  n + j2 - m), work(j2 - m), ka1)
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib_dlar2v(nr, ab(ka1, j2), ab(ka1, j2 + 1), ab(ka, j2 + 1), inca, &
                              work(n + j2 - m), work(j2 - m), ka1)
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = (n - j2 + l)/ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(l, j2 + ka1 - l), inca, ab(l + 1, j2 + ka1 - l &
                              ), inca, work(n + j2 - m), work(j2 - m), ka1)
                 end do
                 if (wantx) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib_drot(n - m, x(m + 1, j), 1, x(m + 1, j + 1), 1, work(n + j - m), &
                                 work(j - m))
                    end do
                 end if
              end do loop_130
              if (update) then
                 if (i2 <= n .and. kbt > 0) then
                    ! create nonzero element a(i-kbt,i-kbt+ka+1) outside the
                    ! band and store it in work(i-kbt)
                    work(i - kbt) = -bb(kb1 - kbt, i)*ra1
                 end if
              end if
              loop_170: do k = kb, 1, -1
                 if (update) then
                    j2 = i - k - 1 + max(2, k - i0 + 1)*ka1
                 else
                    j2 = i - k - 1 + max(1, k - i0 + 1)*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = (n - j2 + ka + l)/ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(l, j2 - l + 1), inca, ab(l + 1, j2 - l + 1), &
                              inca, work(n + j2 - ka), work(j2 - ka), ka1)
                 end do
                 nr = (n - j2 + ka)/ka1
                 j1 = j2 + (nr - 1)*ka1
                 do j = j1, j2, -ka1
                    work(j) = work(j - ka)
                    work(n + j) = work(n + j - ka)
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j-ka,j+1) outside the band
                    ! and store it in work(j)
                    work(j) = work(j)*ab(1, j + 1)
                    ab(1, j + 1) = work(n + j)*ab(1, j + 1)
                 end do
                 if (update) then
                    if (i - k < n - ka .and. k <= kbt) work(i - k + ka) = work(i - k)
                 end if
              end do loop_170
              loop_210: do k = kb, 1, -1
                 j2 = i - k - 1 + max(1, k - i0 + 1)*ka1
                 nr = (n - j2 + ka)/ka1
                 j1 = j2 + (nr - 1)*ka1
                 if (nr > 0) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib_dlargv(nr, ab(1, j2), inca, work(j2), ka1, work(n + j2), ka1)
                              
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib_dlartv(nr, ab(ka1 - l, j2), inca, ab(ka - l, j2 + 1), inca, work( &
                                  n + j2), work(j2), ka1)
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib_dlar2v(nr, ab(ka1, j2), ab(ka1, j2 + 1), ab(ka, j2 + 1), inca, &
                              work(n + j2), work(j2), ka1)
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = (n - j2 + l)/ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(l, j2 + ka1 - l), inca, ab(l + 1, j2 + ka1 - l &
                              ), inca, work(n + j2), work(j2), ka1)
                 end do
                 if (wantx) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib_drot(n - m, x(m + 1, j), 1, x(m + 1, j + 1), 1, work(n + j), work( &
                                 j))
                    end do
                 end if
              end do loop_210
              do k = 1, kb - 1
                 j2 = i - k - 1 + max(1, k - i0 + 2)*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = (n - j2 + l)/ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(l, j2 + ka1 - l), inca, ab(l + 1, j2 + ka1 - l &
                              ), inca, work(n + j2 - m), work(j2 - m), ka1)
                 end do
              end do
              if (kb > 1) then
                 do j = n - 1, i - kb + 2*ka + 1, -1
                    work(n + j - m) = work(n + j - ka - m)
                    work(j - m) = work(j - ka - m)
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if (update) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb(1, i)
                 do j = i, i1
                    ab(j - i + 1, i) = ab(j - i + 1, i)/bii
                 end do
                 do j = max(1, i - ka), i
                    ab(i - j + 1, j) = ab(i - j + 1, j)/bii
                 end do
                 do k = i - kbt, i - 1
                    do j = i - kbt, k
                       ab(k - j + 1, j) = ab(k - j + 1, j) - bb(i - j + 1, j)*ab(i - k + 1, k) - bb(i - k + 1, &
                                 k)*ab(i - j + 1, j) + ab(1, i)*bb(i - j + 1, j)*bb(i - k + 1, k)
                    end do
                    do j = max(1, i - ka), i - kbt - 1
                       ab(k - j + 1, j) = ab(k - j + 1, j) - bb(i - k + 1, k)*ab(i - j + 1, j)
                    end do
                 end do
                 do j = i, i1
                    do k = max(j - ka, i - kbt), i - 1
                       ab(j - k + 1, k) = ab(j - k + 1, k) - bb(i - k + 1, k)*ab(j - i + 1, i)
                    end do
                 end do
                 if (wantx) then
                    ! post-multiply x by inv(s(i))
                    call stdlib_dscal(n - m, one/bii, x(m + 1, i), 1)
                    if (kbt > 0) call stdlib_dger(n - m, kbt, -one, x(m + 1, i), 1, bb(kbt + 1, i - kbt) &
                              , ldbb - 1, x(m + 1, i - kbt), ldx)
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab(i1 - i + 1, i)
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions down toward the bottom of the
              ! band
              loop_360: do k = 1, kb - 1
                 if (update) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if (i - k + ka < n .and. i - k > 1) then
                       ! generate rotation to annihilate a(i-k+ka+1,i)
                       call stdlib_dlartg(ab(ka1 - k, i), ra1, work(n + i - k + ka - m), work(i - k + ka - m &
                                 ), ra)
                       ! create nonzero element a(i-k+ka+1,i-k) outside the
                       ! band and store it in work(i-k)
                       t = -bb(k + 1, i - k)*ra1
                       work(i - k) = work(n + i - k + ka - m)*t - work(i - k + ka - m)*ab(ka1, i - k)
                       ab(ka1, i - k) = work(i - k + ka - m)*t + work(n + i - k + ka - m)*ab(ka1, i - k)
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i - k - 1 + max(1, k - i0 + 2)*ka1
                 nr = (n - j2 + ka)/ka1
                 j1 = j2 + (nr - 1)*ka1
                 if (update) then
                    j2t = max(j2, i + 2*ka - k + 1)
                 else
                    j2t = j2
                 end if
                 nrt = (n - j2t + ka)/ka1
                 do j = j2t, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j-m)
                    work(j - m) = work(j - m)*ab(ka1, j - ka + 1)
                    ab(ka1, j - ka + 1) = work(n + j - m)*ab(ka1, j - ka + 1)
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if (nrt > 0) call stdlib_dlargv(nrt, ab(ka1, j2t - ka), inca, work(j2t - m), ka1, &
                           work(n + j2t - m), ka1)
                 if (nr > 0) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib_dlartv(nr, ab(l + 1, j2 - l), inca, ab(l + 2, j2 - l), inca, work( &
                                 n + j2 - m), work(j2 - m), ka1)
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib_dlar2v(nr, ab(1, j2), ab(1, j2 + 1), ab(2, j2), inca, work(n + &
                              j2 - m), work(j2 - m), ka1)
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = (n - j2 + l)/ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(ka1 - l + 1, j2), inca, ab(ka1 - l, j2 + 1), &
                               inca, work(n + j2 - m), work(j2 - m), ka1)
                 end do
                 if (wantx) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j2, j1, ka1
                       call stdlib_drot(n - m, x(m + 1, j), 1, x(m + 1, j + 1), 1, work(n + j - m), &
                                 work(j - m))
                    end do
                 end if
              end do loop_360
              if (update) then
                 if (i2 <= n .and. kbt > 0) then
                    ! create nonzero element a(i-kbt+ka+1,i-kbt) outside the
                    ! band and store it in work(i-kbt)
                    work(i - kbt) = -bb(kbt + 1, i - kbt)*ra1
                 end if
              end if
              loop_400: do k = kb, 1, -1
                 if (update) then
                    j2 = i - k - 1 + max(2, k - i0 + 1)*ka1
                 else
                    j2 = i - k - 1 + max(1, k - i0 + 1)*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = (n - j2 + ka + l)/ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(ka1 - l + 1, j2 - ka), inca, ab(ka1 - l, j2 - &
                              ka + 1), inca, work(n + j2 - ka), work(j2 - ka), ka1)
                 end do
                 nr = (n - j2 + ka)/ka1
                 j1 = j2 + (nr - 1)*ka1
                 do j = j1, j2, -ka1
                    work(j) = work(j - ka)
                    work(n + j) = work(n + j - ka)
                 end do
                 do j = j2, j1, ka1
                    ! create nonzero element a(j+1,j-ka) outside the band
                    ! and store it in work(j)
                    work(j) = work(j)*ab(ka1, j - ka + 1)
                    ab(ka1, j - ka + 1) = work(n + j)*ab(ka1, j - ka + 1)
                 end do
                 if (update) then
                    if (i - k < n - ka .and. k <= kbt) work(i - k + ka) = work(i - k)
                 end if
              end do loop_400
              loop_440: do k = kb, 1, -1
                 j2 = i - k - 1 + max(1, k - i0 + 1)*ka1
                 nr = (n - j2 + ka)/ka1
                 j1 = j2 + (nr - 1)*ka1
                 if (nr > 0) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib_dlargv(nr, ab(ka1, j2 - ka), inca, work(j2), ka1, work(n + j2), &
                              ka1)
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib_dlartv(nr, ab(l + 1, j2 - l), inca, ab(l + 2, j2 - l), inca, work( &
                                 n + j2), work(j2), ka1)
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib_dlar2v(nr, ab(1, j2), ab(1, j2 + 1), ab(2, j2), inca, work(n + &
                              j2), work(j2), ka1)
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = (n - j2 + l)/ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(ka1 - l + 1, j2), inca, ab(ka1 - l, j2 + 1), &
                               inca, work(n + j2), work(j2), ka1)
                 end do
                 if (wantx) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j2, j1, ka1
                       call stdlib_drot(n - m, x(m + 1, j), 1, x(m + 1, j + 1), 1, work(n + j), work( &
                                 j))
                    end do
                 end if
              end do loop_440
              do k = 1, kb - 1
                 j2 = i - k - 1 + max(1, k - i0 + 2)*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = (n - j2 + l)/ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(ka1 - l + 1, j2), inca, ab(ka1 - l, j2 + 1), &
                               inca, work(n + j2 - m), work(j2 - m), ka1)
                 end do
              end do
              if (kb > 1) then
                 do j = n - 1, i - kb + 2*ka + 1, -1
                    work(n + j - m) = work(n + j - ka - m)
                    work(j - m) = work(j - ka - m)
                 end do
              end if
           end if
           go to 10
480    continue
           ! **************************** phase 2 *****************************
           ! the logical structure of this phase is:
           ! update = .true.
           ! do i = 1, m
              ! use s(i) to update a and create a new bulge
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! update = .false.
           ! do i = m - ka - 1, 2, -1
              ! apply rotations to push all bulges ka positions upward
           ! end do
           ! to avoid duplicating code, the two loops are merged.
           update = .true.
           i = 0
490    continue
           if (update) then
              i = i + 1
              kbt = min(kb, m - i)
              i0 = i + 1
              i1 = max(1, i - ka)
              i2 = i + kbt - ka1
              if (i > m) then
                 update = .false.
                 i = i - 1
                 i0 = m + 1
                 if (ka == 0) return
                 go to 490
              end if
           else
              i = i - ka
              if (i < 2) return
           end if
           if (i < m - kbt) then
              nx = m
           else
              nx = n
           end if
           if (upper) then
              ! transform a, working with the upper triangle
              if (update) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb(kb1, i)
                 do j = i1, i
                    ab(j - i + ka1, i) = ab(j - i + ka1, i)/bii
                 end do
                 do j = i, min(n, i + ka)
                    ab(i - j + ka1, j) = ab(i - j + ka1, j)/bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab(k - j + ka1, j) = ab(k - j + ka1, j) - bb(i - j + kb1, j)*ab(i - k + ka1, k) - bb( &
                        i - k + kb1, k)*ab(i - j + ka1, j) + ab(ka1, i)*bb(i - j + kb1, j)*bb(i - k + kb1, &
                                  k)
                    end do
                    do j = i + kbt + 1, min(n, i + ka)
                       ab(k - j + ka1, j) = ab(k - j + ka1, j) - bb(i - k + kb1, k)*ab(i - j + ka1, j)
                                 
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min(j + ka, i + kbt)
                       ab(j - k + ka1, k) = ab(j - k + ka1, k) - bb(i - k + kb1, k)*ab(j - i + ka1, i)
                                 
                    end do
                 end do
                 if (wantx) then
                    ! post-multiply x by inv(s(i))
                    call stdlib_dscal(nx, one/bii, x(1, i), 1)
                    if (kbt > 0) call stdlib_dger(nx, kbt, -one, x(1, i), 1, bb(kb, i + 1), ldbb - &
                              1, x(1, i + 1), ldx)
                 end if
                 ! store a(i1,i) in ra1 for use in next loop over k
                 ra1 = ab(i1 - i + ka1, i)
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_610: do k = 1, kb - 1
                 if (update) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if (i + k - ka1 > 0 .and. i + k < m) then
                       ! generate rotation to annihilate a(i+k-ka-1,i)
                       call stdlib_dlartg(ab(k + 1, i), ra1, work(n + i + k - ka), work(i + k - ka), ra &
                                 )
                       ! create nonzero element a(i+k-ka-1,i+k) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb(kb1 - k, i + k)*ra1
                       work(m - kb + i + k) = work(n + i + k - ka)*t - work(i + k - ka)*ab(1, i + k)
                       ab(1, i + k) = work(i + k - ka)*t + work(n + i + k - ka)*ab(1, i + k)
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1 - max(1, k + i0 - m + 1)*ka1
                 nr = (j2 + ka - 1)/ka1
                 j1 = j2 - (nr - 1)*ka1
                 if (update) then
                    j2t = min(j2, i - 2*ka + k - 1)
                 else
                    j2t = j2
                 end if
                 nrt = (j2t + ka - 1)/ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(j)
                    work(j) = work(j)*ab(1, j + ka - 1)
                    ab(1, j + ka - 1) = work(n + j)*ab(1, j + ka - 1)
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if (nrt > 0) call stdlib_dlargv(nrt, ab(1, j1 + ka), inca, work(j1), ka1, work( &
                           n + j1), ka1)
                 if (nr > 0) then
                    ! apply rotations in 1st set from the left
                    do l = 1, ka - 1
                       call stdlib_dlartv(nr, ab(ka1 - l, j1 + l), inca, ab(ka - l, j1 + l), inca, &
                                 work(n + j1), work(j1), ka1)
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib_dlar2v(nr, ab(ka1, j1), ab(ka1, j1 - 1), ab(ka, j1), inca, &
                              work(n + j1), work(j1), ka1)
                 end if
                 ! start applying rotations in 1st set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = (j2 + l - 1)/ka1
                    j1t = j2 - (nrt - 1)*ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(l, j1t), inca, ab(l + 1, j1t - 1), inca, &
                               work(n + j1t), work(j1t), ka1)
                 end do
                 if (wantx) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib_drot(nx, x(1, j), 1, x(1, j - 1), 1, work(n + j), work(j))
                                 
                    end do
                 end if
              end do loop_610
              if (update) then
                 if (i2 > 0 .and. kbt > 0) then
                    ! create nonzero element a(i+kbt-ka-1,i+kbt) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work(m - kb + i + kbt) = -bb(kb1 - kbt, i + kbt)*ra1
                 end if
              end if
              loop_650: do k = kb, 1, -1
                 if (update) then
                    j2 = i + k + 1 - max(2, k + i0 - m)*ka1
                 else
                    j2 = i + k + 1 - max(1, k + i0 - m)*ka1
                 end if
                 ! finish applying rotations in 2nd set from the right
                 do l = kb - k, 1, -1
                    nrt = (j2 + ka + l - 1)/ka1
                    j1t = j2 - (nrt - 1)*ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(l, j1t + ka), inca, ab(l + 1, j1t + ka - 1), &
                               inca, work(n + m - kb + j1t + ka), work(m - kb + j1t + ka), ka1)
                 end do
                 nr = (j2 + ka - 1)/ka1
                 j1 = j2 - (nr - 1)*ka1
                 do j = j1, j2, ka1
                    work(m - kb + j) = work(m - kb + j + ka)
                    work(n + m - kb + j) = work(n + m - kb + j + ka)
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j-1,j+ka) outside the band
                    ! and store it in work(m-kb+j)
                    work(m - kb + j) = work(m - kb + j)*ab(1, j + ka - 1)
                    ab(1, j + ka - 1) = work(n + m - kb + j)*ab(1, j + ka - 1)
                 end do
                 if (update) then
                    if (i + k > ka1 .and. k <= kbt) work(m - kb + i + k - ka) = work(m - kb + i + k)
                 end if
              end do loop_650
              loop_690: do k = kb, 1, -1
                 j2 = i + k + 1 - max(1, k + i0 - m)*ka1
                 nr = (j2 + ka - 1)/ka1
                 j1 = j2 - (nr - 1)*ka1
                 if (nr > 0) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib_dlargv(nr, ab(1, j1 + ka), inca, work(m - kb + j1), ka1, work(n + m - &
                              kb + j1), ka1)
                    ! apply rotations in 2nd set from the left
                    do l = 1, ka - 1
                       call stdlib_dlartv(nr, ab(ka1 - l, j1 + l), inca, ab(ka - l, j1 + l), inca, &
                                 work(n + m - kb + j1), work(m - kb + j1), ka1)
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib_dlar2v(nr, ab(ka1, j1), ab(ka1, j1 - 1), ab(ka, j1), inca, &
                              work(n + m - kb + j1), work(m - kb + j1), ka1)
                 end if
                 ! start applying rotations in 2nd set from the right
                 do l = ka - 1, kb - k + 1, -1
                    nrt = (j2 + l - 1)/ka1
                    j1t = j2 - (nrt - 1)*ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(l, j1t), inca, ab(l + 1, j1t - 1), inca, &
                              work(n + m - kb + j1t), work(m - kb + j1t), ka1)
                 end do
                 if (wantx) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib_drot(nx, x(1, j), 1, x(1, j - 1), 1, work(n + m - kb + j), work( &
                                 m - kb + j))
                    end do
                 end if
              end do loop_690
              do k = 1, kb - 1
                 j2 = i + k + 1 - max(1, k + i0 - m + 1)*ka1
                 ! finish applying rotations in 1st set from the right
                 do l = kb - k, 1, -1
                    nrt = (j2 + l - 1)/ka1
                    j1t = j2 - (nrt - 1)*ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(l, j1t), inca, ab(l + 1, j1t - 1), inca, &
                               work(n + j1t), work(j1t), ka1)
                 end do
              end do
              if (kb > 1) then
                 do j = 2, min(i + kb, m) - 2*ka - 1
                    work(n + j) = work(n + j + ka)
                    work(j) = work(j + ka)
                 end do
              end if
           else
              ! transform a, working with the lower triangle
              if (update) then
                 ! form  inv(s(i))**t * a * inv(s(i))
                 bii = bb(1, i)
                 do j = i1, i
                    ab(i - j + 1, j) = ab(i - j + 1, j)/bii
                 end do
                 do j = i, min(n, i + ka)
                    ab(j - i + 1, i) = ab(j - i + 1, i)/bii
                 end do
                 do k = i + 1, i + kbt
                    do j = k, i + kbt
                       ab(j - k + 1, k) = ab(j - k + 1, k) - bb(j - i + 1, i)*ab(k - i + 1, i) - bb(k - i + 1, &
                                 i)*ab(j - i + 1, i) + ab(1, i)*bb(j - i + 1, i)*bb(k - i + 1, i)
                    end do
                    do j = i + kbt + 1, min(n, i + ka)
                       ab(j - k + 1, k) = ab(j - k + 1, k) - bb(k - i + 1, i)*ab(j - i + 1, i)
                    end do
                 end do
                 do j = i1, i
                    do k = i + 1, min(j + ka, i + kbt)
                       ab(k - j + 1, j) = ab(k - j + 1, j) - bb(k - i + 1, i)*ab(i - j + 1, j)
                    end do
                 end do
                 if (wantx) then
                    ! post-multiply x by inv(s(i))
                    call stdlib_dscal(nx, one/bii, x(1, i), 1)
                    if (kbt > 0) call stdlib_dger(nx, kbt, -one, x(1, i), 1, bb(2, i), 1, x(1, &
                              i + 1), ldx)
                 end if
                 ! store a(i,i1) in ra1 for use in next loop over k
                 ra1 = ab(i - i1 + 1, i1)
              end if
              ! generate and apply vectors of rotations to chase all the
              ! existing bulges ka positions up toward the top of the band
              loop_840: do k = 1, kb - 1
                 if (update) then
                    ! determine the rotations which would annihilate the bulge
                    ! which has in theory just been created
                    if (i + k - ka1 > 0 .and. i + k < m) then
                       ! generate rotation to annihilate a(i,i+k-ka-1)
                       call stdlib_dlartg(ab(ka1 - k, i + k - ka), ra1, work(n + i + k - ka), work(i + k - &
                                 ka), ra)
                       ! create nonzero element a(i+k,i+k-ka-1) outside the
                       ! band and store it in work(m-kb+i+k)
                       t = -bb(k + 1, i)*ra1
                       work(m - kb + i + k) = work(n + i + k - ka)*t - work(i + k - ka)*ab(ka1, i + k - ka)
                                 
                       ab(ka1, i + k - ka) = work(i + k - ka)*t + work(n + i + k - ka)*ab(ka1, i + k - ka)
                                 
                       ra1 = ra
                    end if
                 end if
                 j2 = i + k + 1 - max(1, k + i0 - m + 1)*ka1
                 nr = (j2 + ka - 1)/ka1
                 j1 = j2 - (nr - 1)*ka1
                 if (update) then
                    j2t = min(j2, i - 2*ka + k - 1)
                 else
                    j2t = j2
                 end if
                 nrt = (j2t + ka - 1)/ka1
                 do j = j1, j2t, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(j)
                    work(j) = work(j)*ab(ka1, j - 1)
                    ab(ka1, j - 1) = work(n + j)*ab(ka1, j - 1)
                 end do
                 ! generate rotations in 1st set to annihilate elements which
                 ! have been created outside the band
                 if (nrt > 0) call stdlib_dlargv(nrt, ab(ka1, j1), inca, work(j1), ka1, work(n + &
                           j1), ka1)
                 if (nr > 0) then
                    ! apply rotations in 1st set from the right
                    do l = 1, ka - 1
                       call stdlib_dlartv(nr, ab(l + 1, j1), inca, ab(l + 2, j1 - 1), inca, work(n + &
                                 j1), work(j1), ka1)
                    end do
                    ! apply rotations in 1st set from both sides to diagonal
                    ! blocks
                    call stdlib_dlar2v(nr, ab(1, j1), ab(1, j1 - 1), ab(2, j1 - 1), inca, work( &
                              n + j1), work(j1), ka1)
                 end if
                 ! start applying rotations in 1st set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = (j2 + l - 1)/ka1
                    j1t = j2 - (nrt - 1)*ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(ka1 - l + 1, j1t - ka1 + l), inca, ab(ka1 - l, &
                              j1t - ka1 + l), inca, work(n + j1t), work(j1t), ka1)
                 end do
                 if (wantx) then
                    ! post-multiply x by product of rotations in 1st set
                    do j = j1, j2, ka1
                       call stdlib_drot(nx, x(1, j), 1, x(1, j - 1), 1, work(n + j), work(j))
                                 
                    end do
                 end if
              end do loop_840
              if (update) then
                 if (i2 > 0 .and. kbt > 0) then
                    ! create nonzero element a(i+kbt,i+kbt-ka-1) outside the
                    ! band and store it in work(m-kb+i+kbt)
                    work(m - kb + i + kbt) = -bb(kbt + 1, i)*ra1
                 end if
              end if
              loop_880: do k = kb, 1, -1
                 if (update) then
                    j2 = i + k + 1 - max(2, k + i0 - m)*ka1
                 else
                    j2 = i + k + 1 - max(1, k + i0 - m)*ka1
                 end if
                 ! finish applying rotations in 2nd set from the left
                 do l = kb - k, 1, -1
                    nrt = (j2 + ka + l - 1)/ka1
                    j1t = j2 - (nrt - 1)*ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(ka1 - l + 1, j1t + l - 1), inca, ab(ka1 - l, &
                              j1t + l - 1), inca, work(n + m - kb + j1t + ka), work(m - kb + j1t + ka), ka1)
                 end do
                 nr = (j2 + ka - 1)/ka1
                 j1 = j2 - (nr - 1)*ka1
                 do j = j1, j2, ka1
                    work(m - kb + j) = work(m - kb + j + ka)
                    work(n + m - kb + j) = work(n + m - kb + j + ka)
                 end do
                 do j = j1, j2, ka1
                    ! create nonzero element a(j+ka,j-1) outside the band
                    ! and store it in work(m-kb+j)
                    work(m - kb + j) = work(m - kb + j)*ab(ka1, j - 1)
                    ab(ka1, j - 1) = work(n + m - kb + j)*ab(ka1, j - 1)
                 end do
                 if (update) then
                    if (i + k > ka1 .and. k <= kbt) work(m - kb + i + k - ka) = work(m - kb + i + k)
                 end if
              end do loop_880
              loop_920: do k = kb, 1, -1
                 j2 = i + k + 1 - max(1, k + i0 - m)*ka1
                 nr = (j2 + ka - 1)/ka1
                 j1 = j2 - (nr - 1)*ka1
                 if (nr > 0) then
                    ! generate rotations in 2nd set to annihilate elements
                    ! which have been created outside the band
                    call stdlib_dlargv(nr, ab(ka1, j1), inca, work(m - kb + j1), ka1, work(n + m - &
                              kb + j1), ka1)
                    ! apply rotations in 2nd set from the right
                    do l = 1, ka - 1
                       call stdlib_dlartv(nr, ab(l + 1, j1), inca, ab(l + 2, j1 - 1), inca, work(n + &
                                 m - kb + j1), work(m - kb + j1), ka1)
                    end do
                    ! apply rotations in 2nd set from both sides to diagonal
                    ! blocks
                    call stdlib_dlar2v(nr, ab(1, j1), ab(1, j1 - 1), ab(2, j1 - 1), inca, work( &
                              n + m - kb + j1), work(m - kb + j1), ka1)
                 end if
                 ! start applying rotations in 2nd set from the left
                 do l = ka - 1, kb - k + 1, -1
                    nrt = (j2 + l - 1)/ka1
                    j1t = j2 - (nrt - 1)*ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(ka1 - l + 1, j1t - ka1 + l), inca, ab(ka1 - l, &
                              j1t - ka1 + l), inca, work(n + m - kb + j1t), work(m - kb + j1t), ka1)
                 end do
                 if (wantx) then
                    ! post-multiply x by product of rotations in 2nd set
                    do j = j1, j2, ka1
                       call stdlib_drot(nx, x(1, j), 1, x(1, j - 1), 1, work(n + m - kb + j), work( &
                                 m - kb + j))
                    end do
                 end if
              end do loop_920
              do k = 1, kb - 1
                 j2 = i + k + 1 - max(1, k + i0 - m + 1)*ka1
                 ! finish applying rotations in 1st set from the left
                 do l = kb - k, 1, -1
                    nrt = (j2 + l - 1)/ka1
                    j1t = j2 - (nrt - 1)*ka1
                    if (nrt > 0) call stdlib_dlartv(nrt, ab(ka1 - l + 1, j1t - ka1 + l), inca, ab(ka1 - l, &
                              j1t - ka1 + l), inca, work(n + j1t), work(j1t), ka1)
                 end do
              end do
              if (kb > 1) then
                 do j = 2, min(i + kb, m) - 2*ka - 1
                    work(n + j) = work(n + j + ka)
                    work(j) = work(j + ka)
                 end do
              end if
           end if
           go to 490
           ! end of stdlib_dsbgst
     end subroutine stdlib_dsbgst

     ! DSBTRD reduces a real symmetric band matrix A to symmetric
     ! tridiagonal form T by an orthogonal similarity transformation:
     ! Q**T * A * Q = T.

     subroutine stdlib_dsbtrd(vect, uplo, n, kd, ab, ldab, d, e, q, ldq, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo, vect
           integer(ilp) :: info, kd, ldab, ldq, n
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), d(*), e(*), q(ldq, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: initq, upper, wantq
           integer(ilp) :: i, i2, ibl, inca, incx, iqaend, iqb, iqend, j, j1, j1end, j1inc, j2, &
                     jend, jin, jinc, k, kd1, kdm1, kdn, l, last, lend, nq, nr, nrt
           real(dp) :: temp
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
     
           ! .. executable statements ..
           ! test the input parameters
           initq = stdlib_lsame(vect, 'v')
           wantq = initq .or. stdlib_lsame(vect, 'u')
           upper = stdlib_lsame(uplo, 'u')
           kd1 = kd + 1
           kdm1 = kd - 1
           incx = ldab - 1
           iqend = 1
           info = 0
           if (.not. wantq .and. .not. stdlib_lsame(vect, 'n')) then
              info = -1
           else if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (kd < 0) then
              info = -4
           else if (ldab < kd1) then
              info = -6
           else if (ldq < max(1, n) .and. wantq) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsbtrd', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! initialize q to the unit matrix, if needed
           if (initq) call stdlib_dlaset('full', n, n, zero, one, q, ldq)
           ! wherever possible, plane rotations are generated and applied in
           ! vector operations of length nr over the index set j1:j2:kd1.
           ! the cosines and sines of the plane rotations are stored in the
           ! arrays d and work.
           inca = kd1*ldab
           kdn = min(n - 1, kd)
           if (upper) then
              if (kd > 1) then
                 ! reduce to tridiagonal form, working with upper triangle
                 nr = 0
                 j1 = kdn + 2
                 j2 = 1
                 loop_90: do i = 1, n - 2
                    ! reduce i-th row of matrix to tridiagonal form
                    loop_80: do k = kdn + 1, 2, -1
                       j1 = j1 + kdn
                       j2 = j2 + kdn
                       if (nr > 0) then
                          ! generate plane rotations to annihilate nonzero
                          ! elements which have been created outside the band
                          call stdlib_dlargv(nr, ab(1, j1 - 1), inca, work(j1), kd1, d(j1), &
                                    kd1)
                          ! apply rotations from the right
                          ! dependent on the the number of diagonals either
                          ! stdlib_dlartv or stdlib_drot is used
                          if (nr >= 2*kd - 1) then
                             do l = 1, kd - 1
                                call stdlib_dlartv(nr, ab(l + 1, j1 - 1), inca, ab(l, j1), inca, &
                                          d(j1), work(j1), kd1)
                             end do
                          else
                             jend = j1 + (nr - 1)*kd1
                             do jinc = j1, jend, kd1
                                call stdlib_drot(kdm1, ab(2, jinc - 1), 1, ab(1, jinc), 1, d( &
                                          jinc), work(jinc))
                             end do
                          end if
                       end if
                       if (k > 2) then
                          if (k <= n - i + 1) then
                             ! generate plane rotation to annihilate a(i,i+k-1)
                             ! within the band
                             call stdlib_dlartg(ab(kd - k + 3, i + k - 2), ab(kd - k + 2, i + k - 1), d(i + k - &
                                       1), work(i + k - 1), temp)
                             ab(kd - k + 3, i + k - 2) = temp
                             ! apply rotation from the right
                             call stdlib_drot(k - 3, ab(kd - k + 4, i + k - 2), 1, ab(kd - k + 3, i + k - 1), 1, &
                                        d(i + k - 1), work(i + k - 1))
                          end if
                          nr = nr + 1
                          j1 = j1 - kdn - 1
                       end if
                       ! apply plane rotations from both sides to diagonal
                       ! blocks
                       if (nr > 0) call stdlib_dlar2v(nr, ab(kd1, j1 - 1), ab(kd1, j1), ab(kd, &
                                 j1), inca, d(j1), work(j1), kd1)
                       ! apply plane rotations from the left
                       if (nr > 0) then
                          if (2*kd - 1 < nr) then
                          ! dependent on the the number of diagonals either
                          ! stdlib_dlartv or stdlib_drot is used
                             do l = 1, kd - 1
                                if (j2 + l > n) then
                                   nrt = nr - 1
                                else
                                   nrt = nr
                                end if
                                if (nrt > 0) call stdlib_dlartv(nrt, ab(kd - l, j1 + l), inca, ab(kd - &
                                          l + 1, j1 + l), inca, d(j1), work(j1), kd1)
                             end do
                          else
                             j1end = j1 + kd1*(nr - 2)
                             if (j1end >= j1) then
                                do jin = j1, j1end, kd1
                                   call stdlib_drot(kd - 1, ab(kd - 1, jin + 1), incx, ab(kd, jin + 1) &
                                             , incx, d(jin), work(jin))
                                end do
                             end if
                             lend = min(kdm1, n - j2)
                             last = j1end + kd1
                             if (lend > 0) call stdlib_drot(lend, ab(kd - 1, last + 1), incx, ab(kd, &
                                       last + 1), incx, d(last), work(last))
                          end if
                       end if
                       if (wantq) then
                          ! accumulate product of plane rotations in q
                          if (initq) then
                       ! take advantage of the fact that q was
                       ! initially the identity matrix
                             iqend = max(iqend, j2)
                             i2 = max(0, k - 3)
                             iqaend = 1 + i*kd
                             if (k == 2) iqaend = iqaend + kd
                             iqaend = min(iqaend, iqend)
                             do j = j1, j2, kd1
                                ibl = i - i2/kdm1
                                i2 = i2 + 1
                                iqb = max(1, j - ibl)
                                nq = 1 + iqaend - iqb
                                iqaend = min(iqaend + kd, iqend)
                                call stdlib_drot(nq, q(iqb, j - 1), 1, q(iqb, j), 1, d(j), &
                                          work(j))
                             end do
                          else
                             do j = j1, j2, kd1
                                call stdlib_drot(n, q(1, j - 1), 1, q(1, j), 1, d(j), work(j &
                                          ))
                             end do
                          end if
                       end if
                       if (j2 + kdn > n) then
                          ! adjust j2 to keep within the bounds of the matrix
                          nr = nr - 1
                          j2 = j2 - kdn - 1
                       end if
                       do j = j1, j2, kd1
                          ! create nonzero element a(j-1,j+kd) outside the band
                          ! and store it in work
                          work(j + kd) = work(j)*ab(1, j + kd)
                          ab(1, j + kd) = d(j)*ab(1, j + kd)
                       end do
                    end do loop_80
                 end do loop_90
              end if
              if (kd > 0) then
                 ! copy off-diagonal elements to e
                 do i = 1, n - 1
                    e(i) = ab(kd, i + 1)
                 end do
              else
                 ! set e to zero if original matrix was diagonal
                 do i = 1, n - 1
                    e(i) = zero
                 end do
              end if
              ! copy diagonal elements to d
              do i = 1, n
                 d(i) = ab(kd1, i)
              end do
           else
              if (kd > 1) then
                 ! reduce to tridiagonal form, working with lower triangle
                 nr = 0
                 j1 = kdn + 2
                 j2 = 1
                 loop_210: do i = 1, n - 2
                    ! reduce i-th column of matrix to tridiagonal form
                    loop_200: do k = kdn + 1, 2, -1
                       j1 = j1 + kdn
                       j2 = j2 + kdn
                       if (nr > 0) then
                          ! generate plane rotations to annihilate nonzero
                          ! elements which have been created outside the band
                          call stdlib_dlargv(nr, ab(kd1, j1 - kd1), inca, work(j1), kd1, d(j1) &
                                    , kd1)
                          ! apply plane rotations from one side
                          ! dependent on the the number of diagonals either
                          ! stdlib_dlartv or stdlib_drot is used
                          if (nr > 2*kd - 1) then
                             do l = 1, kd - 1
                                call stdlib_dlartv(nr, ab(kd1 - l, j1 - kd1 + l), inca, ab(kd1 - l + 1, &
                                          j1 - kd1 + l), inca, d(j1), work(j1), kd1)
                             end do
                          else
                             jend = j1 + kd1*(nr - 1)
                             do jinc = j1, jend, kd1
                                call stdlib_drot(kdm1, ab(kd, jinc - kd), incx, ab(kd1, jinc - kd) &
                                          , incx, d(jinc), work(jinc))
                             end do
                          end if
                       end if
                       if (k > 2) then
                          if (k <= n - i + 1) then
                             ! generate plane rotation to annihilate a(i+k-1,i)
                             ! within the band
                             call stdlib_dlartg(ab(k - 1, i), ab(k, i), d(i + k - 1), work(i + k - 1 &
                                       ), temp)
                             ab(k - 1, i) = temp
                             ! apply rotation from the left
                             call stdlib_drot(k - 3, ab(k - 2, i + 1), ldab - 1, ab(k - 1, i + 1), ldab - 1, &
                                        d(i + k - 1), work(i + k - 1))
                          end if
                          nr = nr + 1
                          j1 = j1 - kdn - 1
                       end if
                       ! apply plane rotations from both sides to diagonal
                       ! blocks
                       if (nr > 0) call stdlib_dlar2v(nr, ab(1, j1 - 1), ab(1, j1), ab(2, j1 - 1), &
                                  inca, d(j1), work(j1), kd1)
                       ! apply plane rotations from the right
                          ! dependent on the the number of diagonals either
                          ! stdlib_dlartv or stdlib_drot is used
                       if (nr > 0) then
                          if (nr > 2*kd - 1) then
                             do l = 1, kd - 1
                                if (j2 + l > n) then
                                   nrt = nr - 1
                                else
                                   nrt = nr
                                end if
                                if (nrt > 0) call stdlib_dlartv(nrt, ab(l + 2, j1 - 1), inca, ab(l + 1, &
                                           j1), inca, d(j1), work(j1), kd1)
                             end do
                          else
                             j1end = j1 + kd1*(nr - 2)
                             if (j1end >= j1) then
                                do j1inc = j1, j1end, kd1
                                   call stdlib_drot(kdm1, ab(3, j1inc - 1), 1, ab(2, j1inc), 1, &
                                             d(j1inc), work(j1inc))
                                end do
                             end if
                             lend = min(kdm1, n - j2)
                             last = j1end + kd1
                             if (lend > 0) call stdlib_drot(lend, ab(3, last - 1), 1, ab(2, last), &
                                        1, d(last), work(last))
                          end if
                       end if
                       if (wantq) then
                          ! accumulate product of plane rotations in q
                          if (initq) then
                       ! take advantage of the fact that q was
                       ! initially the identity matrix
                             iqend = max(iqend, j2)
                             i2 = max(0, k - 3)
                             iqaend = 1 + i*kd
                             if (k == 2) iqaend = iqaend + kd
                             iqaend = min(iqaend, iqend)
                             do j = j1, j2, kd1
                                ibl = i - i2/kdm1
                                i2 = i2 + 1
                                iqb = max(1, j - ibl)
                                nq = 1 + iqaend - iqb
                                iqaend = min(iqaend + kd, iqend)
                                call stdlib_drot(nq, q(iqb, j - 1), 1, q(iqb, j), 1, d(j), &
                                          work(j))
                             end do
                          else
                             do j = j1, j2, kd1
                                call stdlib_drot(n, q(1, j - 1), 1, q(1, j), 1, d(j), work(j &
                                          ))
                             end do
                          end if
                       end if
                       if (j2 + kdn > n) then
                          ! adjust j2 to keep within the bounds of the matrix
                          nr = nr - 1
                          j2 = j2 - kdn - 1
                       end if
                       do j = j1, j2, kd1
                          ! create nonzero element a(j+kd,j-1) outside the
                          ! band and store it in work
                          work(j + kd) = work(j)*ab(kd1, j)
                          ab(kd1, j) = d(j)*ab(kd1, j)
                       end do
                    end do loop_200
                 end do loop_210
              end if
              if (kd > 0) then
                 ! copy off-diagonal elements to e
                 do i = 1, n - 1
                    e(i) = ab(2, i)
                 end do
              else
                 ! set e to zero if original matrix was diagonal
                 do i = 1, n - 1
                    e(i) = zero
                 end do
              end if
              ! copy diagonal elements to d
              do i = 1, n
                 d(i) = ab(1, i)
              end do
           end if
           return
           ! end of stdlib_dsbtrd
     end subroutine stdlib_dsbtrd

     ! Level 3 BLAS like routine for C in RFP Format.
     ! DSFRK performs one of the symmetric rank--k operations
     ! C := alpha*A*A**T + beta*C,
     ! or
     ! C := alpha*A**T*A + beta*C,
     ! where alpha and beta are real scalars, C is an n--by--n symmetric
     ! matrix and A is an n--by--k matrix in the first case and a k--by--n
     ! matrix in the second case.

     subroutine stdlib_dsfrk(transr, uplo, trans, n, k, alpha, a, lda, beta, c)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(dp) :: alpha, beta
           integer(ilp) :: k, lda, n
           character :: trans, transr, uplo
           ! .. array arguments ..
           real(dp) :: a(lda, *), c(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lower, normaltransr, nisodd, notrans
           integer(ilp) :: info, nrowa, j, nk, n1, n2
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           normaltransr = stdlib_lsame(transr, 'n')
           lower = stdlib_lsame(uplo, 'l')
           notrans = stdlib_lsame(trans, 'n')
           if (notrans) then
              nrowa = n
           else
              nrowa = k
           end if
           if (.not. normaltransr .and. .not. stdlib_lsame(transr, 't')) then
              info = -1
           else if (.not. lower .and. .not. stdlib_lsame(uplo, 'u')) then
              info = -2
           else if (.not. notrans .and. .not. stdlib_lsame(trans, 't')) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0) then
              info = -5
           else if (lda < max(1, nrowa)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsfrk ', -info)
              return
           end if
           ! quick return if possible.
           ! the quick return case: ((alpha==0).and.(beta/=zero)) is not
           ! done (it is in stdlib_dsyrk for example) and left in the general case.
           if ((n == 0) .or. (((alpha == zero) .or. (k == 0)) .and. (beta == one))) &
                     return
           if ((alpha == zero) .and. (beta == zero)) then
              do j = 1, ((n*(n + 1))/2)
                 c(j) = zero
              end do
              return
           end if
           ! c is n-by-n.
           ! if n is odd, set nisodd = .true., and n1 and n2.
           ! if n is even, nisodd = .false., and nk.
           if (mod(n, 2) == 0) then
              nisodd = .false.
              nk = n/2
           else
              nisodd = .true.
              if (lower) then
                 n2 = n/2
                 n1 = n - n2
              else
                 n1 = n/2
                 n2 = n - n1
              end if
           end if
           if (nisodd) then
              ! n is odd
              if (normaltransr) then
                 ! n is odd and transr = 'n'
                 if (lower) then
                    ! n is odd, transr = 'n', and uplo = 'l'
                    if (notrans) then
                       ! n is odd, transr = 'n', uplo = 'l', and trans = 'n'
                       call stdlib_dsyrk('l', 'n', n1, k, alpha, a(1, 1), lda, beta, c(1), n)
                                 
                       call stdlib_dsyrk('u', 'n', n2, k, alpha, a(n1 + 1, 1), lda, beta, c(n + 1) &
                                 , n)
                       call stdlib_dgemm('n', 't', n2, n1, k, alpha, a(n1 + 1, 1), lda, a(1, 1), &
                                  lda, beta, c(n1 + 1), n)
                    else
                       ! n is odd, transr = 'n', uplo = 'l', and trans = 't'
                       call stdlib_dsyrk('l', 't', n1, k, alpha, a(1, 1), lda, beta, c(1), n)
                                 
                       call stdlib_dsyrk('u', 't', n2, k, alpha, a(1, n1 + 1), lda, beta, c(n + 1) &
                                 , n)
                       call stdlib_dgemm('t', 'n', n2, n1, k, alpha, a(1, n1 + 1), lda, a(1, 1), &
                                  lda, beta, c(n1 + 1), n)
                    end if
                 else
                    ! n is odd, transr = 'n', and uplo = 'u'
                    if (notrans) then
                       ! n is odd, transr = 'n', uplo = 'u', and trans = 'n'
                       call stdlib_dsyrk('l', 'n', n1, k, alpha, a(1, 1), lda, beta, c(n2 + 1), &
                                 n)
                       call stdlib_dsyrk('u', 'n', n2, k, alpha, a(n2, 1), lda, beta, c(n1 + 1), &
                                  n)
                       call stdlib_dgemm('n', 't', n1, n2, k, alpha, a(1, 1), lda, a(n2, 1), &
                                 lda, beta, c(1), n)
                    else
                       ! n is odd, transr = 'n', uplo = 'u', and trans = 't'
                       call stdlib_dsyrk('l', 't', n1, k, alpha, a(1, 1), lda, beta, c(n2 + 1), &
                                 n)
                       call stdlib_dsyrk('u', 't', n2, k, alpha, a(1, n2), lda, beta, c(n1 + 1), &
                                  n)
                       call stdlib_dgemm('t', 'n', n1, n2, k, alpha, a(1, 1), lda, a(1, n2), &
                                 lda, beta, c(1), n)
                    end if
                 end if
              else
                 ! n is odd, and transr = 't'
                 if (lower) then
                    ! n is odd, transr = 't', and uplo = 'l'
                    if (notrans) then
                       ! n is odd, transr = 't', uplo = 'l', and trans = 'n'
                       call stdlib_dsyrk('u', 'n', n1, k, alpha, a(1, 1), lda, beta, c(1), n1 &
                                 )
                       call stdlib_dsyrk('l', 'n', n2, k, alpha, a(n1 + 1, 1), lda, beta, c(2), &
                                 n1)
                       call stdlib_dgemm('n', 't', n1, n2, k, alpha, a(1, 1), lda, a(n1 + 1, 1), &
                                  lda, beta, c(n1*n1 + 1), n1)
                    else
                       ! n is odd, transr = 't', uplo = 'l', and trans = 't'
                       call stdlib_dsyrk('u', 't', n1, k, alpha, a(1, 1), lda, beta, c(1), n1 &
                                 )
                       call stdlib_dsyrk('l', 't', n2, k, alpha, a(1, n1 + 1), lda, beta, c(2), &
                                 n1)
                       call stdlib_dgemm('t', 'n', n1, n2, k, alpha, a(1, 1), lda, a(1, n1 + 1), &
                                  lda, beta, c(n1*n1 + 1), n1)
                    end if
                 else
                    ! n is odd, transr = 't', and uplo = 'u'
                    if (notrans) then
                       ! n is odd, transr = 't', uplo = 'u', and trans = 'n'
                       call stdlib_dsyrk('u', 'n', n1, k, alpha, a(1, 1), lda, beta, c(n2*n2 + 1 &
                                 ), n2)
                       call stdlib_dsyrk('l', 'n', n2, k, alpha, a(n1 + 1, 1), lda, beta, c( &
                                 n1*n2 + 1), n2)
                       call stdlib_dgemm('n', 't', n2, n1, k, alpha, a(n1 + 1, 1), lda, a(1, 1), &
                                  lda, beta, c(1), n2)
                    else
                       ! n is odd, transr = 't', uplo = 'u', and trans = 't'
                       call stdlib_dsyrk('u', 't', n1, k, alpha, a(1, 1), lda, beta, c(n2*n2 + 1 &
                                 ), n2)
                       call stdlib_dsyrk('l', 't', n2, k, alpha, a(1, n1 + 1), lda, beta, c( &
                                 n1*n2 + 1), n2)
                       call stdlib_dgemm('t', 'n', n2, n1, k, alpha, a(1, n1 + 1), lda, a(1, 1), &
                                  lda, beta, c(1), n2)
                    end if
                 end if
              end if
           else
              ! n is even
              if (normaltransr) then
                 ! n is even and transr = 'n'
                 if (lower) then
                    ! n is even, transr = 'n', and uplo = 'l'
                    if (notrans) then
                       ! n is even, transr = 'n', uplo = 'l', and trans = 'n'
                       call stdlib_dsyrk('l', 'n', nk, k, alpha, a(1, 1), lda, beta, c(2), n + &
                                 1)
                       call stdlib_dsyrk('u', 'n', nk, k, alpha, a(nk + 1, 1), lda, beta, c(1), &
                                 n + 1)
                       call stdlib_dgemm('n', 't', nk, nk, k, alpha, a(nk + 1, 1), lda, a(1, 1), &
                                  lda, beta, c(nk + 2), n + 1)
                    else
                       ! n is even, transr = 'n', uplo = 'l', and trans = 't'
                       call stdlib_dsyrk('l', 't', nk, k, alpha, a(1, 1), lda, beta, c(2), n + &
                                 1)
                       call stdlib_dsyrk('u', 't', nk, k, alpha, a(1, nk + 1), lda, beta, c(1), &
                                 n + 1)
                       call stdlib_dgemm('t', 'n', nk, nk, k, alpha, a(1, nk + 1), lda, a(1, 1), &
                                  lda, beta, c(nk + 2), n + 1)
                    end if
                 else
                    ! n is even, transr = 'n', and uplo = 'u'
                    if (notrans) then
                       ! n is even, transr = 'n', uplo = 'u', and trans = 'n'
                       call stdlib_dsyrk('l', 'n', nk, k, alpha, a(1, 1), lda, beta, c(nk + 2), &
                                 n + 1)
                       call stdlib_dsyrk('u', 'n', nk, k, alpha, a(nk + 1, 1), lda, beta, c(nk + 1 &
                                 ), n + 1)
                       call stdlib_dgemm('n', 't', nk, nk, k, alpha, a(1, 1), lda, a(nk + 1, 1), &
                                  lda, beta, c(1), n + 1)
                    else
                       ! n is even, transr = 'n', uplo = 'u', and trans = 't'
                       call stdlib_dsyrk('l', 't', nk, k, alpha, a(1, 1), lda, beta, c(nk + 2), &
                                 n + 1)
                       call stdlib_dsyrk('u', 't', nk, k, alpha, a(1, nk + 1), lda, beta, c(nk + 1 &
                                 ), n + 1)
                       call stdlib_dgemm('t', 'n', nk, nk, k, alpha, a(1, 1), lda, a(1, nk + 1), &
                                  lda, beta, c(1), n + 1)
                    end if
                 end if
              else
                 ! n is even, and transr = 't'
                 if (lower) then
                    ! n is even, transr = 't', and uplo = 'l'
                    if (notrans) then
                       ! n is even, transr = 't', uplo = 'l', and trans = 'n'
                       call stdlib_dsyrk('u', 'n', nk, k, alpha, a(1, 1), lda, beta, c(nk + 1), &
                                 nk)
                       call stdlib_dsyrk('l', 'n', nk, k, alpha, a(nk + 1, 1), lda, beta, c(1), &
                                 nk)
                       call stdlib_dgemm('n', 't', nk, nk, k, alpha, a(1, 1), lda, a(nk + 1, 1), &
                                  lda, beta, c(((nk + 1)*nk) + 1), nk)
                    else
                       ! n is even, transr = 't', uplo = 'l', and trans = 't'
                       call stdlib_dsyrk('u', 't', nk, k, alpha, a(1, 1), lda, beta, c(nk + 1), &
                                 nk)
                       call stdlib_dsyrk('l', 't', nk, k, alpha, a(1, nk + 1), lda, beta, c(1), &
                                 nk)
                       call stdlib_dgemm('t', 'n', nk, nk, k, alpha, a(1, 1), lda, a(1, nk + 1), &
                                  lda, beta, c(((nk + 1)*nk) + 1), nk)
                    end if
                 else
                    ! n is even, transr = 't', and uplo = 'u'
                    if (notrans) then
                       ! n is even, transr = 't', uplo = 'u', and trans = 'n'
                       call stdlib_dsyrk('u', 'n', nk, k, alpha, a(1, 1), lda, beta, c(nk*(nk + &
                                 1) + 1), nk)
                       call stdlib_dsyrk('l', 'n', nk, k, alpha, a(nk + 1, 1), lda, beta, c( &
                                 nk*nk + 1), nk)
                       call stdlib_dgemm('n', 't', nk, nk, k, alpha, a(nk + 1, 1), lda, a(1, 1), &
                                  lda, beta, c(1), nk)
                    else
                       ! n is even, transr = 't', uplo = 'u', and trans = 't'
                       call stdlib_dsyrk('u', 't', nk, k, alpha, a(1, 1), lda, beta, c(nk*(nk + &
                                 1) + 1), nk)
                       call stdlib_dsyrk('l', 't', nk, k, alpha, a(1, nk + 1), lda, beta, c( &
                                 nk*nk + 1), nk)
                       call stdlib_dgemm('t', 'n', nk, nk, k, alpha, a(1, nk + 1), lda, a(1, 1), &
                                  lda, beta, c(1), nk)
                    end if
                 end if
              end if
           end if
           return
           ! end of stdlib_dsfrk
     end subroutine stdlib_dsfrk

     ! DSPGST reduces a real symmetric-definite generalized eigenproblem
     ! to standard form, using packed storage.
     ! If ITYPE = 1, the problem is A*x = lambda*B*x,
     ! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     ! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     ! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
     ! B must have been previously factorized as U**T*U or L*L**T by DPPTRF.

     subroutine stdlib_dspgst(itype, uplo, n, ap, bp, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, itype, n
           ! .. array arguments ..
           real(dp) :: ap(*), bp(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, j1, j1j1, jj, k, k1, k1k1, kk
           real(dp) :: ajj, akk, bjj, bkk, ct
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (itype < 1 .or. itype > 3) then
              info = -1
           else if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -2
           else if (n < 0) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dspgst', -info)
              return
           end if
           if (itype == 1) then
              if (upper) then
                 ! compute inv(u**t)*a*inv(u)
                 ! j1 and jj are the indices of a(1,j) and a(j,j)
                 jj = 0
                 do j = 1, n
                    j1 = jj + 1
                    jj = jj + j
                    ! compute the j-th column of the upper triangle of a
                    bjj = bp(jj)
                    call stdlib_dtpsv(uplo, 'transpose', 'nonunit', j, bp, ap(j1), 1)
                    call stdlib_dspmv(uplo, j - 1, -one, ap, bp(j1), 1, one, ap(j1), 1)
                    call stdlib_dscal(j - 1, one/bjj, ap(j1), 1)
                    ap(jj) = (ap(jj) - stdlib_ddot(j - 1, ap(j1), 1, bp(j1), 1))/ &
                              bjj
                 end do
              else
                 ! compute inv(l)*a*inv(l**t)
                 ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1)
                 kk = 1
                 do k = 1, n
                    k1k1 = kk + n - k + 1
                    ! update the lower triangle of a(k:n,k:n)
                    akk = ap(kk)
                    bkk = bp(kk)
                    akk = akk/bkk**2
                    ap(kk) = akk
                    if (k < n) then
                       call stdlib_dscal(n - k, one/bkk, ap(kk + 1), 1)
                       ct = -half*akk
                       call stdlib_daxpy(n - k, ct, bp(kk + 1), 1, ap(kk + 1), 1)
                       call stdlib_dspr2(uplo, n - k, -one, ap(kk + 1), 1, bp(kk + 1), 1, ap(k1k1) &
                                  )
                       call stdlib_daxpy(n - k, ct, bp(kk + 1), 1, ap(kk + 1), 1)
                       call stdlib_dtpsv(uplo, 'no transpose', 'non-unit', n - k, bp(k1k1), ap( &
                                 kk + 1), 1)
                    end if
                    kk = k1k1
                 end do
              end if
           else
              if (upper) then
                 ! compute u*a*u**t
                 ! k1 and kk are the indices of a(1,k) and a(k,k)
                 kk = 0
                 do k = 1, n
                    k1 = kk + 1
                    kk = kk + k
                    ! update the upper triangle of a(1:k,1:k)
                    akk = ap(kk)
                    bkk = bp(kk)
                    call stdlib_dtpmv(uplo, 'no transpose', 'non-unit', k - 1, bp, ap(k1), 1)
                              
                    ct = half*akk
                    call stdlib_daxpy(k - 1, ct, bp(k1), 1, ap(k1), 1)
                    call stdlib_dspr2(uplo, k - 1, one, ap(k1), 1, bp(k1), 1, ap)
                    call stdlib_daxpy(k - 1, ct, bp(k1), 1, ap(k1), 1)
                    call stdlib_dscal(k - 1, bkk, ap(k1), 1)
                    ap(kk) = akk*bkk**2
                 end do
              else
                 ! compute l**t *a*l
                 ! jj and j1j1 are the indices of a(j,j) and a(j+1,j+1)
                 jj = 1
                 do j = 1, n
                    j1j1 = jj + n - j + 1
                    ! compute the j-th column of the lower triangle of a
                    ajj = ap(jj)
                    bjj = bp(jj)
                    ap(jj) = ajj*bjj + stdlib_ddot(n - j, ap(jj + 1), 1, bp(jj + 1), 1)
                    call stdlib_dscal(n - j, bjj, ap(jj + 1), 1)
                    call stdlib_dspmv(uplo, n - j, one, ap(j1j1), bp(jj + 1), 1, one, ap(jj + 1), &
                              1)
                    call stdlib_dtpmv(uplo, 'transpose', 'non-unit', n - j + 1, bp(jj), ap(jj), 1 &
                              )
                    jj = j1j1
                 end do
              end if
           end if
           return
           ! end of stdlib_dspgst
     end subroutine stdlib_dspgst

     ! DSPTRF computes the factorization of a real symmetric matrix A stored
     ! in packed format using the Bunch-Kaufman diagonal pivoting method:
     ! A = U*D*U**T  or  A = L*D*L**T
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, and D is symmetric and block diagonal with
     ! 1-by-1 and 2-by-2 diagonal blocks.

     subroutine stdlib_dsptrf(uplo, n, ap, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: ap(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: sevten = 17.0_dp
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp
           real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, r1, rowmax, t, wk, wkm1, &
                     wkp1
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsptrf', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
              kc = (n - 1)*n/2 + 1
10      continue
              knc = kc
              ! if k < 1, exit from loop
              if (k < 1) go to 110
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(ap(kc + k - 1))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value
              if (k > 1) then
                 imax = stdlib_idamax(k - 1, ap(kc), 1)
                 colmax = abs(ap(kc + imax - 1))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    rowmax = zero
                    jmax = imax
                    kx = imax*(imax + 1)/2 + imax
                    do j = imax + 1, k
                       if (abs(ap(kx)) > rowmax) then
                          rowmax = abs(ap(kx))
                          jmax = j
                       end if
                       kx = kx + j
                    end do
                    kpc = (imax - 1)*imax/2 + 1
                    if (imax > 1) then
                       jmax = stdlib_idamax(imax - 1, ap(kpc), 1)
                       rowmax = max(rowmax, abs(ap(kpc + jmax - 1)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (abs(ap(kpc + imax - 1)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k-1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k - kstep + 1
                 if (kstep == 2) knc = knc - k + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    call stdlib_dswap(kp - 1, ap(knc), 1, ap(kpc), 1)
                    kx = kpc + kp - 1
                    do j = kp + 1, kk - 1
                       kx = kx + j - 1
                       t = ap(knc + j - 1)
                       ap(knc + j - 1) = ap(kx)
                       ap(kx) = t
                    end do
                    t = ap(knc + kk - 1)
                    ap(knc + kk - 1) = ap(kpc + kp - 1)
                    ap(kpc + kp - 1) = t
                    if (kstep == 2) then
                       t = ap(kc + k - 2)
                       ap(kc + k - 2) = ap(kc + kp - 1)
                       ap(kc + kp - 1) = t
                    end if
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! perform a rank-1 update of a(1:k-1,1:k-1) as
                    ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t
                    r1 = one/ap(kc + k - 1)
                    call stdlib_dspr(uplo, k - 1, -r1, ap(kc), 1, ap)
                    ! store u(k) in column k
                    call stdlib_dscal(k - 1, r1, ap(kc), 1)
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t
                    if (k > 2) then
                       d12 = ap(k - 1 + (k - 1)*k/2)
                       d22 = ap(k - 1 + (k - 2)*(k - 1)/2)/d12
                       d11 = ap(k + (k - 1)*k/2)/d12
                       t = one/(d11*d22 - one)
                       d12 = t/d12
                       do j = k - 2, 1, -1
                          wkm1 = d12*(d11*ap(j + (k - 2)*(k - 1)/2) - ap(j + (k - 1)*k/2))
                                    
                          wk = d12*(d22*ap(j + (k - 1)*k/2) - ap(j + (k - 2)*(k - 1)/2))
                                    
                          do i = j, 1, -1
                             ap(i + (j - 1)*j/2) = ap(i + (j - 1)*j/2) - ap(i + (k - 1)*k/2) &
                                       *wk - ap(i + (k - 2)*(k - 1)/2)*wkm1
                          end do
                          ap(j + (k - 1)*k/2) = wk
                          ap(j + (k - 2)*(k - 1)/2) = wkm1
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              kc = knc - k
              go to 10
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
              kc = 1
              npp = n*(n + 1)/2
60      continue
              knc = kc
              ! if k > n, exit from loop
              if (k > n) go to 110
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(ap(kc))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value
              if (k < n) then
                 imax = k + stdlib_idamax(n - k, ap(kc + 1), 1)
                 colmax = abs(ap(kc + imax - k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    rowmax = zero
                    kx = kc + imax - k
                    do j = k, imax - 1
                       if (abs(ap(kx)) > rowmax) then
                          rowmax = abs(ap(kx))
                          jmax = j
                       end if
                       kx = kx + n - j
                    end do
                    kpc = npp - (n - imax + 1)*(n - imax + 2)/2 + 1
                    if (imax < n) then
                       jmax = imax + stdlib_idamax(n - imax, ap(kpc + 1), 1)
                       rowmax = max(rowmax, abs(ap(kpc + jmax - imax)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (abs(ap(kpc)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k+1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k + kstep - 1
                 if (kstep == 2) knc = knc + n - k + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_dswap(n - kp, ap(knc + kp - kk + 1), 1, ap(kpc + 1), 1)
                              
                    kx = knc + kp - kk
                    do j = kk + 1, kp - 1
                       kx = kx + n - j + 1
                       t = ap(knc + j - kk)
                       ap(knc + j - kk) = ap(kx)
                       ap(kx) = t
                    end do
                    t = ap(knc)
                    ap(knc) = ap(kpc)
                    ap(kpc) = t
                    if (kstep == 2) then
                       t = ap(kc + 1)
                       ap(kc + 1) = ap(kc + kp - k)
                       ap(kc + kp - k) = t
                    end if
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                       ! perform a rank-1 update of a(k+1:n,k+1:n) as
                       ! a := a - l(k)*d(k)*l(k)**t = a - w(k)*(1/d(k))*w(k)**t
                       r1 = one/ap(kc)
                       call stdlib_dspr(uplo, n - k, -r1, ap(kc + 1), 1, ap(kc + n - k + 1))
                       ! store l(k) in column k
                       call stdlib_dscal(n - k, r1, ap(kc + 1), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    if (k < n - 1) then
                       ! perform a rank-2 update of a(k+2:n,k+2:n) as
                       ! a := a - ( l(k) l(k+1) )*d(k)*( l(k) l(k+1) )**t
                          ! = a - ( w(k) w(k+1) )*inv(d(k))*( w(k) w(k+1) )**t
                       ! where l(k) and l(k+1) are the k-th and (k+1)-th
                       ! columns of l
                       d21 = ap(k + 1 + (k - 1)*(2*n - k)/2)
                       d11 = ap(k + 1 + k*(2*n - k - 1)/2)/d21
                       d22 = ap(k + (k - 1)*(2*n - k)/2)/d21
                       t = one/(d11*d22 - one)
                       d21 = t/d21
                       do j = k + 2, n
                          wk = d21*(d11*ap(j + (k - 1)*(2*n - k)/2) - ap(j + k*(2*n - k - 1)/2))
                                    
                          wkp1 = d21*(d22*ap(j + k*(2*n - k - 1)/2) - ap(j + (k - 1)*(2*n - k)/2) &
                                     )
                          do i = j, n
                             ap(i + (j - 1)*(2*n - j)/2) = ap(i + (j - 1)*(2*n - j)/2) - ap( &
                                       i + (k - 1)*(2*n - k)/2)*wk - ap(i + k*(2*n - k - 1)/2)*wkp1
                          end do
                          ap(j + (k - 1)*(2*n - k)/2) = wk
                          ap(j + k*(2*n - k - 1)/2) = wkp1
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              kc = knc + n - k + 2
              go to 60
           end if
110    continue
           return
           ! end of stdlib_dsptrf
     end subroutine stdlib_dsptrf

     ! DSPTRI computes the inverse of a real symmetric indefinite matrix
     ! A in packed storage using the factorization A = U*D*U**T or
     ! A = L*D*L**T computed by DSPTRF.

     subroutine stdlib_dsptri(uplo, n, ap, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: ap(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp
           real(dp) :: ak, akkp1, akp1, d, t, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsptri', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              kp = n*(n + 1)/2
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. ap(kp) == zero) return
                 kp = kp - info
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              kp = 1
              do info = 1, n
                 if (ipiv(info) > 0 .and. ap(kp) == zero) return
                 kp = kp + n - info + 1
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
              kc = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              kcnext = kc + k
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 ap(kc + k - 1) = one/ap(kc + k - 1)
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_dcopy(k - 1, ap(kc), 1, work, 1)
                    call stdlib_dspmv(uplo, k - 1, -one, ap, work, 1, zero, ap(kc), 1)
                    ap(kc + k - 1) = ap(kc + k - 1) - stdlib_ddot(k - 1, work, 1, ap(kc), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(ap(kcnext + k - 1))
                 ak = ap(kc + k - 1)/t
                 akp1 = ap(kcnext + k)/t
                 akkp1 = ap(kcnext + k - 1)/t
                 d = t*(ak*akp1 - one)
                 ap(kc + k - 1) = akp1/d
                 ap(kcnext + k) = ak/d
                 ap(kcnext + k - 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_dcopy(k - 1, ap(kc), 1, work, 1)
                    call stdlib_dspmv(uplo, k - 1, -one, ap, work, 1, zero, ap(kc), 1)
                    ap(kc + k - 1) = ap(kc + k - 1) - stdlib_ddot(k - 1, work, 1, ap(kc), 1)
                    ap(kcnext + k - 1) = ap(kcnext + k - 1) - stdlib_ddot(k - 1, ap(kc), 1, ap( &
                              kcnext), 1)
                    call stdlib_dcopy(k - 1, ap(kcnext), 1, work, 1)
                    call stdlib_dspmv(uplo, k - 1, -one, ap, work, 1, zero, ap(kcnext), 1)
                              
                    ap(kcnext + k) = ap(kcnext + k) - stdlib_ddot(k - 1, work, 1, ap(kcnext), 1)
                              
                 end if
                 kstep = 2
                 kcnext = kcnext + k + 1
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the leading
                 ! submatrix a(1:k+1,1:k+1)
                 kpc = (kp - 1)*kp/2 + 1
                 call stdlib_dswap(kp - 1, ap(kc), 1, ap(kpc), 1)
                 kx = kpc + kp - 1
                 do j = kp + 1, k - 1
                    kx = kx + j - 1
                    temp = ap(kc + j - 1)
                    ap(kc + j - 1) = ap(kx)
                    ap(kx) = temp
                 end do
                 temp = ap(kc + k - 1)
                 ap(kc + k - 1) = ap(kpc + kp - 1)
                 ap(kpc + kp - 1) = temp
                 if (kstep == 2) then
                    temp = ap(kc + k + k - 1)
                    ap(kc + k + k - 1) = ap(kc + k + kp - 1)
                    ap(kc + k + kp - 1) = temp
                 end if
              end if
              k = k + kstep
              kc = kcnext
              go to 30
50      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              npp = n*(n + 1)/2
              k = n
              kc = npp
60      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 80
              kcnext = kc - (n - k + 2)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 ap(kc) = one/ap(kc)
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_dcopy(n - k, ap(kc + 1), 1, work, 1)
                    call stdlib_dspmv(uplo, n - k, -one, ap(kc + n - k + 1), work, 1, zero, ap(kc + 1), &
                              1)
                    ap(kc) = ap(kc) - stdlib_ddot(n - k, work, 1, ap(kc + 1), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(ap(kcnext + 1))
                 ak = ap(kcnext)/t
                 akp1 = ap(kc)/t
                 akkp1 = ap(kcnext + 1)/t
                 d = t*(ak*akp1 - one)
                 ap(kcnext) = akp1/d
                 ap(kc) = ak/d
                 ap(kcnext + 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_dcopy(n - k, ap(kc + 1), 1, work, 1)
                    call stdlib_dspmv(uplo, n - k, -one, ap(kc + (n - k + 1)), work, 1, zero, ap(kc + &
                              1), 1)
                    ap(kc) = ap(kc) - stdlib_ddot(n - k, work, 1, ap(kc + 1), 1)
                    ap(kcnext + 1) = ap(kcnext + 1) - stdlib_ddot(n - k, ap(kc + 1), 1, ap(kcnext + 2 &
                              ), 1)
                    call stdlib_dcopy(n - k, ap(kcnext + 2), 1, work, 1)
                    call stdlib_dspmv(uplo, n - k, -one, ap(kc + (n - k + 1)), work, 1, zero, ap( &
                              kcnext + 2), 1)
                    ap(kcnext) = ap(kcnext) - stdlib_ddot(n - k, work, 1, ap(kcnext + 2), 1)
                              
                 end if
                 kstep = 2
                 kcnext = kcnext - (n - k + 3)
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the trailing
                 ! submatrix a(k-1:n,k-1:n)
                 kpc = npp - (n - kp + 1)*(n - kp + 2)/2 + 1
                 if (kp < n) call stdlib_dswap(n - kp, ap(kc + kp - k + 1), 1, ap(kpc + 1), 1)
                 kx = kc + kp - k
                 do j = k + 1, kp - 1
                    kx = kx + n - j + 1
                    temp = ap(kc + j - k)
                    ap(kc + j - k) = ap(kx)
                    ap(kx) = temp
                 end do
                 temp = ap(kc)
                 ap(kc) = ap(kpc)
                 ap(kpc) = temp
                 if (kstep == 2) then
                    temp = ap(kc - n + k - 1)
                    ap(kc - n + k - 1) = ap(kc - n + kp - 1)
                    ap(kc - n + kp - 1) = temp
                 end if
              end if
              k = k - kstep
              kc = kcnext
              go to 60
80      continue
           end if
           return
           ! end of stdlib_dsptri
     end subroutine stdlib_dsptri

     ! DSPTRS solves a system of linear equations A*X = B with a real
     ! symmetric matrix A stored in packed format using the factorization
     ! A = U*D*U**T or A = L*D*L**T computed by DSPTRF.

     subroutine stdlib_dsptrs(uplo, n, nrhs, ap, ipiv, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: ap(*), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kc, kp
           real(dp) :: ak, akm1, akm1k, bk, bkm1, denom
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (ldb < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsptrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b, where a = u*d*u**t.
              ! first solve u*d*x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
              kc = n*(n + 1)/2 + 1
10      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 30
              kc = kc - k
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_dger(k - 1, nrhs, -one, ap(kc), 1, b(k, 1), ldb, b(1, 1), ldb)
                           
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_dscal(nrhs, one/ap(kc + k - 1), b(k, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k-1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k - 1) call stdlib_dswap(nrhs, b(k - 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in columns k-1 and k of a.
                 call stdlib_dger(k - 2, nrhs, -one, ap(kc), 1, b(k, 1), ldb, b(1, 1), ldb)
                           
                 call stdlib_dger(k - 2, nrhs, -one, ap(kc - (k - 1)), 1, b(k - 1, 1), ldb, b(1, 1 &
                           ), ldb)
                 ! multiply by the inverse of the diagonal block.
                 akm1k = ap(kc + k - 2)
                 akm1 = ap(kc - 1)/akm1k
                 ak = ap(kc + k - 1)/akm1k
                 denom = akm1*ak - one
                 do j = 1, nrhs
                    bkm1 = b(k - 1, j)/akm1k
                    bk = b(k, j)/akm1k
                    b(k - 1, j) = (ak*bkm1 - bk)/denom
                    b(k, j) = (akm1*bk - bkm1)/denom
                 end do
                 kc = kc - k + 1
                 k = k - 2
              end if
              go to 10
30      continue
              ! next solve u**t*x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
              kc = 1
40      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(u**t(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, ap(kc), 1, one, b(k, &
                           1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kc = kc + k
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation
                 ! stored in columns k and k+1 of a.
                 call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, ap(kc), 1, one, b(k, &
                           1), ldb)
                 call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, ap(kc + k), 1, one, b(k + &
                           1, 1), ldb)
                 ! interchange rows k and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kc = kc + 2*k + 1
                 k = k + 2
              end if
              go to 40
50      continue
           else
              ! solve a*x = b, where a = l*d*l**t.
              ! first solve l*d*x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
              kc = 1
60      continue
              ! if k > n, exit from loop.
              if (k > n) go to 80
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(l(k)), where l(k) is the transformation
                 ! stored in column k of a.
                 if (k < n) call stdlib_dger(n - k, nrhs, -one, ap(kc + 1), 1, b(k, 1), ldb, b(k + 1, &
                            1), ldb)
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_dscal(nrhs, one/ap(kc), b(k, 1), ldb)
                 kc = kc + n - k + 1
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k+1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k + 1) call stdlib_dswap(nrhs, b(k + 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(l(k)), where l(k) is the transformation
                 ! stored in columns k and k+1 of a.
                 if (k < n - 1) then
                    call stdlib_dger(n - k - 1, nrhs, -one, ap(kc + 2), 1, b(k, 1), ldb, b(k + 2, 1) &
                              , ldb)
                    call stdlib_dger(n - k - 1, nrhs, -one, ap(kc + n - k + 2), 1, b(k + 1, 1), ldb, b(k + &
                              2, 1), ldb)
                 end if
                 ! multiply by the inverse of the diagonal block.
                 akm1k = ap(kc + 1)
                 akm1 = ap(kc)/akm1k
                 ak = ap(kc + n - k + 1)/akm1k
                 denom = akm1*ak - one
                 do j = 1, nrhs
                    bkm1 = b(k, j)/akm1k
                    bk = b(k + 1, j)/akm1k
                    b(k, j) = (ak*bkm1 - bk)/denom
                    b(k + 1, j) = (akm1*bk - bkm1)/denom
                 end do
                 kc = kc + 2*(n - k) + 1
                 k = k + 2
              end if
              go to 60
80      continue
              ! next solve l**t*x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
              kc = n*(n + 1)/2 + 1
90      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 100
              kc = kc - (n - k + 1)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(l**t(k)), where l(k) is the transformation
                 ! stored in column k of a.
                 if (k < n) call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, ap( &
                           kc + 1), 1, one, b(k, 1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(l**t(k-1)), where l(k-1) is the transformation
                 ! stored in columns k-1 and k of a.
                 if (k < n) then
                    call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, ap(kc + 1), &
                              1, one, b(k, 1), ldb)
                    call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, ap(kc - (n - &
                              k)), 1, one, b(k - 1, 1), ldb)
                 end if
                 ! interchange rows k and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kc = kc - (n - k + 2)
                 k = k - 2
              end if
              go to 90
100    continue
           end if
           return
           ! end of stdlib_dsptrs
     end subroutine stdlib_dsptrs

     ! DSTEBZ computes the eigenvalues of a symmetric tridiagonal
     ! matrix T.  The user may ask for all eigenvalues, all eigenvalues
     ! in the half-open interval (VL, VU], or the IL-th through IU-th
     ! eigenvalues.
     ! To avoid overflow, the matrix must be scaled so that its
     ! largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest
     ! accuracy, it should not be much smaller than that.
     ! See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal
     ! Matrix", Report CS41, Computer Science Dept., Stanford
     ! University, July 21, 1966.

     subroutine stdlib_dstebz(range, order, n, vl, vu, il, iu, abstol, d, e, m, nsplit, w, iblock, &
                isplit, work, iwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: order, range
           integer(ilp) :: il, info, iu, m, n, nsplit
           real(dp) :: abstol, vl, vu
           ! .. array arguments ..
           integer(ilp) :: iblock(*), isplit(*), iwork(*)
           real(dp) :: d(*), e(*), w(*), work(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: fudge = 2.1d0
           real(dp), parameter :: relfac = 2.0d0
           
           ! .. local scalars ..
           logical(lk) :: ncnvrg, toofew
           integer(ilp) :: ib, ibegin, idiscl, idiscu, ie, iend, iinfo, im, in, ioff, iorder, iout, &
                      irange, itmax, itmp1, iw, iwoff, j, jb, jdisc, je, nb, nwl, nwu
           real(dp) :: atoli, bnorm, gl, gu, pivmin, rtoli, safemn, tmp1, tmp2, tnorm, ulp, wkill, &
                     wl, wlu, wu, wul
           ! .. local arrays ..
           integer(ilp) :: idumma(1)
     
           ! .. intrinsic functions ..
           intrinsic :: abs, int, log, max, min, sqrt
           ! .. executable statements ..
           info = 0
           ! decode range
           if (stdlib_lsame(range, 'a')) then
              irange = 1
           else if (stdlib_lsame(range, 'v')) then
              irange = 2
           else if (stdlib_lsame(range, 'i')) then
              irange = 3
           else
              irange = 0
           end if
           ! decode order
           if (stdlib_lsame(order, 'b')) then
              iorder = 2
           else if (stdlib_lsame(order, 'e')) then
              iorder = 1
           else
              iorder = 0
           end if
           ! check for errors
           if (irange <= 0) then
              info = -1
           else if (iorder <= 0) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (irange == 2) then
              if (vl >= vu) info = -5
           else if (irange == 3 .and. (il < 1 .or. il > max(1, n))) then
              info = -6
           else if (irange == 3 .and. (iu < min(n, il) .or. iu > n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dstebz', -info)
              return
           end if
           ! initialize error flags
           info = 0
           ncnvrg = .false.
           toofew = .false.
           ! quick return if possible
           m = 0
           if (n == 0) return
           ! simplifications:
           if (irange == 3 .and. il == 1 .and. iu == n) irange = 1
           ! get machine constants
           ! nb is the minimum vector length for vector bisection, or 0
           ! if only scalar is to be done.
           safemn = stdlib_dlamch('s')
           ulp = stdlib_dlamch('p')
           rtoli = ulp*relfac
           nb = stdlib_ilaenv(1, 'stdlib_dstebz', ' ', n, -1, -1, -1)
           if (nb <= 1) nb = 0
           ! special case when n=1
           if (n == 1) then
              nsplit = 1
              isplit(1) = 1
              if (irange == 2 .and. (vl >= d(1) .or. vu < d(1))) then
                 m = 0
              else
                 w(1) = d(1)
                 iblock(1) = 1
                 m = 1
              end if
              return
           end if
           ! compute splitting points
           nsplit = 1
           work(n) = zero
           pivmin = one
           do j = 2, n
              tmp1 = e(j - 1)**2
              if (abs(d(j)*d(j - 1))*ulp**2 + safemn > tmp1) then
                 isplit(nsplit) = j - 1
                 nsplit = nsplit + 1
                 work(j - 1) = zero
              else
                 work(j - 1) = tmp1
                 pivmin = max(pivmin, tmp1)
              end if
           end do
           isplit(nsplit) = n
           pivmin = pivmin*safemn
           ! compute interval and atoli
           if (irange == 3) then
              ! range='i': compute the interval containing eigenvalues
                         ! il through iu.
              ! compute gershgorin interval for entire (split) matrix
              ! and use it as the initial interval
              gu = d(1)
              gl = d(1)
              tmp1 = zero
              do j = 1, n - 1
                 tmp2 = sqrt(work(j))
                 gu = max(gu, d(j) + tmp1 + tmp2)
                 gl = min(gl, d(j) - tmp1 - tmp2)
                 tmp1 = tmp2
              end do
              gu = max(gu, d(n) + tmp1)
              gl = min(gl, d(n) - tmp1)
              tnorm = max(abs(gl), abs(gu))
              gl = gl - fudge*tnorm*ulp*n - fudge*two*pivmin
              gu = gu + fudge*tnorm*ulp*n + fudge*pivmin
              ! compute iteration parameters
              itmax = int((log(tnorm + pivmin) - log(pivmin))/log(two)) + 2
              if (abstol <= zero) then
                 atoli = ulp*tnorm
              else
                 atoli = abstol
              end if
              work(n + 1) = gl
              work(n + 2) = gl
              work(n + 3) = gu
              work(n + 4) = gu
              work(n + 5) = gl
              work(n + 6) = gu
              iwork(1) = -1
              iwork(2) = -1
              iwork(3) = n + 1
              iwork(4) = n + 1
              iwork(5) = il - 1
              iwork(6) = iu
              call stdlib_dlaebz(3, itmax, n, 2, 2, nb, atoli, rtoli, pivmin, d, e, work, iwork( &
                        5), work(n + 1), work(n + 5), iout, iwork, w, iblock, iinfo)
              if (iwork(6) == iu) then
                 wl = work(n + 1)
                 wlu = work(n + 3)
                 nwl = iwork(1)
                 wu = work(n + 4)
                 wul = work(n + 2)
                 nwu = iwork(4)
              else
                 wl = work(n + 2)
                 wlu = work(n + 4)
                 nwl = iwork(2)
                 wu = work(n + 3)
                 wul = work(n + 1)
                 nwu = iwork(3)
              end if
              if (nwl < 0 .or. nwl >= n .or. nwu < 1 .or. nwu > n) then
                 info = 4
                 return
              end if
           else
              ! range='a' or 'v' -- set atoli
              tnorm = max(abs(d(1)) + abs(e(1)), abs(d(n)) + abs(e(n - 1)))
              do j = 2, n - 1
                 tnorm = max(tnorm, abs(d(j)) + abs(e(j - 1)) + abs(e(j)))
              end do
              if (abstol <= zero) then
                 atoli = ulp*tnorm
              else
                 atoli = abstol
              end if
              if (irange == 2) then
                 wl = vl
                 wu = vu
              else
                 wl = zero
                 wu = zero
              end if
           end if
           ! find eigenvalues -- loop over blocks and recompute nwl and nwu.
           ! nwl accumulates the number of eigenvalues .le. wl,
           ! nwu accumulates the number of eigenvalues .le. wu
           m = 0
           iend = 0
           info = 0
           nwl = 0
           nwu = 0
           loop_70: do jb = 1, nsplit
              ioff = iend
              ibegin = ioff + 1
              iend = isplit(jb)
              in = iend - ioff
              if (in == 1) then
                 ! special case -- in=1
                 if (irange == 1 .or. wl >= d(ibegin) - pivmin) nwl = nwl + 1
                 if (irange == 1 .or. wu >= d(ibegin) - pivmin) nwu = nwu + 1
                 if (irange == 1 .or. (wl < d(ibegin) - pivmin .and. wu >= d(ibegin) - pivmin)) &
                           then
                    m = m + 1
                    w(m) = d(ibegin)
                    iblock(m) = jb
                 end if
              else
                 ! general case -- in > 1
                 ! compute gershgorin interval
                 ! and use it as the initial interval
                 gu = d(ibegin)
                 gl = d(ibegin)
                 tmp1 = zero
                 do j = ibegin, iend - 1
                    tmp2 = abs(e(j))
                    gu = max(gu, d(j) + tmp1 + tmp2)
                    gl = min(gl, d(j) - tmp1 - tmp2)
                    tmp1 = tmp2
                 end do
                 gu = max(gu, d(iend) + tmp1)
                 gl = min(gl, d(iend) - tmp1)
                 bnorm = max(abs(gl), abs(gu))
                 gl = gl - fudge*bnorm*ulp*in - fudge*pivmin
                 gu = gu + fudge*bnorm*ulp*in + fudge*pivmin
                 ! compute atoli for the current submatrix
                 if (abstol <= zero) then
                    atoli = ulp*max(abs(gl), abs(gu))
                 else
                    atoli = abstol
                 end if
                 if (irange > 1) then
                    if (gu < wl) then
                       nwl = nwl + in
                       nwu = nwu + in
                       cycle loop_70
                    end if
                    gl = max(gl, wl)
                    gu = min(gu, wu)
                    if (gl >= gu) cycle loop_70
                 end if
                 ! set up initial interval
                 work(n + 1) = gl
                 work(n + in + 1) = gu
                 call stdlib_dlaebz(1, 0, in, in, 1, nb, atoli, rtoli, pivmin, d(ibegin), e( &
                 ibegin), work(ibegin), idumma, work(n + 1), work(n + 2*in + 1), im, iwork, w(m + 1 &
                           ), iblock(m + 1), iinfo)
                 nwl = nwl + iwork(1)
                 nwu = nwu + iwork(in + 1)
                 iwoff = m - iwork(1)
                 ! compute eigenvalues
                 itmax = int((log(gu - gl + pivmin) - log(pivmin))/log(two)) + 2
                 call stdlib_dlaebz(2, itmax, in, in, 1, nb, atoli, rtoli, pivmin, d(ibegin), e( &
                  ibegin), work(ibegin), idumma, work(n + 1), work(n + 2*in + 1), iout, iwork, w( &
                            m + 1), iblock(m + 1), iinfo)
                 ! copy eigenvalues into w and iblock
                 ! use -jb for block number for unconverged eigenvalues.
                 do j = 1, iout
                    tmp1 = half*(work(j + n) + work(j + in + n))
                    ! flag non-convergence.
                    if (j > iout - iinfo) then
                       ncnvrg = .true.
                       ib = -jb
                    else
                       ib = jb
                    end if
                    do je = iwork(j) + 1 + iwoff, iwork(j + in) + iwoff
                       w(je) = tmp1
                       iblock(je) = ib
                    end do
                 end do
                 m = m + im
              end if
           end do loop_70
           ! if range='i', then (wl,wu) contains eigenvalues nwl+1,...,nwu
           ! if nwl+1 < il or nwu > iu, discard extra eigenvalues.
           if (irange == 3) then
              im = 0
              idiscl = il - 1 - nwl
              idiscu = nwu - iu
              if (idiscl > 0 .or. idiscu > 0) then
                 do je = 1, m
                    if (w(je) <= wlu .and. idiscl > 0) then
                       idiscl = idiscl - 1
                    else if (w(je) >= wul .and. idiscu > 0) then
                       idiscu = idiscu - 1
                    else
                       im = im + 1
                       w(im) = w(je)
                       iblock(im) = iblock(je)
                    end if
                 end do
                 m = im
              end if
              if (idiscl > 0 .or. idiscu > 0) then
                 ! code to deal with effects of bad arithmetic:
                 ! some low eigenvalues to be discarded are not in (wl,wlu],
                 ! or high eigenvalues to be discarded are not in (wul,wu]
                 ! so just kill off the smallest idiscl/largest idiscu
                 ! eigenvalues, by simply finding the smallest/largest
                 ! eigenvalue(s).
                 ! (if n(w) is monotone non-decreasing, this should never
                     ! happen.)
                 if (idiscl > 0) then
                    wkill = wu
                    do jdisc = 1, idiscl
                       iw = 0
                       do je = 1, m
                          if (iblock(je) /= 0 .and. (w(je) < wkill .or. iw == 0)) then
                             iw = je
                             wkill = w(je)
                          end if
                       end do
                       iblock(iw) = 0
                    end do
                 end if
                 if (idiscu > 0) then
                    wkill = wl
                    do jdisc = 1, idiscu
                       iw = 0
                       do je = 1, m
                          if (iblock(je) /= 0 .and. (w(je) > wkill .or. iw == 0)) then
                             iw = je
                             wkill = w(je)
                          end if
                       end do
                       iblock(iw) = 0
                    end do
                 end if
                 im = 0
                 do je = 1, m
                    if (iblock(je) /= 0) then
                       im = im + 1
                       w(im) = w(je)
                       iblock(im) = iblock(je)
                    end if
                 end do
                 m = im
              end if
              if (idiscl < 0 .or. idiscu < 0) then
                 toofew = .true.
              end if
           end if
           ! if order='b', do nothing -- the eigenvalues are already sorted
              ! by block.
           ! if order='e', sort the eigenvalues from smallest to largest
           if (iorder == 1 .and. nsplit > 1) then
              do je = 1, m - 1
                 ie = 0
                 tmp1 = w(je)
                 do j = je + 1, m
                    if (w(j) < tmp1) then
                       ie = j
                       tmp1 = w(j)
                    end if
                 end do
                 if (ie /= 0) then
                    itmp1 = iblock(ie)
                    w(ie) = w(je)
                    iblock(ie) = iblock(je)
                    w(je) = tmp1
                    iblock(je) = itmp1
                 end if
              end do
           end if
           info = 0
           if (ncnvrg) info = info + 1
           if (toofew) info = info + 2
           return
           ! end of stdlib_dstebz
     end subroutine stdlib_dstebz

     ! DSYCONV convert A given by TRF into L and D and vice-versa.
     ! Get Non-diag elements of D (returned in workspace) and
     ! apply or reverse permutation done in TRF.

     subroutine stdlib_dsyconv(uplo, way, n, a, lda, ipiv, e, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo, way
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), e(*)
        ! =====================================================================
           
           ! .. external subroutines ..
     
           logical(lk) :: upper, convert
           integer(ilp) :: i, ip, j
           real(dp) :: temp
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           convert = stdlib_lsame(way, 'c')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. convert .and. .not. stdlib_lsame(way, 'r')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsyconv', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
            ! a is upper
            ! convert a (a is upper)
              ! convert value
              if (convert) then
                 i = n
                 e(1) = zero
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       e(i) = a(i - 1, i)
                       e(i - 1) = zero
                       a(i - 1, i) = zero
                       i = i - 1
                    else
                       e(i) = zero
                    end if
                    i = i - 1
                 end do
              ! convert permutations
              i = n
              do while (i >= 1)
                 if (ipiv(i) > 0) then
                    ip = ipiv(i)
                    if (i < n) then
                       do j = i + 1, n
                         temp = a(ip, j)
                         a(ip, j) = a(i, j)
                         a(i, j) = temp
                       end do
                    end if
                 else
                   ip = -ipiv(i)
                    if (i < n) then
                  do j = i + 1, n
                      temp = a(ip, j)
                      a(ip, j) = a(i - 1, j)
                      a(i - 1, j) = temp
                  end do
                     end if
                     i = i - 1
                end if
                i = i - 1
             end do
              else
            ! revert a (a is upper)
              ! revert permutations
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ip = ipiv(i)
                       if (i < n) then
                       do j = i + 1, n
                         temp = a(ip, j)
                         a(ip, j) = a(i, j)
                         a(i, j) = temp
                       end do
                       end if
                    else
                      ip = -ipiv(i)
                      i = i + 1
                      if (i < n) then
                         do j = i + 1, n
                            temp = a(ip, j)
                            a(ip, j) = a(i - 1, j)
                            a(i - 1, j) = temp
                         end do
                      end if
                    end if
                    i = i + 1
                 end do
              ! revert value
                 i = n
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       a(i - 1, i) = e(i)
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              end if
           else
            ! a is lower
              if (convert) then
            ! convert a (a is lower)
              ! convert value
                 i = 1
                 e(n) = zero
                 do while (i <= n)
                    if (i < n .and. ipiv(i) < 0) then
                       e(i) = a(i + 1, i)
                       e(i + 1) = zero
                       a(i + 1, i) = zero
                       i = i + 1
                    else
                       e(i) = zero
                    end if
                    i = i + 1
                 end do
              ! convert permutations
              i = 1
              do while (i <= n)
                 if (ipiv(i) > 0) then
                    ip = ipiv(i)
                    if (i > 1) then
                    do j = 1, i - 1
                      temp = a(ip, j)
                      a(ip, j) = a(i, j)
                      a(i, j) = temp
                    end do
                    end if
                 else
                   ip = -ipiv(i)
                   if (i > 1) then
                   do j = 1, i - 1
                      temp = a(ip, j)
                      a(ip, j) = a(i + 1, j)
                      a(i + 1, j) = temp
                   end do
                   end if
                   i = i + 1
                end if
                i = i + 1
             end do
              else
            ! revert a (a is lower)
              ! revert permutations
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ip = ipiv(i)
                       if (i > 1) then
                          do j = 1, i - 1
                             temp = a(i, j)
                             a(i, j) = a(ip, j)
                             a(ip, j) = temp
                          end do
                       end if
                    else
                       ip = -ipiv(i)
                       i = i - 1
                       if (i > 1) then
                          do j = 1, i - 1
                             temp = a(i + 1, j)
                             a(i + 1, j) = a(ip, j)
                             a(ip, j) = temp
                          end do
                       end if
                    end if
                    i = i - 1
                 end do
              ! revert value
                 i = 1
                 do while (i <= n - 1)
                    if (ipiv(i) < 0) then
                       a(i + 1, i) = e(i)
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              end if
           end if
           return
           ! end of stdlib_dsyconv
     end subroutine stdlib_dsyconv

     ! If parameter WAY = 'C':
     ! DSYCONVF converts the factorization output format used in
     ! DSYTRF provided on entry in parameter A into the factorization
     ! output format used in DSYTRF_RK (or DSYTRF_BK) that is stored
     ! on exit in parameters A and E. It also converts in place details of
     ! the intechanges stored in IPIV from the format used in DSYTRF into
     ! the format used in DSYTRF_RK (or DSYTRF_BK).
     ! If parameter WAY = 'R':
     ! DSYCONVF performs the conversion in reverse direction, i.e.
     ! converts the factorization output format used in DSYTRF_RK
     ! (or DSYTRF_BK) provided on entry in parameters A and E into
     ! the factorization output format used in DSYTRF that is stored
     ! on exit in parameter A. It also converts in place details of
     ! the intechanges stored in IPIV from the format used in DSYTRF_RK
     ! (or DSYTRF_BK) into the format used in DSYTRF.

     subroutine stdlib_dsyconvf(uplo, way, n, a, lda, e, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo, way
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), e(*)
        ! =====================================================================
           
           ! .. external subroutines ..
     
           logical(lk) :: upper, convert
           integer(ilp) :: i, ip
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           convert = stdlib_lsame(way, 'c')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. convert .and. .not. stdlib_lsame(way, 'r')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsyconvf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! begin a is upper
              if (convert) then
                 ! convert a (a is upper)
                 ! convert value
                 ! assign superdiagonal entries of d to array e and zero out
                 ! corresponding entries in input storage a
                 i = n
                 e(1) = zero
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       e(i) = a(i - 1, i)
                       e(i - 1) = zero
                       a(i - 1, i) = zero
                       i = i - 1
                    else
                       e(i) = zero
                    end if
                    i = i - 1
                 end do
                 ! convert permutations and ipiv
                 ! apply permutations to submatrices of upper part of a
                 ! in factorization order where i decreases from n to 1
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(1:i,n-i:n)
                       ip = ipiv(i)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_dswap(n - i, a(i, i + 1), lda, a(ip, i + 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i-1 and ipiv(i) in a(1:i,n-i:n)
                       ip = -ipiv(i)
                       if (i < n) then
                          if (ip /= (i - 1)) then
                             call stdlib_dswap(n - i, a(i - 1, i + 1), lda, a(ip, i + 1), lda)
                                       
                          end if
                       end if
                       ! convert ipiv
                       ! there is no interchnge of rows i and and ipiv(i),
                       ! so this should be reflected in ipiv format for
                       ! *sytrf_rk ( or *sytrf_bk)
                       ipiv(i) = i
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              else
                 ! revert a (a is upper)
                 ! revert permutations and ipiv
                 ! apply permutations to submatrices of upper part of a
                 ! in reverse factorization order where i increases from 1 to n
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(1:i,n-i:n)
                       ip = ipiv(i)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_dswap(n - i, a(ip, i + 1), lda, a(i, i + 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i-1 and ipiv(i) in a(1:i,n-i:n)
                       i = i + 1
                       ip = -ipiv(i)
                       if (i < n) then
                          if (ip /= (i - 1)) then
                             call stdlib_dswap(n - i, a(ip, i + 1), lda, a(i - 1, i + 1), lda)
                                       
                          end if
                       end if
                       ! convert ipiv
                       ! there is one interchange of rows i-1 and ipiv(i-1),
                       ! so this should be recorded in two consecutive entries
                       ! in ipiv format for *sytrf
                       ipiv(i) = ipiv(i - 1)
                    end if
                    i = i + 1
                 end do
                 ! revert value
                 ! assign superdiagonal entries of d from array e to
                 ! superdiagonal entries of a.
                 i = n
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       a(i - 1, i) = e(i)
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              ! end a is upper
              end if
           else
              ! begin a is lower
              if (convert) then
                 ! convert a (a is lower)
                 ! convert value
                 ! assign subdiagonal entries of d to array e and zero out
                 ! corresponding entries in input storage a
                 i = 1
                 e(n) = zero
                 do while (i <= n)
                    if (i < n .and. ipiv(i) < 0) then
                       e(i) = a(i + 1, i)
                       e(i + 1) = zero
                       a(i + 1, i) = zero
                       i = i + 1
                    else
                       e(i) = zero
                    end if
                    i = i + 1
                 end do
                 ! convert permutations and ipiv
                 ! apply permutations to submatrices of lower part of a
                 ! in factorization order where k increases from 1 to n
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(i:n,1:i-1)
                       ip = ipiv(i)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_dswap(i - 1, a(i, 1), lda, a(ip, 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1)
                       ip = -ipiv(i)
                       if (i > 1) then
                          if (ip /= (i + 1)) then
                             call stdlib_dswap(i - 1, a(i + 1, 1), lda, a(ip, 1), lda)
                          end if
                       end if
                       ! convert ipiv
                       ! there is no interchnge of rows i and and ipiv(i),
                       ! so this should be reflected in ipiv format for
                       ! *sytrf_rk ( or *sytrf_bk)
                       ipiv(i) = i
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              else
                 ! revert a (a is lower)
                 ! revert permutations and ipiv
                 ! apply permutations to submatrices of lower part of a
                 ! in reverse factorization order where i decreases from n to 1
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(i:n,1:i-1)
                       ip = ipiv(i)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_dswap(i - 1, a(ip, 1), lda, a(i, 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1)
                       i = i - 1
                       ip = -ipiv(i)
                       if (i > 1) then
                          if (ip /= (i + 1)) then
                             call stdlib_dswap(i - 1, a(ip, 1), lda, a(i + 1, 1), lda)
                          end if
                       end if
                       ! convert ipiv
                       ! there is one interchange of rows i+1 and ipiv(i+1),
                       ! so this should be recorded in consecutive entries
                       ! in ipiv format for *sytrf
                       ipiv(i) = ipiv(i + 1)
                    end if
                    i = i - 1
                 end do
                 ! revert value
                 ! assign subdiagonal entries of d from array e to
                 ! subgiagonal entries of a.
                 i = 1
                 do while (i <= n - 1)
                    if (ipiv(i) < 0) then
                       a(i + 1, i) = e(i)
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              end if
              ! end a is lower
           end if
           return
           ! end of stdlib_dsyconvf
     end subroutine stdlib_dsyconvf

     ! If parameter WAY = 'C':
     ! DSYCONVF_ROOK converts the factorization output format used in
     ! DSYTRF_ROOK provided on entry in parameter A into the factorization
     ! output format used in DSYTRF_RK (or DSYTRF_BK) that is stored
     ! on exit in parameters A and E. IPIV format for DSYTRF_ROOK and
     ! DSYTRF_RK (or DSYTRF_BK) is the same and is not converted.
     ! If parameter WAY = 'R':
     ! DSYCONVF_ROOK performs the conversion in reverse direction, i.e.
     ! converts the factorization output format used in DSYTRF_RK
     ! (or DSYTRF_BK) provided on entry in parameters A and E into
     ! the factorization output format used in DSYTRF_ROOK that is stored
     ! on exit in parameter A. IPIV format for DSYTRF_ROOK and
     ! DSYTRF_RK (or DSYTRF_BK) is the same and is not converted.

     subroutine stdlib_dsyconvf_rook(uplo, way, n, a, lda, e, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo, way
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), e(*)
        ! =====================================================================
           
           ! .. external subroutines ..
     
           logical(lk) :: upper, convert
           integer(ilp) :: i, ip, ip2
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           convert = stdlib_lsame(way, 'c')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. convert .and. .not. stdlib_lsame(way, 'r')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsyconvf_rook', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! begin a is upper
              if (convert) then
                 ! convert a (a is upper)
                 ! convert value
                 ! assign superdiagonal entries of d to array e and zero out
                 ! corresponding entries in input storage a
                 i = n
                 e(1) = zero
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       e(i) = a(i - 1, i)
                       e(i - 1) = zero
                       a(i - 1, i) = zero
                       i = i - 1
                    else
                       e(i) = zero
                    end if
                    i = i - 1
                 end do
                 ! convert permutations
                 ! apply permutations to submatrices of upper part of a
                 ! in factorization order where i decreases from n to 1
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(1:i,n-i:n)
                       ip = ipiv(i)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_dswap(n - i, a(i, i + 1), lda, a(ip, i + 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i and ipiv(i) and i-1 and ipiv(i-1)
                       ! in a(1:i,n-i:n)
                       ip = -ipiv(i)
                       ip2 = -ipiv(i - 1)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_dswap(n - i, a(i, i + 1), lda, a(ip, i + 1), lda)
                          end if
                          if (ip2 /= (i - 1)) then
                             call stdlib_dswap(n - i, a(i - 1, i + 1), lda, a(ip2, i + 1), lda)
                                       
                          end if
                       end if
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              else
                 ! revert a (a is upper)
                 ! revert permutations
                 ! apply permutations to submatrices of upper part of a
                 ! in reverse factorization order where i increases from 1 to n
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(1:i,n-i:n)
                       ip = ipiv(i)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_dswap(n - i, a(ip, i + 1), lda, a(i, i + 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i-1 and ipiv(i-1) and i and ipiv(i)
                       ! in a(1:i,n-i:n)
                       i = i + 1
                       ip = -ipiv(i)
                       ip2 = -ipiv(i - 1)
                       if (i < n) then
                          if (ip2 /= (i - 1)) then
                             call stdlib_dswap(n - i, a(ip2, i + 1), lda, a(i - 1, i + 1), lda)
                                       
                          end if
                          if (ip /= i) then
                             call stdlib_dswap(n - i, a(ip, i + 1), lda, a(i, i + 1), lda)
                          end if
                       end if
                    end if
                    i = i + 1
                 end do
                 ! revert value
                 ! assign superdiagonal entries of d from array e to
                 ! superdiagonal entries of a.
                 i = n
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       a(i - 1, i) = e(i)
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              ! end a is upper
              end if
           else
              ! begin a is lower
              if (convert) then
                 ! convert a (a is lower)
                 ! convert value
                 ! assign subdiagonal entries of d to array e and zero out
                 ! corresponding entries in input storage a
                 i = 1
                 e(n) = zero
                 do while (i <= n)
                    if (i < n .and. ipiv(i) < 0) then
                       e(i) = a(i + 1, i)
                       e(i + 1) = zero
                       a(i + 1, i) = zero
                       i = i + 1
                    else
                       e(i) = zero
                    end if
                    i = i + 1
                 end do
                 ! convert permutations
                 ! apply permutations to submatrices of lower part of a
                 ! in factorization order where i increases from 1 to n
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(i:n,1:i-1)
                       ip = ipiv(i)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_dswap(i - 1, a(i, 1), lda, a(ip, 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i and ipiv(i) and i+1 and ipiv(i+1)
                       ! in a(i:n,1:i-1)
                       ip = -ipiv(i)
                       ip2 = -ipiv(i + 1)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_dswap(i - 1, a(i, 1), lda, a(ip, 1), lda)
                          end if
                          if (ip2 /= (i + 1)) then
                             call stdlib_dswap(i - 1, a(i + 1, 1), lda, a(ip2, 1), lda)
                          end if
                       end if
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              else
                 ! revert a (a is lower)
                 ! revert permutations
                 ! apply permutations to submatrices of lower part of a
                 ! in reverse factorization order where i decreases from n to 1
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(i:n,1:i-1)
                       ip = ipiv(i)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_dswap(i - 1, a(ip, 1), lda, a(i, 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i+1 and ipiv(i+1) and i and ipiv(i)
                       ! in a(i:n,1:i-1)
                       i = i - 1
                       ip = -ipiv(i)
                       ip2 = -ipiv(i + 1)
                       if (i > 1) then
                          if (ip2 /= (i + 1)) then
                             call stdlib_dswap(i - 1, a(ip2, 1), lda, a(i + 1, 1), lda)
                          end if
                          if (ip /= i) then
                             call stdlib_dswap(i - 1, a(ip, 1), lda, a(i, 1), lda)
                          end if
                       end if
                    end if
                    i = i - 1
                 end do
                 ! revert value
                 ! assign subdiagonal entries of d from array e to
                 ! subgiagonal entries of a.
                 i = 1
                 do while (i <= n - 1)
                    if (ipiv(i) < 0) then
                       a(i + 1, i) = e(i)
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              end if
              ! end a is lower
           end if
           return
           ! end of stdlib_dsyconvf_rook
     end subroutine stdlib_dsyconvf_rook

     ! DSYEQUB computes row and column scalings intended to equilibrate a
     ! symmetric matrix A (with respect to the Euclidean norm) and reduce
     ! its condition number. The scale factors S are computed by the BIN
     ! algorithm (see references) so that the scaled matrix B with elements
     ! B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of
     ! the smallest possible condition number over all possible diagonal
     ! scalings.

     subroutine stdlib_dsyequb(uplo, n, a, lda, s, scond, amax, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, n
           real(dp) :: amax, scond
           character :: uplo
           ! .. array arguments ..
           real(dp) :: a(lda, *), s(*), work(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: max_iter = 100
           
           ! .. local scalars ..
           integer(ilp) :: i, j, iter
           real(dp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, &
                     scale, sumsq
           logical(lk) :: up
     
           ! .. intrinsic functions ..
           intrinsic :: abs, int, log, max, min, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (.not. (stdlib_lsame(uplo, 'u') .or. stdlib_lsame(uplo, 'l'))) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsyequb', -info)
              return
           end if
           up = stdlib_lsame(uplo, 'u')
           amax = zero
           ! quick return if possible.
           if (n == 0) then
              scond = one
              return
           end if
           do i = 1, n
              s(i) = zero
           end do
           amax = zero
           if (up) then
              do j = 1, n
                 do i = 1, j - 1
                    s(i) = max(s(i), abs(a(i, j)))
                    s(j) = max(s(j), abs(a(i, j)))
                    amax = max(amax, abs(a(i, j)))
                 end do
                 s(j) = max(s(j), abs(a(j, j)))
                 amax = max(amax, abs(a(j, j)))
              end do
           else
              do j = 1, n
                 s(j) = max(s(j), abs(a(j, j)))
                 amax = max(amax, abs(a(j, j)))
                 do i = j + 1, n
                    s(i) = max(s(i), abs(a(i, j)))
                    s(j) = max(s(j), abs(a(i, j)))
                    amax = max(amax, abs(a(i, j)))
                 end do
              end do
           end if
           do j = 1, n
              s(j) = 1.0d0/s(j)
           end do
           tol = one/sqrt(2.0d0*n)
           do iter = 1, max_iter
              scale = 0.0d0
              sumsq = 0.0d0
              ! beta = |a|s
              do i = 1, n
                 work(i) = zero
              end do
              if (up) then
                 do j = 1, n
                    do i = 1, j - 1
                       work(i) = work(i) + abs(a(i, j))*s(j)
                       work(j) = work(j) + abs(a(i, j))*s(i)
                    end do
                    work(j) = work(j) + abs(a(j, j))*s(j)
                 end do
              else
                 do j = 1, n
                    work(j) = work(j) + abs(a(j, j))*s(j)
                    do i = j + 1, n
                       work(i) = work(i) + abs(a(i, j))*s(j)
                       work(j) = work(j) + abs(a(i, j))*s(i)
                    end do
                 end do
              end if
              ! avg = s^t beta / n
              avg = 0.0d0
              do i = 1, n
                 avg = avg + s(i)*work(i)
              end do
              avg = avg/n
              std = 0.0d0
              do i = n + 1, 2*n
                 work(i) = s(i - n)*work(i - n) - avg
              end do
              call stdlib_dlassq(n, work(n + 1), 1, scale, sumsq)
              std = scale*sqrt(sumsq/n)
              if (std < tol*avg) goto 999
              do i = 1, n
                 t = abs(a(i, i))
                 si = s(i)
                 c2 = (n - 1)*t
                 c1 = (n - 2)*(work(i) - t*si)
                 c0 = -(t*si)*si + 2*work(i)*si - n*avg
                 d = c1*c1 - 4*c0*c2
                 if (d <= 0) then
                    info = -1
                    return
                 end if
                 si = -2*c0/(c1 + sqrt(d))
                 d = si - s(i)
                 u = zero
                 if (up) then
                    do j = 1, i
                       t = abs(a(j, i))
                       u = u + s(j)*t
                       work(j) = work(j) + d*t
                    end do
                    do j = i + 1, n
                       t = abs(a(i, j))
                       u = u + s(j)*t
                       work(j) = work(j) + d*t
                    end do
                 else
                    do j = 1, i
                       t = abs(a(i, j))
                       u = u + s(j)*t
                       work(j) = work(j) + d*t
                    end do
                    do j = i + 1, n
                       t = abs(a(j, i))
                       u = u + s(j)*t
                       work(j) = work(j) + d*t
                    end do
                 end if
                 avg = avg + (u + work(i))*d/n
                 s(i) = si
              end do
           end do
999   continue
           smlnum = stdlib_dlamch('safemin')
           bignum = one/smlnum
           smin = bignum
           smax = zero
           t = one/sqrt(avg)
           base = stdlib_dlamch('b')
           u = one/log(base)
           do i = 1, n
              s(i) = base**int(u*log(s(i)*t))
              smin = min(smin, s(i))
              smax = max(smax, s(i))
           end do
           scond = max(smin, smlnum)/min(smax, bignum)
     end subroutine stdlib_dsyequb

     ! DSYGS2 reduces a real symmetric-definite generalized eigenproblem
     ! to standard form.
     ! If ITYPE = 1, the problem is A*x = lambda*B*x,
     ! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     ! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     ! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L.
     ! B must have been previously factorized as U**T *U or L*L**T by DPOTRF.

     subroutine stdlib_dsygs2(itype, uplo, n, a, lda, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, itype, lda, ldb, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: k
           real(dp) :: akk, bkk, ct
     
           ! .. intrinsic functions ..
           intrinsic :: max
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (itype < 1 .or. itype > 3) then
              info = -1
           else if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsygs2', -info)
              return
           end if
           if (itype == 1) then
              if (upper) then
                 ! compute inv(u**t)*a*inv(u)
                 do k = 1, n
                    ! update the upper triangle of a(k:n,k:n)
                    akk = a(k, k)
                    bkk = b(k, k)
                    akk = akk/bkk**2
                    a(k, k) = akk
                    if (k < n) then
                       call stdlib_dscal(n - k, one/bkk, a(k, k + 1), lda)
                       ct = -half*akk
                       call stdlib_daxpy(n - k, ct, b(k, k + 1), ldb, a(k, k + 1), lda)
                       call stdlib_dsyr2(uplo, n - k, -one, a(k, k + 1), lda, b(k, k + 1), ldb, a( &
                                 k + 1, k + 1), lda)
                       call stdlib_daxpy(n - k, ct, b(k, k + 1), ldb, a(k, k + 1), lda)
                       call stdlib_dtrsv(uplo, 'transpose', 'non-unit', n - k, b(k + 1, k + 1), ldb, &
                                 a(k, k + 1), lda)
                    end if
                 end do
              else
                 ! compute inv(l)*a*inv(l**t)
                 do k = 1, n
                    ! update the lower triangle of a(k:n,k:n)
                    akk = a(k, k)
                    bkk = b(k, k)
                    akk = akk/bkk**2
                    a(k, k) = akk
                    if (k < n) then
                       call stdlib_dscal(n - k, one/bkk, a(k + 1, k), 1)
                       ct = -half*akk
                       call stdlib_daxpy(n - k, ct, b(k + 1, k), 1, a(k + 1, k), 1)
                       call stdlib_dsyr2(uplo, n - k, -one, a(k + 1, k), 1, b(k + 1, k), 1, a(k + 1, &
                                 k + 1), lda)
                       call stdlib_daxpy(n - k, ct, b(k + 1, k), 1, a(k + 1, k), 1)
                       call stdlib_dtrsv(uplo, 'no transpose', 'non-unit', n - k, b(k + 1, k + 1), &
                                 ldb, a(k + 1, k), 1)
                    end if
                 end do
              end if
           else
              if (upper) then
                 ! compute u*a*u**t
                 do k = 1, n
                    ! update the upper triangle of a(1:k,1:k)
                    akk = a(k, k)
                    bkk = b(k, k)
                    call stdlib_dtrmv(uplo, 'no transpose', 'non-unit', k - 1, b, ldb, a(1, k), 1 &
                              )
                    ct = half*akk
                    call stdlib_daxpy(k - 1, ct, b(1, k), 1, a(1, k), 1)
                    call stdlib_dsyr2(uplo, k - 1, one, a(1, k), 1, b(1, k), 1, a, lda)
                    call stdlib_daxpy(k - 1, ct, b(1, k), 1, a(1, k), 1)
                    call stdlib_dscal(k - 1, bkk, a(1, k), 1)
                    a(k, k) = akk*bkk**2
                 end do
              else
                 ! compute l**t *a*l
                 do k = 1, n
                    ! update the lower triangle of a(1:k,1:k)
                    akk = a(k, k)
                    bkk = b(k, k)
                    call stdlib_dtrmv(uplo, 'transpose', 'non-unit', k - 1, b, ldb, a(k, 1), lda)
                              
                    ct = half*akk
                    call stdlib_daxpy(k - 1, ct, b(k, 1), ldb, a(k, 1), lda)
                    call stdlib_dsyr2(uplo, k - 1, one, a(k, 1), lda, b(k, 1), ldb, a, lda)
                              
                    call stdlib_daxpy(k - 1, ct, b(k, 1), ldb, a(k, 1), lda)
                    call stdlib_dscal(k - 1, bkk, a(k, 1), lda)
                    a(k, k) = akk*bkk**2
                 end do
              end if
           end if
           return
           ! end of stdlib_dsygs2
     end subroutine stdlib_dsygs2

     ! DSYGST reduces a real symmetric-definite generalized eigenproblem
     ! to standard form.
     ! If ITYPE = 1, the problem is A*x = lambda*B*x,
     ! and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
     ! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     ! B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
     ! B must have been previously factorized as U**T*U or L*L**T by DPOTRF.

     subroutine stdlib_dsygst(itype, uplo, n, a, lda, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, itype, lda, ldb, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: k, kb, nb
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (itype < 1 .or. itype > 3) then
              info = -1
           else if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsygst', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine the block size for this environment.
           nb = stdlib_ilaenv(1, 'stdlib_dsygst', uplo, n, -1, -1, -1)
           if (nb <= 1 .or. nb >= n) then
              ! use unblocked code
              call stdlib_dsygs2(itype, uplo, n, a, lda, b, ldb, info)
           else
              ! use blocked code
              if (itype == 1) then
                 if (upper) then
                    ! compute inv(u**t)*a*inv(u)
                    do k = 1, n, nb
                       kb = min(n - k + 1, nb)
                       ! update the upper triangle of a(k:n,k:n)
                       call stdlib_dsygs2(itype, uplo, kb, a(k, k), lda, b(k, k), ldb, info)
                                 
                       if (k + kb <= n) then
                          call stdlib_dtrsm('left', uplo, 'transpose', 'non-unit', kb, n - k - kb + 1, &
                                    one, b(k, k), ldb, a(k, k + kb), lda)
                          call stdlib_dsymm('left', uplo, kb, n - k - kb + 1, -half, a(k, k), lda, b( &
                                    k, k + kb), ldb, one, a(k, k + kb), lda)
                          call stdlib_dsyr2k(uplo, 'transpose', n - k - kb + 1, kb, -one, a(k, k + kb), &
                                    lda, b(k, k + kb), ldb, one, a(k + kb, k + kb), lda)
                          call stdlib_dsymm('left', uplo, kb, n - k - kb + 1, -half, a(k, k), lda, b( &
                                    k, k + kb), ldb, one, a(k, k + kb), lda)
                          call stdlib_dtrsm('right', uplo, 'no transpose', 'non-unit', kb, n - k - kb + &
                                    1, one, b(k + kb, k + kb), ldb, a(k, k + kb), lda)
                       end if
                    end do
                 else
                    ! compute inv(l)*a*inv(l**t)
                    do k = 1, n, nb
                       kb = min(n - k + 1, nb)
                       ! update the lower triangle of a(k:n,k:n)
                       call stdlib_dsygs2(itype, uplo, kb, a(k, k), lda, b(k, k), ldb, info)
                                 
                       if (k + kb <= n) then
                          call stdlib_dtrsm('right', uplo, 'transpose', 'non-unit', n - k - kb + 1, kb, &
                                    one, b(k, k), ldb, a(k + kb, k), lda)
                          call stdlib_dsymm('right', uplo, n - k - kb + 1, kb, -half, a(k, k), lda, b( &
                                     k + kb, k), ldb, one, a(k + kb, k), lda)
                          call stdlib_dsyr2k(uplo, 'no transpose', n - k - kb + 1, kb, -one, a(k + kb, k &
                                    ), lda, b(k + kb, k), ldb, one, a(k + kb, k + kb), lda)
                          call stdlib_dsymm('right', uplo, n - k - kb + 1, kb, -half, a(k, k), lda, b( &
                                     k + kb, k), ldb, one, a(k + kb, k), lda)
                          call stdlib_dtrsm('left', uplo, 'no transpose', 'non-unit', n - k - kb + 1, &
                                    kb, one, b(k + kb, k + kb), ldb, a(k + kb, k), lda)
                       end if
                    end do
                 end if
              else
                 if (upper) then
                    ! compute u*a*u**t
                    do k = 1, n, nb
                       kb = min(n - k + 1, nb)
                       ! update the upper triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib_dtrmm('left', uplo, 'no transpose', 'non-unit', k - 1, kb, one, &
                                 b, ldb, a(1, k), lda)
                       call stdlib_dsymm('right', uplo, k - 1, kb, half, a(k, k), lda, b(1, k), &
                                 ldb, one, a(1, k), lda)
                       call stdlib_dsyr2k(uplo, 'no transpose', k - 1, kb, one, a(1, k), lda, b( &
                                 1, k), ldb, one, a, lda)
                       call stdlib_dsymm('right', uplo, k - 1, kb, half, a(k, k), lda, b(1, k), &
                                 ldb, one, a(1, k), lda)
                       call stdlib_dtrmm('right', uplo, 'transpose', 'non-unit', k - 1, kb, one, b( &
                                 k, k), ldb, a(1, k), lda)
                       call stdlib_dsygs2(itype, uplo, kb, a(k, k), lda, b(k, k), ldb, info)
                                 
                    end do
                 else
                    ! compute l**t*a*l
                    do k = 1, n, nb
                       kb = min(n - k + 1, nb)
                       ! update the lower triangle of a(1:k+kb-1,1:k+kb-1)
                       call stdlib_dtrmm('right', uplo, 'no transpose', 'non-unit', kb, k - 1, one, &
                                 b, ldb, a(k, 1), lda)
                       call stdlib_dsymm('left', uplo, kb, k - 1, half, a(k, k), lda, b(k, 1), &
                                 ldb, one, a(k, 1), lda)
                       call stdlib_dsyr2k(uplo, 'transpose', k - 1, kb, one, a(k, 1), lda, b(k, &
                                 1), ldb, one, a, lda)
                       call stdlib_dsymm('left', uplo, kb, k - 1, half, a(k, k), lda, b(k, 1), &
                                 ldb, one, a(k, 1), lda)
                       call stdlib_dtrmm('left', uplo, 'transpose', 'non-unit', kb, k - 1, one, b( &
                                 k, k), ldb, a(k, 1), lda)
                       call stdlib_dsygs2(itype, uplo, kb, a(k, k), lda, b(k, k), ldb, info)
                                 
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_dsygst
     end subroutine stdlib_dsygst

     ! DSYSWAPR applies an elementary permutation on the rows and the columns of
     ! a symmetric matrix.

     subroutine stdlib_dsyswapr(uplo, n, a, lda, i1, i2)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: i1, i2, lda, n
           ! .. array arguments ..
           real(dp) :: a(lda, n)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i
           real(dp) :: tmp
     
           ! .. executable statements ..
           upper = stdlib_lsame(uplo, 'u')
           if (upper) then
               ! upper
               ! first swap
                ! - swap column i1 and i2 from i1 to i1-1
              call stdlib_dswap(i1 - 1, a(1, i1), 1, a(1, i2), 1)
                ! second swap :
                ! - swap a(i1,i1) and a(i2,i2)
                ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1
              tmp = a(i1, i1)
              a(i1, i1) = a(i2, i2)
              a(i2, i2) = tmp
              do i = 1, i2 - i1 - 1
                 tmp = a(i1, i1 + i)
                 a(i1, i1 + i) = a(i1 + i, i2)
                 a(i1 + i, i2) = tmp
              end do
                ! third swap
                ! - swap row i1 and i2 from i2+1 to n
              do i = i2 + 1, n
                 tmp = a(i1, i)
                 a(i1, i) = a(i2, i)
                 a(i2, i) = tmp
              end do
             else
               ! lower
               ! first swap
                ! - swap row i1 and i2 from i1 to i1-1
              call stdlib_dswap(i1 - 1, a(i1, 1), lda, a(i2, 1), lda)
               ! second swap :
                ! - swap a(i1,i1) and a(i2,i2)
                ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1
               tmp = a(i1, i1)
               a(i1, i1) = a(i2, i2)
               a(i2, i2) = tmp
               do i = 1, i2 - i1 - 1
                  tmp = a(i1 + i, i1)
                  a(i1 + i, i1) = a(i2, i1 + i)
                  a(i2, i1 + i) = tmp
               end do
               ! third swap
                ! - swap col i1 and i2 from i2+1 to n
               do i = i2 + 1, n
                  tmp = a(i, i1)
                  a(i, i1) = a(i, i2)
                  a(i, i2) = tmp
               end do
           end if
     end subroutine stdlib_dsyswapr

     ! DSYTF2_RK computes the factorization of a real symmetric matrix A
     ! using the bounded Bunch-Kaufman (rook) diagonal pivoting method:
     ! A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
     ! where U (or L) is unit upper (or lower) triangular matrix,
     ! U**T (or L**T) is the transpose of U (or L), P is a permutation
     ! matrix, P**T is the transpose of P, and D is symmetric and block
     ! diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.
     ! For more information see Further Details section.

     subroutine stdlib_dsytf2_rk(uplo, n, a, lda, e, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), e(*)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: sevten = 17.0_dp
           
           ! .. local scalars ..
           logical(lk) :: upper, done
           integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii
           real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, rowmax, dtemp, t, wk, wkm1, wkp1, &
                      sfmin
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytf2_rk', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_dlamch('s')
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! initialize the first entry of array e, where superdiagonal
              ! elements of d are stored
              e(1) = zero
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 34
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_idamax(k - 1, a(1, k), 1)
                 colmax = abs(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 ! set e( k ) to zero
                 if (k > 1) e(k) = zero
              else
                 ! test for interchange
                 ! equivalent to testing for (used to handle nan and inf)
                 ! absakk>=alpha*colmax
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange,
                    ! use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_idamax(k - imax, a(imax, imax + 1), lda)
                          rowmax = abs(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_idamax(imax - 1, a(1, imax), 1)
                          dtemp = abs(a(itemp, imax))
                          if (dtemp > rowmax) then
                             rowmax = dtemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for (used to handle nan and inf)
                       ! abs( a( imax, imax ) )>=alpha*rowmax
                       if (.not. (abs(a(imax, imax)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! equivalent to testing for rowmax == colmax,
                       ! used to handle nan and inf
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found, set variables and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! swap two rows and two columns
                 ! first swap
                 if ((kstep == 2) .and. (p /= k)) then
                    ! interchange rows and column k and p in the leading
                    ! submatrix a(1:k,1:k) if we have a 2-by-2 pivot
                    if (p > 1) call stdlib_dswap(p - 1, a(1, k), 1, a(1, p), 1)
                    if (p < (k - 1)) call stdlib_dswap(k - p - 1, a(p + 1, k), 1, a(p, p + 1), lda)
                              
                    t = a(k, k)
                    a(k, k) = a(p, p)
                    a(p, p) = t
                    ! convert upper triangle of a into u form by applying
                    ! the interchanges in columns k+1:n.
                    if (k < n) call stdlib_dswap(n - k, a(k, k + 1), lda, a(p, k + 1), lda)
                 end if
                 ! second swap
                 kk = k - kstep + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    if (kp > 1) call stdlib_dswap(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    if ((kk > 1) .and. (kp < (kk - 1))) call stdlib_dswap(kk - kp - 1, a(kp + 1, kk), &
                              1, a(kp, kp + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k - 1, k)
                       a(k - 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                    ! convert upper triangle of a into u form by applying
                    ! the interchanges in columns k+1:n.
                    if (k < n) call stdlib_dswap(n - k, a(kk, k + 1), lda, a(kp, k + 1), lda)
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    if (k > 1) then
                       ! perform a rank-1 update of a(1:k-1,1:k-1) and
                       ! store u(k) in column k
                       if (abs(a(k, k)) >= sfmin) then
                          ! perform a rank-1 update of a(1:k-1,1:k-1) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*1/d(k)*w(k)**t
                          d11 = one/a(k, k)
                          call stdlib_dsyr(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                          ! store u(k) in column k
                          call stdlib_dscal(k - 1, d11, a(1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = a(k, k)
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_dsyr(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                       end if
                       ! store the superdiagonal element of d in array e
                       e(k) = zero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k > 2) then
                       d12 = a(k - 1, k)
                       d22 = a(k - 1, k - 1)/d12
                       d11 = a(k, k)/d12
                       t = one/(d11*d22 - one)
                       do j = k - 2, 1, -1
                          wkm1 = t*(d11*a(j, k - 1) - a(j, k))
                          wk = t*(d22*a(j, k) - a(j, k - 1))
                          do i = j, 1, -1
                             a(i, j) = a(i, j) - (a(i, k)/d12)*wk - (a(i, k - 1)/d12) &
                                       *wkm1
                          end do
                          ! store u(k) and u(k-1) in cols k and k-1 for row j
                          a(j, k) = wk/d12
                          a(j, k - 1) = wkm1/d12
                       end do
                    end if
                    ! copy superdiagonal elements of d(k) to e(k) and
                    ! zero out superdiagonal entry of a
                    e(k) = a(k - 1, k)
                    e(k - 1) = zero
                    a(k - 1, k) = zero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
34      continue
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! initialize the unused last entry of the subdiagonal array e.
              e(n) = zero
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
40      continue
              ! if k > n, exit from loop
              if (k > n) go to 64
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_idamax(n - k, a(k + 1, k), 1)
                 colmax = abs(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 ! set e( k ) to zero
                 if (k < n) e(k) = zero
              else
                 ! test for interchange
                 ! equivalent to testing for (used to handle nan and inf)
                 ! absakk>=alpha*colmax
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
42      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_idamax(imax - k, a(imax, k), lda)
                          rowmax = abs(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_idamax(n - imax, a(imax + 1, imax), 1)
                          dtemp = abs(a(itemp, imax))
                          if (dtemp > rowmax) then
                             rowmax = dtemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for (used to handle nan and inf)
                       ! abs( a( imax, imax ) )>=alpha*rowmax
                       if (.not. (abs(a(imax, imax)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! equivalent to testing for rowmax == colmax,
                       ! used to handle nan and inf
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found, set variables and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 42
                 end if
                 ! swap two rows and two columns
                 ! first swap
                 if ((kstep == 2) .and. (p /= k)) then
                    ! interchange rows and column k and p in the trailing
                    ! submatrix a(k:n,k:n) if we have a 2-by-2 pivot
                    if (p < n) call stdlib_dswap(n - p, a(p + 1, k), 1, a(p + 1, p), 1)
                    if (p > (k + 1)) call stdlib_dswap(p - k - 1, a(k + 1, k), 1, a(p, k + 1), lda)
                              
                    t = a(k, k)
                    a(k, k) = a(p, p)
                    a(p, p) = t
                    ! convert lower triangle of a into l form by applying
                    ! the interchanges in columns 1:k-1.
                    if (k > 1) call stdlib_dswap(k - 1, a(k, 1), lda, a(p, 1), lda)
                 end if
                 ! second swap
                 kk = k + kstep - 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_dswap(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    if ((kk < n) .and. (kp > (kk + 1))) call stdlib_dswap(kp - kk - 1, a(kk + 1, kk), &
                              1, a(kp, kk + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k + 1, k)
                       a(k + 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                    ! convert lower triangle of a into l form by applying
                    ! the interchanges in columns 1:k-1.
                    if (k > 1) call stdlib_dswap(k - 1, a(kk, 1), lda, a(kp, 1), lda)
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                    ! perform a rank-1 update of a(k+1:n,k+1:n) and
                    ! store l(k) in column k
                       if (abs(a(k, k)) >= sfmin) then
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                          d11 = one/a(k, k)
                          call stdlib_dsyr(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                          ! store l(k) in column k
                          call stdlib_dscal(n - k, d11, a(k + 1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = a(k, k)
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_dsyr(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                       end if
                       ! store the subdiagonal element of d in array e
                       e(k) = zero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! perform a rank-2 update of a(k+2:n,k+2:n) as
                    ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t
                       ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k < n - 1) then
                       d21 = a(k + 1, k)
                       d11 = a(k + 1, k + 1)/d21
                       d22 = a(k, k)/d21
                       t = one/(d11*d22 - one)
                       do j = k + 2, n
                          ! compute  d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j
                          wk = t*(d11*a(j, k) - a(j, k + 1))
                          wkp1 = t*(d22*a(j, k + 1) - a(j, k))
                          ! perform a rank-2 update of a(k+2:n,k+2:n)
                          do i = j, n
                             a(i, j) = a(i, j) - (a(i, k)/d21)*wk - (a(i, k + 1)/d21) &
                                       *wkp1
                          end do
                          ! store l(k) and l(k+1) in cols k and k+1 for row j
                          a(j, k) = wk/d21
                          a(j, k + 1) = wkp1/d21
                       end do
                    end if
                    ! copy subdiagonal elements of d(k) to e(k) and
                    ! zero out subdiagonal entry of a
                    e(k) = a(k + 1, k)
                    e(k + 1) = zero
                    a(k + 1, k) = zero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 40
64      continue
           end if
           return
           ! end of stdlib_dsytf2_rk
     end subroutine stdlib_dsytf2_rk

     ! DSYTF2_ROOK computes the factorization of a real symmetric matrix A
     ! using the bounded Bunch-Kaufman ("rook") diagonal pivoting method:
     ! A = U*D*U**T  or  A = L*D*L**T
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, U**T is the transpose of U, and D is symmetric and
     ! block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_dsytf2_rook(uplo, n, a, lda, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *)
        ! =====================================================================
           ! .. parameters ..
           real(dp), parameter :: sevten = 17.0_dp
           
           ! .. local scalars ..
           logical(lk) :: upper, done
           integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii
           real(dp) :: absakk, alpha, colmax, d11, d12, d21, d22, rowmax, dtemp, t, wk, wkm1, wkp1, &
                      sfmin
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytf2_rook', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_dlamch('s')
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 70
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_idamax(k - 1, a(1, k), 1)
                 colmax = abs(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 ! test for interchange
                 ! equivalent to testing for (used to handle nan and inf)
                 ! absakk>=alpha*colmax
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange,
                    ! use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_idamax(k - imax, a(imax, imax + 1), lda)
                          rowmax = abs(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_idamax(imax - 1, a(1, imax), 1)
                          dtemp = abs(a(itemp, imax))
                          if (dtemp > rowmax) then
                             rowmax = dtemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for (used to handle nan and inf)
                       ! abs( a( imax, imax ) )>=alpha*rowmax
                       if (.not. (abs(a(imax, imax)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! equivalent to testing for rowmax == colmax,
                       ! used to handle nan and inf
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found, set variables and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! swap two rows and two columns
                 ! first swap
                 if ((kstep == 2) .and. (p /= k)) then
                    ! interchange rows and column k and p in the leading
                    ! submatrix a(1:k,1:k) if we have a 2-by-2 pivot
                    if (p > 1) call stdlib_dswap(p - 1, a(1, k), 1, a(1, p), 1)
                    if (p < (k - 1)) call stdlib_dswap(k - p - 1, a(p + 1, k), 1, a(p, p + 1), lda)
                              
                    t = a(k, k)
                    a(k, k) = a(p, p)
                    a(p, p) = t
                 end if
                 ! second swap
                 kk = k - kstep + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    if (kp > 1) call stdlib_dswap(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    if ((kk > 1) .and. (kp < (kk - 1))) call stdlib_dswap(kk - kp - 1, a(kp + 1, kk), &
                              1, a(kp, kp + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k - 1, k)
                       a(k - 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    if (k > 1) then
                       ! perform a rank-1 update of a(1:k-1,1:k-1) and
                       ! store u(k) in column k
                       if (abs(a(k, k)) >= sfmin) then
                          ! perform a rank-1 update of a(1:k-1,1:k-1) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*1/d(k)*w(k)**t
                          d11 = one/a(k, k)
                          call stdlib_dsyr(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                          ! store u(k) in column k
                          call stdlib_dscal(k - 1, d11, a(1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = a(k, k)
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_dsyr(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k > 2) then
                       d12 = a(k - 1, k)
                       d22 = a(k - 1, k - 1)/d12
                       d11 = a(k, k)/d12
                       t = one/(d11*d22 - one)
                       do j = k - 2, 1, -1
                          wkm1 = t*(d11*a(j, k - 1) - a(j, k))
                          wk = t*(d22*a(j, k) - a(j, k - 1))
                          do i = j, 1, -1
                             a(i, j) = a(i, j) - (a(i, k)/d12)*wk - (a(i, k - 1)/d12) &
                                       *wkm1
                          end do
                          ! store u(k) and u(k-1) in cols k and k-1 for row j
                          a(j, k) = wk/d12
                          a(j, k - 1) = wkm1/d12
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
40      continue
              ! if k > n, exit from loop
              if (k > n) go to 70
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_idamax(n - k, a(k + 1, k), 1)
                 colmax = abs(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 ! test for interchange
                 ! equivalent to testing for (used to handle nan and inf)
                 ! absakk>=alpha*colmax
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
42      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_idamax(imax - k, a(imax, k), lda)
                          rowmax = abs(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_idamax(n - imax, a(imax + 1, imax), 1)
                          dtemp = abs(a(itemp, imax))
                          if (dtemp > rowmax) then
                             rowmax = dtemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for (used to handle nan and inf)
                       ! abs( a( imax, imax ) )>=alpha*rowmax
                       if (.not. (abs(a(imax, imax)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! equivalent to testing for rowmax == colmax,
                       ! used to handle nan and inf
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found, set variables and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 42
                 end if
                 ! swap two rows and two columns
                 ! first swap
                 if ((kstep == 2) .and. (p /= k)) then
                    ! interchange rows and column k and p in the trailing
                    ! submatrix a(k:n,k:n) if we have a 2-by-2 pivot
                    if (p < n) call stdlib_dswap(n - p, a(p + 1, k), 1, a(p + 1, p), 1)
                    if (p > (k + 1)) call stdlib_dswap(p - k - 1, a(k + 1, k), 1, a(p, k + 1), lda)
                              
                    t = a(k, k)
                    a(k, k) = a(p, p)
                    a(p, p) = t
                 end if
                 ! second swap
                 kk = k + kstep - 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_dswap(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    if ((kk < n) .and. (kp > (kk + 1))) call stdlib_dswap(kp - kk - 1, a(kk + 1, kk), &
                              1, a(kp, kk + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k + 1, k)
                       a(k + 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                    ! perform a rank-1 update of a(k+1:n,k+1:n) and
                    ! store l(k) in column k
                       if (abs(a(k, k)) >= sfmin) then
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                          d11 = one/a(k, k)
                          call stdlib_dsyr(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                          ! store l(k) in column k
                          call stdlib_dscal(n - k, d11, a(k + 1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = a(k, k)
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_dsyr(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! perform a rank-2 update of a(k+2:n,k+2:n) as
                    ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t
                       ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k < n - 1) then
                       d21 = a(k + 1, k)
                       d11 = a(k + 1, k + 1)/d21
                       d22 = a(k, k)/d21
                       t = one/(d11*d22 - one)
                       do j = k + 2, n
                          ! compute  d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j
                          wk = t*(d11*a(j, k) - a(j, k + 1))
                          wkp1 = t*(d22*a(j, k + 1) - a(j, k))
                          ! perform a rank-2 update of a(k+2:n,k+2:n)
                          do i = j, n
                             a(i, j) = a(i, j) - (a(i, k)/d21)*wk - (a(i, k + 1)/d21) &
                                       *wkp1
                          end do
                          ! store l(k) and l(k+1) in cols k and k+1 for row j
                          a(j, k) = wk/d21
                          a(j, k + 1) = wkp1/d21
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 40
           end if
70      continue
           return
           ! end of stdlib_dsytf2_rook
     end subroutine stdlib_dsytf2_rook

     ! DSYTRF_RK computes the factorization of a real symmetric matrix A
     ! using the bounded Bunch-Kaufman (rook) diagonal pivoting method:
     ! A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
     ! where U (or L) is unit upper (or lower) triangular matrix,
     ! U**T (or L**T) is the transpose of U (or L), P is a permutation
     ! matrix, P**T is the transpose of P, and D is symmetric and block
     ! diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the blocked version of the algorithm, calling Level 3 BLAS.
     ! For more information see Further Details section.

     subroutine stdlib_dsytrf_rk(uplo, n, a, lda, e, ipiv, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, lwork, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), e(*), work(*)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: lquery, upper
           integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           lquery = (lwork == -1)
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           else if (lwork < 1 .and. .not. lquery) then
              info = -8
           end if
           if (info == 0) then
              ! determine the block size
              nb = stdlib_ilaenv(1, 'stdlib_dsytrf_rk', uplo, n, -1, -1, -1)
              lwkopt = n*nb
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytrf_rk', -info)
              return
           else if (lquery) then
              return
           end if
           nbmin = 2
           ldwork = n
           if (nb > 1 .and. nb < n) then
              iws = ldwork*nb
              if (lwork < iws) then
                 nb = max(lwork/ldwork, 1)
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dsytrf_rk', uplo, n, -1, -1, -1))
                           
              end if
           else
              iws = 1
           end if
           if (nb < nbmin) nb = n
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! kb, where kb is the number of columns factorized by stdlib_dlasyf_rk;
              ! kb is either nb or nb-1, or k for the last block
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 15
              if (k > nb) then
                 ! factorize columns k-kb+1:k of a and use blocked code to
                 ! update columns 1:k-kb
                 call stdlib_dlasyf_rk(uplo, k, nb, kb, a, lda, e, ipiv, work, ldwork, iinfo)
                           
              else
                 ! use unblocked code to factorize columns 1:k of a
                 call stdlib_dsytf2_rk(uplo, k, a, lda, e, ipiv, iinfo)
                 kb = k
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo
              ! no need to adjust ipiv
              ! apply permutations to the leading panel 1:k-1
              ! read ipiv from the last block factored, i.e.
              ! indices  k-kb+1:k and apply row permutations to the
              ! last k+1 colunms k+1:n after that block
              ! (we can do the simple loop over ipiv with decrement -1,
              ! since the abs value of ipiv( i ) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              if (k < n) then
                 do i = k, (k - kb + 1), -1
                    ip = abs(ipiv(i))
                    if (ip /= i) then
                       call stdlib_dswap(n - k, a(i, k + 1), lda, a(ip, k + 1), lda)
                    end if
                 end do
              end if
              ! decrease k and return to the start of the main loop
              k = k - kb
              go to 10
              ! this label is the exit from main loop over k decreasing
              ! from n to 1 in steps of kb
15      continue
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! kb, where kb is the number of columns factorized by stdlib_dlasyf_rk;
              ! kb is either nb or nb-1, or n-k+1 for the last block
              k = 1
20      continue
              ! if k > n, exit from loop
              if (k > n) go to 35
              if (k <= n - nb) then
                 ! factorize columns k:k+kb-1 of a and use blocked code to
                 ! update columns k+kb:n
                 call stdlib_dlasyf_rk(uplo, n - k + 1, nb, kb, a(k, k), lda, e(k), ipiv(k), &
                           work, ldwork, iinfo)
              else
                 ! use unblocked code to factorize columns k:n of a
                 call stdlib_dsytf2_rk(uplo, n - k + 1, a(k, k), lda, e(k), ipiv(k), iinfo)
                           
                 kb = n - k + 1
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo + k - 1
              ! adjust ipiv
              do i = k, k + kb - 1
                 if (ipiv(i) > 0) then
                    ipiv(i) = ipiv(i) + k - 1
                 else
                    ipiv(i) = ipiv(i) - k + 1
                 end if
              end do
              ! apply permutations to the leading panel 1:k-1
              ! read ipiv from the last block factored, i.e.
              ! indices  k:k+kb-1 and apply row permutations to the
              ! first k-1 colunms 1:k-1 before that block
              ! (we can do the simple loop over ipiv with increment 1,
              ! since the abs value of ipiv( i ) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              if (k > 1) then
                 do i = k, (k + kb - 1), 1
                    ip = abs(ipiv(i))
                    if (ip /= i) then
                       call stdlib_dswap(k - 1, a(i, 1), lda, a(ip, 1), lda)
                    end if
                 end do
              end if
              ! increase k and return to the start of the main loop
              k = k + kb
              go to 20
              ! this label is the exit from main loop over k increasing
              ! from 1 to n in steps of kb
35      continue
           ! end lower
           end if
           work(1) = lwkopt
           return
           ! end of stdlib_dsytrf_rk
     end subroutine stdlib_dsytrf_rk

     ! DSYTRF_ROOK computes the factorization of a real symmetric matrix A
     ! using the bounded Bunch-Kaufman ("rook") diagonal pivoting method.
     ! The form of the factorization is
     ! A = U*D*U**T  or  A = L*D*L**T
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, and D is symmetric and block diagonal with
     ! 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the blocked version of the algorithm, calling Level 3 BLAS.

     subroutine stdlib_dsytrf_rook(uplo, n, a, lda, ipiv, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, lwork, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), work(*)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: lquery, upper
           integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           lquery = (lwork == -1)
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           else if (lwork < 1 .and. .not. lquery) then
              info = -7
           end if
           if (info == 0) then
              ! determine the block size
              nb = stdlib_ilaenv(1, 'stdlib_dsytrf_rook', uplo, n, -1, -1, -1)
              lwkopt = max(1, n*nb)
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytrf_rook', -info)
              return
           else if (lquery) then
              return
           end if
           nbmin = 2
           ldwork = n
           if (nb > 1 .and. nb < n) then
              iws = ldwork*nb
              if (lwork < iws) then
                 nb = max(lwork/ldwork, 1)
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_dsytrf_rook', uplo, n, -1, -1, -1))
                           
              end if
           else
              iws = 1
           end if
           if (nb < nbmin) nb = n
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! kb, where kb is the number of columns factorized by stdlib_dlasyf_rook;
              ! kb is either nb or nb-1, or k for the last block
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 40
              if (k > nb) then
                 ! factorize columns k-kb+1:k of a and use blocked code to
                 ! update columns 1:k-kb
                 call stdlib_dlasyf_rook(uplo, k, nb, kb, a, lda, ipiv, work, ldwork, iinfo)
                           
              else
                 ! use unblocked code to factorize columns 1:k of a
                 call stdlib_dsytf2_rook(uplo, k, a, lda, ipiv, iinfo)
                 kb = k
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo
              ! no need to adjust ipiv
              ! decrease k and return to the start of the main loop
              k = k - kb
              go to 10
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! kb, where kb is the number of columns factorized by stdlib_dlasyf_rook;
              ! kb is either nb or nb-1, or n-k+1 for the last block
              k = 1
20      continue
              ! if k > n, exit from loop
              if (k > n) go to 40
              if (k <= n - nb) then
                 ! factorize columns k:k+kb-1 of a and use blocked code to
                 ! update columns k+kb:n
                 call stdlib_dlasyf_rook(uplo, n - k + 1, nb, kb, a(k, k), lda, ipiv(k), work, &
                           ldwork, iinfo)
              else
                 ! use unblocked code to factorize columns k:n of a
                 call stdlib_dsytf2_rook(uplo, n - k + 1, a(k, k), lda, ipiv(k), iinfo)
                 kb = n - k + 1
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo + k - 1
              ! adjust ipiv
              do j = k, k + kb - 1
                 if (ipiv(j) > 0) then
                    ipiv(j) = ipiv(j) + k - 1
                 else
                    ipiv(j) = ipiv(j) - k + 1
                 end if
              end do
              ! increase k and return to the start of the main loop
              k = k + kb
              go to 20
           end if
40      continue
           work(1) = lwkopt
           return
           ! end of stdlib_dsytrf_rook
     end subroutine stdlib_dsytrf_rook

     ! DSYTRI computes the inverse of a real symmetric indefinite matrix
     ! A using the factorization A = U*D*U**T or A = L*D*L**T computed by
     ! DSYTRF.

     subroutine stdlib_dsytri(uplo, n, a, lda, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: k, kp, kstep
           real(dp) :: ak, akkp1, akp1, d, t, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytri', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. a(info, info) == zero) return
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              do info = 1, n
                 if (ipiv(info) > 0 .and. a(info, info) == zero) return
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 40
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = one/a(k, k)
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_dcopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_dsymv(uplo, k - 1, -one, a, lda, work, 1, zero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - stdlib_ddot(k - 1, work, 1, a(1, k), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(a(k, k + 1))
                 ak = a(k, k)/t
                 akp1 = a(k + 1, k + 1)/t
                 akkp1 = a(k, k + 1)/t
                 d = t*(ak*akp1 - one)
                 a(k, k) = akp1/d
                 a(k + 1, k + 1) = ak/d
                 a(k, k + 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_dcopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_dsymv(uplo, k - 1, -one, a, lda, work, 1, zero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - stdlib_ddot(k - 1, work, 1, a(1, k), 1)
                    a(k, k + 1) = a(k, k + 1) - stdlib_ddot(k - 1, a(1, k), 1, a(1, k + 1), 1)
                              
                    call stdlib_dcopy(k - 1, a(1, k + 1), 1, work, 1)
                    call stdlib_dsymv(uplo, k - 1, -one, a, lda, work, 1, zero, a(1, k + 1), 1)
                              
                    a(k + 1, k + 1) = a(k + 1, k + 1) - stdlib_ddot(k - 1, work, 1, a(1, k + 1), 1)
                              
                 end if
                 kstep = 2
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the leading
                 ! submatrix a(1:k+1,1:k+1)
                 call stdlib_dswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                 call stdlib_dswap(k - kp - 1, a(kp + 1, k), 1, a(kp, kp + 1), lda)
                 temp = a(k, k)
                 a(k, k) = a(kp, kp)
                 a(kp, kp) = temp
                 if (kstep == 2) then
                    temp = a(k, k + 1)
                    a(k, k + 1) = a(kp, k + 1)
                    a(kp, k + 1) = temp
                 end if
              end if
              k = k + kstep
              go to 30
40      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
50      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 60
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = one/a(k, k)
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_dcopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_dsymv(uplo, n - k, -one, a(k + 1, k + 1), lda, work, 1, zero, a(k + 1, &
                              k), 1)
                    a(k, k) = a(k, k) - stdlib_ddot(n - k, work, 1, a(k + 1, k), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(a(k, k - 1))
                 ak = a(k - 1, k - 1)/t
                 akp1 = a(k, k)/t
                 akkp1 = a(k, k - 1)/t
                 d = t*(ak*akp1 - one)
                 a(k - 1, k - 1) = akp1/d
                 a(k, k) = ak/d
                 a(k, k - 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_dcopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_dsymv(uplo, n - k, -one, a(k + 1, k + 1), lda, work, 1, zero, a(k + 1, &
                              k), 1)
                    a(k, k) = a(k, k) - stdlib_ddot(n - k, work, 1, a(k + 1, k), 1)
                    a(k, k - 1) = a(k, k - 1) - stdlib_ddot(n - k, a(k + 1, k), 1, a(k + 1, k - 1), 1)
                              
                    call stdlib_dcopy(n - k, a(k + 1, k - 1), 1, work, 1)
                    call stdlib_dsymv(uplo, n - k, -one, a(k + 1, k + 1), lda, work, 1, zero, a(k + 1, &
                              k - 1), 1)
                    a(k - 1, k - 1) = a(k - 1, k - 1) - stdlib_ddot(n - k, work, 1, a(k + 1, k - 1), 1)
                              
                 end if
                 kstep = 2
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the trailing
                 ! submatrix a(k-1:n,k-1:n)
                 if (kp < n) call stdlib_dswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                 call stdlib_dswap(kp - k - 1, a(k + 1, k), 1, a(kp, k + 1), lda)
                 temp = a(k, k)
                 a(k, k) = a(kp, kp)
                 a(kp, kp) = temp
                 if (kstep == 2) then
                    temp = a(k, k - 1)
                    a(k, k - 1) = a(kp, k - 1)
                    a(kp, k - 1) = temp
                 end if
              end if
              k = k - kstep
              go to 50
60      continue
           end if
           return
           ! end of stdlib_dsytri
     end subroutine stdlib_dsytri

     ! DSYTRI_ROOK computes the inverse of a real symmetric
     ! matrix A using the factorization A = U*D*U**T or A = L*D*L**T
     ! computed by DSYTRF_ROOK.

     subroutine stdlib_dsytri_rook(uplo, n, a, lda, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: k, kp, kstep
           real(dp) :: ak, akkp1, akp1, d, t, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytri_rook', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. a(info, info) == zero) return
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              do info = 1, n
                 if (ipiv(info) > 0 .and. a(info, info) == zero) return
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 40
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = one/a(k, k)
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_dcopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_dsymv(uplo, k - 1, -one, a, lda, work, 1, zero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - stdlib_ddot(k - 1, work, 1, a(1, k), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(a(k, k + 1))
                 ak = a(k, k)/t
                 akp1 = a(k + 1, k + 1)/t
                 akkp1 = a(k, k + 1)/t
                 d = t*(ak*akp1 - one)
                 a(k, k) = akp1/d
                 a(k + 1, k + 1) = ak/d
                 a(k, k + 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_dcopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_dsymv(uplo, k - 1, -one, a, lda, work, 1, zero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - stdlib_ddot(k - 1, work, 1, a(1, k), 1)
                    a(k, k + 1) = a(k, k + 1) - stdlib_ddot(k - 1, a(1, k), 1, a(1, k + 1), 1)
                              
                    call stdlib_dcopy(k - 1, a(1, k + 1), 1, work, 1)
                    call stdlib_dsymv(uplo, k - 1, -one, a, lda, work, 1, zero, a(1, k + 1), 1)
                              
                    a(k + 1, k + 1) = a(k + 1, k + 1) - stdlib_ddot(k - 1, work, 1, a(1, k + 1), 1)
                              
                 end if
                 kstep = 2
              end if
              if (kstep == 1) then
                 ! interchange rows and columns k and ipiv(k) in the leading
                 ! submatrix a(1:k+1,1:k+1)
                 kp = ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_dswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    call stdlib_dswap(k - kp - 1, a(kp + 1, k), 1, a(kp, kp + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              else
                 ! interchange rows and columns k and k+1 with -ipiv(k) and
                 ! -ipiv(k+1)in the leading submatrix a(1:k+1,1:k+1)
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_dswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    call stdlib_dswap(k - kp - 1, a(kp + 1, k), 1, a(kp, kp + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                    temp = a(k, k + 1)
                    a(k, k + 1) = a(kp, k + 1)
                    a(kp, k + 1) = temp
                 end if
                 k = k + 1
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_dswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    call stdlib_dswap(k - kp - 1, a(kp + 1, k), 1, a(kp, kp + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              end if
              k = k + 1
              go to 30
40      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
50      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 60
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = one/a(k, k)
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_dcopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_dsymv(uplo, n - k, -one, a(k + 1, k + 1), lda, work, 1, zero, a(k + 1, &
                              k), 1)
                    a(k, k) = a(k, k) - stdlib_ddot(n - k, work, 1, a(k + 1, k), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(a(k, k - 1))
                 ak = a(k - 1, k - 1)/t
                 akp1 = a(k, k)/t
                 akkp1 = a(k, k - 1)/t
                 d = t*(ak*akp1 - one)
                 a(k - 1, k - 1) = akp1/d
                 a(k, k) = ak/d
                 a(k, k - 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_dcopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_dsymv(uplo, n - k, -one, a(k + 1, k + 1), lda, work, 1, zero, a(k + 1, &
                              k), 1)
                    a(k, k) = a(k, k) - stdlib_ddot(n - k, work, 1, a(k + 1, k), 1)
                    a(k, k - 1) = a(k, k - 1) - stdlib_ddot(n - k, a(k + 1, k), 1, a(k + 1, k - 1), 1)
                              
                    call stdlib_dcopy(n - k, a(k + 1, k - 1), 1, work, 1)
                    call stdlib_dsymv(uplo, n - k, -one, a(k + 1, k + 1), lda, work, 1, zero, a(k + 1, &
                              k - 1), 1)
                    a(k - 1, k - 1) = a(k - 1, k - 1) - stdlib_ddot(n - k, work, 1, a(k + 1, k - 1), 1)
                              
                 end if
                 kstep = 2
              end if
              if (kstep == 1) then
                 ! interchange rows and columns k and ipiv(k) in the trailing
                 ! submatrix a(k-1:n,k-1:n)
                 kp = ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_dswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    call stdlib_dswap(kp - k - 1, a(k + 1, k), 1, a(kp, k + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              else
                 ! interchange rows and columns k and k-1 with -ipiv(k) and
                 ! -ipiv(k-1) in the trailing submatrix a(k-1:n,k-1:n)
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_dswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    call stdlib_dswap(kp - k - 1, a(k + 1, k), 1, a(kp, k + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                    temp = a(k, k - 1)
                    a(k, k - 1) = a(kp, k - 1)
                    a(kp, k - 1) = temp
                 end if
                 k = k - 1
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_dswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    call stdlib_dswap(kp - k - 1, a(k + 1, k), 1, a(kp, k + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              end if
              k = k - 1
              go to 50
60      continue
           end if
           return
           ! end of stdlib_dsytri_rook
     end subroutine stdlib_dsytri_rook

     ! DSYTRS solves a system of linear equations A*X = B with a real
     ! symmetric matrix A using the factorization A = U*D*U**T or
     ! A = L*D*L**T computed by DSYTRF.

     subroutine stdlib_dsytrs(uplo, n, nrhs, a, lda, ipiv, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kp
           real(dp) :: ak, akm1, akm1k, bk, bkm1, denom
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b, where a = u*d*u**t.
              ! first solve u*d*x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
10      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 30
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_dger(k - 1, nrhs, -one, a(1, k), 1, b(k, 1), ldb, b(1, 1), ldb)
                           
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_dscal(nrhs, one/a(k, k), b(k, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k-1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k - 1) call stdlib_dswap(nrhs, b(k - 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in columns k-1 and k of a.
                 call stdlib_dger(k - 2, nrhs, -one, a(1, k), 1, b(k, 1), ldb, b(1, 1), ldb)
                           
                 call stdlib_dger(k - 2, nrhs, -one, a(1, k - 1), 1, b(k - 1, 1), ldb, b(1, 1), &
                           ldb)
                 ! multiply by the inverse of the diagonal block.
                 akm1k = a(k - 1, k)
                 akm1 = a(k - 1, k - 1)/akm1k
                 ak = a(k, k)/akm1k
                 denom = akm1*ak - one
                 do j = 1, nrhs
                    bkm1 = b(k - 1, j)/akm1k
                    bk = b(k, j)/akm1k
                    b(k - 1, j) = (ak*bkm1 - bk)/denom
                    b(k, j) = (akm1*bk - bkm1)/denom
                 end do
                 k = k - 2
              end if
              go to 10
30      continue
              ! next solve u**t *x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
40      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(u**t(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, a(1, k), 1, one, b(k, &
                           1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation
                 ! stored in columns k and k+1 of a.
                 call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, a(1, k), 1, one, b(k, &
                           1), ldb)
                 call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, a(1, k + 1), 1, one, b( &
                           k + 1, 1), ldb)
                 ! interchange rows k and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k + 2
              end if
              go to 40
50      continue
           else
              ! solve a*x = b, where a = l*d*l**t.
              ! first solve l*d*x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
60      continue
              ! if k > n, exit from loop.
              if (k > n) go to 80
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(l(k)), where l(k) is the transformation
                 ! stored in column k of a.
                 if (k < n) call stdlib_dger(n - k, nrhs, -one, a(k + 1, k), 1, b(k, 1), ldb, b(k + &
                           1, 1), ldb)
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_dscal(nrhs, one/a(k, k), b(k, 1), ldb)
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k+1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k + 1) call stdlib_dswap(nrhs, b(k + 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(l(k)), where l(k) is the transformation
                 ! stored in columns k and k+1 of a.
                 if (k < n - 1) then
                    call stdlib_dger(n - k - 1, nrhs, -one, a(k + 2, k), 1, b(k, 1), ldb, b(k + 2, 1 &
                              ), ldb)
                    call stdlib_dger(n - k - 1, nrhs, -one, a(k + 2, k + 1), 1, b(k + 1, 1), ldb, b(k + &
                              2, 1), ldb)
                 end if
                 ! multiply by the inverse of the diagonal block.
                 akm1k = a(k + 1, k)
                 akm1 = a(k, k)/akm1k
                 ak = a(k + 1, k + 1)/akm1k
                 denom = akm1*ak - one
                 do j = 1, nrhs
                    bkm1 = b(k, j)/akm1k
                    bk = b(k + 1, j)/akm1k
                    b(k, j) = (ak*bkm1 - bk)/denom
                    b(k + 1, j) = (akm1*bk - bkm1)/denom
                 end do
                 k = k + 2
              end if
              go to 60
80      continue
              ! next solve l**t *x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
90      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 100
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(l**t(k)), where l(k) is the transformation
                 ! stored in column k of a.
                 if (k < n) call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, a(k + &
                           1, k), 1, one, b(k, 1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(l**t(k-1)), where l(k-1) is the transformation
                 ! stored in columns k-1 and k of a.
                 if (k < n) then
                    call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, a(k + 1, k), &
                               1, one, b(k, 1), ldb)
                    call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, a(k + 1, k - 1 &
                              ), 1, one, b(k - 1, 1), ldb)
                 end if
                 ! interchange rows k and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k - 2
              end if
              go to 90
100    continue
           end if
           return
           ! end of stdlib_dsytrs
     end subroutine stdlib_dsytrs

     ! DSYTRS2 solves a system of linear equations A*X = B with a real
     ! symmetric matrix A using the factorization A = U*D*U**T or
     ! A = L*D*L**T computed by DSYTRF and converted by DSYCONV.

     subroutine stdlib_dsytrs2(uplo, n, nrhs, a, lda, ipiv, b, ldb, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), b(ldb, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, iinfo, j, k, kp
           real(dp) :: ak, akm1, akm1k, bk, bkm1, denom
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytrs2', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           ! convert a
           call stdlib_dsyconv(uplo, 'c', n, a, lda, ipiv, work, iinfo)
           if (upper) then
              ! solve a*x = b, where a = u*d*u**t.
             ! p**t * b
             k = n
             do while (k >= 1)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k-1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp == -ipiv(k - 1)) call stdlib_dswap(nrhs, b(k - 1, 1), ldb, b(kp, 1), ldb &
                           )
                 k = k - 2
              end if
             end do
        ! compute (u \p**t * b) -> b    [ (u \p**t * b) ]
             call stdlib_dtrsm('l', 'u', 'n', 'u', n, nrhs, one, a, lda, b, ldb)
        ! compute d \ b -> b   [ d \ (u \p**t * b) ]
              i = n
              do while (i >= 1)
                 if (ipiv(i) > 0) then
                   call stdlib_dscal(nrhs, one/a(i, i), b(i, 1), ldb)
                 elseif (i > 1) then
                    if (ipiv(i - 1) == ipiv(i)) then
                       akm1k = work(i)
                       akm1 = a(i - 1, i - 1)/akm1k
                       ak = a(i, i)/akm1k
                       denom = akm1*ak - one
                       do j = 1, nrhs
                          bkm1 = b(i - 1, j)/akm1k
                          bk = b(i, j)/akm1k
                          b(i - 1, j) = (ak*bkm1 - bk)/denom
                          b(i, j) = (akm1*bk - bkm1)/denom
                       end do
                    i = i - 1
                    end if
                 end if
                 i = i - 1
              end do
            ! compute (u**t \ b) -> b   [ u**t \ (d \ (u \p**t * b) ) ]
              call stdlib_dtrsm('l', 'u', 't', 'u', n, nrhs, one, a, lda, b, ldb)
             ! p * b  [ p * (u**t \ (d \ (u \p**t * b) )) ]
             k = 1
             do while (k <= n)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k-1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (k < n .and. kp == -ipiv(k + 1)) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, &
                            1), ldb)
                 k = k + 2
              end if
             end do
           else
              ! solve a*x = b, where a = l*d*l**t.
             ! p**t * b
             k = 1
             do while (k <= n)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k and -ipiv(k+1).
                 kp = -ipiv(k + 1)
                 if (kp == -ipiv(k)) call stdlib_dswap(nrhs, b(k + 1, 1), ldb, b(kp, 1), ldb)
                           
                 k = k + 2
              end if
             end do
        ! compute (l \p**t * b) -> b    [ (l \p**t * b) ]
             call stdlib_dtrsm('l', 'l', 'n', 'u', n, nrhs, one, a, lda, b, ldb)
        ! compute d \ b -> b   [ d \ (l \p**t * b) ]
              i = 1
              do while (i <= n)
                 if (ipiv(i) > 0) then
                   call stdlib_dscal(nrhs, one/a(i, i), b(i, 1), ldb)
                 else
                       akm1k = work(i)
                       akm1 = a(i, i)/akm1k
                       ak = a(i + 1, i + 1)/akm1k
                       denom = akm1*ak - one
                       do j = 1, nrhs
                          bkm1 = b(i, j)/akm1k
                          bk = b(i + 1, j)/akm1k
                          b(i, j) = (ak*bkm1 - bk)/denom
                          b(i + 1, j) = (akm1*bk - bkm1)/denom
                       end do
                       i = i + 1
                 end if
                 i = i + 1
              end do
        ! compute (l**t \ b) -> b   [ l**t \ (d \ (l \p**t * b) ) ]
             call stdlib_dtrsm('l', 'l', 't', 'u', n, nrhs, one, a, lda, b, ldb)
             ! p * b  [ p * (l**t \ (d \ (l \p**t * b) )) ]
             k = n
             do while (k >= 1)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k-1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (k > 1 .and. kp == -ipiv(k - 1)) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, &
                           1), ldb)
                 k = k - 2
              end if
             end do
           end if
           ! revert a
           call stdlib_dsyconv(uplo, 'r', n, a, lda, ipiv, work, iinfo)
           return
           ! end of stdlib_dsytrs2
     end subroutine stdlib_dsytrs2

     ! DSYTRS_3 solves a system of linear equations A * X = B with a real
     ! symmetric matrix A using the factorization computed
     ! by DSYTRF_RK or DSYTRF_BK:
     ! A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
     ! where U (or L) is unit upper (or lower) triangular matrix,
     ! U**T (or L**T) is the transpose of U (or L), P is a permutation
     ! matrix, P**T is the transpose of P, and D is symmetric and block
     ! diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This algorithm is using Level 3 BLAS.

     subroutine stdlib_dsytrs_3(uplo, n, nrhs, a, lda, e, ipiv, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), b(ldb, *), e(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, j, k, kp
           real(dp) :: ak, akm1, akm1k, bk, bkm1, denom
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -9
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytrs_3', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! begin upper
              ! solve a*x = b, where a = u*d*u**t.
              ! p**t * b
              ! interchange rows k and ipiv(k) of matrix b in the same order
              ! that the formation order of ipiv(i) vector for upper case.
              ! (we can do the simple loop over ipiv with decrement -1,
              ! since the abs value of ipiv( i ) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              do k = n, 1, -1
                 kp = abs(ipiv(k))
                 if (kp /= k) then
                    call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end if
              end do
              ! compute (u \p**t * b) -> b    [ (u \p**t * b) ]
              call stdlib_dtrsm('l', 'u', 'n', 'u', n, nrhs, one, a, lda, b, ldb)
              ! compute d \ b -> b   [ d \ (u \p**t * b) ]
              i = n
              do while (i >= 1)
                 if (ipiv(i) > 0) then
                    call stdlib_dscal(nrhs, one/a(i, i), b(i, 1), ldb)
                 else if (i > 1) then
                    akm1k = e(i)
                    akm1 = a(i - 1, i - 1)/akm1k
                    ak = a(i, i)/akm1k
                    denom = akm1*ak - one
                    do j = 1, nrhs
                       bkm1 = b(i - 1, j)/akm1k
                       bk = b(i, j)/akm1k
                       b(i - 1, j) = (ak*bkm1 - bk)/denom
                       b(i, j) = (akm1*bk - bkm1)/denom
                    end do
                    i = i - 1
                 end if
                 i = i - 1
              end do
              ! compute (u**t \ b) -> b   [ u**t \ (d \ (u \p**t * b) ) ]
              call stdlib_dtrsm('l', 'u', 't', 'u', n, nrhs, one, a, lda, b, ldb)
              ! p * b  [ p * (u**t \ (d \ (u \p**t * b) )) ]
              ! interchange rows k and ipiv(k) of matrix b in reverse order
              ! from the formation order of ipiv(i) vector for upper case.
              ! (we can do the simple loop over ipiv with increment 1,
              ! since the abs value of ipiv(i) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              do k = 1, n
                 kp = abs(ipiv(k))
                 if (kp /= k) then
                    call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end if
              end do
           else
              ! begin lower
              ! solve a*x = b, where a = l*d*l**t.
              ! p**t * b
              ! interchange rows k and ipiv(k) of matrix b in the same order
              ! that the formation order of ipiv(i) vector for lower case.
              ! (we can do the simple loop over ipiv with increment 1,
              ! since the abs value of ipiv(i) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              do k = 1, n
                 kp = abs(ipiv(k))
                 if (kp /= k) then
                    call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end if
              end do
              ! compute (l \p**t * b) -> b    [ (l \p**t * b) ]
              call stdlib_dtrsm('l', 'l', 'n', 'u', n, nrhs, one, a, lda, b, ldb)
              ! compute d \ b -> b   [ d \ (l \p**t * b) ]
              i = 1
              do while (i <= n)
                 if (ipiv(i) > 0) then
                    call stdlib_dscal(nrhs, one/a(i, i), b(i, 1), ldb)
                 else if (i < n) then
                    akm1k = e(i)
                    akm1 = a(i, i)/akm1k
                    ak = a(i + 1, i + 1)/akm1k
                    denom = akm1*ak - one
                    do j = 1, nrhs
                       bkm1 = b(i, j)/akm1k
                       bk = b(i + 1, j)/akm1k
                       b(i, j) = (ak*bkm1 - bk)/denom
                       b(i + 1, j) = (akm1*bk - bkm1)/denom
                    end do
                    i = i + 1
                 end if
                 i = i + 1
              end do
              ! compute (l**t \ b) -> b   [ l**t \ (d \ (l \p**t * b) ) ]
              call stdlib_dtrsm('l', 'l', 't', 'u', n, nrhs, one, a, lda, b, ldb)
              ! p * b  [ p * (l**t \ (d \ (l \p**t * b) )) ]
              ! interchange rows k and ipiv(k) of matrix b in reverse order
              ! from the formation order of ipiv(i) vector for lower case.
              ! (we can do the simple loop over ipiv with decrement -1,
              ! since the abs value of ipiv(i) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              do k = n, 1, -1
                 kp = abs(ipiv(k))
                 if (kp /= k) then
                    call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end if
              end do
              ! end lower
           end if
           return
           ! end of stdlib_dsytrs_3
     end subroutine stdlib_dsytrs_3

     ! DSYTRS_AA solves a system of linear equations A*X = B with a real
     ! symmetric matrix A using the factorization A = U**T*T*U or
     ! A = L*T*L**T computed by DSYTRF_AA.

     subroutine stdlib_dsytrs_aa(uplo, n, nrhs, a, lda, ipiv, b, ldb, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
     
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: n, nrhs, lda, ldb, lwork, info
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), b(ldb, *), work(*)
        ! =====================================================================
           
           logical(lk) :: lquery, upper
           integer(ilp) :: k, kp, lwkopt
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           lquery = (lwork == -1)
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -8
           else if (lwork < max(1, 3*n - 2) .and. .not. lquery) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytrs_aa', -info)
              return
           else if (lquery) then
              lwkopt = (3*n - 2)
              work(1) = lwkopt
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b, where a = u**t*t*u.
              ! 1) forward substitution with u**t
              if (n > 1) then
                 ! pivot, p**t * b -> b
                 do k = 1, n
                    kp = ipiv(k)
                    if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end do
                 ! compute u**t \ b -> b    [ (u**t \p**t * b) ]
                 call stdlib_dtrsm('l', 'u', 't', 'u', n - 1, nrhs, one, a(1, 2), lda, b(2, 1), &
                           ldb)
              end if
              ! 2) solve with triangular matrix t
              ! compute t \ b -> b   [ t \ (u**t \p**t * b) ]
              call stdlib_dlacpy('f', 1, n, a(1, 1), lda + 1, work(n), 1)
              if (n > 1) then
                 call stdlib_dlacpy('f', 1, n - 1, a(1, 2), lda + 1, work(1), 1)
                 call stdlib_dlacpy('f', 1, n - 1, a(1, 2), lda + 1, work(2*n), 1)
              end if
              call stdlib_dgtsv(n, nrhs, work(1), work(n), work(2*n), b, ldb, info)
              ! 3) backward substitution with u
              if (n > 1) then
                 ! compute u \ b -> b   [ u \ (t \ (u**t \p**t * b) ) ]
                 call stdlib_dtrsm('l', 'u', 'n', 'u', n - 1, nrhs, one, a(1, 2), lda, b(2, 1), &
                           ldb)
                 ! pivot, p * b -> b  [ p * (u \ (t \ (u**t \p**t * b) )) ]
                 do k = n, 1, -1
                    kp = ipiv(k)
                    if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end do
              end if
           else
              ! solve a*x = b, where a = l*t*l**t.
              ! 1) forward substitution with l
              if (n > 1) then
                 ! pivot, p**t * b -> b
                 do k = 1, n
                    kp = ipiv(k)
                    if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end do
                 ! compute l \ b -> b    [ (l \p**t * b) ]
                 call stdlib_dtrsm('l', 'l', 'n', 'u', n - 1, nrhs, one, a(2, 1), lda, b(2, 1), &
                           ldb)
              end if
              ! 2) solve with triangular matrix t
              ! compute t \ b -> b   [ t \ (l \p**t * b) ]
              call stdlib_dlacpy('f', 1, n, a(1, 1), lda + 1, work(n), 1)
              if (n > 1) then
                 call stdlib_dlacpy('f', 1, n - 1, a(2, 1), lda + 1, work(1), 1)
                 call stdlib_dlacpy('f', 1, n - 1, a(2, 1), lda + 1, work(2*n), 1)
              end if
              call stdlib_dgtsv(n, nrhs, work(1), work(n), work(2*n), b, ldb, info)
              ! 3) backward substitution with l**t
              if (n > 1) then
                 ! compute (l**t \ b) -> b   [ l**t \ (t \ (l \p**t * b) ) ]
                 call stdlib_dtrsm('l', 'l', 't', 'u', n - 1, nrhs, one, a(2, 1), lda, b(2, 1), &
                           ldb)
                 ! pivot, p * b -> b  [ p * (l**t \ (t \ (l \p**t * b) )) ]
                 do k = n, 1, -1
                    kp = ipiv(k)
                    if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end do
              end if
           end if
           return
           ! end of stdlib_dsytrs_aa
     end subroutine stdlib_dsytrs_aa

     ! DSYTRS_ROOK solves a system of linear equations A*X = B with
     ! a real symmetric matrix A using the factorization A = U*D*U**T or
     ! A = L*D*L**T computed by DSYTRF_ROOK.

     subroutine stdlib_dsytrs_rook(uplo, n, nrhs, a, lda, ipiv, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           real(dp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kp
           real(dp) :: ak, akm1, akm1k, bk, bkm1, denom
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dsytrs_rook', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b, where a = u*d*u**t.
              ! first solve u*d*x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
10      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 30
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_dger(k - 1, nrhs, -one, a(1, k), 1, b(k, 1), ldb, b(1, 1), ldb)
                           
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_dscal(nrhs, one/a(k, k), b(k, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k and -ipiv(k) then k-1 and -ipiv(k-1)
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kp = -ipiv(k - 1)
                 if (kp /= k - 1) call stdlib_dswap(nrhs, b(k - 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in columns k-1 and k of a.
                 if (k > 2) then
                    call stdlib_dger(k - 2, nrhs, -one, a(1, k), 1, b(k, 1), ldb, b(1, 1), &
                              ldb)
                    call stdlib_dger(k - 2, nrhs, -one, a(1, k - 1), 1, b(k - 1, 1), ldb, b(1, 1), &
                               ldb)
                 end if
                 ! multiply by the inverse of the diagonal block.
                 akm1k = a(k - 1, k)
                 akm1 = a(k - 1, k - 1)/akm1k
                 ak = a(k, k)/akm1k
                 denom = akm1*ak - one
                 do j = 1, nrhs
                    bkm1 = b(k - 1, j)/akm1k
                    bk = b(k, j)/akm1k
                    b(k - 1, j) = (ak*bkm1 - bk)/denom
                    b(k, j) = (akm1*bk - bkm1)/denom
                 end do
                 k = k - 2
              end if
              go to 10
30      continue
              ! next solve u**t *x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
40      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(u**t(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 if (k > 1) call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, a(1, k), 1, &
                           one, b(k, 1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation
                 ! stored in columns k and k+1 of a.
                 if (k > 1) then
                    call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, a(1, k), 1, one, b( &
                              k, 1), ldb)
                    call stdlib_dgemv('transpose', k - 1, nrhs, -one, b, ldb, a(1, k + 1), 1, one, &
                              b(k + 1, 1), ldb)
                 end if
                 ! interchange rows k and -ipiv(k) then k+1 and -ipiv(k+1).
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kp = -ipiv(k + 1)
                 if (kp /= k + 1) call stdlib_dswap(nrhs, b(k + 1, 1), ldb, b(kp, 1), ldb)
                 k = k + 2
              end if
              go to 40
50      continue
           else
              ! solve a*x = b, where a = l*d*l**t.
              ! first solve l*d*x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
60      continue
              ! if k > n, exit from loop.
              if (k > n) go to 80
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(l(k)), where l(k) is the transformation
                 ! stored in column k of a.
                 if (k < n) call stdlib_dger(n - k, nrhs, -one, a(k + 1, k), 1, b(k, 1), ldb, b(k + &
                           1, 1), ldb)
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_dscal(nrhs, one/a(k, k), b(k, 1), ldb)
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k and -ipiv(k) then k+1 and -ipiv(k+1)
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kp = -ipiv(k + 1)
                 if (kp /= k + 1) call stdlib_dswap(nrhs, b(k + 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(l(k)), where l(k) is the transformation
                 ! stored in columns k and k+1 of a.
                 if (k < n - 1) then
                    call stdlib_dger(n - k - 1, nrhs, -one, a(k + 2, k), 1, b(k, 1), ldb, b(k + 2, 1 &
                              ), ldb)
                    call stdlib_dger(n - k - 1, nrhs, -one, a(k + 2, k + 1), 1, b(k + 1, 1), ldb, b(k + &
                              2, 1), ldb)
                 end if
                 ! multiply by the inverse of the diagonal block.
                 akm1k = a(k + 1, k)
                 akm1 = a(k, k)/akm1k
                 ak = a(k + 1, k + 1)/akm1k
                 denom = akm1*ak - one
                 do j = 1, nrhs
                    bkm1 = b(k, j)/akm1k
                    bk = b(k + 1, j)/akm1k
                    b(k, j) = (ak*bkm1 - bk)/denom
                    b(k + 1, j) = (akm1*bk - bkm1)/denom
                 end do
                 k = k + 2
              end if
              go to 60
80      continue
              ! next solve l**t *x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
90      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 100
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(l**t(k)), where l(k) is the transformation
                 ! stored in column k of a.
                 if (k < n) call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, a(k + &
                           1, k), 1, one, b(k, 1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(l**t(k-1)), where l(k-1) is the transformation
                 ! stored in columns k-1 and k of a.
                 if (k < n) then
                    call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, a(k + 1, k), &
                               1, one, b(k, 1), ldb)
                    call stdlib_dgemv('transpose', n - k, nrhs, -one, b(k + 1, 1), ldb, a(k + 1, k - 1 &
                              ), 1, one, b(k - 1, 1), ldb)
                 end if
                 ! interchange rows k and -ipiv(k) then k-1 and -ipiv(k-1)
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_dswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kp = -ipiv(k - 1)
                 if (kp /= k - 1) call stdlib_dswap(nrhs, b(k - 1, 1), ldb, b(kp, 1), ldb)
                 k = k - 2
              end if
              go to 90
100    continue
           end if
           return
           ! end of stdlib_dsytrs_rook
     end subroutine stdlib_dsytrs_rook

     ! DTBRFS provides error bounds and backward error estimates for the
     ! solution to a system of linear equations with a triangular band
     ! coefficient matrix.
     ! The solution matrix X must be computed by DTBTRS or some other
     ! means before entering this routine.  DTBRFS does not do iterative
     ! refinement because doing so cannot improve the backward error.

     subroutine stdlib_dtbrfs(uplo, trans, diag, n, kd, nrhs, ab, ldab, b, ldb, x, ldx, ferr, &
               berr, work, iwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, trans, uplo
           integer(ilp) :: info, kd, ldab, ldb, ldx, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: iwork(*)
           real(dp) :: ab(ldab, *), b(ldb, *), berr(*), ferr(*), work(*), x(ldx, *)
                     
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           character :: transt
           integer(ilp) :: i, j, k, kase, nz
           real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk
           ! .. local arrays ..
           integer(ilp) :: isave(3)
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (kd < 0) then
              info = -5
           else if (nrhs < 0) then
              info = -6
           else if (ldab < kd + 1) then
              info = -8
           else if (ldb < max(1, n)) then
              info = -10
           else if (ldx < max(1, n)) then
              info = -12
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtbrfs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) then
              do j = 1, nrhs
                 ferr(j) = zero
                 berr(j) = zero
              end do
              return
           end if
           if (notran) then
              transt = 't'
           else
              transt = 'n'
           end if
           ! nz = maximum number of nonzero elements in each row of a, plus 1
           nz = kd + 2
           eps = stdlib_dlamch('epsilon')
           safmin = stdlib_dlamch('safe minimum')
           safe1 = nz*safmin
           safe2 = safe1/eps
           ! do for each right hand side
           loop_250: do j = 1, nrhs
              ! compute residual r = b - op(a) * x,
              ! where op(a) = a or a**t, depending on trans.
              call stdlib_dcopy(n, x(1, j), 1, work(n + 1), 1)
              call stdlib_dtbmv(uplo, trans, diag, n, kd, ab, ldab, work(n + 1), 1)
              call stdlib_daxpy(n, -one, b(1, j), 1, work(n + 1), 1)
              ! compute componentwise relative backward error from formula
              ! max(i) ( abs(r(i)) / ( abs(op(a))*abs(x) + abs(b) )(i) )
              ! where abs(z) is the componentwise absolute value of the matrix
              ! or vector z.  if the i-th component of the denominator is less
              ! than safe2, then safe1 is added to the i-th components of the
              ! numerator and denominator before dividing.
              do i = 1, n
                 work(i) = abs(b(i, j))
              end do
              if (notran) then
                 ! compute abs(a)*abs(x) + abs(b).
                 if (upper) then
                    if (nounit) then
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = max(1, k - kd), k
                             work(i) = work(i) + abs(ab(kd + 1 + i - k, k))*xk
                          end do
                       end do
                    else
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = max(1, k - kd), k - 1
                             work(i) = work(i) + abs(ab(kd + 1 + i - k, k))*xk
                          end do
                          work(k) = work(k) + xk
                       end do
                    end if
                 else
                    if (nounit) then
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = k, min(n, k + kd)
                             work(i) = work(i) + abs(ab(1 + i - k, k))*xk
                          end do
                       end do
                    else
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = k + 1, min(n, k + kd)
                             work(i) = work(i) + abs(ab(1 + i - k, k))*xk
                          end do
                          work(k) = work(k) + xk
                       end do
                    end if
                 end if
              else
                 ! compute abs(a**t)*abs(x) + abs(b).
                 if (upper) then
                    if (nounit) then
                       do k = 1, n
                          s = zero
                          do i = max(1, k - kd), k
                             s = s + abs(ab(kd + 1 + i - k, k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                       end do
                    else
                       do k = 1, n
                          s = abs(x(k, j))
                          do i = max(1, k - kd), k - 1
                             s = s + abs(ab(kd + 1 + i - k, k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                       end do
                    end if
                 else
                    if (nounit) then
                       do k = 1, n
                          s = zero
                          do i = k, min(n, k + kd)
                             s = s + abs(ab(1 + i - k, k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                       end do
                    else
                       do k = 1, n
                          s = abs(x(k, j))
                          do i = k + 1, min(n, k + kd)
                             s = s + abs(ab(1 + i - k, k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                       end do
                    end if
                 end if
              end if
              s = zero
              do i = 1, n
                 if (work(i) > safe2) then
                    s = max(s, abs(work(n + i))/work(i))
                 else
                    s = max(s, (abs(work(n + i)) + safe1)/(work(i) + safe1))
                 end if
              end do
              berr(j) = s
              ! bound error from formula
              ! norm(x - xtrue) / norm(x) .le. ferr =
              ! norm( abs(inv(op(a)))*
                 ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x)
              ! where
                ! norm(z) is the magnitude of the largest component of z
                ! inv(op(a)) is the inverse of op(a)
                ! abs(z) is the componentwise absolute value of the matrix or
                   ! vector z
                ! nz is the maximum number of nonzeros in any row of a, plus 1
                ! eps is machine epsilon
              ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b))
              ! is incremented by safe1 if the i-th component of
              ! abs(op(a))*abs(x) + abs(b) is less than safe2.
              ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix
                 ! inv(op(a)) * diag(w),
              ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) )))
              do i = 1, n
                 if (work(i) > safe2) then
                    work(i) = abs(work(n + i)) + nz*eps*work(i)
                 else
                    work(i) = abs(work(n + i)) + nz*eps*work(i) + safe1
                 end if
              end do
              kase = 0
210    continue
              call stdlib_dlacn2(n, work(2*n + 1), work(n + 1), iwork, ferr(j), kase, isave)
                        
              if (kase /= 0) then
                 if (kase == 1) then
                    ! multiply by diag(w)*inv(op(a)**t).
                    call stdlib_dtbsv(uplo, transt, diag, n, kd, ab, ldab, work(n + 1), 1)
                              
                    do i = 1, n
                       work(n + i) = work(i)*work(n + i)
                    end do
                 else
                    ! multiply by inv(op(a))*diag(w).
                    do i = 1, n
                       work(n + i) = work(i)*work(n + i)
                    end do
                    call stdlib_dtbsv(uplo, trans, diag, n, kd, ab, ldab, work(n + 1), 1)
                 end if
                 go to 210
              end if
              ! normalize error.
              lstres = zero
              do i = 1, n
                 lstres = max(lstres, abs(x(i, j)))
              end do
              if (lstres /= zero) ferr(j) = ferr(j)/lstres
           end do loop_250
           return
           ! end of stdlib_dtbrfs
     end subroutine stdlib_dtbrfs

     ! DTBTRS solves a triangular system of the form
     ! A * X = B  or  A**T * X = B,
     ! where A is a triangular band matrix of order N, and B is an
     ! N-by NRHS matrix.  A check is made to verify that A is nonsingular.

     subroutine stdlib_dtbtrs(uplo, trans, diag, n, kd, nrhs, ab, ldab, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, trans, uplo
           integer(ilp) :: info, kd, ldab, ldb, n, nrhs
           ! .. array arguments ..
           real(dp) :: ab(ldab, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: nounit, upper
           integer(ilp) :: j
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           nounit = stdlib_lsame(diag, 'n')
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. stdlib_lsame(trans, 'n') .and. .not. stdlib_lsame(trans, 't') .and. &
                     .not. stdlib_lsame(trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (kd < 0) then
              info = -5
           else if (nrhs < 0) then
              info = -6
           else if (ldab < kd + 1) then
              info = -8
           else if (ldb < max(1, n)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtbtrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check for singularity.
           if (nounit) then
              if (upper) then
                 do info = 1, n
                    if (ab(kd + 1, info) == zero) return
                 end do
              else
                 do info = 1, n
                    if (ab(1, info) == zero) return
                 end do
              end if
           end if
           info = 0
           ! solve a * x = b  or  a**t * x = b.
           do j = 1, nrhs
              call stdlib_dtbsv(uplo, trans, diag, n, kd, ab, ldab, b(1, j), 1)
           end do
           return
           ! end of stdlib_dtbtrs
     end subroutine stdlib_dtbtrs

     ! Level 3 BLAS like routine for A in RFP Format.
     ! DTFSM  solves the matrix equation
     ! op( A )*X = alpha*B  or  X*op( A ) = alpha*B
     ! where alpha is a scalar, X and B are m by n matrices, A is a unit, or
     ! non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
     ! op( A ) = A   or   op( A ) = A**T.
     ! A is in Rectangular Full Packed (RFP) Format.
     ! The matrix X is overwritten on B.

     subroutine stdlib_dtfsm(transr, side, uplo, trans, diag, m, n, alpha, a, b, ldb)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: transr, diag, side, trans, uplo
           integer(ilp) :: ldb, m, n
           real(dp) :: alpha
           ! .. array arguments ..
           real(dp) :: a(0:*), b(0:ldb - 1, 0:*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lower, lside, misodd, nisodd, normaltransr, notrans
           integer(ilp) :: m1, m2, n1, n2, k, info, i, j
     
           ! .. intrinsic functions ..
           intrinsic :: max, mod
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           normaltransr = stdlib_lsame(transr, 'n')
           lside = stdlib_lsame(side, 'l')
           lower = stdlib_lsame(uplo, 'l')
           notrans = stdlib_lsame(trans, 'n')
           if (.not. normaltransr .and. .not. stdlib_lsame(transr, 't')) then
              info = -1
           else if (.not. lside .and. .not. stdlib_lsame(side, 'r')) then
              info = -2
           else if (.not. lower .and. .not. stdlib_lsame(uplo, 'u')) then
              info = -3
           else if (.not. notrans .and. .not. stdlib_lsame(trans, 't')) then
              info = -4
           else if (.not. stdlib_lsame(diag, 'n') .and. .not. stdlib_lsame(diag, 'u')) &
                     then
              info = -5
           else if (m < 0) then
              info = -6
           else if (n < 0) then
              info = -7
           else if (ldb < max(1, m)) then
              info = -11
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtfsm ', -info)
              return
           end if
           ! quick return when ( (n==0).or.(m==0) )
           if ((m == 0) .or. (n == 0)) return
           ! quick return when alpha==(0_dp)
           if (alpha == zero) then
              do j = 0, n - 1
                 do i = 0, m - 1
                    b(i, j) = zero
                 end do
              end do
              return
           end if
           if (lside) then
              ! side = 'l'
              ! a is m-by-m.
              ! if m is odd, set nisodd = .true., and m1 and m2.
              ! if m is even, nisodd = .false., and m.
              if (mod(m, 2) == 0) then
                 misodd = .false.
                 k = m/2
              else
                 misodd = .true.
                 if (lower) then
                    m2 = m/2
                    m1 = m - m2
                 else
                    m1 = m/2
                    m2 = m - m1
                 end if
              end if
              if (misodd) then
                 ! side = 'l' and n is odd
                 if (normaltransr) then
                    ! side = 'l', n is odd, and transr = 'n'
                    if (lower) then
                       ! side  ='l', n is odd, transr = 'n', and uplo = 'l'
                       if (notrans) then
                          ! side  ='l', n is odd, transr = 'n', uplo = 'l', and
                          ! trans = 'n'
                          if (m == 1) then
                             call stdlib_dtrsm('l', 'l', 'n', diag, m1, n, alpha, a, m, b, ldb)
                                       
                          else
                             call stdlib_dtrsm('l', 'l', 'n', diag, m1, n, alpha, a(0), m, b, &
                                       ldb)
                             call stdlib_dgemm('n', 'n', m2, n, m1, -one, a(m1), m, b, ldb, &
                                       alpha, b(m1, 0), ldb)
                             call stdlib_dtrsm('l', 'u', 't', diag, m2, n, one, a(m), m, b(m1, &
                                       0), ldb)
                          end if
                       else
                          ! side  ='l', n is odd, transr = 'n', uplo = 'l', and
                          ! trans = 't'
                          if (m == 1) then
                             call stdlib_dtrsm('l', 'l', 't', diag, m1, n, alpha, a(0), m, b, &
                                       ldb)
                          else
                             call stdlib_dtrsm('l', 'u', 'n', diag, m2, n, alpha, a(m), m, b( &
                                       m1, 0), ldb)
                             call stdlib_dgemm('t', 'n', m1, n, m2, -one, a(m1), m, b(m1, 0), &
                                       ldb, alpha, b, ldb)
                             call stdlib_dtrsm('l', 'l', 't', diag, m1, n, one, a(0), m, b, ldb &
                                       )
                          end if
                       end if
                    else
                       ! side  ='l', n is odd, transr = 'n', and uplo = 'u'
                       if (.not. notrans) then
                          ! side  ='l', n is odd, transr = 'n', uplo = 'u', and
                          ! trans = 'n'
                          call stdlib_dtrsm('l', 'l', 'n', diag, m1, n, alpha, a(m2), m, b, ldb &
                                    )
                          call stdlib_dgemm('t', 'n', m2, n, m1, -one, a(0), m, b, ldb, alpha, &
                                    b(m1, 0), ldb)
                          call stdlib_dtrsm('l', 'u', 't', diag, m2, n, one, a(m1), m, b(m1, 0 &
                                    ), ldb)
                       else
                          ! side  ='l', n is odd, transr = 'n', uplo = 'u', and
                          ! trans = 't'
                          call stdlib_dtrsm('l', 'u', 'n', diag, m2, n, alpha, a(m1), m, b(m1, &
                                    0), ldb)
                          call stdlib_dgemm('n', 'n', m1, n, m2, -one, a(0), m, b(m1, 0), ldb, &
                                     alpha, b, ldb)
                          call stdlib_dtrsm('l', 'l', 't', diag, m1, n, one, a(m2), m, b, ldb)
                                    
                       end if
                    end if
                 else
                    ! side = 'l', n is odd, and transr = 't'
                    if (lower) then
                       ! side  ='l', n is odd, transr = 't', and uplo = 'l'
                       if (notrans) then
                          ! side  ='l', n is odd, transr = 't', uplo = 'l', and
                          ! trans = 'n'
                          if (m == 1) then
                             call stdlib_dtrsm('l', 'u', 't', diag, m1, n, alpha, a(0), m1, b, &
                                       ldb)
                          else
                             call stdlib_dtrsm('l', 'u', 't', diag, m1, n, alpha, a(0), m1, b, &
                                       ldb)
                             call stdlib_dgemm('t', 'n', m2, n, m1, -one, a(m1*m1), m1, b, ldb, &
                                       alpha, b(m1, 0), ldb)
                             call stdlib_dtrsm('l', 'l', 'n', diag, m2, n, one, a(1), m1, b(m1, &
                                        0), ldb)
                          end if
                       else
                          ! side  ='l', n is odd, transr = 't', uplo = 'l', and
                          ! trans = 't'
                          if (m == 1) then
                             call stdlib_dtrsm('l', 'u', 'n', diag, m1, n, alpha, a(0), m1, b, &
                                       ldb)
                          else
                             call stdlib_dtrsm('l', 'l', 't', diag, m2, n, alpha, a(1), m1, b( &
                                       m1, 0), ldb)
                             call stdlib_dgemm('n', 'n', m1, n, m2, -one, a(m1*m1), m1, b(m1, &
                                       0), ldb, alpha, b, ldb)
                             call stdlib_dtrsm('l', 'u', 'n', diag, m1, n, one, a(0), m1, b, &
                                       ldb)
                          end if
                       end if
                    else
                       ! side  ='l', n is odd, transr = 't', and uplo = 'u'
                       if (.not. notrans) then
                          ! side  ='l', n is odd, transr = 't', uplo = 'u', and
                          ! trans = 'n'
                          call stdlib_dtrsm('l', 'u', 't', diag, m1, n, alpha, a(m2*m2), m2, b, &
                                    ldb)
                          call stdlib_dgemm('n', 'n', m2, n, m1, -one, a(0), m2, b, ldb, alpha, &
                                    b(m1, 0), ldb)
                          call stdlib_dtrsm('l', 'l', 'n', diag, m2, n, one, a(m1*m2), m2, b( &
                                    m1, 0), ldb)
                       else
                          ! side  ='l', n is odd, transr = 't', uplo = 'u', and
                          ! trans = 't'
                          call stdlib_dtrsm('l', 'l', 't', diag, m2, n, alpha, a(m1*m2), m2, b( &
                                    m1, 0), ldb)
                          call stdlib_dgemm('t', 'n', m1, n, m2, -one, a(0), m2, b(m1, 0), &
                                    ldb, alpha, b, ldb)
                          call stdlib_dtrsm('l', 'u', 'n', diag, m1, n, one, a(m2*m2), m2, b, &
                                    ldb)
                       end if
                    end if
                 end if
              else
                 ! side = 'l' and n is even
                 if (normaltransr) then
                    ! side = 'l', n is even, and transr = 'n'
                    if (lower) then
                       ! side  ='l', n is even, transr = 'n', and uplo = 'l'
                       if (notrans) then
                          ! side  ='l', n is even, transr = 'n', uplo = 'l',
                          ! and trans = 'n'
                          call stdlib_dtrsm('l', 'l', 'n', diag, k, n, alpha, a(1), m + 1, b, ldb &
                                    )
                          call stdlib_dgemm('n', 'n', k, n, k, -one, a(k + 1), m + 1, b, ldb, alpha, &
                                     b(k, 0), ldb)
                          call stdlib_dtrsm('l', 'u', 't', diag, k, n, one, a(0), m + 1, b(k, 0) &
                                    , ldb)
                       else
                          ! side  ='l', n is even, transr = 'n', uplo = 'l',
                          ! and trans = 't'
                          call stdlib_dtrsm('l', 'u', 'n', diag, k, n, alpha, a(0), m + 1, b(k, &
                                    0), ldb)
                          call stdlib_dgemm('t', 'n', k, n, k, -one, a(k + 1), m + 1, b(k, 0), &
                                    ldb, alpha, b, ldb)
                          call stdlib_dtrsm('l', 'l', 't', diag, k, n, one, a(1), m + 1, b, ldb)
                                    
                       end if
                    else
                       ! side  ='l', n is even, transr = 'n', and uplo = 'u'
                       if (.not. notrans) then
                          ! side  ='l', n is even, transr = 'n', uplo = 'u',
                          ! and trans = 'n'
                          call stdlib_dtrsm('l', 'l', 'n', diag, k, n, alpha, a(k + 1), m + 1, b, &
                                    ldb)
                          call stdlib_dgemm('t', 'n', k, n, k, -one, a(0), m + 1, b, ldb, alpha, &
                                    b(k, 0), ldb)
                          call stdlib_dtrsm('l', 'u', 't', diag, k, n, one, a(k), m + 1, b(k, 0) &
                                    , ldb)
                       else
                          ! side  ='l', n is even, transr = 'n', uplo = 'u',
                          ! and trans = 't'
                          call stdlib_dtrsm('l', 'u', 'n', diag, k, n, alpha, a(k), m + 1, b(k, &
                                    0), ldb)
                          call stdlib_dgemm('n', 'n', k, n, k, -one, a(0), m + 1, b(k, 0), ldb, &
                                    alpha, b, ldb)
                          call stdlib_dtrsm('l', 'l', 't', diag, k, n, one, a(k + 1), m + 1, b, ldb &
                                    )
                       end if
                    end if
                 else
                    ! side = 'l', n is even, and transr = 't'
                    if (lower) then
                       ! side  ='l', n is even, transr = 't', and uplo = 'l'
                       if (notrans) then
                          ! side  ='l', n is even, transr = 't', uplo = 'l',
                          ! and trans = 'n'
                          call stdlib_dtrsm('l', 'u', 't', diag, k, n, alpha, a(k), k, b, ldb)
                                    
                          call stdlib_dgemm('t', 'n', k, n, k, -one, a(k*(k + 1)), k, b, ldb, &
                                    alpha, b(k, 0), ldb)
                          call stdlib_dtrsm('l', 'l', 'n', diag, k, n, one, a(0), k, b(k, 0), &
                                    ldb)
                       else
                          ! side  ='l', n is even, transr = 't', uplo = 'l',
                          ! and trans = 't'
                          call stdlib_dtrsm('l', 'l', 't', diag, k, n, alpha, a(0), k, b(k, 0) &
                                    , ldb)
                          call stdlib_dgemm('n', 'n', k, n, k, -one, a(k*(k + 1)), k, b(k, 0), &
                                     ldb, alpha, b, ldb)
                          call stdlib_dtrsm('l', 'u', 'n', diag, k, n, one, a(k), k, b, ldb)
                                    
                       end if
                    else
                       ! side  ='l', n is even, transr = 't', and uplo = 'u'
                       if (.not. notrans) then
                          ! side  ='l', n is even, transr = 't', uplo = 'u',
                          ! and trans = 'n'
                          call stdlib_dtrsm('l', 'u', 't', diag, k, n, alpha, a(k*(k + 1)), k, &
                                    b, ldb)
                          call stdlib_dgemm('n', 'n', k, n, k, -one, a(0), k, b, ldb, alpha, b( &
                                    k, 0), ldb)
                          call stdlib_dtrsm('l', 'l', 'n', diag, k, n, one, a(k*k), k, b(k, 0) &
                                    , ldb)
                       else
                          ! side  ='l', n is even, transr = 't', uplo = 'u',
                          ! and trans = 't'
                          call stdlib_dtrsm('l', 'l', 't', diag, k, n, alpha, a(k*k), k, b(k, &
                                    0), ldb)
                          call stdlib_dgemm('t', 'n', k, n, k, -one, a(0), k, b(k, 0), ldb, &
                                    alpha, b, ldb)
                          call stdlib_dtrsm('l', 'u', 'n', diag, k, n, one, a(k*(k + 1)), k, b, &
                                    ldb)
                       end if
                    end if
                 end if
              end if
           else
              ! side = 'r'
              ! a is n-by-n.
              ! if n is odd, set nisodd = .true., and n1 and n2.
              ! if n is even, nisodd = .false., and k.
              if (mod(n, 2) == 0) then
                 nisodd = .false.
                 k = n/2
              else
                 nisodd = .true.
                 if (lower) then
                    n2 = n/2
                    n1 = n - n2
                 else
                    n1 = n/2
                    n2 = n - n1
                 end if
              end if
              if (nisodd) then
                 ! side = 'r' and n is odd
                 if (normaltransr) then
                    ! side = 'r', n is odd, and transr = 'n'
                    if (lower) then
                       ! side  ='r', n is odd, transr = 'n', and uplo = 'l'
                       if (notrans) then
                          ! side  ='r', n is odd, transr = 'n', uplo = 'l', and
                          ! trans = 'n'
                          call stdlib_dtrsm('r', 'u', 't', diag, m, n2, alpha, a(n), n, b(0, &
                                    n1), ldb)
                          call stdlib_dgemm('n', 'n', m, n1, n2, -one, b(0, n1), ldb, a(n1), &
                                    n, alpha, b(0, 0), ldb)
                          call stdlib_dtrsm('r', 'l', 'n', diag, m, n1, one, a(0), n, b(0, 0), &
                                     ldb)
                       else
                          ! side  ='r', n is odd, transr = 'n', uplo = 'l', and
                          ! trans = 't'
                          call stdlib_dtrsm('r', 'l', 't', diag, m, n1, alpha, a(0), n, b(0, 0 &
                                    ), ldb)
                          call stdlib_dgemm('n', 't', m, n2, n1, -one, b(0, 0), ldb, a(n1), n, &
                                     alpha, b(0, n1), ldb)
                          call stdlib_dtrsm('r', 'u', 'n', diag, m, n2, one, a(n), n, b(0, n1) &
                                    , ldb)
                       end if
                    else
                       ! side  ='r', n is odd, transr = 'n', and uplo = 'u'
                       if (notrans) then
                          ! side  ='r', n is odd, transr = 'n', uplo = 'u', and
                          ! trans = 'n'
                          call stdlib_dtrsm('r', 'l', 't', diag, m, n1, alpha, a(n2), n, b(0, &
                                    0), ldb)
                          call stdlib_dgemm('n', 'n', m, n2, n1, -one, b(0, 0), ldb, a(0), n, &
                                    alpha, b(0, n1), ldb)
                          call stdlib_dtrsm('r', 'u', 'n', diag, m, n2, one, a(n1), n, b(0, n1 &
                                    ), ldb)
                       else
                          ! side  ='r', n is odd, transr = 'n', uplo = 'u', and
                          ! trans = 't'
                          call stdlib_dtrsm('r', 'u', 't', diag, m, n2, alpha, a(n1), n, b(0, &
                                    n1), ldb)
                          call stdlib_dgemm('n', 't', m, n1, n2, -one, b(0, n1), ldb, a(0), n, &
                                     alpha, b(0, 0), ldb)
                          call stdlib_dtrsm('r', 'l', 'n', diag, m, n1, one, a(n2), n, b(0, 0) &
                                    , ldb)
                       end if
                    end if
                 else
                    ! side = 'r', n is odd, and transr = 't'
                    if (lower) then
                       ! side  ='r', n is odd, transr = 't', and uplo = 'l'
                       if (notrans) then
                          ! side  ='r', n is odd, transr = 't', uplo = 'l', and
                          ! trans = 'n'
                          call stdlib_dtrsm('r', 'l', 'n', diag, m, n2, alpha, a(1), n1, b(0, &
                                    n1), ldb)
                          call stdlib_dgemm('n', 't', m, n1, n2, -one, b(0, n1), ldb, a(n1*n1) &
                                    , n1, alpha, b(0, 0), ldb)
                          call stdlib_dtrsm('r', 'u', 't', diag, m, n1, one, a(0), n1, b(0, 0) &
                                    , ldb)
                       else
                          ! side  ='r', n is odd, transr = 't', uplo = 'l', and
                          ! trans = 't'
                          call stdlib_dtrsm('r', 'u', 'n', diag, m, n1, alpha, a(0), n1, b(0, &
                                    0), ldb)
                          call stdlib_dgemm('n', 'n', m, n2, n1, -one, b(0, 0), ldb, a(n1*n1), &
                                     n1, alpha, b(0, n1), ldb)
                          call stdlib_dtrsm('r', 'l', 't', diag, m, n2, one, a(1), n1, b(0, n1 &
                                    ), ldb)
                       end if
                    else
                       ! side  ='r', n is odd, transr = 't', and uplo = 'u'
                       if (notrans) then
                          ! side  ='r', n is odd, transr = 't', uplo = 'u', and
                          ! trans = 'n'
                          call stdlib_dtrsm('r', 'u', 'n', diag, m, n1, alpha, a(n2*n2), n2, b( &
                                    0, 0), ldb)
                          call stdlib_dgemm('n', 't', m, n2, n1, -one, b(0, 0), ldb, a(0), n2, &
                                     alpha, b(0, n1), ldb)
                          call stdlib_dtrsm('r', 'l', 't', diag, m, n2, one, a(n1*n2), n2, b(0, &
                                     n1), ldb)
                       else
                          ! side  ='r', n is odd, transr = 't', uplo = 'u', and
                          ! trans = 't'
                          call stdlib_dtrsm('r', 'l', 'n', diag, m, n2, alpha, a(n1*n2), n2, b( &
                                    0, n1), ldb)
                          call stdlib_dgemm('n', 'n', m, n1, n2, -one, b(0, n1), ldb, a(0), &
                                    n2, alpha, b(0, 0), ldb)
                          call stdlib_dtrsm('r', 'u', 't', diag, m, n1, one, a(n2*n2), n2, b(0, &
                                     0), ldb)
                       end if
                    end if
                 end if
              else
                 ! side = 'r' and n is even
                 if (normaltransr) then
                    ! side = 'r', n is even, and transr = 'n'
                    if (lower) then
                       ! side  ='r', n is even, transr = 'n', and uplo = 'l'
                       if (notrans) then
                          ! side  ='r', n is even, transr = 'n', uplo = 'l',
                          ! and trans = 'n'
                          call stdlib_dtrsm('r', 'u', 't', diag, m, k, alpha, a(0), n + 1, b(0, &
                                    k), ldb)
                          call stdlib_dgemm('n', 'n', m, k, k, -one, b(0, k), ldb, a(k + 1), n + &
                                    1, alpha, b(0, 0), ldb)
                          call stdlib_dtrsm('r', 'l', 'n', diag, m, k, one, a(1), n + 1, b(0, 0) &
                                    , ldb)
                       else
                          ! side  ='r', n is even, transr = 'n', uplo = 'l',
                          ! and trans = 't'
                          call stdlib_dtrsm('r', 'l', 't', diag, m, k, alpha, a(1), n + 1, b(0, &
                                    0), ldb)
                          call stdlib_dgemm('n', 't', m, k, k, -one, b(0, 0), ldb, a(k + 1), n + &
                                    1, alpha, b(0, k), ldb)
                          call stdlib_dtrsm('r', 'u', 'n', diag, m, k, one, a(0), n + 1, b(0, k) &
                                    , ldb)
                       end if
                    else
                       ! side  ='r', n is even, transr = 'n', and uplo = 'u'
                       if (notrans) then
                          ! side  ='r', n is even, transr = 'n', uplo = 'u',
                          ! and trans = 'n'
                          call stdlib_dtrsm('r', 'l', 't', diag, m, k, alpha, a(k + 1), n + 1, b(0, &
                                     0), ldb)
                          call stdlib_dgemm('n', 'n', m, k, k, -one, b(0, 0), ldb, a(0), n + 1, &
                                    alpha, b(0, k), ldb)
                          call stdlib_dtrsm('r', 'u', 'n', diag, m, k, one, a(k), n + 1, b(0, k) &
                                    , ldb)
                       else
                          ! side  ='r', n is even, transr = 'n', uplo = 'u',
                          ! and trans = 't'
                          call stdlib_dtrsm('r', 'u', 't', diag, m, k, alpha, a(k), n + 1, b(0, &
                                    k), ldb)
                          call stdlib_dgemm('n', 't', m, k, k, -one, b(0, k), ldb, a(0), n + 1, &
                                    alpha, b(0, 0), ldb)
                          call stdlib_dtrsm('r', 'l', 'n', diag, m, k, one, a(k + 1), n + 1, b(0, &
                                    0), ldb)
                       end if
                    end if
                 else
                    ! side = 'r', n is even, and transr = 't'
                    if (lower) then
                       ! side  ='r', n is even, transr = 't', and uplo = 'l'
                       if (notrans) then
                          ! side  ='r', n is even, transr = 't', uplo = 'l',
                          ! and trans = 'n'
                          call stdlib_dtrsm('r', 'l', 'n', diag, m, k, alpha, a(0), k, b(0, k) &
                                    , ldb)
                          call stdlib_dgemm('n', 't', m, k, k, -one, b(0, k), ldb, a((k + 1)*k &
                                    ), k, alpha, b(0, 0), ldb)
                          call stdlib_dtrsm('r', 'u', 't', diag, m, k, one, a(k), k, b(0, 0), &
                                    ldb)
                       else
                          ! side  ='r', n is even, transr = 't', uplo = 'l',
                          ! and trans = 't'
                          call stdlib_dtrsm('r', 'u', 'n', diag, m, k, alpha, a(k), k, b(0, 0) &
                                    , ldb)
                          call stdlib_dgemm('n', 'n', m, k, k, -one, b(0, 0), ldb, a((k + 1)*k &
                                    ), k, alpha, b(0, k), ldb)
                          call stdlib_dtrsm('r', 'l', 't', diag, m, k, one, a(0), k, b(0, k), &
                                    ldb)
                       end if
                    else
                       ! side  ='r', n is even, transr = 't', and uplo = 'u'
                       if (notrans) then
                          ! side  ='r', n is even, transr = 't', uplo = 'u',
                          ! and trans = 'n'
                          call stdlib_dtrsm('r', 'u', 'n', diag, m, k, alpha, a((k + 1)*k), k, &
                                    b(0, 0), ldb)
                          call stdlib_dgemm('n', 't', m, k, k, -one, b(0, 0), ldb, a(0), k, &
                                    alpha, b(0, k), ldb)
                          call stdlib_dtrsm('r', 'l', 't', diag, m, k, one, a(k*k), k, b(0, k) &
                                    , ldb)
                       else
                          ! side  ='r', n is even, transr = 't', uplo = 'u',
                          ! and trans = 't'
                          call stdlib_dtrsm('r', 'l', 'n', diag, m, k, alpha, a(k*k), k, b(0, &
                                    k), ldb)
                          call stdlib_dgemm('n', 'n', m, k, k, -one, b(0, k), ldb, a(0), k, &
                                    alpha, b(0, 0), ldb)
                          call stdlib_dtrsm('r', 'u', 't', diag, m, k, one, a((k + 1)*k), k, b( &
                                    0, 0), ldb)
                       end if
                    end if
                 end if
              end if
           end if
           return
           ! end of stdlib_dtfsm
     end subroutine stdlib_dtfsm

     ! DTFTTP copies a triangular matrix A from rectangular full packed
     ! format (TF) to standard packed format (TP).

     subroutine stdlib_dtfttp(transr, uplo, n, arf, ap, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: transr, uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           real(dp) :: ap(0:*), arf(0:*)
        ! =====================================================================
           ! .. parameters ..
           ! .. local scalars ..
           logical(lk) :: lower, nisodd, normaltransr
           integer(ilp) :: n1, n2, k, nt
           integer(ilp) :: i, j, ij
           integer(ilp) :: ijp, jp, lda, js
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           normaltransr = stdlib_lsame(transr, 'n')
           lower = stdlib_lsame(uplo, 'l')
           if (.not. normaltransr .and. .not. stdlib_lsame(transr, 't')) then
              info = -1
           else if (.not. lower .and. .not. stdlib_lsame(uplo, 'u')) then
              info = -2
           else if (n < 0) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtfttp', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (n == 1) then
              if (normaltransr) then
                 ap(0) = arf(0)
              else
                 ap(0) = arf(0)
              end if
              return
           end if
           ! size of array arf(0:nt-1)
           nt = n*(n + 1)/2
           ! set n1 and n2 depending on lower
           if (lower) then
              n2 = n/2
              n1 = n - n2
           else
              n1 = n/2
              n2 = n - n1
           end if
           ! if n is odd, set nisodd = .true.
           ! if n is even, set k = n/2 and nisodd = .false.
           ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe)
           ! where noe = 0 if n is even, noe = 1 if n is odd
           if (mod(n, 2) == 0) then
              k = n/2
              nisodd = .false.
              lda = n + 1
           else
              nisodd = .true.
              lda = n
           end if
           ! arf^c has lda rows and n+1-noe cols
           if (.not. normaltransr) lda = (n + 1)/2
           ! start execution: there are eight cases
           if (nisodd) then
              ! n is odd
              if (normaltransr) then
                 ! n is odd and transr = 'n'
                 if (lower) then
                   ! srpa for lower, normal and n is odd ( a(0:n-1,0:n1-1) )
                   ! t1 -> a(0,0), t2 -> a(0,1), s -> a(n1,0)
                   ! t1 -> a(0), t2 -> a(n), s -> a(n1); lda = n
                    ijp = 0
                    jp = 0
                    do j = 0, n2
                       do i = j, n - 1
                          ij = i + jp
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                       jp = jp + lda
                    end do
                    do i = 0, n2 - 1
                       do j = 1 + i, n2
                          ij = i + j*lda
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                    end do
                 else
                   ! srpa for upper, normal and n is odd ( a(0:n-1,0:n2-1)
                   ! t1 -> a(n1+1,0), t2 -> a(n1,0), s -> a(0,0)
                   ! t1 -> a(n2), t2 -> a(n1), s -> a(0)
                    ijp = 0
                    do j = 0, n1 - 1
                       ij = n2 + j
                       do i = 0, j
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                          ij = ij + lda
                       end do
                    end do
                    js = 0
                    do j = n1, n - 1
                       ij = js
                       do ij = js, js + j
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                       js = js + lda
                    end do
                 end if
              else
                 ! n is odd and transr = 't'
                 if (lower) then
                    ! srpa for lower, transpose and n is odd
                    ! t1 -> a(0,0) , t2 -> a(1,0) , s -> a(0,n1)
                    ! t1 -> a(0+0) , t2 -> a(1+0) , s -> a(0+n1*n1); lda=n1
                    ijp = 0
                    do i = 0, n2
                       do ij = i*(lda + 1), n*lda - 1, lda
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                    end do
                    js = 1
                    do j = 0, n2 - 1
                       do ij = js, js + n2 - j - 1
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                       js = js + lda + 1
                    end do
                 else
                    ! srpa for upper, transpose and n is odd
                    ! t1 -> a(0,n1+1), t2 -> a(0,n1), s -> a(0,0)
                    ! t1 -> a(n2*n2), t2 -> a(n1*n2), s -> a(0); lda = n2
                    ijp = 0
                    js = n2*lda
                    do j = 0, n1 - 1
                       do ij = js, js + j
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                       js = js + lda
                    end do
                    do i = 0, n1
                       do ij = i, i + (n1 + i)*lda, lda
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                    end do
                 end if
              end if
           else
              ! n is even
              if (normaltransr) then
                 ! n is even and transr = 'n'
                 if (lower) then
                    ! srpa for lower, normal, and n is even ( a(0:n,0:k-1) )
                    ! t1 -> a(1,0), t2 -> a(0,0), s -> a(k+1,0)
                    ! t1 -> a(1), t2 -> a(0), s -> a(k+1)
                    ijp = 0
                    jp = 0
                    do j = 0, k - 1
                       do i = j, n - 1
                          ij = 1 + i + jp
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                       jp = jp + lda
                    end do
                    do i = 0, k - 1
                       do j = i, k - 1
                          ij = i + j*lda
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                    end do
                 else
                    ! srpa for upper, normal, and n is even ( a(0:n,0:k-1) )
                    ! t1 -> a(k+1,0) ,  t2 -> a(k,0),   s -> a(0,0)
                    ! t1 -> a(k+1), t2 -> a(k), s -> a(0)
                    ijp = 0
                    do j = 0, k - 1
                       ij = k + 1 + j
                       do i = 0, j
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                          ij = ij + lda
                       end do
                    end do
                    js = 0
                    do j = k, n - 1
                       ij = js
                       do ij = js, js + j
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                       js = js + lda
                    end do
                 end if
              else
                 ! n is even and transr = 't'
                 if (lower) then
                    ! srpa for lower, transpose and n is even (see paper)
                    ! t1 -> b(0,1), t2 -> b(0,0), s -> b(0,k+1)
                    ! t1 -> a(0+k), t2 -> a(0+0), s -> a(0+k*(k+1)); lda=k
                    ijp = 0
                    do i = 0, k - 1
                       do ij = i + (i + 1)*lda, (n + 1)*lda - 1, lda
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                    end do
                    js = 0
                    do j = 0, k - 1
                       do ij = js, js + k - j - 1
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                       js = js + lda + 1
                    end do
                 else
                    ! srpa for upper, transpose and n is even (see paper)
                    ! t1 -> b(0,k+1),     t2 -> b(0,k),   s -> b(0,0)
                    ! t1 -> a(0+k*(k+1)), t2 -> a(0+k*k), s -> a(0+0)); lda=k
                    ijp = 0
                    js = (k + 1)*lda
                    do j = 0, k - 1
                       do ij = js, js + j
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                       js = js + lda
                    end do
                    do i = 0, k - 1
                       do ij = i, i + (k + i)*lda, lda
                          ap(ijp) = arf(ij)
                          ijp = ijp + 1
                       end do
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_dtfttp
     end subroutine stdlib_dtfttp

     ! DTFTTR copies a triangular matrix A from rectangular full packed
     ! format (TF) to standard full format (TR).

     subroutine stdlib_dtfttr(transr, uplo, n, arf, a, lda, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: transr, uplo
           integer(ilp) :: info, n, lda
           ! .. array arguments ..
           real(dp) :: a(0:lda - 1, 0:*), arf(0:*)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: lower, nisodd, normaltransr
           integer(ilp) :: n1, n2, k, nt, nx2, np1x2
           integer(ilp) :: i, j, l, ij
     
           ! .. intrinsic functions ..
           intrinsic :: max, mod
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           normaltransr = stdlib_lsame(transr, 'n')
           lower = stdlib_lsame(uplo, 'l')
           if (.not. normaltransr .and. .not. stdlib_lsame(transr, 't')) then
              info = -1
           else if (.not. lower .and. .not. stdlib_lsame(uplo, 'u')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtfttr', -info)
              return
           end if
           ! quick return if possible
           if (n <= 1) then
              if (n == 1) then
                 a(0, 0) = arf(0)
              end if
              return
           end if
           ! size of array arf(0:nt-1)
           nt = n*(n + 1)/2
           ! set n1 and n2 depending on lower: for n even n1=n2=k
           if (lower) then
              n2 = n/2
              n1 = n - n2
           else
              n1 = n/2
              n2 = n - n1
           end if
           ! if n is odd, set nisodd = .true., lda=n+1 and a is (n+1)--by--k2.
           ! if n is even, set k = n/2 and nisodd = .false., lda=n and a is
           ! n--by--(n+1)/2.
           if (mod(n, 2) == 0) then
              k = n/2
              nisodd = .false.
              if (.not. lower) np1x2 = n + n + 2
           else
              nisodd = .true.
              if (.not. lower) nx2 = n + n
           end if
           if (nisodd) then
              ! n is odd
              if (normaltransr) then
                 ! n is odd and transr = 'n'
                 if (lower) then
                    ! n is odd, transr = 'n', and uplo = 'l'
                    ij = 0
                    do j = 0, n2
                       do i = n1, n2 + j
                          a(n2 + j, i) = arf(ij)
                          ij = ij + 1
                       end do
                       do i = j, n - 1
                          a(i, j) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                 else
                    ! n is odd, transr = 'n', and uplo = 'u'
                    ij = nt - n
                    do j = n - 1, n1, -1
                       do i = 0, j
                          a(i, j) = arf(ij)
                          ij = ij + 1
                       end do
                       do l = j - n1, n1 - 1
                          a(j - n1, l) = arf(ij)
                          ij = ij + 1
                       end do
                       ij = ij - nx2
                    end do
                 end if
              else
                 ! n is odd and transr = 't'
                 if (lower) then
                    ! n is odd, transr = 't', and uplo = 'l'
                    ij = 0
                    do j = 0, n2 - 1
                       do i = 0, j
                          a(j, i) = arf(ij)
                          ij = ij + 1
                       end do
                       do i = n1 + j, n - 1
                          a(i, n1 + j) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                    do j = n2, n - 1
                       do i = 0, n1 - 1
                          a(j, i) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                 else
                    ! n is odd, transr = 't', and uplo = 'u'
                    ij = 0
                    do j = 0, n1
                       do i = n1, n - 1
                          a(j, i) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                    do j = 0, n1 - 1
                       do i = 0, j
                          a(i, j) = arf(ij)
                          ij = ij + 1
                       end do
                       do l = n2 + j, n - 1
                          a(n2 + j, l) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                 end if
              end if
           else
              ! n is even
              if (normaltransr) then
                 ! n is even and transr = 'n'
                 if (lower) then
                    ! n is even, transr = 'n', and uplo = 'l'
                    ij = 0
                    do j = 0, k - 1
                       do i = k, k + j
                          a(k + j, i) = arf(ij)
                          ij = ij + 1
                       end do
                       do i = j, n - 1
                          a(i, j) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                 else
                    ! n is even, transr = 'n', and uplo = 'u'
                    ij = nt - n - 1
                    do j = n - 1, k, -1
                       do i = 0, j
                          a(i, j) = arf(ij)
                          ij = ij + 1
                       end do
                       do l = j - k, k - 1
                          a(j - k, l) = arf(ij)
                          ij = ij + 1
                       end do
                       ij = ij - np1x2
                    end do
                 end if
              else
                 ! n is even and transr = 't'
                 if (lower) then
                    ! n is even, transr = 't', and uplo = 'l'
                    ij = 0
                    j = k
                    do i = k, n - 1
                       a(i, j) = arf(ij)
                       ij = ij + 1
                    end do
                    do j = 0, k - 2
                       do i = 0, j
                          a(j, i) = arf(ij)
                          ij = ij + 1
                       end do
                       do i = k + 1 + j, n - 1
                          a(i, k + 1 + j) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                    do j = k - 1, n - 1
                       do i = 0, k - 1
                          a(j, i) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                 else
                    ! n is even, transr = 't', and uplo = 'u'
                    ij = 0
                    do j = 0, k
                       do i = k, n - 1
                          a(j, i) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                    do j = 0, k - 2
                       do i = 0, j
                          a(i, j) = arf(ij)
                          ij = ij + 1
                       end do
                       do l = k + 1 + j, n - 1
                          a(k + 1 + j, l) = arf(ij)
                          ij = ij + 1
                       end do
                    end do
                    ! note that here, on exit of the loop, j = k-1
                    do i = 0, j
                       a(i, j) = arf(ij)
                       ij = ij + 1
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_dtfttr
     end subroutine stdlib_dtfttr

     ! DTPRFB applies a real "triangular-pentagonal" block reflector H or its
     ! transpose H**T to a real matrix C, which is composed of two
     ! blocks A and B, either from the left or right.

     subroutine stdlib_dtprfb(side, trans, direct, storev, m, n, k, l, v, ldv, t, ldt, a, lda, b, &
               ldb, work, ldwork)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, side, storev, trans
           integer(ilp) :: k, l, lda, ldb, ldt, ldv, ldwork, m, n
           ! .. array arguments ..
           real(dp) :: a(lda, *), b(ldb, *), t(ldt, *), v(ldv, *), work(ldwork, *)
                     
        ! ==========================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, mp, np, kp
           logical(lk) :: left, forward, column, right, backward, row
     
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0 .or. k <= 0 .or. l < 0) return
           if (stdlib_lsame(storev, 'c')) then
              column = .true.
              row = .false.
           else if (stdlib_lsame(storev, 'r')) then
              column = .false.
              row = .true.
           else
              column = .false.
              row = .false.
           end if
           if (stdlib_lsame(side, 'l')) then
              left = .true.
              right = .false.
           else if (stdlib_lsame(side, 'r')) then
              left = .false.
              right = .true.
           else
              left = .false.
              right = .false.
           end if
           if (stdlib_lsame(direct, 'f')) then
              forward = .true.
              backward = .false.
           else if (stdlib_lsame(direct, 'b')) then
              forward = .false.
              backward = .true.
           else
              forward = .false.
              backward = .false.
           end if
       ! ---------------------------------------------------------------------------
           if (column .and. forward .and. left) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (m-by-k)
              ! form  h c  or  h**t c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a -   t (a + v**t b)  or  a = a -   t**t (a + v**t b)
              ! b = b - v t (a + v**t b)  or  b = b - v t**t (a + v**t b)
       ! ---------------------------------------------------------------------------
              mp = min(m - l + 1, m)
              kp = min(l + 1, k)
              do j = 1, n
                 do i = 1, l
                    work(i, j) = b(m - l + i, j)
                 end do
              end do
              call stdlib_dtrmm('l', 'u', 't', 'n', l, n, one, v(mp, 1), ldv, work, ldwork)
                        
              call stdlib_dgemm('t', 'n', l, n, m - l, one, v, ldv, b, ldb, one, work, ldwork)
                        
              call stdlib_dgemm('t', 'n', k - l, n, m, one, v(1, kp), ldv, b, ldb, zero, work(kp, &
                         1), ldwork)
              do j = 1, n
                 do i = 1, k
                    work(i, j) = work(i, j) + a(i, j)
                 end do
              end do
              call stdlib_dtrmm('l', 'u', trans, 'n', k, n, one, t, ldt, work, ldwork)
              do j = 1, n
                 do i = 1, k
                    a(i, j) = a(i, j) - work(i, j)
                 end do
              end do
              call stdlib_dgemm('n', 'n', m - l, n, k, -one, v, ldv, work, ldwork, one, b, ldb)
                        
              call stdlib_dgemm('n', 'n', l, n, k - l, -one, v(mp, kp), ldv, work(kp, 1), &
                        ldwork, one, b(mp, 1), ldb)
              call stdlib_dtrmm('l', 'u', 'n', 'n', l, n, one, v(mp, 1), ldv, work, ldwork)
                        
              do j = 1, n
                 do i = 1, l
                    b(m - l + i, j) = b(m - l + i, j) - work(i, j)
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if (column .and. forward .and. right) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i ]    (k-by-k)
                        ! [ v ]    (n-by-k)
              ! form  c h or  c h**t  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**t
              ! b = b - (a + b v) t v**t  or  b = b - (a + b v) t**t v**t
       ! ---------------------------------------------------------------------------
              np = min(n - l + 1, n)
              kp = min(l + 1, k)
              do j = 1, l
                 do i = 1, m
                    work(i, j) = b(i, n - l + j)
                 end do
              end do
              call stdlib_dtrmm('r', 'u', 'n', 'n', m, l, one, v(np, 1), ldv, work, ldwork)
                        
              call stdlib_dgemm('n', 'n', m, l, n - l, one, b, ldb, v, ldv, one, work, ldwork)
                        
              call stdlib_dgemm('n', 'n', m, k - l, n, one, b, ldb, v(1, kp), ldv, zero, work(1, &
                        kp), ldwork)
              do j = 1, k
                 do i = 1, m
                    work(i, j) = work(i, j) + a(i, j)
                 end do
              end do
              call stdlib_dtrmm('r', 'u', trans, 'n', m, k, one, t, ldt, work, ldwork)
              do j = 1, k
                 do i = 1, m
                    a(i, j) = a(i, j) - work(i, j)
                 end do
              end do
              call stdlib_dgemm('n', 't', m, n - l, k, -one, work, ldwork, v, ldv, one, b, ldb)
                        
              call stdlib_dgemm('n', 't', m, l, k - l, -one, work(1, kp), ldwork, v(np, kp), &
                        ldv, one, b(1, np), ldb)
              call stdlib_dtrmm('r', 'u', 't', 'n', m, l, one, v(np, 1), ldv, work, ldwork)
                        
              do j = 1, l
                 do i = 1, m
                    b(i, n - l + j) = b(i, n - l + j) - work(i, j)
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if (column .and. backward .and. left) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (m-by-k)
                        ! [ i ]    (k-by-k)
              ! form  h c  or  h**t c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a -   t (a + v**t b)  or  a = a -   t**t (a + v**t b)
              ! b = b - v t (a + v**t b)  or  b = b - v t**t (a + v**t b)
       ! ---------------------------------------------------------------------------
              mp = min(l + 1, m)
              kp = min(k - l + 1, k)
              do j = 1, n
                 do i = 1, l
                    work(k - l + i, j) = b(i, j)
                 end do
              end do
              call stdlib_dtrmm('l', 'l', 't', 'n', l, n, one, v(1, kp), ldv, work(kp, 1), &
                        ldwork)
              call stdlib_dgemm('t', 'n', l, n, m - l, one, v(mp, kp), ldv, b(mp, 1), ldb, one, &
                        work(kp, 1), ldwork)
              call stdlib_dgemm('t', 'n', k - l, n, m, one, v, ldv, b, ldb, zero, work, ldwork)
                        
              do j = 1, n
                 do i = 1, k
                    work(i, j) = work(i, j) + a(i, j)
                 end do
              end do
              call stdlib_dtrmm('l', 'l', trans, 'n', k, n, one, t, ldt, work, ldwork)
              do j = 1, n
                 do i = 1, k
                    a(i, j) = a(i, j) - work(i, j)
                 end do
              end do
              call stdlib_dgemm('n', 'n', m - l, n, k, -one, v(mp, 1), ldv, work, ldwork, one, b( &
                        mp, 1), ldb)
              call stdlib_dgemm('n', 'n', l, n, k - l, -one, v, ldv, work, ldwork, one, b, ldb)
                        
              call stdlib_dtrmm('l', 'l', 'n', 'n', l, n, one, v(1, kp), ldv, work(kp, 1), &
                        ldwork)
              do j = 1, n
                 do i = 1, l
                    b(i, j) = b(i, j) - work(k - l + i, j)
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if (column .and. backward .and. right) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v ]    (n-by-k)
                        ! [ i ]    (k-by-k)
              ! form  c h  or  c h**t  where  c = [ b a ] (b is m-by-n, a is m-by-k)
              ! h = i - w t w**t          or  h**t = i - w t**t w**t
              ! a = a - (a + b v) t      or  a = a - (a + b v) t**t
              ! b = b - (a + b v) t v**t  or  b = b - (a + b v) t**t v**t
       ! ---------------------------------------------------------------------------
              np = min(l + 1, n)
              kp = min(k - l + 1, k)
              do j = 1, l
                 do i = 1, m
                    work(i, k - l + j) = b(i, j)
                 end do
              end do
              call stdlib_dtrmm('r', 'l', 'n', 'n', m, l, one, v(1, kp), ldv, work(1, kp), &
                        ldwork)
              call stdlib_dgemm('n', 'n', m, l, n - l, one, b(1, np), ldb, v(np, kp), ldv, one, &
                        work(1, kp), ldwork)
              call stdlib_dgemm('n', 'n', m, k - l, n, one, b, ldb, v, ldv, zero, work, ldwork)
                        
              do j = 1, k
                 do i = 1, m
                    work(i, j) = work(i, j) + a(i, j)
                 end do
              end do
              call stdlib_dtrmm('r', 'l', trans, 'n', m, k, one, t, ldt, work, ldwork)
              do j = 1, k
                 do i = 1, m
                    a(i, j) = a(i, j) - work(i, j)
                 end do
              end do
              call stdlib_dgemm('n', 't', m, n - l, k, -one, work, ldwork, v(np, 1), ldv, one, b( &
                        1, np), ldb)
              call stdlib_dgemm('n', 't', m, l, k - l, -one, work, ldwork, v, ldv, one, b, ldb)
                        
              call stdlib_dtrmm('r', 'l', 't', 'n', m, l, one, v(1, kp), ldv, work(1, kp), &
                        ldwork)
              do j = 1, l
                 do i = 1, m
                    b(i, j) = b(i, j) - work(i, k - l + j)
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if (row .and. forward .and. left) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**t c  where  c = [ a ]  (k-by-n)
                                                ! [ b ]  (m-by-n)
              ! h = i - w**t t w          or  h**t = i - w**t t**t w
              ! a = a -     t (a + v b)  or  a = a -     t**t (a + v b)
              ! b = b - v**t t (a + v b)  or  b = b - v**t t**t (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min(m - l + 1, m)
              kp = min(l + 1, k)
              do j = 1, n
                 do i = 1, l
                    work(i, j) = b(m - l + i, j)
                 end do
              end do
              call stdlib_dtrmm('l', 'l', 'n', 'n', l, n, one, v(1, mp), ldv, work, ldb)
                        
              call stdlib_dgemm('n', 'n', l, n, m - l, one, v, ldv, b, ldb, one, work, ldwork)
                        
              call stdlib_dgemm('n', 'n', k - l, n, m, one, v(kp, 1), ldv, b, ldb, zero, work(kp, &
                         1), ldwork)
              do j = 1, n
                 do i = 1, k
                    work(i, j) = work(i, j) + a(i, j)
                 end do
              end do
              call stdlib_dtrmm('l', 'u', trans, 'n', k, n, one, t, ldt, work, ldwork)
              do j = 1, n
                 do i = 1, k
                    a(i, j) = a(i, j) - work(i, j)
                 end do
              end do
              call stdlib_dgemm('t', 'n', m - l, n, k, -one, v, ldv, work, ldwork, one, b, ldb)
                        
              call stdlib_dgemm('t', 'n', l, n, k - l, -one, v(kp, mp), ldv, work(kp, 1), &
                        ldwork, one, b(mp, 1), ldb)
              call stdlib_dtrmm('l', 'l', 't', 'n', l, n, one, v(1, mp), ldv, work, ldwork)
                        
              do j = 1, n
                 do i = 1, l
                    b(m - l + i, j) = b(m - l + i, j) - work(i, j)
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if (row .and. forward .and. right) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ i v ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**t  where  c = [ a b ] (a is m-by-k, b is m-by-n)
              ! h = i - w**t t w            or  h**t = i - w**t t**t w
              ! a = a - (a + b v**t) t      or  a = a - (a + b v**t) t**t
              ! b = b - (a + b v**t) t v    or  b = b - (a + b v**t) t**t v
       ! ---------------------------------------------------------------------------
              np = min(n - l + 1, n)
              kp = min(l + 1, k)
              do j = 1, l
                 do i = 1, m
                    work(i, j) = b(i, n - l + j)
                 end do
              end do
              call stdlib_dtrmm('r', 'l', 't', 'n', m, l, one, v(1, np), ldv, work, ldwork)
                        
              call stdlib_dgemm('n', 't', m, l, n - l, one, b, ldb, v, ldv, one, work, ldwork)
                        
              call stdlib_dgemm('n', 't', m, k - l, n, one, b, ldb, v(kp, 1), ldv, zero, work(1, &
                        kp), ldwork)
              do j = 1, k
                 do i = 1, m
                    work(i, j) = work(i, j) + a(i, j)
                 end do
              end do
              call stdlib_dtrmm('r', 'u', trans, 'n', m, k, one, t, ldt, work, ldwork)
              do j = 1, k
                 do i = 1, m
                    a(i, j) = a(i, j) - work(i, j)
                 end do
              end do
              call stdlib_dgemm('n', 'n', m, n - l, k, -one, work, ldwork, v, ldv, one, b, ldb)
                        
              call stdlib_dgemm('n', 'n', m, l, k - l, -one, work(1, kp), ldwork, v(kp, np), &
                        ldv, one, b(1, np), ldb)
              call stdlib_dtrmm('r', 'l', 'n', 'n', m, l, one, v(1, np), ldv, work, ldwork)
                        
              do j = 1, l
                 do i = 1, m
                    b(i, n - l + j) = b(i, n - l + j) - work(i, j)
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if (row .and. backward .and. left) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-m )
              ! form  h c  or  h**t c  where  c = [ b ]  (m-by-n)
                                                ! [ a ]  (k-by-n)
              ! h = i - w**t t w          or  h**t = i - w**t t**t w
              ! a = a -     t (a + v b)  or  a = a -     t**t (a + v b)
              ! b = b - v**t t (a + v b)  or  b = b - v**t t**t (a + v b)
       ! ---------------------------------------------------------------------------
              mp = min(l + 1, m)
              kp = min(k - l + 1, k)
              do j = 1, n
                 do i = 1, l
                    work(k - l + i, j) = b(i, j)
                 end do
              end do
              call stdlib_dtrmm('l', 'u', 'n', 'n', l, n, one, v(kp, 1), ldv, work(kp, 1), &
                        ldwork)
              call stdlib_dgemm('n', 'n', l, n, m - l, one, v(kp, mp), ldv, b(mp, 1), ldb, one, &
                        work(kp, 1), ldwork)
              call stdlib_dgemm('n', 'n', k - l, n, m, one, v, ldv, b, ldb, zero, work, ldwork)
                        
              do j = 1, n
                 do i = 1, k
                    work(i, j) = work(i, j) + a(i, j)
                 end do
              end do
              call stdlib_dtrmm('l', 'l ', trans, 'n', k, n, one, t, ldt, work, ldwork)
              do j = 1, n
                 do i = 1, k
                    a(i, j) = a(i, j) - work(i, j)
                 end do
              end do
              call stdlib_dgemm('t', 'n', m - l, n, k, -one, v(1, mp), ldv, work, ldwork, one, b( &
                        mp, 1), ldb)
              call stdlib_dgemm('t', 'n', l, n, k - l, -one, v, ldv, work, ldwork, one, b, ldb)
                        
              call stdlib_dtrmm('l', 'u', 't', 'n', l, n, one, v(kp, 1), ldv, work(kp, 1), &
                        ldwork)
              do j = 1, n
                 do i = 1, l
                    b(i, j) = b(i, j) - work(k - l + i, j)
                 end do
              end do
       ! ---------------------------------------------------------------------------
           else if (row .and. backward .and. right) then
       ! ---------------------------------------------------------------------------
              ! let  w =  [ v i ] ( i is k-by-k, v is k-by-n )
              ! form  c h  or  c h**t  where  c = [ b a ] (a is m-by-k, b is m-by-n)
              ! h = i - w**t t w            or  h**t = i - w**t t**t w
              ! a = a - (a + b v**t) t      or  a = a - (a + b v**t) t**t
              ! b = b - (a + b v**t) t v    or  b = b - (a + b v**t) t**t v
       ! ---------------------------------------------------------------------------
              np = min(l + 1, n)
              kp = min(k - l + 1, k)
              do j = 1, l
                 do i = 1, m
                    work(i, k - l + j) = b(i, j)
                 end do
              end do
              call stdlib_dtrmm('r', 'u', 't', 'n', m, l, one, v(kp, 1), ldv, work(1, kp), &
                        ldwork)
              call stdlib_dgemm('n', 't', m, l, n - l, one, b(1, np), ldb, v(kp, np), ldv, one, &
                        work(1, kp), ldwork)
              call stdlib_dgemm('n', 't', m, k - l, n, one, b, ldb, v, ldv, zero, work, ldwork)
                        
              do j = 1, k
                 do i = 1, m
                    work(i, j) = work(i, j) + a(i, j)
                 end do
              end do
              call stdlib_dtrmm('r', 'l', trans, 'n', m, k, one, t, ldt, work, ldwork)
              do j = 1, k
                 do i = 1, m
                    a(i, j) = a(i, j) - work(i, j)
                 end do
              end do
              call stdlib_dgemm('n', 'n', m, n - l, k, -one, work, ldwork, v(1, np), ldv, one, b( &
                        1, np), ldb)
              call stdlib_dgemm('n', 'n', m, l, k - l, -one, work, ldwork, v, ldv, one, b, ldb)
                        
              call stdlib_dtrmm('r', 'u', 'n', 'n', m, l, one, v(kp, 1), ldv, work(1, kp), &
                        ldwork)
              do j = 1, l
                 do i = 1, m
                    b(i, j) = b(i, j) - work(i, k - l + j)
                 end do
              end do
           end if
           return
           ! end of stdlib_dtprfb
     end subroutine stdlib_dtprfb

     ! DTPRFS provides error bounds and backward error estimates for the
     ! solution to a system of linear equations with a triangular packed
     ! coefficient matrix.
     ! The solution matrix X must be computed by DTPTRS or some other
     ! means before entering this routine.  DTPRFS does not do iterative
     ! refinement because doing so cannot improve the backward error.

     subroutine stdlib_dtprfs(uplo, trans, diag, n, nrhs, ap, b, ldb, x, ldx, ferr, berr, work, &
               iwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, trans, uplo
           integer(ilp) :: info, ldb, ldx, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: iwork(*)
           real(dp) :: ap(*), b(ldb, *), berr(*), ferr(*), work(*), x(ldx, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           character :: transt
           integer(ilp) :: i, j, k, kase, kc, nz
           real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk
           ! .. local arrays ..
           integer(ilp) :: isave(3)
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (nrhs < 0) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -8
           else if (ldx < max(1, n)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtprfs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) then
              do j = 1, nrhs
                 ferr(j) = zero
                 berr(j) = zero
              end do
              return
           end if
           if (notran) then
              transt = 't'
           else
              transt = 'n'
           end if
           ! nz = maximum number of nonzero elements in each row of a, plus 1
           nz = n + 1
           eps = stdlib_dlamch('epsilon')
           safmin = stdlib_dlamch('safe minimum')
           safe1 = nz*safmin
           safe2 = safe1/eps
           ! do for each right hand side
           loop_250: do j = 1, nrhs
              ! compute residual r = b - op(a) * x,
              ! where op(a) = a or a**t, depending on trans.
              call stdlib_dcopy(n, x(1, j), 1, work(n + 1), 1)
              call stdlib_dtpmv(uplo, trans, diag, n, ap, work(n + 1), 1)
              call stdlib_daxpy(n, -one, b(1, j), 1, work(n + 1), 1)
              ! compute componentwise relative backward error from formula
              ! max(i) ( abs(r(i)) / ( abs(op(a))*abs(x) + abs(b) )(i) )
              ! where abs(z) is the componentwise absolute value of the matrix
              ! or vector z.  if the i-th component of the denominator is less
              ! than safe2, then safe1 is added to the i-th components of the
              ! numerator and denominator before dividing.
              do i = 1, n
                 work(i) = abs(b(i, j))
              end do
              if (notran) then
                 ! compute abs(a)*abs(x) + abs(b).
                 if (upper) then
                    kc = 1
                    if (nounit) then
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = 1, k
                             work(i) = work(i) + abs(ap(kc + i - 1))*xk
                          end do
                          kc = kc + k
                       end do
                    else
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = 1, k - 1
                             work(i) = work(i) + abs(ap(kc + i - 1))*xk
                          end do
                          work(k) = work(k) + xk
                          kc = kc + k
                       end do
                    end if
                 else
                    kc = 1
                    if (nounit) then
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = k, n
                             work(i) = work(i) + abs(ap(kc + i - k))*xk
                          end do
                          kc = kc + n - k + 1
                       end do
                    else
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = k + 1, n
                             work(i) = work(i) + abs(ap(kc + i - k))*xk
                          end do
                          work(k) = work(k) + xk
                          kc = kc + n - k + 1
                       end do
                    end if
                 end if
              else
                 ! compute abs(a**t)*abs(x) + abs(b).
                 if (upper) then
                    kc = 1
                    if (nounit) then
                       do k = 1, n
                          s = zero
                          do i = 1, k
                             s = s + abs(ap(kc + i - 1))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                          kc = kc + k
                       end do
                    else
                       do k = 1, n
                          s = abs(x(k, j))
                          do i = 1, k - 1
                             s = s + abs(ap(kc + i - 1))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                          kc = kc + k
                       end do
                    end if
                 else
                    kc = 1
                    if (nounit) then
                       do k = 1, n
                          s = zero
                          do i = k, n
                             s = s + abs(ap(kc + i - k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                          kc = kc + n - k + 1
                       end do
                    else
                       do k = 1, n
                          s = abs(x(k, j))
                          do i = k + 1, n
                             s = s + abs(ap(kc + i - k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                          kc = kc + n - k + 1
                       end do
                    end if
                 end if
              end if
              s = zero
              do i = 1, n
                 if (work(i) > safe2) then
                    s = max(s, abs(work(n + i))/work(i))
                 else
                    s = max(s, (abs(work(n + i)) + safe1)/(work(i) + safe1))
                 end if
              end do
              berr(j) = s
              ! bound error from formula
              ! norm(x - xtrue) / norm(x) .le. ferr =
              ! norm( abs(inv(op(a)))*
                 ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x)
              ! where
                ! norm(z) is the magnitude of the largest component of z
                ! inv(op(a)) is the inverse of op(a)
                ! abs(z) is the componentwise absolute value of the matrix or
                   ! vector z
                ! nz is the maximum number of nonzeros in any row of a, plus 1
                ! eps is machine epsilon
              ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b))
              ! is incremented by safe1 if the i-th component of
              ! abs(op(a))*abs(x) + abs(b) is less than safe2.
              ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix
                 ! inv(op(a)) * diag(w),
              ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) )))
              do i = 1, n
                 if (work(i) > safe2) then
                    work(i) = abs(work(n + i)) + nz*eps*work(i)
                 else
                    work(i) = abs(work(n + i)) + nz*eps*work(i) + safe1
                 end if
              end do
              kase = 0
210    continue
              call stdlib_dlacn2(n, work(2*n + 1), work(n + 1), iwork, ferr(j), kase, isave)
                        
              if (kase /= 0) then
                 if (kase == 1) then
                    ! multiply by diag(w)*inv(op(a)**t).
                    call stdlib_dtpsv(uplo, transt, diag, n, ap, work(n + 1), 1)
                    do i = 1, n
                       work(n + i) = work(i)*work(n + i)
                    end do
                 else
                    ! multiply by inv(op(a))*diag(w).
                    do i = 1, n
                       work(n + i) = work(i)*work(n + i)
                    end do
                    call stdlib_dtpsv(uplo, trans, diag, n, ap, work(n + 1), 1)
                 end if
                 go to 210
              end if
              ! normalize error.
              lstres = zero
              do i = 1, n
                 lstres = max(lstres, abs(x(i, j)))
              end do
              if (lstres /= zero) ferr(j) = ferr(j)/lstres
           end do loop_250
           return
           ! end of stdlib_dtprfs
     end subroutine stdlib_dtprfs

     ! DTPTRI computes the inverse of a real upper or lower triangular
     ! matrix A stored in packed format.

     subroutine stdlib_dtptri(uplo, diag, n, ap, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           real(dp) :: ap(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: nounit, upper
           integer(ilp) :: j, jc, jclast, jj
           real(dp) :: ajj
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           nounit = stdlib_lsame(diag, 'n')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -2
           else if (n < 0) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtptri', -info)
              return
           end if
           ! check for singularity if non-unit.
           if (nounit) then
              if (upper) then
                 jj = 0
                 do info = 1, n
                    jj = jj + info
                    if (ap(jj) == zero) return
                 end do
              else
                 jj = 1
                 do info = 1, n
                    if (ap(jj) == zero) return
                    jj = jj + n - info + 1
                 end do
              end if
              info = 0
           end if
           if (upper) then
              ! compute inverse of upper triangular matrix.
              jc = 1
              do j = 1, n
                 if (nounit) then
                    ap(jc + j - 1) = one/ap(jc + j - 1)
                    ajj = -ap(jc + j - 1)
                 else
                    ajj = -one
                 end if
                 ! compute elements 1:j-1 of j-th column.
                 call stdlib_dtpmv('upper', 'no transpose', diag, j - 1, ap, ap(jc), 1)
                 call stdlib_dscal(j - 1, ajj, ap(jc), 1)
                 jc = jc + j
              end do
           else
              ! compute inverse of lower triangular matrix.
              jc = n*(n + 1)/2
              do j = n, 1, -1
                 if (nounit) then
                    ap(jc) = one/ap(jc)
                    ajj = -ap(jc)
                 else
                    ajj = -one
                 end if
                 if (j < n) then
                    ! compute elements j+1:n of j-th column.
                    call stdlib_dtpmv('lower', 'no transpose', diag, n - j, ap(jclast), ap(jc + 1) &
                              , 1)
                    call stdlib_dscal(n - j, ajj, ap(jc + 1), 1)
                 end if
                 jclast = jc
                 jc = jc - n + j - 2
              end do
           end if
           return
           ! end of stdlib_dtptri
     end subroutine stdlib_dtptri

     ! DTPTRS solves a triangular system of the form
     ! A * X = B  or  A**T * X = B,
     ! where A is a triangular matrix of order N stored in packed format,
     ! and B is an N-by-NRHS matrix.  A check is made to verify that A is
     ! nonsingular.

     subroutine stdlib_dtptrs(uplo, trans, diag, n, nrhs, ap, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, trans, uplo
           integer(ilp) :: info, ldb, n, nrhs
           ! .. array arguments ..
           real(dp) :: ap(*), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: nounit, upper
           integer(ilp) :: j, jc
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           nounit = stdlib_lsame(diag, 'n')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. stdlib_lsame(trans, 'n') .and. .not. stdlib_lsame(trans, 't') .and. &
                     .not. stdlib_lsame(trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (nrhs < 0) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtptrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check for singularity.
           if (nounit) then
              if (upper) then
                 jc = 1
                 do info = 1, n
                    if (ap(jc + info - 1) == zero) return
                    jc = jc + info
                 end do
              else
                 jc = 1
                 do info = 1, n
                    if (ap(jc) == zero) return
                    jc = jc + n - info + 1
                 end do
              end if
           end if
           info = 0
           ! solve a * x = b  or  a**t * x = b.
           do j = 1, nrhs
              call stdlib_dtpsv(uplo, trans, diag, n, ap, b(1, j), 1)
           end do
           return
           ! end of stdlib_dtptrs
     end subroutine stdlib_dtptrs

     ! DTPTTF copies a triangular matrix A from standard packed format (TP)
     ! to rectangular full packed format (TF).

     subroutine stdlib_dtpttf(transr, uplo, n, ap, arf, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: transr, uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           real(dp) :: ap(0:*), arf(0:*)
        ! =====================================================================
           ! .. parameters ..
           ! .. local scalars ..
           logical(lk) :: lower, nisodd, normaltransr
           integer(ilp) :: n1, n2, k, nt
           integer(ilp) :: i, j, ij
           integer(ilp) :: ijp, jp, lda, js
     
           ! .. intrinsic functions ..
           intrinsic :: mod
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           normaltransr = stdlib_lsame(transr, 'n')
           lower = stdlib_lsame(uplo, 'l')
           if (.not. normaltransr .and. .not. stdlib_lsame(transr, 't')) then
              info = -1
           else if (.not. lower .and. .not. stdlib_lsame(uplo, 'u')) then
              info = -2
           else if (n < 0) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtpttf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (n == 1) then
              if (normaltransr) then
                 arf(0) = ap(0)
              else
                 arf(0) = ap(0)
              end if
              return
           end if
           ! size of array arf(0:nt-1)
           nt = n*(n + 1)/2
           ! set n1 and n2 depending on lower
           if (lower) then
              n2 = n/2
              n1 = n - n2
           else
              n1 = n/2
              n2 = n - n1
           end if
           ! if n is odd, set nisodd = .true.
           ! if n is even, set k = n/2 and nisodd = .false.
           ! set lda of arf^c; arf^c is (0:(n+1)/2-1,0:n-noe)
           ! where noe = 0 if n is even, noe = 1 if n is odd
           if (mod(n, 2) == 0) then
              k = n/2
              nisodd = .false.
              lda = n + 1
           else
              nisodd = .true.
              lda = n
           end if
           ! arf^c has lda rows and n+1-noe cols
           if (.not. normaltransr) lda = (n + 1)/2
           ! start execution: there are eight cases
           if (nisodd) then
              ! n is odd
              if (normaltransr) then
                 ! n is odd and transr = 'n'
                 if (lower) then
                    ! n is odd, transr = 'n', and uplo = 'l'
                    ijp = 0
                    jp = 0
                    do j = 0, n2
                       do i = j, n - 1
                          ij = i + jp
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                       jp = jp + lda
                    end do
                    do i = 0, n2 - 1
                       do j = 1 + i, n2
                          ij = i + j*lda
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                    end do
                 else
                    ! n is odd, transr = 'n', and uplo = 'u'
                    ijp = 0
                    do j = 0, n1 - 1
                       ij = n2 + j
                       do i = 0, j
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                          ij = ij + lda
                       end do
                    end do
                    js = 0
                    do j = n1, n - 1
                       ij = js
                       do ij = js, js + j
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                       js = js + lda
                    end do
                 end if
              else
                 ! n is odd and transr = 't'
                 if (lower) then
                    ! n is odd, transr = 't', and uplo = 'l'
                    ijp = 0
                    do i = 0, n2
                       do ij = i*(lda + 1), n*lda - 1, lda
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                    end do
                    js = 1
                    do j = 0, n2 - 1
                       do ij = js, js + n2 - j - 1
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                       js = js + lda + 1
                    end do
                 else
                    ! n is odd, transr = 't', and uplo = 'u'
                    ijp = 0
                    js = n2*lda
                    do j = 0, n1 - 1
                       do ij = js, js + j
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                       js = js + lda
                    end do
                    do i = 0, n1
                       do ij = i, i + (n1 + i)*lda, lda
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                    end do
                 end if
              end if
           else
              ! n is even
              if (normaltransr) then
                 ! n is even and transr = 'n'
                 if (lower) then
                    ! n is even, transr = 'n', and uplo = 'l'
                    ijp = 0
                    jp = 0
                    do j = 0, k - 1
                       do i = j, n - 1
                          ij = 1 + i + jp
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                       jp = jp + lda
                    end do
                    do i = 0, k - 1
                       do j = i, k - 1
                          ij = i + j*lda
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                    end do
                 else
                    ! n is even, transr = 'n', and uplo = 'u'
                    ijp = 0
                    do j = 0, k - 1
                       ij = k + 1 + j
                       do i = 0, j
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                          ij = ij + lda
                       end do
                    end do
                    js = 0
                    do j = k, n - 1
                       ij = js
                       do ij = js, js + j
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                       js = js + lda
                    end do
                 end if
              else
                 ! n is even and transr = 't'
                 if (lower) then
                    ! n is even, transr = 't', and uplo = 'l'
                    ijp = 0
                    do i = 0, k - 1
                       do ij = i + (i + 1)*lda, (n + 1)*lda - 1, lda
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                    end do
                    js = 0
                    do j = 0, k - 1
                       do ij = js, js + k - j - 1
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                       js = js + lda + 1
                    end do
                 else
                    ! n is even, transr = 't', and uplo = 'u'
                    ijp = 0
                    js = (k + 1)*lda
                    do j = 0, k - 1
                       do ij = js, js + j
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                       js = js + lda
                    end do
                    do i = 0, k - 1
                       do ij = i, i + (k + i)*lda, lda
                          arf(ij) = ap(ijp)
                          ijp = ijp + 1
                       end do
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_dtpttf
     end subroutine stdlib_dtpttf

     ! DTPTTR copies a triangular matrix A from standard packed format (TP)
     ! to standard full format (TR).

     subroutine stdlib_dtpttr(uplo, n, ap, a, lda, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n, lda
           ! .. array arguments ..
           real(dp) :: a(lda, *), ap(*)
        ! =====================================================================
           ! .. parameters ..
           ! .. local scalars ..
           logical(lk) :: lower
           integer(ilp) :: i, j, k
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           lower = stdlib_lsame(uplo, 'l')
           if (.not. lower .and. .not. stdlib_lsame(uplo, 'u')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtpttr', -info)
              return
           end if
           if (lower) then
              k = 0
              do j = 1, n
                 do i = j, n
                    k = k + 1
                    a(i, j) = ap(k)
                 end do
              end do
           else
              k = 0
              do j = 1, n
                 do i = 1, j
                    k = k + 1
                    a(i, j) = ap(k)
                 end do
              end do
           end if
           return
           ! end of stdlib_dtpttr
     end subroutine stdlib_dtpttr

     ! DTRRFS provides error bounds and backward error estimates for the
     ! solution to a system of linear equations with a triangular
     ! coefficient matrix.
     ! The solution matrix X must be computed by DTRTRS or some other
     ! means before entering this routine.  DTRRFS does not do iterative
     ! refinement because doing so cannot improve the backward error.

     subroutine stdlib_dtrrfs(uplo, trans, diag, n, nrhs, a, lda, b, ldb, x, ldx, ferr, berr, &
               work, iwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, trans, uplo
           integer(ilp) :: info, lda, ldb, ldx, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: iwork(*)
           real(dp) :: a(lda, *), b(ldb, *), berr(*), ferr(*), work(*), x(ldx, *)
                     
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           character :: transt
           integer(ilp) :: i, j, k, kase, nz
           real(dp) :: eps, lstres, s, safe1, safe2, safmin, xk
           ! .. local arrays ..
           integer(ilp) :: isave(3)
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (nrhs < 0) then
              info = -5
           else if (lda < max(1, n)) then
              info = -7
           else if (ldb < max(1, n)) then
              info = -9
           else if (ldx < max(1, n)) then
              info = -11
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtrrfs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) then
              do j = 1, nrhs
                 ferr(j) = zero
                 berr(j) = zero
              end do
              return
           end if
           if (notran) then
              transt = 't'
           else
              transt = 'n'
           end if
           ! nz = maximum number of nonzero elements in each row of a, plus 1
           nz = n + 1
           eps = stdlib_dlamch('epsilon')
           safmin = stdlib_dlamch('safe minimum')
           safe1 = nz*safmin
           safe2 = safe1/eps
           ! do for each right hand side
           loop_250: do j = 1, nrhs
              ! compute residual r = b - op(a) * x,
              ! where op(a) = a or a**t, depending on trans.
              call stdlib_dcopy(n, x(1, j), 1, work(n + 1), 1)
              call stdlib_dtrmv(uplo, trans, diag, n, a, lda, work(n + 1), 1)
              call stdlib_daxpy(n, -one, b(1, j), 1, work(n + 1), 1)
              ! compute componentwise relative backward error from formula
              ! max(i) ( abs(r(i)) / ( abs(op(a))*abs(x) + abs(b) )(i) )
              ! where abs(z) is the componentwise absolute value of the matrix
              ! or vector z.  if the i-th component of the denominator is less
              ! than safe2, then safe1 is added to the i-th components of the
              ! numerator and denominator before dividing.
              do i = 1, n
                 work(i) = abs(b(i, j))
              end do
              if (notran) then
                 ! compute abs(a)*abs(x) + abs(b).
                 if (upper) then
                    if (nounit) then
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = 1, k
                             work(i) = work(i) + abs(a(i, k))*xk
                          end do
                       end do
                    else
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = 1, k - 1
                             work(i) = work(i) + abs(a(i, k))*xk
                          end do
                          work(k) = work(k) + xk
                       end do
                    end if
                 else
                    if (nounit) then
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = k, n
                             work(i) = work(i) + abs(a(i, k))*xk
                          end do
                       end do
                    else
                       do k = 1, n
                          xk = abs(x(k, j))
                          do i = k + 1, n
                             work(i) = work(i) + abs(a(i, k))*xk
                          end do
                          work(k) = work(k) + xk
                       end do
                    end if
                 end if
              else
                 ! compute abs(a**t)*abs(x) + abs(b).
                 if (upper) then
                    if (nounit) then
                       do k = 1, n
                          s = zero
                          do i = 1, k
                             s = s + abs(a(i, k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                       end do
                    else
                       do k = 1, n
                          s = abs(x(k, j))
                          do i = 1, k - 1
                             s = s + abs(a(i, k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                       end do
                    end if
                 else
                    if (nounit) then
                       do k = 1, n
                          s = zero
                          do i = k, n
                             s = s + abs(a(i, k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                       end do
                    else
                       do k = 1, n
                          s = abs(x(k, j))
                          do i = k + 1, n
                             s = s + abs(a(i, k))*abs(x(i, j))
                          end do
                          work(k) = work(k) + s
                       end do
                    end if
                 end if
              end if
              s = zero
              do i = 1, n
                 if (work(i) > safe2) then
                    s = max(s, abs(work(n + i))/work(i))
                 else
                    s = max(s, (abs(work(n + i)) + safe1)/(work(i) + safe1))
                 end if
              end do
              berr(j) = s
              ! bound error from formula
              ! norm(x - xtrue) / norm(x) .le. ferr =
              ! norm( abs(inv(op(a)))*
                 ! ( abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) ))) / norm(x)
              ! where
                ! norm(z) is the magnitude of the largest component of z
                ! inv(op(a)) is the inverse of op(a)
                ! abs(z) is the componentwise absolute value of the matrix or
                   ! vector z
                ! nz is the maximum number of nonzeros in any row of a, plus 1
                ! eps is machine epsilon
              ! the i-th component of abs(r)+nz*eps*(abs(op(a))*abs(x)+abs(b))
              ! is incremented by safe1 if the i-th component of
              ! abs(op(a))*abs(x) + abs(b) is less than safe2.
              ! use stdlib_dlacn2 to estimate the infinity-norm of the matrix
                 ! inv(op(a)) * diag(w),
              ! where w = abs(r) + nz*eps*( abs(op(a))*abs(x)+abs(b) )))
              do i = 1, n
                 if (work(i) > safe2) then
                    work(i) = abs(work(n + i)) + nz*eps*work(i)
                 else
                    work(i) = abs(work(n + i)) + nz*eps*work(i) + safe1
                 end if
              end do
              kase = 0
210    continue
              call stdlib_dlacn2(n, work(2*n + 1), work(n + 1), iwork, ferr(j), kase, isave)
                        
              if (kase /= 0) then
                 if (kase == 1) then
                    ! multiply by diag(w)*inv(op(a)**t).
                    call stdlib_dtrsv(uplo, transt, diag, n, a, lda, work(n + 1), 1)
                    do i = 1, n
                       work(n + i) = work(i)*work(n + i)
                    end do
                 else
                    ! multiply by inv(op(a))*diag(w).
                    do i = 1, n
                       work(n + i) = work(i)*work(n + i)
                    end do
                    call stdlib_dtrsv(uplo, trans, diag, n, a, lda, work(n + 1), 1)
                 end if
                 go to 210
              end if
              ! normalize error.
              lstres = zero
              do i = 1, n
                 lstres = max(lstres, abs(x(i, j)))
              end do
              if (lstres /= zero) ferr(j) = ferr(j)/lstres
           end do loop_250
           return
           ! end of stdlib_dtrrfs
     end subroutine stdlib_dtrrfs

     ! DTRTI2 computes the inverse of a real upper or lower triangular
     ! matrix.
     ! This is the Level 2 BLAS version of the algorithm.

     subroutine stdlib_dtrti2(uplo, diag, n, a, lda, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           real(dp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: nounit, upper
           integer(ilp) :: j
           real(dp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           nounit = stdlib_lsame(diag, 'n')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtrti2', -info)
              return
           end if
           if (upper) then
              ! compute inverse of upper triangular matrix.
              do j = 1, n
                 if (nounit) then
                    a(j, j) = one/a(j, j)
                    ajj = -a(j, j)
                 else
                    ajj = -one
                 end if
                 ! compute elements 1:j-1 of j-th column.
                 call stdlib_dtrmv('upper', 'no transpose', diag, j - 1, a, lda, a(1, j), 1)
                           
                 call stdlib_dscal(j - 1, ajj, a(1, j), 1)
              end do
           else
              ! compute inverse of lower triangular matrix.
              do j = n, 1, -1
                 if (nounit) then
                    a(j, j) = one/a(j, j)
                    ajj = -a(j, j)
                 else
                    ajj = -one
                 end if
                 if (j < n) then
                    ! compute elements j+1:n of j-th column.
                    call stdlib_dtrmv('lower', 'no transpose', diag, n - j, a(j + 1, j + 1), lda, a( &
                              j + 1, j), 1)
                    call stdlib_dscal(n - j, ajj, a(j + 1, j), 1)
                 end if
              end do
           end if
           return
           ! end of stdlib_dtrti2
     end subroutine stdlib_dtrti2

     ! DTRTRI computes the inverse of a real upper or lower triangular
     ! matrix A.
     ! This is the Level 3 BLAS version of the algorithm.

     subroutine stdlib_dtrtri(uplo, diag, n, a, lda, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           real(dp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: nounit, upper
           integer(ilp) :: j, jb, nb, nn
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           nounit = stdlib_lsame(diag, 'n')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtrtri', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check for singularity if non-unit.
           if (nounit) then
              do info = 1, n
                 if (a(info, info) == zero) return
              end do
              info = 0
           end if
           ! determine the block size for this environment.
           nb = stdlib_ilaenv(1, 'stdlib_dtrtri', uplo//diag, n, -1, -1, -1)
           if (nb <= 1 .or. nb >= n) then
              ! use unblocked code
              call stdlib_dtrti2(uplo, diag, n, a, lda, info)
           else
              ! use blocked code
              if (upper) then
                 ! compute inverse of upper triangular matrix
                 do j = 1, n, nb
                    jb = min(nb, n - j + 1)
                    ! compute rows 1:j-1 of current block column
                    call stdlib_dtrmm('left', 'upper', 'no transpose', diag, j - 1, jb, one, a, lda, &
                               a(1, j), lda)
                    call stdlib_dtrsm('right', 'upper', 'no transpose', diag, j - 1, jb, -one, a(j, &
                               j), lda, a(1, j), lda)
                    ! compute inverse of current diagonal block
                    call stdlib_dtrti2('upper', diag, jb, a(j, j), lda, info)
                 end do
              else
                 ! compute inverse of lower triangular matrix
                 nn = ((n - 1)/nb)*nb + 1
                 do j = nn, 1, -nb
                    jb = min(nb, n - j + 1)
                    if (j + jb <= n) then
                       ! compute rows j+jb:n of current block column
                       call stdlib_dtrmm('left', 'lower', 'no transpose', diag, n - j - jb + 1, jb, one, &
                                  a(j + jb, j + jb), lda, a(j + jb, j), lda)
                       call stdlib_dtrsm('right', 'lower', 'no transpose', diag, n - j - jb + 1, jb, - &
                                 one, a(j, j), lda, a(j + jb, j), lda)
                    end if
                    ! compute inverse of current diagonal block
                    call stdlib_dtrti2('lower', diag, jb, a(j, j), lda, info)
                 end do
              end if
           end if
           return
           ! end of stdlib_dtrtri
     end subroutine stdlib_dtrtri

     ! DTRTRS solves a triangular system of the form
     ! A * X = B  or  A**T * X = B,
     ! where A is a triangular matrix of order N, and B is an N-by-NRHS
     ! matrix.  A check is made to verify that A is nonsingular.

     subroutine stdlib_dtrtrs(uplo, trans, diag, n, nrhs, a, lda, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, trans, uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           real(dp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: nounit
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           nounit = stdlib_lsame(diag, 'n')
           if (.not. stdlib_lsame(uplo, 'u') .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. stdlib_lsame(trans, 'n') .and. .not. stdlib_lsame(trans, 't') .and. &
                     .not. stdlib_lsame(trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (nrhs < 0) then
              info = -5
           else if (lda < max(1, n)) then
              info = -7
           else if (ldb < max(1, n)) then
              info = -9
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtrtrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check for singularity.
           if (nounit) then
              do info = 1, n
                 if (a(info, info) == zero) return
              end do
           end if
           info = 0
           ! solve a * x = b  or  a**t * x = b.
           call stdlib_dtrsm('left', uplo, trans, diag, n, nrhs, one, a, lda, b, ldb)
           return
           ! end of stdlib_dtrtrs
     end subroutine stdlib_dtrtrs

     ! DTRTTF copies a triangular matrix A from standard full format (TR)
     ! to rectangular full packed format (TF) .

     subroutine stdlib_dtrttf(transr, uplo, n, a, lda, arf, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: transr, uplo
           integer(ilp) :: info, n, lda
           ! .. array arguments ..
           real(dp) :: a(0:lda - 1, 0:*), arf(0:*)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: lower, nisodd, normaltransr
           integer(ilp) :: i, ij, j, k, l, n1, n2, nt, nx2, np1x2
     
           ! .. intrinsic functions ..
           intrinsic :: max, mod
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           normaltransr = stdlib_lsame(transr, 'n')
           lower = stdlib_lsame(uplo, 'l')
           if (.not. normaltransr .and. .not. stdlib_lsame(transr, 't')) then
              info = -1
           else if (.not. lower .and. .not. stdlib_lsame(uplo, 'u')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_dtrttf', -info)
              return
           end if
           ! quick return if possible
           if (n <= 1) then
              if (n == 1) then
                 arf(0) = a(0, 0)
              end if
              return
           end if
           ! size of array arf(0:nt-1)
           nt = n*(n + 1)/2
           ! set n1 and n2 depending on lower: for n even n1=n2=k
           if (lower) then
              n2 = n/2
              n1 = n - n2
           else
              n1 = n/2
              n2 = n - n1
           end if
           ! if n is odd, set nisodd = .true., lda=n+1 and a is (n+1)--by--k2.
           ! if n is even, set k = n/2 and nisodd = .false., lda=n and a is
           ! n--by--(n+1)/2.
           if (mod(n, 2) == 0) then
              k = n/2
              nisodd = .false.
              if (.not. lower) np1x2 = n + n + 2
           else
              nisodd = .true.
              if (.not. lower) nx2 = n + n
           end if
           if (nisodd) then
              ! n is odd
              if (normaltransr) then
                 ! n is odd and transr = 'n'
                 if (lower) then
                    ! n is odd, transr = 'n', and uplo = 'l'
                    ij = 0
                    do j = 0, n2
                       do i = n1, n2 + j
                          arf(ij) = a(n2 + j, i)
                          ij = ij + 1
                       end do
                       do i = j, n - 1
                          arf(ij) = a(i, j)
                          ij = ij + 1
                       end do
                    end do
                 else
                    ! n is odd, transr = 'n', and uplo = 'u'
                    ij = nt - n
                    do j = n - 1, n1, -1
                       do i = 0, j
                          arf(ij) = a(i, j)
                          ij = ij + 1
                       end do
                       do l = j - n1, n1 - 1
                          arf(ij) = a(j - n1, l)
                          ij = ij + 1
                       end do
                       ij = ij - nx2
                    end do
                 end if
              else
                 ! n is odd and transr = 't'
                 if (lower) then
                    ! n is odd, transr = 't', and uplo = 'l'
                    ij = 0
                    do j = 0, n2 - 1
                       do i = 0, j
                          arf(ij) = a(j, i)
                          ij = ij + 1
                       end do
                       do i = n1 + j, n - 1
                          arf(ij) = a(i, n1 + j)
                          ij = ij + 1
                       end do
                    end do
                    do j = n2, n - 1
                       do i = 0, n1 - 1
                          arf(ij) = a(j, i)
                          ij = ij + 1
                       end do
                    end do
                 else
                    ! n is odd, transr = 't', and uplo = 'u'
                    ij = 0
                    do j = 0, n1
                       do i = n1, n - 1
                          arf(ij) = a(j, i)
                          ij = ij + 1
                       end do
                    end do
                    do j = 0, n1 - 1
                       do i = 0, j
                          arf(ij) = a(i, j)
                          ij = ij + 1
                       end do
                       do l = n2 + j, n - 1
                          arf(ij) = a(n2 + j, l)
                          ij = ij + 1
                       end do
                    end do
                 end if
              end if
           else
              ! n is even
              if (normaltransr) then
                 ! n is even and transr = 'n'
                 if (lower) then
                    ! n is even, transr = 'n', and uplo = 'l'
                    ij = 0
                    do j = 0, k - 1
                       do i = k, k + j
                          arf(ij) = a(k + j, i)
                          ij = ij + 1
                       end do
                       do i = j, n - 1
                          arf(ij) = a(i, j)
                          ij = ij + 1
                       end do
                    end do
                 else
                    ! n is even, transr = 'n', and uplo = 'u'
                    ij = nt - n - 1
                    do j = n - 1, k, -1
                       do i = 0, j
                          arf(ij) = a(i, j)
                          ij = ij + 1
                       end do
                       do l = j - k, k - 1
                          arf(ij) = a(j - k, l)
                          ij = ij + 1
                       end do
                       ij = ij - np1x2
                    end do
                 end if
              else
                 ! n is even and transr = 't'
                 if (lower) then
                    ! n is even, transr = 't', and uplo = 'l'
                    ij = 0
                    j = k
                    do i = k, n - 1
                       arf(ij) = a(i, j)
                       ij = ij + 1
                    end do
                    do j = 0, k - 2
                       do i = 0, j
                          arf(ij) = a(j, i)
                          ij = ij + 1
                       end do
                       do i = k + 1 + j, n - 1
                          arf(ij) = a(i, k + 1 + j)
                          ij = ij + 1
                       end do
                    end do
                    do j = k - 1, n - 1
                       do i = 0, k - 1
                          arf(ij) = a(j, i)
                          ij = ij + 1
                       end do
                    end do
                 else
                    ! n is even, transr = 't', and uplo = 'u'
                    ij = 0
                    do j = 0, k
                       do i = k, n - 1
                          arf(ij) = a(j, i)
                          ij = ij + 1
                       end do
                    end do
                    do j = 0, k - 2
                       do i = 0, j
                          arf(ij) = a(i, j)
                          ij = ij + 1
                       end do
                       do l = k + 1 + j, n - 1
                          arf(ij) = a(k + 1 + j, l)
                          ij = ij + 1
                       end do
                    end do
                    ! note that here, on exit of the loop, j = k-1
                    do i = 0, j
                       arf(ij) = a(i, j)
                       ij = ij + 1
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_dtrttf
     end subroutine stdlib_dtrttf

     ! DTRTTP copies a triangular matrix A from full format (TR) to standard
     ! packed format (TP).

     subroutine stdlib_dtrttp(uplo, n, a, lda, ap, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n, lda
           ! .. array arguments ..
           real(dp) :: a(lda, *), ap(*)
        ! =====================================================================
           ! .. parameters ..
           ! .. local scalars ..
           logical(lk) :: lower
    