module stdlib_linalg_lapack_c
     use stdlib_linalg_constants
     use stdlib_linalg_blas
     use stdlib_linalg_lapack_aux
     use stdlib_linalg_lapack_s
     use stdlib_linalg_lapack_d
     implicit none(type, external)
     private

     public :: sp, dp, lk, ilp
     public :: stdlib_cbbcsd
     public :: stdlib_cbdsqr
     public :: stdlib_cgbbrd
     public :: stdlib_cgbcon
     public :: stdlib_cgbequ
     public :: stdlib_cgbequb
     public :: stdlib_cgbrfs
     public :: stdlib_cgbsv
     public :: stdlib_cgbsvx
     public :: stdlib_cgbtf2
     public :: stdlib_cgbtrf
     public :: stdlib_cgbtrs
     public :: stdlib_cgebak
     public :: stdlib_cgebal
     public :: stdlib_cgebd2
     public :: stdlib_cgebrd
     public :: stdlib_cgecon
     public :: stdlib_cgeequ
     public :: stdlib_cgeequb
     public :: stdlib_cgees
     public :: stdlib_cgeesx
     public :: stdlib_cgeev
     public :: stdlib_cgeevx
     public :: stdlib_cgehd2
     public :: stdlib_cgehrd
     public :: stdlib_cgejsv
     public :: stdlib_cgelq
     public :: stdlib_cgelq2
     public :: stdlib_cgelqf
     public :: stdlib_cgelqt
     public :: stdlib_cgelqt3
     public :: stdlib_cgels
     public :: stdlib_cgelsd
     public :: stdlib_cgelss
     public :: stdlib_cgelsy
     public :: stdlib_cgemlq
     public :: stdlib_cgemlqt
     public :: stdlib_cgemqr
     public :: stdlib_cgemqrt
     public :: stdlib_cgeql2
     public :: stdlib_cgeqlf
     public :: stdlib_cgeqp3
     public :: stdlib_cgeqr
     public :: stdlib_cgeqr2
     public :: stdlib_cgeqr2p
     public :: stdlib_cgeqrf
     public :: stdlib_cgeqrfp
     public :: stdlib_cgeqrt
     public :: stdlib_cgeqrt2
     public :: stdlib_cgeqrt3
     public :: stdlib_cgerfs
     public :: stdlib_cgerq2
     public :: stdlib_cgerqf
     public :: stdlib_cgesc2
     public :: stdlib_cgesdd
     public :: stdlib_cgesv
     public :: stdlib_cgesvd
     public :: stdlib_cgesvdq
     public :: stdlib_cgesvj
     public :: stdlib_cgesvx
     public :: stdlib_cgetc2
     public :: stdlib_cgetf2
     public :: stdlib_cgetrf
     public :: stdlib_cgetrf2
     public :: stdlib_cgetri
     public :: stdlib_cgetrs
     public :: stdlib_cgetsls
     public :: stdlib_cgetsqrhrt
     public :: stdlib_cggbak
     public :: stdlib_cggbal
     public :: stdlib_cgges
     public :: stdlib_cgges3
     public :: stdlib_cggesx
     public :: stdlib_cggev
     public :: stdlib_cggev3
     public :: stdlib_cggevx
     public :: stdlib_cggglm
     public :: stdlib_cgghd3
     public :: stdlib_cgghrd
     public :: stdlib_cgglse
     public :: stdlib_cggqrf
     public :: stdlib_cggrqf
     public :: stdlib_cgsvj0
     public :: stdlib_cgsvj1
     public :: stdlib_cgtcon
     public :: stdlib_cgtrfs
     public :: stdlib_cgtsv
     public :: stdlib_cgtsvx
     public :: stdlib_cgttrf
     public :: stdlib_cgttrs
     public :: stdlib_cgtts2
     public :: stdlib_chb2st_kernels
     public :: stdlib_chbev
     public :: stdlib_chbevd
     public :: stdlib_chbevx
     public :: stdlib_chbgst
     public :: stdlib_chbgv
     public :: stdlib_chbgvd
     public :: stdlib_chbgvx
     public :: stdlib_chbtrd
     public :: stdlib_checon
     public :: stdlib_checon_rook
     public :: stdlib_cheequb
     public :: stdlib_cheev
     public :: stdlib_cheevd
     public :: stdlib_cheevr
     public :: stdlib_cheevx
     public :: stdlib_chegs2
     public :: stdlib_chegst
     public :: stdlib_chegv
     public :: stdlib_chegvd
     public :: stdlib_chegvx
     public :: stdlib_cherfs
     public :: stdlib_chesv
     public :: stdlib_chesv_aa
     public :: stdlib_chesv_rk
     public :: stdlib_chesv_rook
     public :: stdlib_chesvx
     public :: stdlib_cheswapr
     public :: stdlib_chetd2
     public :: stdlib_chetf2
     public :: stdlib_chetf2_rk
     public :: stdlib_chetf2_rook
     public :: stdlib_chetrd
     public :: stdlib_chetrd_hb2st
     public :: stdlib_chetrd_he2hb
     public :: stdlib_chetrf
     public :: stdlib_chetrf_aa
     public :: stdlib_chetrf_rk
     public :: stdlib_chetrf_rook
     public :: stdlib_chetri
     public :: stdlib_chetri_rook
     public :: stdlib_chetrs
     public :: stdlib_chetrs2
     public :: stdlib_chetrs_3
     public :: stdlib_chetrs_aa
     public :: stdlib_chetrs_rook
     public :: stdlib_chfrk
     public :: stdlib_chgeqz
     public :: stdlib_chla_transtype
     public :: stdlib_chpcon
     public :: stdlib_chpev
     public :: stdlib_chpevd
     public :: stdlib_chpevx
     public :: stdlib_chpgst
     public :: stdlib_chpgv
     public :: stdlib_chpgvd
     public :: stdlib_chpgvx
     public :: stdlib_chprfs
     public :: stdlib_chpsv
     public :: stdlib_chpsvx
     public :: stdlib_chptrd
     public :: stdlib_chptrf
     public :: stdlib_chptri
     public :: stdlib_chptrs
     public :: stdlib_chsein
     public :: stdlib_chseqr
     public :: stdlib_cla_gbamv
     public :: stdlib_cla_gbrcond_c
     public :: stdlib_cla_gbrpvgrw
     public :: stdlib_cla_geamv
     public :: stdlib_cla_gercond_c
     public :: stdlib_cla_gerpvgrw
     public :: stdlib_cla_heamv
     public :: stdlib_cla_hercond_c
     public :: stdlib_cla_herpvgrw
     public :: stdlib_cla_lin_berr
     public :: stdlib_cla_porcond_c
     public :: stdlib_cla_porpvgrw
     public :: stdlib_cla_syamv
     public :: stdlib_cla_syrcond_c
     public :: stdlib_cla_syrpvgrw
     public :: stdlib_cla_wwaddw
     public :: stdlib_clabrd
     public :: stdlib_clacgv
     public :: stdlib_clacn2
     public :: stdlib_clacon
     public :: stdlib_clacp2
     public :: stdlib_clacpy
     public :: stdlib_clacrm
     public :: stdlib_clacrt
     public :: stdlib_cladiv
     public :: stdlib_claed0
     public :: stdlib_claed7
     public :: stdlib_claed8
     public :: stdlib_claein
     public :: stdlib_claesy
     public :: stdlib_claev2
     public :: stdlib_clag2z
     public :: stdlib_clags2
     public :: stdlib_clagtm
     public :: stdlib_clahef
     public :: stdlib_clahef_aa
     public :: stdlib_clahef_rk
     public :: stdlib_clahef_rook
     public :: stdlib_clahqr
     public :: stdlib_clahr2
     public :: stdlib_claic1
     public :: stdlib_clals0
     public :: stdlib_clalsa
     public :: stdlib_clalsd
     public :: stdlib_clamswlq
     public :: stdlib_clamtsqr
     public :: stdlib_clangb
     public :: stdlib_clange
     public :: stdlib_clangt
     public :: stdlib_clanhb
     public :: stdlib_clanhe
     public :: stdlib_clanhf
     public :: stdlib_clanhp
     public :: stdlib_clanhs
     public :: stdlib_clanht
     public :: stdlib_clansb
     public :: stdlib_clansp
     public :: stdlib_clansy
     public :: stdlib_clantb
     public :: stdlib_clantp
     public :: stdlib_clantr
     public :: stdlib_clapll
     public :: stdlib_clapmr
     public :: stdlib_clapmt
     public :: stdlib_claqgb
     public :: stdlib_claqge
     public :: stdlib_claqhb
     public :: stdlib_claqhe
     public :: stdlib_claqhp
     public :: stdlib_claqp2
     public :: stdlib_claqps
     public :: stdlib_claqr0
     public :: stdlib_claqr1
     public :: stdlib_claqr2
     public :: stdlib_claqr3
     public :: stdlib_claqr4
     public :: stdlib_claqr5
     public :: stdlib_claqsb
     public :: stdlib_claqsp
     public :: stdlib_claqsy
     public :: stdlib_claqz0
     public :: stdlib_claqz1
     public :: stdlib_claqz2
     public :: stdlib_claqz3
     public :: stdlib_clar1v
     public :: stdlib_clar2v
     public :: stdlib_clarcm
     public :: stdlib_clarf
     public :: stdlib_clarfb
     public :: stdlib_clarfb_gett
     public :: stdlib_clarfg
     public :: stdlib_clarfgp
     public :: stdlib_clarft
     public :: stdlib_clarfx
     public :: stdlib_clarfy
     public :: stdlib_clargv
     public :: stdlib_clarnv
     public :: stdlib_clarrv
     public :: stdlib_clartg
     public :: stdlib_clartv
     public :: stdlib_clarz
     public :: stdlib_clarzb
     public :: stdlib_clarzt
     public :: stdlib_clascl
     public :: stdlib_claset
     public :: stdlib_clasr
     public :: stdlib_classq
     public :: stdlib_claswlq
     public :: stdlib_claswp
     public :: stdlib_clasyf
     public :: stdlib_clasyf_aa
     public :: stdlib_clasyf_rk
     public :: stdlib_clasyf_rook
     public :: stdlib_clatbs
     public :: stdlib_clatdf
     public :: stdlib_clatps
     public :: stdlib_clatrd
     public :: stdlib_clatrs
     public :: stdlib_clatrz
     public :: stdlib_clatsqr
     public :: stdlib_claunhr_col_getrfnp
     public :: stdlib_claunhr_col_getrfnp2
     public :: stdlib_clauu2
     public :: stdlib_clauum
     public :: stdlib_cpbcon
     public :: stdlib_cpbequ
     public :: stdlib_cpbrfs
     public :: stdlib_cpbstf
     public :: stdlib_cpbsv
     public :: stdlib_cpbsvx
     public :: stdlib_cpbtf2
     public :: stdlib_cpbtrf
     public :: stdlib_cpbtrs
     public :: stdlib_cpftrf
     public :: stdlib_cpftri
     public :: stdlib_cpftrs
     public :: stdlib_cpocon
     public :: stdlib_cpoequ
     public :: stdlib_cpoequb
     public :: stdlib_cporfs
     public :: stdlib_cposv
     public :: stdlib_cposvx
     public :: stdlib_cpotf2
     public :: stdlib_cpotrf
     public :: stdlib_cpotrf2
     public :: stdlib_cpotri
     public :: stdlib_cpotrs
     public :: stdlib_cppcon
     public :: stdlib_cppequ
     public :: stdlib_cpprfs
     public :: stdlib_cppsv
     public :: stdlib_cppsvx
     public :: stdlib_cpptrf
     public :: stdlib_cpptri
     public :: stdlib_cpptrs
     public :: stdlib_cpstf2
     public :: stdlib_cpstrf
     public :: stdlib_cptcon
     public :: stdlib_cpteqr
     public :: stdlib_cptrfs
     public :: stdlib_cptsv
     public :: stdlib_cptsvx
     public :: stdlib_cpttrf
     public :: stdlib_cpttrs
     public :: stdlib_cptts2
     public :: stdlib_crot
     public :: stdlib_cspcon
     public :: stdlib_cspmv
     public :: stdlib_cspr
     public :: stdlib_csprfs
     public :: stdlib_cspsv
     public :: stdlib_cspsvx
     public :: stdlib_csptrf
     public :: stdlib_csptri
     public :: stdlib_csptrs
     public :: stdlib_csrscl
     public :: stdlib_cstedc
     public :: stdlib_cstegr
     public :: stdlib_cstein
     public :: stdlib_cstemr
     public :: stdlib_csteqr
     public :: stdlib_csycon
     public :: stdlib_csycon_rook
     public :: stdlib_csyconv
     public :: stdlib_csyconvf
     public :: stdlib_csyconvf_rook
     public :: stdlib_csyequb
     public :: stdlib_csymv
     public :: stdlib_csyr
     public :: stdlib_csyrfs
     public :: stdlib_csysv
     public :: stdlib_csysv_aa
     public :: stdlib_csysv_rk
     public :: stdlib_csysv_rook
     public :: stdlib_csysvx
     public :: stdlib_csyswapr
     public :: stdlib_csytf2
     public :: stdlib_csytf2_rk
     public :: stdlib_csytf2_rook
     public :: stdlib_csytrf
     public :: stdlib_csytrf_aa
     public :: stdlib_csytrf_rk
     public :: stdlib_csytrf_rook
     public :: stdlib_csytri
     public :: stdlib_csytri_rook
     public :: stdlib_csytrs
     public :: stdlib_csytrs2
     public :: stdlib_csytrs_3
     public :: stdlib_csytrs_aa
     public :: stdlib_csytrs_rook
     public :: stdlib_ctbcon
     public :: stdlib_ctbrfs
     public :: stdlib_ctbtrs
     public :: stdlib_ctfsm
     public :: stdlib_ctftri
     public :: stdlib_ctfttp
     public :: stdlib_ctfttr
     public :: stdlib_ctgevc
     public :: stdlib_ctgex2
     public :: stdlib_ctgexc
     public :: stdlib_ctgsen
     public :: stdlib_ctgsja
     public :: stdlib_ctgsna
     public :: stdlib_ctgsy2
     public :: stdlib_ctgsyl
     public :: stdlib_ctpcon
     public :: stdlib_ctplqt
     public :: stdlib_ctplqt2
     public :: stdlib_ctpmlqt
     public :: stdlib_ctpmqrt
     public :: stdlib_ctpqrt
     public :: stdlib_ctpqrt2
     public :: stdlib_ctprfb
     public :: stdlib_ctprfs
     public :: stdlib_ctptri
     public :: stdlib_ctptrs
     public :: stdlib_ctpttf
     public :: stdlib_ctpttr
     public :: stdlib_ctrcon
     public :: stdlib_ctrevc
     public :: stdlib_ctrevc3
     public :: stdlib_ctrexc
     public :: stdlib_ctrrfs
     public :: stdlib_ctrsen
     public :: stdlib_ctrsna
     public :: stdlib_ctrsyl
     public :: stdlib_ctrti2
     public :: stdlib_ctrtri
     public :: stdlib_ctrtrs
     public :: stdlib_ctrttf
     public :: stdlib_ctrttp
     public :: stdlib_ctzrzf
     public :: stdlib_cunbdb
     public :: stdlib_cunbdb1
     public :: stdlib_cunbdb2
     public :: stdlib_cunbdb3
     public :: stdlib_cunbdb4
     public :: stdlib_cunbdb5
     public :: stdlib_cunbdb6
     public :: stdlib_cuncsd
     public :: stdlib_cuncsd2by1
     public :: stdlib_cung2l
     public :: stdlib_cung2r
     public :: stdlib_cungbr
     public :: stdlib_cunghr
     public :: stdlib_cungl2
     public :: stdlib_cunglq
     public :: stdlib_cungql
     public :: stdlib_cungqr
     public :: stdlib_cungr2
     public :: stdlib_cungrq
     public :: stdlib_cungtr
     public :: stdlib_cungtsqr
     public :: stdlib_cungtsqr_row
     public :: stdlib_cunhr_col
     public :: stdlib_cunm22
     public :: stdlib_cunm2l
     public :: stdlib_cunm2r
     public :: stdlib_cunmbr
     public :: stdlib_cunmhr
     public :: stdlib_cunml2
     public :: stdlib_cunmlq
     public :: stdlib_cunmql
     public :: stdlib_cunmqr
     public :: stdlib_cunmr2
     public :: stdlib_cunmr3
     public :: stdlib_cunmrq
     public :: stdlib_cunmrz
     public :: stdlib_cunmtr
     public :: stdlib_cupgtr
     public :: stdlib_cupmtr

     ! 32-bit real constants
     real(sp), parameter, private :: zero = 0.00_sp
     real(sp), parameter, private :: half = 0.50_sp
     real(sp), parameter, private :: one = 1.00_sp
     real(sp), parameter, private :: two = 2.00_sp
     real(sp), parameter, private :: three = 3.00_sp
     real(sp), parameter, private :: four = 4.00_sp
     real(sp), parameter, private :: eight = 8.00_sp
     real(sp), parameter, private :: ten = 10.00_sp

     ! 32-bit complex constants
     complex(sp), parameter, private :: czero = (0.0_sp, 0.0_sp)
     complex(sp), parameter, private :: chalf = (0.5_sp, 0.0_sp)
     complex(sp), parameter, private :: cone = (1.0_sp, 0.0_sp)

     ! 32-bit scaling constants
     integer, parameter, private :: maxexp = maxexponent(zero)
     integer, parameter, private :: minexp = minexponent(zero)
     real(sp), parameter, private :: rradix = real(radix(zero), sp)
     real(sp), parameter, private :: ulp = epsilon(zero)
     real(sp), parameter, private :: eps = ulp*half
     real(sp), parameter, private :: safmin = rradix**max(minexp - 1, 1 - maxexp)
     real(sp), parameter, private :: safmax = one/safmin
     real(sp), parameter, private :: smlnum = safmin/ulp
     real(sp), parameter, private :: bignum = safmax*ulp
     real(sp), parameter, private :: rtmin = sqrt(smlnum)
     real(sp), parameter, private :: rtmax = sqrt(bignum)

     ! 32-bit Blue's scaling constants
     ! ssml>=1/s and sbig==1/S with s,S as defined in https://doi.org/10.1145/355769.355771
     real(sp), parameter, private :: tsml = rradix**ceiling((minexp - 1)*half)
     real(sp), parameter, private :: tbig = rradix**floor((maxexp - digits(zero) + 1)*half)
     real(sp), parameter, private :: ssml = rradix**(-floor((minexp - digits(zero))*half))
     real(sp), parameter, private :: sbig = rradix**(-ceiling((maxexp + digits(zero) - 1)*half))

     contains

     ! CGBEQU computes row and column scalings intended to equilibrate an
     ! M-by-N band matrix A and reduce its condition number.  R returns the
     ! row scale factors and C the column scale factors, chosen to try to
     ! make the largest element in each row and column of the matrix B with
     ! elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
     ! R(i) and C(j) are restricted to be between SMLNUM = smallest safe
     ! number and BIGNUM = largest safe number.  Use of these scaling
     ! factors is not guaranteed to reduce the condition number of A but
     ! works well in practice.

     subroutine stdlib_cgbequ(m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, kl, ku, ldab, m, n
           real(sp) :: amax, colcnd, rowcnd
           ! .. array arguments ..
           real(sp) :: c(*), r(*)
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, kd
           real(sp) :: bignum, rcmax, rcmin, smlnum
           complex(sp) :: zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, max, min, real
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kl < 0) then
              info = -3
           else if (ku < 0) then
              info = -4
           else if (ldab < kl + ku + 1) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgbequ', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) then
              rowcnd = one
              colcnd = one
              amax = zero
              return
           end if
           ! get machine constants.
           smlnum = stdlib_slamch('s')
           bignum = one/smlnum
           ! compute row scale factors.
           do i = 1, m
              r(i) = zero
           end do
           ! find the maximum element in each row.
           kd = ku + 1
           do j = 1, n
              do i = max(j - ku, 1), min(j + kl, m)
                 r(i) = max(r(i), cabs1(ab(kd + i - j, j)))
              end do
           end do
           ! find the maximum and minimum scale factors.
           rcmin = bignum
           rcmax = zero
           do i = 1, m
              rcmax = max(rcmax, r(i))
              rcmin = min(rcmin, r(i))
           end do
           amax = rcmax
           if (rcmin == zero) then
              ! find the first zero scale factor and return an error code.
              do i = 1, m
                 if (r(i) == zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! invert the scale factors.
              do i = 1, m
                 r(i) = one/min(max(r(i), smlnum), bignum)
              end do
              ! compute rowcnd = min(r(i)) / max(r(i))
              rowcnd = max(rcmin, smlnum)/min(rcmax, bignum)
           end if
           ! compute column scale factors
           do j = 1, n
              c(j) = zero
           end do
           ! find the maximum element in each column,
           ! assuming the row scaling computed above.
           kd = ku + 1
           do j = 1, n
              do i = max(j - ku, 1), min(j + kl, m)
                 c(j) = max(c(j), cabs1(ab(kd + i - j, j))*r(i))
              end do
           end do
           ! find the maximum and minimum scale factors.
           rcmin = bignum
           rcmax = zero
           do j = 1, n
              rcmin = min(rcmin, c(j))
              rcmax = max(rcmax, c(j))
           end do
           if (rcmin == zero) then
              ! find the first zero scale factor and return an error code.
              do j = 1, n
                 if (c(j) == zero) then
                    info = m + j
                    return
                 end if
              end do
           else
              ! invert the scale factors.
              do j = 1, n
                 c(j) = one/min(max(c(j), smlnum), bignum)
              end do
              ! compute colcnd = min(c(j)) / max(c(j))
              colcnd = max(rcmin, smlnum)/min(rcmax, bignum)
           end if
           return
           ! end of stdlib_cgbequ
     end subroutine stdlib_cgbequ

     ! CGBEQUB computes row and column scalings intended to equilibrate an
     ! M-by-N matrix A and reduce its condition number.  R returns the row
     ! scale factors and C the column scale factors, chosen to try to make
     ! the largest element in each row and column of the matrix B with
     ! elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most
     ! the radix.
     ! R(i) and C(j) are restricted to be a power of the radix between
     ! SMLNUM = smallest safe number and BIGNUM = largest safe number.  Use
     ! of these scaling factors is not guaranteed to reduce the condition
     ! number of A but works well in practice.
     ! This routine differs from CGEEQU by restricting the scaling factors
     ! to a power of the radix.  Barring over- and underflow, scaling by
     ! these factors introduces no additional rounding errors.  However, the
     ! scaled entries' magnitudes are no longer approximately 1 but lie
     ! between sqrt(radix) and 1/sqrt(radix).

     subroutine stdlib_cgbequb(m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, kl, ku, ldab, m, n
           real(sp) :: amax, colcnd, rowcnd
           ! .. array arguments ..
           real(sp) :: c(*), r(*)
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, kd
           real(sp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx
           complex(sp) :: zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, log, real, aimag
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kl < 0) then
              info = -3
           else if (ku < 0) then
              info = -4
           else if (ldab < kl + ku + 1) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgbequb', -info)
              return
           end if
           ! quick return if possible.
           if (m == 0 .or. n == 0) then
              rowcnd = one
              colcnd = one
              amax = zero
              return
           end if
           ! get machine constants.  assume smlnum is a power of the radix.
           smlnum = stdlib_slamch('s')
           bignum = one/smlnum
           radix = stdlib_slamch('b')
           logrdx = log(radix)
           ! compute row scale factors.
           do i = 1, m
              r(i) = zero
           end do
           ! find the maximum element in each row.
           kd = ku + 1
           do j = 1, n
              do i = max(j - ku, 1), min(j + kl, m)
                 r(i) = max(r(i), cabs1(ab(kd + i - j, j)))
              end do
           end do
           do i = 1, m
              if (r(i) > zero) then
                 r(i) = radix**int(log(r(i))/logrdx)
              end if
           end do
           ! find the maximum and minimum scale factors.
           rcmin = bignum
           rcmax = zero
           do i = 1, m
              rcmax = max(rcmax, r(i))
              rcmin = min(rcmin, r(i))
           end do
           amax = rcmax
           if (rcmin == zero) then
              ! find the first zero scale factor and return an error code.
              do i = 1, m
                 if (r(i) == zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! invert the scale factors.
              do i = 1, m
                 r(i) = one/min(max(r(i), smlnum), bignum)
              end do
              ! compute rowcnd = min(r(i)) / max(r(i)).
              rowcnd = max(rcmin, smlnum)/min(rcmax, bignum)
           end if
           ! compute column scale factors.
           do j = 1, n
              c(j) = zero
           end do
           ! find the maximum element in each column,
           ! assuming the row scaling computed above.
           do j = 1, n
              do i = max(j - ku, 1), min(j + kl, m)
                 c(j) = max(c(j), cabs1(ab(kd + i - j, j))*r(i))
              end do
              if (c(j) > zero) then
                 c(j) = radix**int(log(c(j))/logrdx)
              end if
           end do
           ! find the maximum and minimum scale factors.
           rcmin = bignum
           rcmax = zero
           do j = 1, n
              rcmin = min(rcmin, c(j))
              rcmax = max(rcmax, c(j))
           end do
           if (rcmin == zero) then
              ! find the first zero scale factor and return an error code.
              do j = 1, n
                 if (c(j) == zero) then
                    info = m + j
                    return
                 end if
              end do
           else
              ! invert the scale factors.
              do j = 1, n
                 c(j) = one/min(max(c(j), smlnum), bignum)
              end do
              ! compute colcnd = min(c(j)) / max(c(j)).
              colcnd = max(rcmin, smlnum)/min(rcmax, bignum)
           end if
           return
           ! end of stdlib_cgbequb
     end subroutine stdlib_cgbequb

     ! CGBTF2 computes an LU factorization of a complex m-by-n band matrix
     ! A using partial pivoting with row interchanges.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_cgbtf2(m, n, kl, ku, ab, ldab, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, kl, ku, ldab, m, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, jp, ju, km, kv
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! kv is the number of superdiagonals in the factor u, allowing for
           ! fill-in.
           kv = ku + kl
           ! test the input parameters.
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kl < 0) then
              info = -3
           else if (ku < 0) then
              info = -4
           else if (ldab < kl + kv + 1) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgbtf2', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) return
           ! gaussian elimination with partial pivoting
           ! set fill-in elements in columns ku+2 to kv to czero.
           do j = ku + 2, min(kv, n)
              do i = kv - j + 2, kl
                 ab(i, j) = czero
              end do
           end do
           ! ju is the index of the last column affected by the current stage
           ! of the factorization.
           ju = 1
           loop_40: do j = 1, min(m, n)
              ! set fill-in elements in column j+kv to czero.
              if (j + kv <= n) then
                 do i = 1, kl
                    ab(i, j + kv) = czero
                 end do
              end if
              ! find pivot and test for singularity. km is the number of
              ! subdiagonal elements in the current column.
              km = min(kl, m - j)
              jp = stdlib_icamax(km + 1, ab(kv + 1, j), 1)
              ipiv(j) = jp + j - 1
              if (ab(kv + jp, j) /= czero) then
                 ju = max(ju, min(j + ku + jp - 1, n))
                 ! apply interchange to columns j to ju.
                 if (jp /= 1) call stdlib_cswap(ju - j + 1, ab(kv + jp, j), ldab - 1, ab(kv + 1, j), ldab - &
                           1)
                 if (km > 0) then
                    ! compute multipliers.
                    call stdlib_cscal(km, cone/ab(kv + 1, j), ab(kv + 2, j), 1)
                    ! update trailing submatrix within the band.
                    if (ju > j) call stdlib_cgeru(km, ju - j, -cone, ab(kv + 2, j), 1, ab(kv, j + 1), &
                              ldab - 1, ab(kv + 1, j + 1), ldab - 1)
                 end if
              else
                 ! if pivot is czero, set info to the index of the pivot
                 ! unless a czero pivot has already been found.
                 if (info == 0) info = j
              end if
           end do loop_40
           return
           ! end of stdlib_cgbtf2
     end subroutine stdlib_cgbtf2

     ! CGEBAK forms the right or left eigenvectors of a complex general
     ! matrix by backward transformation on the computed eigenvectors of the
     ! balanced matrix output by CGEBAL.

     subroutine stdlib_cgebak(job, side, n, ilo, ihi, scale, m, v, ldv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: job, side
           integer(ilp) :: ihi, ilo, info, ldv, m, n
           ! .. array arguments ..
           real(sp) :: scale(*)
           complex(sp) :: v(ldv, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: leftv, rightv
           integer(ilp) :: i, ii, k
           real(sp) :: s
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! decode and test the input parameters
           rightv = stdlib_lsame(side, 'r')
           leftv = stdlib_lsame(side, 'l')
           info = 0
           if (.not. stdlib_lsame(job, 'n') .and. .not. stdlib_lsame(job, 'p') &
                     .and. .not. stdlib_lsame(job, 's') .and. .not. stdlib_lsame(job, 'b')) then
              info = -1
           else if (.not. rightv .and. .not. leftv) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (ilo < 1 .or. ilo > max(1, n)) then
              info = -4
           else if (ihi < min(ilo, n) .or. ihi > n) then
              info = -5
           else if (m < 0) then
              info = -7
           else if (ldv < max(1, n)) then
              info = -9
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgebak', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (m == 0) return
           if (stdlib_lsame(job, 'n')) return
           if (ilo == ihi) go to 30
           ! backward balance
           if (stdlib_lsame(job, 's') .or. stdlib_lsame(job, 'b')) then
              if (rightv) then
                 do i = ilo, ihi
                    s = scale(i)
                    call stdlib_csscal(m, s, v(i, 1), ldv)
                 end do
              end if
              if (leftv) then
                 do i = ilo, ihi
                    s = one/scale(i)
                    call stdlib_csscal(m, s, v(i, 1), ldv)
                 end do
              end if
           end if
           ! backward permutation
           ! for  i = ilo-1 step -1 until 1,
                    ! ihi+1 step 1 until n do --
30      continue
           if (stdlib_lsame(job, 'p') .or. stdlib_lsame(job, 'b')) then
              if (rightv) then
                 loop_40: do ii = 1, n
                    i = ii
                    if (i >= ilo .and. i <= ihi) cycle loop_40
                    if (i < ilo) i = ilo - ii
                    k = scale(i)
                    if (k == i) cycle loop_40
                    call stdlib_cswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_40
              end if
              if (leftv) then
                 loop_50: do ii = 1, n
                    i = ii
                    if (i >= ilo .and. i <= ihi) cycle loop_50
                    if (i < ilo) i = ilo - ii
                    k = scale(i)
                    if (k == i) cycle loop_50
                    call stdlib_cswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_50
              end if
           end if
           return
           ! end of stdlib_cgebak
     end subroutine stdlib_cgebak

     ! CGEBAL balances a general complex matrix A.  This involves, first,
     ! permuting A by a similarity transformation to isolate eigenvalues
     ! in the first 1 to ILO-1 and last IHI+1 to N elements on the
     ! diagonal; and second, applying a diagonal similarity transformation
     ! to rows and columns ILO to IHI to make the rows and columns as
     ! close in norm as possible.  Both steps are optional.
     ! Balancing may reduce the 1-norm of the matrix, and improve the
     ! accuracy of the computed eigenvalues and/or eigenvectors.

     subroutine stdlib_cgebal(job, n, a, lda, ilo, ihi, scale, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: job
           integer(ilp) :: ihi, ilo, info, lda, n
           ! .. array arguments ..
           real(sp) :: scale(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sclfac = 2.0_sp
           real(sp), parameter :: factor = 0.95_sp
           
           ! .. local scalars ..
           logical(lk) :: noconv
           integer(ilp) :: i, ica, iexc, ira, j, k, l, m
           real(sp) :: c, ca, f, g, r, ra, s, sfmax1, sfmax2, sfmin1, sfmin2
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, max, min, real
           ! test the input parameters
           info = 0
           if (.not. stdlib_lsame(job, 'n') .and. .not. stdlib_lsame(job, 'p') &
                     .and. .not. stdlib_lsame(job, 's') .and. .not. stdlib_lsame(job, 'b')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgebal', -info)
              return
           end if
           k = 1
           l = n
           if (n == 0) go to 210
           if (stdlib_lsame(job, 'n')) then
              do i = 1, n
                 scale(i) = one
              end do
              go to 210
           end if
           if (stdlib_lsame(job, 's')) go to 120
           ! permutation to isolate eigenvalues if possible
           go to 50
           ! row and column exchange.
20      continue
           scale(m) = j
           if (j == m) go to 30
           call stdlib_cswap(l, a(1, j), 1, a(1, m), 1)
           call stdlib_cswap(n - k + 1, a(j, k), lda, a(m, k), lda)
30      continue
           go to(40, 80) iexc
           ! search for rows isolating an eigenvalue and push them down.
40      continue
           if (l == 1) go to 210
           l = l - 1
50      continue
           loop_70: do j = l, 1, -1
              loop_60: do i = 1, l
                 if (i == j) cycle loop_60
                 if (real(a(j, i)) /= zero .or. aimag(a(j, i)) /= zero) cycle loop_70
              end do loop_60
              m = l
              iexc = 1
              go to 20
           end do loop_70
           go to 90
           ! search for columns isolating an eigenvalue and push them left.
80      continue
           k = k + 1
90      continue
           loop_110: do j = k, l
              loop_100: do i = k, l
                 if (i == j) cycle loop_100
                 if (real(a(i, j)) /= zero .or. aimag(a(i, j)) /= zero) cycle loop_110
              end do loop_100
              m = k
              iexc = 2
              go to 20
           end do loop_110
120    continue
           do i = k, l
              scale(i) = one
           end do
           if (stdlib_lsame(job, 'p')) go to 210
           ! balance the submatrix in rows k to l.
           ! iterative loop for norm reduction
           sfmin1 = stdlib_slamch('s')/stdlib_slamch('p')
           sfmax1 = one/sfmin1
           sfmin2 = sfmin1*sclfac
           sfmax2 = one/sfmin2
140    continue
           noconv = .false.
           loop_200: do i = k, l
              c = stdlib_scnrm2(l - k + 1, a(k, i), 1)
              r = stdlib_scnrm2(l - k + 1, a(i, k), lda)
              ica = stdlib_icamax(l, a(1, i), 1)
              ca = abs(a(ica, i))
              ira = stdlib_icamax(n - k + 1, a(i, k), lda)
              ra = abs(a(i, ira + k - 1))
              ! guard against zero c or r due to underflow.
              if (c == zero .or. r == zero) cycle loop_200
              g = r/sclfac
              f = one
              s = c + r
160    continue
              if (c >= g .or. max(f, c, ca) >= sfmax2 .or. min(r, g, ra) <= sfmin2) go to 170
                 if (stdlib_sisnan(c + f + ca + r + g + ra)) then
                 ! exit if nan to avoid infinite loop
                 info = -3
                 call stdlib_xerbla('stdlib_cgebal', -info)
                 return
              end if
              f = f*sclfac
              c = c*sclfac
              ca = ca*sclfac
              r = r/sclfac
              g = g/sclfac
              ra = ra/sclfac
              go to 160
170    continue
              g = c/sclfac
180    continue
              if (g < r .or. max(r, ra) >= sfmax2 .or. min(f, c, g, ca) <= sfmin2) go to 190
              f = f/sclfac
              c = c/sclfac
              g = g/sclfac
              ca = ca/sclfac
              r = r*sclfac
              ra = ra*sclfac
              go to 180
              ! now balance.
190    continue
              if ((c + r) >= factor*s) cycle loop_200
              if (f < one .and. scale(i) < one) then
                 if (f*scale(i) <= sfmin1) cycle loop_200
              end if
              if (f > one .and. scale(i) > one) then
                 if (scale(i) >= sfmax1/f) cycle loop_200
              end if
              g = one/f
              scale(i) = scale(i)*f
              noconv = .true.
              call stdlib_csscal(n - k + 1, g, a(i, k), lda)
              call stdlib_csscal(l, f, a(1, i), 1)
           end do loop_200
           if (noconv) go to 140
210    continue
           ilo = k
           ihi = l
           return
           ! end of stdlib_cgebal
     end subroutine stdlib_cgebal

     ! CGEEQU computes row and column scalings intended to equilibrate an
     ! M-by-N matrix A and reduce its condition number.  R returns the row
     ! scale factors and C the column scale factors, chosen to try to make
     ! the largest element in each row and column of the matrix B with
     ! elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
     ! R(i) and C(j) are restricted to be between SMLNUM = smallest safe
     ! number and BIGNUM = largest safe number.  Use of these scaling
     ! factors is not guaranteed to reduce the condition number of A but
     ! works well in practice.

     subroutine stdlib_cgeequ(m, n, a, lda, r, c, rowcnd, colcnd, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, m, n
           real(sp) :: amax, colcnd, rowcnd
           ! .. array arguments ..
           real(sp) :: c(*), r(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: bignum, rcmax, rcmin, smlnum
           complex(sp) :: zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, max, min, real
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, m)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgeequ', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) then
              rowcnd = one
              colcnd = one
              amax = zero
              return
           end if
           ! get machine constants.
           smlnum = stdlib_slamch('s')
           bignum = one/smlnum
           ! compute row scale factors.
           do i = 1, m
              r(i) = zero
           end do
           ! find the maximum element in each row.
           do j = 1, n
              do i = 1, m
                 r(i) = max(r(i), cabs1(a(i, j)))
              end do
           end do
           ! find the maximum and minimum scale factors.
           rcmin = bignum
           rcmax = zero
           do i = 1, m
              rcmax = max(rcmax, r(i))
              rcmin = min(rcmin, r(i))
           end do
           amax = rcmax
           if (rcmin == zero) then
              ! find the first zero scale factor and return an error code.
              do i = 1, m
                 if (r(i) == zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! invert the scale factors.
              do i = 1, m
                 r(i) = one/min(max(r(i), smlnum), bignum)
              end do
              ! compute rowcnd = min(r(i)) / max(r(i))
              rowcnd = max(rcmin, smlnum)/min(rcmax, bignum)
           end if
           ! compute column scale factors
           do j = 1, n
              c(j) = zero
           end do
           ! find the maximum element in each column,
           ! assuming the row scaling computed above.
           do j = 1, n
              do i = 1, m
                 c(j) = max(c(j), cabs1(a(i, j))*r(i))
              end do
           end do
           ! find the maximum and minimum scale factors.
           rcmin = bignum
           rcmax = zero
           do j = 1, n
              rcmin = min(rcmin, c(j))
              rcmax = max(rcmax, c(j))
           end do
           if (rcmin == zero) then
              ! find the first zero scale factor and return an error code.
              do j = 1, n
                 if (c(j) == zero) then
                    info = m + j
                    return
                 end if
              end do
           else
              ! invert the scale factors.
              do j = 1, n
                 c(j) = one/min(max(c(j), smlnum), bignum)
              end do
              ! compute colcnd = min(c(j)) / max(c(j))
              colcnd = max(rcmin, smlnum)/min(rcmax, bignum)
           end if
           return
           ! end of stdlib_cgeequ
     end subroutine stdlib_cgeequ

     ! CGEEQUB computes row and column scalings intended to equilibrate an
     ! M-by-N matrix A and reduce its condition number.  R returns the row
     ! scale factors and C the column scale factors, chosen to try to make
     ! the largest element in each row and column of the matrix B with
     ! elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most
     ! the radix.
     ! R(i) and C(j) are restricted to be a power of the radix between
     ! SMLNUM = smallest safe number and BIGNUM = largest safe number.  Use
     ! of these scaling factors is not guaranteed to reduce the condition
     ! number of A but works well in practice.
     ! This routine differs from CGEEQU by restricting the scaling factors
     ! to a power of the radix.  Barring over- and underflow, scaling by
     ! these factors introduces no additional rounding errors.  However, the
     ! scaled entries' magnitudes are no longer approximately 1 but lie
     ! between sqrt(radix) and 1/sqrt(radix).

     subroutine stdlib_cgeequb(m, n, a, lda, r, c, rowcnd, colcnd, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, m, n
           real(sp) :: amax, colcnd, rowcnd
           ! .. array arguments ..
           real(sp) :: c(*), r(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: bignum, rcmax, rcmin, smlnum, radix, logrdx
           complex(sp) :: zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, log, real, aimag
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, m)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgeequb', -info)
              return
           end if
           ! quick return if possible.
           if (m == 0 .or. n == 0) then
              rowcnd = one
              colcnd = one
              amax = zero
              return
           end if
           ! get machine constants.  assume smlnum is a power of the radix.
           smlnum = stdlib_slamch('s')
           bignum = one/smlnum
           radix = stdlib_slamch('b')
           logrdx = log(radix)
           ! compute row scale factors.
           do i = 1, m
              r(i) = zero
           end do
           ! find the maximum element in each row.
           do j = 1, n
              do i = 1, m
                 r(i) = max(r(i), cabs1(a(i, j)))
              end do
           end do
           do i = 1, m
              if (r(i) > zero) then
                 r(i) = radix**int(log(r(i))/logrdx)
              end if
           end do
           ! find the maximum and minimum scale factors.
           rcmin = bignum
           rcmax = zero
           do i = 1, m
              rcmax = max(rcmax, r(i))
              rcmin = min(rcmin, r(i))
           end do
           amax = rcmax
           if (rcmin == zero) then
              ! find the first zero scale factor and return an error code.
              do i = 1, m
                 if (r(i) == zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! invert the scale factors.
              do i = 1, m
                 r(i) = one/min(max(r(i), smlnum), bignum)
              end do
              ! compute rowcnd = min(r(i)) / max(r(i)).
              rowcnd = max(rcmin, smlnum)/min(rcmax, bignum)
           end if
           ! compute column scale factors.
           do j = 1, n
              c(j) = zero
           end do
           ! find the maximum element in each column,
           ! assuming the row scaling computed above.
           do j = 1, n
              do i = 1, m
                 c(j) = max(c(j), cabs1(a(i, j))*r(i))
              end do
              if (c(j) > zero) then
                 c(j) = radix**int(log(c(j))/logrdx)
              end if
           end do
           ! find the maximum and minimum scale factors.
           rcmin = bignum
           rcmax = zero
           do j = 1, n
              rcmin = min(rcmin, c(j))
              rcmax = max(rcmax, c(j))
           end do
           if (rcmin == zero) then
              ! find the first zero scale factor and return an error code.
              do j = 1, n
                 if (c(j) == zero) then
                    info = m + j
                    return
                 end if
              end do
           else
              ! invert the scale factors.
              do j = 1, n
                 c(j) = one/min(max(c(j), smlnum), bignum)
              end do
              ! compute colcnd = min(c(j)) / max(c(j)).
              colcnd = max(rcmin, smlnum)/min(rcmax, bignum)
           end if
           return
           ! end of stdlib_cgeequb
     end subroutine stdlib_cgeequb

     ! CGETC2 computes an LU factorization, using complete pivoting, of the
     ! n-by-n matrix A. The factorization has the form A = P * L * U * Q,
     ! where P and Q are permutation matrices, L is lower triangular with
     ! unit diagonal elements and U is upper triangular.
     ! This is a level 1 BLAS version of the algorithm.

     subroutine stdlib_cgetc2(n, a, lda, ipiv, jpiv, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*), jpiv(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, ip, ipv, j, jp, jpv
           real(sp) :: bignum, eps, smin, smlnum, xmax
     
           ! .. intrinsic functions ..
           intrinsic :: abs, cmplx, max
           ! .. executable statements ..
           info = 0
           ! quick return if possible
           if (n == 0) return
           ! set constants to control overflow
           eps = stdlib_slamch('p')
           smlnum = stdlib_slamch('s')/eps
           bignum = one/smlnum
           call stdlib_slabad(smlnum, bignum)
           ! handle the case n=1 by itself
           if (n == 1) then
              ipiv(1) = 1
              jpiv(1) = 1
              if (abs(a(1, 1)) < smlnum) then
                 info = 1
                 a(1, 1) = cmplx(smlnum, zero, KIND=sp)
              end if
              return
           end if
           ! factorize a using complete pivoting.
           ! set pivots less than smin to smin
           loop_40: do i = 1, n - 1
              ! find max element in matrix a
              xmax = zero
              do ip = i, n
                 do jp = i, n
                    if (abs(a(ip, jp)) >= xmax) then
                       xmax = abs(a(ip, jp))
                       ipv = ip
                       jpv = jp
                    end if
                 end do
              end do
              if (i == 1) smin = max(eps*xmax, smlnum)
              ! swap rows
              if (ipv /= i) call stdlib_cswap(n, a(ipv, 1), lda, a(i, 1), lda)
              ipiv(i) = ipv
              ! swap columns
              if (jpv /= i) call stdlib_cswap(n, a(1, jpv), 1, a(1, i), 1)
              jpiv(i) = jpv
              ! check for singularity
              if (abs(a(i, i)) < smin) then
                 info = i
                 a(i, i) = cmplx(smin, zero, KIND=sp)
              end if
              do j = i + 1, n
                 a(j, i) = a(j, i)/a(i, i)
              end do
              call stdlib_cgeru(n - i, n - i, -cmplx(one, KIND=sp), a(i + 1, i), 1, a(i, i + 1), lda, &
                         a(i + 1, i + 1), lda)
           end do loop_40
           if (abs(a(n, n)) < smin) then
              info = n
              a(n, n) = cmplx(smin, zero, KIND=sp)
           end if
           ! set last pivots to n
           ipiv(n) = n
           jpiv(n) = n
           return
           ! end of stdlib_cgetc2
     end subroutine stdlib_cgetc2

     ! CGETF2 computes an LU factorization of a general m-by-n matrix A
     ! using partial pivoting with row interchanges.
     ! The factorization has the form
     ! A = P * L * U
     ! where P is a permutation matrix, L is lower triangular with unit
     ! diagonal elements (lower trapezoidal if m > n), and U is upper
     ! triangular (upper trapezoidal if m < n).
     ! This is the right-looking Level 2 BLAS version of the algorithm.

     subroutine stdlib_cgetf2(m, n, a, lda, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, m, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           real(sp) :: sfmin
           integer(ilp) :: i, j, jp
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, m)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgetf2', -info)
              return
           end if
           ! quick return if possible
           if (m == 0 .or. n == 0) return
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           do j = 1, min(m, n)
              ! find pivot and test for singularity.
              jp = j - 1 + stdlib_icamax(m - j + 1, a(j, j), 1)
              ipiv(j) = jp
              if (a(jp, j) /= czero) then
                 ! apply the interchange to columns 1:n.
                 if (jp /= j) call stdlib_cswap(n, a(j, 1), lda, a(jp, 1), lda)
                 ! compute elements j+1:m of j-th column.
                 if (j < m) then
                    if (abs(a(j, j)) >= sfmin) then
                       call stdlib_cscal(m - j, cone/a(j, j), a(j + 1, j), 1)
                    else
                       do i = 1, m - j
                          a(j + i, j) = a(j + i, j)/a(j, j)
                       end do
                    end if
                 end if
              else if (info == 0) then
                 info = j
              end if
              if (j < min(m, n)) then
                 ! update trailing submatrix.
                 call stdlib_cgeru(m - j, n - j, -cone, a(j + 1, j), 1, a(j, j + 1), lda, a(j + 1, j + 1 &
                           ), lda)
              end if
           end do
           return
           ! end of stdlib_cgetf2
     end subroutine stdlib_cgetf2

     ! CGGBAK forms the right or left eigenvectors of a complex generalized
     ! eigenvalue problem A*x = lambda*B*x, by backward transformation on
     ! the computed eigenvectors of the balanced pair of matrices output by
     ! CGGBAL.

     subroutine stdlib_cggbak(job, side, n, ilo, ihi, lscale, rscale, m, v, ldv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: job, side
           integer(ilp) :: ihi, ilo, info, ldv, m, n
           ! .. array arguments ..
           real(sp) :: lscale(*), rscale(*)
           complex(sp) :: v(ldv, *)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: leftv, rightv
           integer(ilp) :: i, k
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters
           rightv = stdlib_lsame(side, 'r')
           leftv = stdlib_lsame(side, 'l')
           info = 0
           if (.not. stdlib_lsame(job, 'n') .and. .not. stdlib_lsame(job, 'p') &
                     .and. .not. stdlib_lsame(job, 's') .and. .not. stdlib_lsame(job, 'b')) then
              info = -1
           else if (.not. rightv .and. .not. leftv) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (ilo < 1) then
              info = -4
           else if (n == 0 .and. ihi == 0 .and. ilo /= 1) then
              info = -4
           else if (n > 0 .and. (ihi < ilo .or. ihi > max(1, n))) then
              info = -5
           else if (n == 0 .and. ilo == 1 .and. ihi /= 0) then
              info = -5
           else if (m < 0) then
              info = -8
           else if (ldv < max(1, n)) then
              info = -10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cggbak', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (m == 0) return
           if (stdlib_lsame(job, 'n')) return
           if (ilo == ihi) go to 30
           ! backward balance
           if (stdlib_lsame(job, 's') .or. stdlib_lsame(job, 'b')) then
              ! backward transformation on right eigenvectors
              if (rightv) then
                 do i = ilo, ihi
                    call stdlib_csscal(m, rscale(i), v(i, 1), ldv)
                 end do
              end if
              ! backward transformation on left eigenvectors
              if (leftv) then
                 do i = ilo, ihi
                    call stdlib_csscal(m, lscale(i), v(i, 1), ldv)
                 end do
              end if
           end if
           ! backward permutation
30      continue
           if (stdlib_lsame(job, 'p') .or. stdlib_lsame(job, 'b')) then
              ! backward permutation on right eigenvectors
              if (rightv) then
                 if (ilo == 1) go to 50
                 loop_40: do i = ilo - 1, 1, -1
                    k = rscale(i)
                    if (k == i) cycle loop_40
                    call stdlib_cswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_40
50      continue
                 if (ihi == n) go to 70
                 loop_60: do i = ihi + 1, n
                    k = rscale(i)
                    if (k == i) cycle loop_60
                    call stdlib_cswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_60
              end if
              ! backward permutation on left eigenvectors
70      continue
              if (leftv) then
                 if (ilo == 1) go to 90
                 loop_80: do i = ilo - 1, 1, -1
                    k = lscale(i)
                    if (k == i) cycle loop_80
                    call stdlib_cswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_80
90      continue
                 if (ihi == n) go to 110
                 loop_100: do i = ihi + 1, n
                    k = lscale(i)
                    if (k == i) cycle loop_100
                    call stdlib_cswap(m, v(i, 1), ldv, v(k, 1), ldv)
                 end do loop_100
              end if
           end if
110    continue
           return
           ! end of stdlib_cggbak
     end subroutine stdlib_cggbak

     ! CGGBAL balances a pair of general complex matrices (A,B).  This
     ! involves, first, permuting A and B by similarity transformations to
     ! isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N
     ! elements on the diagonal; and second, applying a diagonal similarity
     ! transformation to rows and columns ILO to IHI to make the rows
     ! and columns as close in norm as possible. Both steps are optional.
     ! Balancing may reduce the 1-norm of the matrices, and improve the
     ! accuracy of the computed eigenvalues and/or eigenvectors in the
     ! generalized eigenvalue problem A*x = lambda*B*x.

     subroutine stdlib_cggbal(job, n, a, lda, b, ldb, ilo, ihi, lscale, rscale, work, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: job
           integer(ilp) :: ihi, ilo, info, lda, ldb, n
           ! .. array arguments ..
           real(sp) :: lscale(*), rscale(*), work(*)
           complex(sp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sclfac = 1.0e+1
           
           ! .. local scalars ..
           integer(ilp) :: i, icab, iflow, ip1, ir, irab, it, j, jc, jp1, k, kount, l, lcab, lm1, &
                     lrab, lsfmax, lsfmin, m, nr, nrp2
           real(sp) :: alpha, basl, beta, cab, cmax, coef, coef2, coef5, cor, ew, ewc, gamma, &
                     pgamma, rab, sfmax, sfmin, sum, t, ta, tb, tc
           complex(sp) :: cdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, int, log10, max, min, real, sign
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(cdum) = abs(real(cdum)) + abs(aimag(cdum))
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           if (.not. stdlib_lsame(job, 'n') .and. .not. stdlib_lsame(job, 'p') &
                     .and. .not. stdlib_lsame(job, 's') .and. .not. stdlib_lsame(job, 'b')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           else if (ldb < max(1, n)) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cggbal', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) then
              ilo = 1
              ihi = n
              return
           end if
           if (n == 1) then
              ilo = 1
              ihi = n
              lscale(1) = one
              rscale(1) = one
              return
           end if
           if (stdlib_lsame(job, 'n')) then
              ilo = 1
              ihi = n
              do i = 1, n
                 lscale(i) = one
                 rscale(i) = one
              end do
              return
           end if
           k = 1
           l = n
           if (stdlib_lsame(job, 's')) go to 190
           go to 30
           ! permute the matrices a and b to isolate the eigenvalues.
           ! find row with one nonzero in columns 1 through l
20      continue
           l = lm1
           if (l /= 1) go to 30
           rscale(1) = one
           lscale(1) = one
           go to 190
30      continue
           lm1 = l - 1
           loop_80: do i = l, 1, -1
              do j = 1, lm1
                 jp1 = j + 1
                 if (a(i, j) /= czero .or. b(i, j) /= czero) go to 50
              end do
              j = l
              go to 70
50      continue
              do j = jp1, l
                 if (a(i, j) /= czero .or. b(i, j) /= czero) cycle loop_80
              end do
              j = jp1 - 1
70      continue
              m = l
              iflow = 1
              go to 160
           end do loop_80
           go to 100
           ! find column with one nonzero in rows k through n
90      continue
           k = k + 1
100    continue
           loop_150: do j = k, l
              do i = k, lm1
                 ip1 = i + 1
                 if (a(i, j) /= czero .or. b(i, j) /= czero) go to 120
              end do
              i = l
              go to 140
120    continue
              do i = ip1, l
                 if (a(i, j) /= czero .or. b(i, j) /= czero) cycle loop_150
              end do
              i = ip1 - 1
140    continue
              m = k
              iflow = 2
              go to 160
           end do loop_150
           go to 190
           ! permute rows m and i
160    continue
           lscale(m) = i
           if (i == m) go to 170
           call stdlib_cswap(n - k + 1, a(i, k), lda, a(m, k), lda)
           call stdlib_cswap(n - k + 1, b(i, k), ldb, b(m, k), ldb)
           ! permute columns m and j
170    continue
           rscale(m) = j
           if (j == m) go to 180
           call stdlib_cswap(l, a(1, j), 1, a(1, m), 1)
           call stdlib_cswap(l, b(1, j), 1, b(1, m), 1)
180    continue
           go to(20, 90) iflow
190    continue
           ilo = k
           ihi = l
           if (stdlib_lsame(job, 'p')) then
              do i = ilo, ihi
                 lscale(i) = one
                 rscale(i) = one
              end do
              return
           end if
           if (ilo == ihi) return
           ! balance the submatrix in rows ilo to ihi.
           nr = ihi - ilo + 1
           do i = ilo, ihi
              rscale(i) = zero
              lscale(i) = zero
              work(i) = zero
              work(i + n) = zero
              work(i + 2*n) = zero
              work(i + 3*n) = zero
              work(i + 4*n) = zero
              work(i + 5*n) = zero
           end do
           ! compute right side vector in resulting linear equations
           basl = log10(sclfac)
           do i = ilo, ihi
              do j = ilo, ihi
                 if (a(i, j) == czero) then
                    ta = zero
                    go to 210
                 end if
                 ta = log10(cabs1(a(i, j)))/basl
210    continue
                 if (b(i, j) == czero) then
                    tb = zero
                    go to 220
                 end if
                 tb = log10(cabs1(b(i, j)))/basl
220    continue
                 work(i + 4*n) = work(i + 4*n) - ta - tb
                 work(j + 5*n) = work(j + 5*n) - ta - tb
              end do
           end do
           coef = one/real(2*nr)
           coef2 = coef*coef
           coef5 = half*coef2
           nrp2 = nr + 2
           beta = zero
           it = 1
           ! start generalized conjugate gradient iteration
250    continue
           gamma = stdlib_sdot(nr, work(ilo + 4*n), 1, work(ilo + 4*n), 1) + stdlib_sdot(nr, &
                     work(ilo + 5*n), 1, work(ilo + 5*n), 1)
           ew = zero
           ewc = zero
           do i = ilo, ihi
              ew = ew + work(i + 4*n)
              ewc = ewc + work(i + 5*n)
           end do
           gamma = coef*gamma - coef2*(ew**2 + ewc**2) - coef5*(ew - ewc)**2
           if (gamma == zero) go to 350
           if (it /= 1) beta = gamma/pgamma
           t = coef5*(ewc - three*ew)
           tc = coef5*(ew - three*ewc)
           call stdlib_sscal(nr, beta, work(ilo), 1)
           call stdlib_sscal(nr, beta, work(ilo + n), 1)
           call stdlib_saxpy(nr, coef, work(ilo + 4*n), 1, work(ilo + n), 1)
           call stdlib_saxpy(nr, coef, work(ilo + 5*n), 1, work(ilo), 1)
           do i = ilo, ihi
              work(i) = work(i) + tc
              work(i + n) = work(i + n) + t
           end do
           ! apply matrix to vector
           do i = ilo, ihi
              kount = 0
              sum = zero
              loop_290: do j = ilo, ihi
                 if (a(i, j) == czero) go to 280
                 kount = kount + 1
                 sum = sum + work(j)
280    continue
                 if (b(i, j) == czero) cycle loop_290
                 kount = kount + 1
                 sum = sum + work(j)
              end do loop_290
              work(i + 2*n) = real(kount)*work(i + n) + sum
           end do
           do j = ilo, ihi
              kount = 0
              sum = zero
              loop_320: do i = ilo, ihi
                 if (a(i, j) == czero) go to 310
                 kount = kount + 1
                 sum = sum + work(i + n)
310    continue
                 if (b(i, j) == czero) cycle loop_320
                 kount = kount + 1
                 sum = sum + work(i + n)
              end do loop_320
              work(j + 3*n) = real(kount)*work(j) + sum
           end do
           sum = stdlib_sdot(nr, work(ilo + n), 1, work(ilo + 2*n), 1) + stdlib_sdot(nr, work( &
                     ilo), 1, work(ilo + 3*n), 1)
           alpha = gamma/sum
           ! determine correction to current iteration
           cmax = zero
           do i = ilo, ihi
              cor = alpha*work(i + n)
              if (abs(cor) > cmax) cmax = abs(cor)
              lscale(i) = lscale(i) + cor
              cor = alpha*work(i)
              if (abs(cor) > cmax) cmax = abs(cor)
              rscale(i) = rscale(i) + cor
           end do
           if (cmax < half) go to 350
           call stdlib_saxpy(nr, -alpha, work(ilo + 2*n), 1, work(ilo + 4*n), 1)
           call stdlib_saxpy(nr, -alpha, work(ilo + 3*n), 1, work(ilo + 5*n), 1)
           pgamma = gamma
           it = it + 1
           if (it <= nrp2) go to 250
           ! end generalized conjugate gradient iteration
350    continue
           sfmin = stdlib_slamch('s')
           sfmax = one/sfmin
           lsfmin = int(log10(sfmin)/basl + one, KIND=ilp)
           lsfmax = int(log10(sfmax)/basl, KIND=ilp)
           do i = ilo, ihi
              irab = stdlib_icamax(n - ilo + 1, a(i, ilo), lda)
              rab = abs(a(i, irab + ilo - 1))
              irab = stdlib_icamax(n - ilo + 1, b(i, ilo), ldb)
              rab = max(rab, abs(b(i, irab + ilo - 1)))
              lrab = int(log10(rab + sfmin)/basl + one, KIND=ilp)
              ir = lscale(i) + sign(half, lscale(i))
              ir = min(max(ir, lsfmin), lsfmax, lsfmax - lrab)
              lscale(i) = sclfac**ir
              icab = stdlib_icamax(ihi, a(1, i), 1)
              cab = abs(a(icab, i))
              icab = stdlib_icamax(ihi, b(1, i), 1)
              cab = max(cab, abs(b(icab, i)))
              lcab = int(log10(cab + sfmin)/basl + one, KIND=ilp)
              jc = rscale(i) + sign(half, rscale(i))
              jc = min(max(jc, lsfmin), lsfmax, lsfmax - lcab)
              rscale(i) = sclfac**jc
           end do
           ! row scaling of matrices a and b
           do i = ilo, ihi
              call stdlib_csscal(n - ilo + 1, lscale(i), a(i, ilo), lda)
              call stdlib_csscal(n - ilo + 1, lscale(i), b(i, ilo), ldb)
           end do
           ! column scaling of matrices a and b
           do j = ilo, ihi
              call stdlib_csscal(ihi, rscale(j), a(1, j), 1)
              call stdlib_csscal(ihi, rscale(j), b(1, j), 1)
           end do
           return
           ! end of stdlib_cggbal
     end subroutine stdlib_cggbal

     ! CGTSV  solves the equation
     ! A*X = B,
     ! where A is an N-by-N tridiagonal matrix, by Gaussian elimination with
     ! partial pivoting.
     ! Note that the equation  A**T *X = B  may be solved by interchanging the
     ! order of the arguments DU and DL.

     subroutine stdlib_cgtsv(n, nrhs, dl, d, du, b, ldb, info)
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, ldb, n, nrhs
           ! .. array arguments ..
           complex(sp) :: b(ldb, *), d(*), dl(*), du(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: j, k
           complex(sp) :: mult, temp, zdum
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, max, real
     
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           info = 0
           if (n < 0) then
              info = -1
           else if (nrhs < 0) then
              info = -2
           else if (ldb < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cgtsv ', -info)
              return
           end if
           if (n == 0) return
           loop_30: do k = 1, n - 1
              if (dl(k) == czero) then
                 ! subdiagonal is czero, no elimination is required.
                 if (d(k) == czero) then
                    ! diagonal is czero: set info = k and return; a unique
                    ! solution can not be found.
                    info = k
                    return
                 end if
              else if (cabs1(d(k)) >= cabs1(dl(k))) then
                 ! no row interchange required
                 mult = dl(k)/d(k)
                 d(k + 1) = d(k + 1) - mult*du(k)
                 do j = 1, nrhs
                    b(k + 1, j) = b(k + 1, j) - mult*b(k, j)
                 end do
                 if (k < (n - 1)) dl(k) = czero
              else
                 ! interchange rows k and k+1
                 mult = d(k)/dl(k)
                 d(k) = dl(k)
                 temp = d(k + 1)
                 d(k + 1) = du(k) - mult*temp
                 if (k < (n - 1)) then
                    dl(k) = du(k + 1)
                    du(k + 1) = -mult*dl(k)
                 end if
                 du(k) = temp
                 do j = 1, nrhs
                    temp = b(k, j)
                    b(k, j) = b(k + 1, j)
                    b(k + 1, j) = temp - mult*b(k + 1, j)
                 end do
              end if
           end do loop_30
           if (d(n) == czero) then
              info = n
              return
           end if
           ! back solve with the matrix u from the factorization.
           do j = 1, nrhs
              b(n, j) = b(n, j)/d(n)
              if (n > 1) b(n - 1, j) = (b(n - 1, j) - du(n - 1)*b(n, j))/d(n - 1)
              do k = n - 2, 1, -1
                 b(k, j) = (b(k, j) - du(k)*b(k + 1, j) - dl(k)*b(k + 2, j))/d(k)
                           
              end do
           end do
           return
           ! end of stdlib_cgtsv
     end subroutine stdlib_cgtsv

     ! CGTTRF computes an LU factorization of a complex tridiagonal matrix A
     ! using elimination with partial pivoting and row interchanges.
     ! The factorization has the form
     ! A = L * U
     ! where L is a product of permutation and unit lower bidiagonal
     ! matrices and U is upper triangular with nonzeros in only the main
     ! diagonal and first two superdiagonals.

     subroutine stdlib_cgttrf(n, dl, d, du, du2, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: d(*), dl(*), du(*), du2(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           complex(sp) :: fact, temp, zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, real
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           info = 0
           if (n < 0) then
              info = -1
              call stdlib_xerbla('stdlib_cgttrf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! initialize ipiv(i) = i and du2(i) = 0
           do i = 1, n
              ipiv(i) = i
           end do
           do i = 1, n - 2
              du2(i) = zero
           end do
           do i = 1, n - 2
              if (cabs1(d(i)) >= cabs1(dl(i))) then
                 ! no row interchange required, eliminate dl(i)
                 if (cabs1(d(i)) /= zero) then
                    fact = dl(i)/d(i)
                    dl(i) = fact
                    d(i + 1) = d(i + 1) - fact*du(i)
                 end if
              else
                 ! interchange rows i and i+1, eliminate dl(i)
                 fact = d(i)/dl(i)
                 d(i) = dl(i)
                 dl(i) = fact
                 temp = du(i)
                 du(i) = d(i + 1)
                 d(i + 1) = temp - fact*d(i + 1)
                 du2(i) = du(i + 1)
                 du(i + 1) = -fact*du(i + 1)
                 ipiv(i) = i + 1
              end if
           end do
           if (n > 1) then
              i = n - 1
              if (cabs1(d(i)) >= cabs1(dl(i))) then
                 if (cabs1(d(i)) /= zero) then
                    fact = dl(i)/d(i)
                    dl(i) = fact
                    d(i + 1) = d(i + 1) - fact*du(i)
                 end if
              else
                 fact = d(i)/dl(i)
                 d(i) = dl(i)
                 dl(i) = fact
                 temp = du(i)
                 du(i) = d(i + 1)
                 d(i + 1) = temp - fact*d(i + 1)
                 ipiv(i) = i + 1
              end if
           end if
           ! check for a zero on the diagonal of u.
           do i = 1, n
              if (cabs1(d(i)) == zero) then
                 info = i
                 go to 50
              end if
           end do
50      continue
           return
           ! end of stdlib_cgttrf
     end subroutine stdlib_cgttrf

     ! CGTTS2 solves one of the systems of equations
     ! A * X = B,  A**T * X = B,  or  A**H * X = B,
     ! with a tridiagonal matrix A using the LU factorization computed
     ! by CGTTRF.

     subroutine stdlib_cgtts2(itrans, n, nrhs, dl, d, du, du2, ipiv, b, ldb)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: itrans, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: b(ldb, *), d(*), dl(*), du(*), du2(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
           complex(sp) :: temp
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (itrans == 0) then
              ! solve a*x = b using the lu factorization of a,
              ! overwriting each right hand side vector with its solution.
              if (nrhs <= 1) then
                 j = 1
10      continue
                 ! solve l*x = b.
                 do i = 1, n - 1
                    if (ipiv(i) == i) then
                       b(i + 1, j) = b(i + 1, j) - dl(i)*b(i, j)
                    else
                       temp = b(i, j)
                       b(i, j) = b(i + 1, j)
                       b(i + 1, j) = temp - dl(i)*b(i, j)
                    end if
                 end do
                 ! solve u*x = b.
                 b(n, j) = b(n, j)/d(n)
                 if (n > 1) b(n - 1, j) = (b(n - 1, j) - du(n - 1)*b(n, j))/d(n - 1)
                 do i = n - 2, 1, -1
                    b(i, j) = (b(i, j) - du(i)*b(i + 1, j) - du2(i)*b(i + 2, j))/d(i)
                              
                 end do
                 if (j < nrhs) then
                    j = j + 1
                    go to 10
                 end if
              else
                 do j = 1, nrhs
                 ! solve l*x = b.
                    do i = 1, n - 1
                       if (ipiv(i) == i) then
                          b(i + 1, j) = b(i + 1, j) - dl(i)*b(i, j)
                       else
                          temp = b(i, j)
                          b(i, j) = b(i + 1, j)
                          b(i + 1, j) = temp - dl(i)*b(i, j)
                       end if
                    end do
                 ! solve u*x = b.
                    b(n, j) = b(n, j)/d(n)
                    if (n > 1) b(n - 1, j) = (b(n - 1, j) - du(n - 1)*b(n, j))/d(n - 1)
                    do i = n - 2, 1, -1
                       b(i, j) = (b(i, j) - du(i)*b(i + 1, j) - du2(i)*b(i + 2, j))/d(i)
                                 
                    end do
                 end do
              end if
           else if (itrans == 1) then
              ! solve a**t * x = b.
              if (nrhs <= 1) then
                 j = 1
70      continue
                 ! solve u**t * x = b.
                 b(1, j) = b(1, j)/d(1)
                 if (n > 1) b(2, j) = (b(2, j) - du(1)*b(1, j))/d(2)
                 do i = 3, n
                    b(i, j) = (b(i, j) - du(i - 1)*b(i - 1, j) - du2(i - 2)*b(i - 2, j))/d(i &
                              )
                 end do
                 ! solve l**t * x = b.
                 do i = n - 1, 1, -1
                    if (ipiv(i) == i) then
                       b(i, j) = b(i, j) - dl(i)*b(i + 1, j)
                    else
                       temp = b(i + 1, j)
                       b(i + 1, j) = b(i, j) - dl(i)*temp
                       b(i, j) = temp
                    end if
                 end do
                 if (j < nrhs) then
                    j = j + 1
                    go to 70
                 end if
              else
                 do j = 1, nrhs
                 ! solve u**t * x = b.
                    b(1, j) = b(1, j)/d(1)
                    if (n > 1) b(2, j) = (b(2, j) - du(1)*b(1, j))/d(2)
                    do i = 3, n
                       b(i, j) = (b(i, j) - du(i - 1)*b(i - 1, j) - du2(i - 2)*b(i - 2, j))/d( &
                                  i)
                    end do
                 ! solve l**t * x = b.
                    do i = n - 1, 1, -1
                       if (ipiv(i) == i) then
                          b(i, j) = b(i, j) - dl(i)*b(i + 1, j)
                       else
                          temp = b(i + 1, j)
                          b(i + 1, j) = b(i, j) - dl(i)*temp
                          b(i, j) = temp
                       end if
                    end do
                 end do
              end if
           else
              ! solve a**h * x = b.
              if (nrhs <= 1) then
                 j = 1
130    continue
                 ! solve u**h * x = b.
                 b(1, j) = b(1, j)/conjg(d(1))
                 if (n > 1) b(2, j) = (b(2, j) - conjg(du(1))*b(1, j))/conjg(d(2))
                           
                 do i = 3, n
                    b(i, j) = (b(i, j) - conjg(du(i - 1))*b(i - 1, j) - conjg(du2(i - 2))*b( &
                              i - 2, j))/conjg(d(i))
                 end do
                 ! solve l**h * x = b.
                 do i = n - 1, 1, -1
                    if (ipiv(i) == i) then
                       b(i, j) = b(i, j) - conjg(dl(i))*b(i + 1, j)
                    else
                       temp = b(i + 1, j)
                       b(i + 1, j) = b(i, j) - conjg(dl(i))*temp
                       b(i, j) = temp
                    end if
                 end do
                 if (j < nrhs) then
                    j = j + 1
                    go to 130
                 end if
              else
                 do j = 1, nrhs
                 ! solve u**h * x = b.
                    b(1, j) = b(1, j)/conjg(d(1))
                    if (n > 1) b(2, j) = (b(2, j) - conjg(du(1))*b(1, j))/conjg(d(2))
                              
                    do i = 3, n
                       b(i, j) = (b(i, j) - conjg(du(i - 1))*b(i - 1, j) - conjg(du2(i - 2)) &
                                 *b(i - 2, j))/conjg(d(i))
                    end do
                 ! solve l**h * x = b.
                    do i = n - 1, 1, -1
                       if (ipiv(i) == i) then
                          b(i, j) = b(i, j) - conjg(dl(i))*b(i + 1, j)
                       else
                          temp = b(i + 1, j)
                          b(i + 1, j) = b(i, j) - conjg(dl(i))*temp
                          b(i, j) = temp
                       end if
                    end do
                 end do
              end if
           end if
           ! end of stdlib_cgtts2
     end subroutine stdlib_cgtts2

     ! CHESWAPR applies an elementary permutation on the rows and the columns of
     ! a hermitian matrix.

     subroutine stdlib_cheswapr(uplo, n, a, lda, i1, i2)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: i1, i2, lda, n
           ! .. array arguments ..
           complex(sp) :: a(lda, n)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i
           complex(sp) :: tmp
     
           ! .. executable statements ..
           upper = stdlib_lsame(uplo, 'u')
           if (upper) then
               ! upper
               ! first swap
                ! - swap column i1 and i2 from i1 to i1-1
              call stdlib_cswap(i1 - 1, a(1, i1), 1, a(1, i2), 1)
                ! second swap :
                ! - swap a(i1,i1) and a(i2,i2)
                ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1
                ! - swap a(i2,i1) and a(i1,i2)
              tmp = a(i1, i1)
              a(i1, i1) = a(i2, i2)
              a(i2, i2) = tmp
              do i = 1, i2 - i1 - 1
                 tmp = a(i1, i1 + i)
                 a(i1, i1 + i) = conjg(a(i1 + i, i2))
                 a(i1 + i, i2) = conjg(tmp)
              end do
               a(i1, i2) = conjg(a(i1, i2))
                ! third swap
                ! - swap row i1 and i2 from i2+1 to n
              do i = i2 + 1, n
                 tmp = a(i1, i)
                 a(i1, i) = a(i2, i)
                 a(i2, i) = tmp
              end do
             else
               ! lower
               ! first swap
                ! - swap row i1 and i2 from 1 to i1-1
              call stdlib_cswap(i1 - 1, a(i1, 1), lda, a(i2, 1), lda)
               ! second swap :
                ! - swap a(i1,i1) and a(i2,i2)
                ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1
                ! - swap a(i2,i1) and a(i1,i2)
               tmp = a(i1, i1)
               a(i1, i1) = a(i2, i2)
               a(i2, i2) = tmp
               do i = 1, i2 - i1 - 1
                  tmp = a(i1 + i, i1)
                  a(i1 + i, i1) = conjg(a(i2, i1 + i))
                  a(i2, i1 + i) = conjg(tmp)
               end do
               a(i2, i1) = conjg(a(i2, i1))
               ! third swap
                ! - swap col i1 and i2 from i2+1 to n
               do i = i2 + 1, n
                  tmp = a(i, i1)
                  a(i, i1) = a(i, i2)
                  a(i, i2) = tmp
               end do
           end if
     end subroutine stdlib_cheswapr

     ! CHETF2 computes the factorization of a complex Hermitian matrix A
     ! using the Bunch-Kaufman diagonal pivoting method:
     ! A = U*D*U**H  or  A = L*D*L**H
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, U**H is the conjugate transpose of U, and D is
     ! Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_chetf2(uplo, n, a, lda, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep
           real(sp) :: absakk, alpha, colmax, d, d11, d22, r1, rowmax, tt
           complex(sp) :: d12, d21, t, wk, wkm1, wkp1, zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, conjg, max, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chetf2', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           if (upper) then
              ! factorize a as u*d*u**h using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 90
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(a(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, a(1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero) .or. stdlib_sisnan(absakk)) then
                 ! column k is or underflow, or contains a nan:
                 ! set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(a(k, k))
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    jmax = imax + stdlib_icamax(k - imax, a(imax, imax + 1), lda)
                    rowmax = cabs1(a(imax, jmax))
                    if (imax > 1) then
                       jmax = stdlib_icamax(imax - 1, a(1, imax), 1)
                       rowmax = max(rowmax, cabs1(a(jmax, imax)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (abs(real(a(imax, imax))) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k-1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k - kstep + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    call stdlib_cswap(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    do j = kp + 1, kk - 1
                       t = conjg(a(j, kk))
                       a(j, kk) = conjg(a(kp, j))
                       a(kp, j) = t
                    end do
                    a(kp, kk) = conjg(a(kp, kk))
                    r1 = real(a(kk, kk))
                    a(kk, kk) = real(a(kp, kp))
                    a(kp, kp) = r1
                    if (kstep == 2) then
                       a(k, k) = real(a(k, k))
                       t = a(k - 1, k)
                       a(k - 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 else
                    a(k, k) = real(a(k, k))
                    if (kstep == 2) a(k - 1, k - 1) = real(a(k - 1, k - 1))
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! perform a rank-1 update of a(1:k-1,1:k-1) as
                    ! a := a - u(k)*d(k)*u(k)**h = a - w(k)*1/d(k)*w(k)**h
                    r1 = one/real(a(k, k))
                    call stdlib_cher(uplo, k - 1, -r1, a(1, k), 1, a, lda)
                    ! store u(k) in column k
                    call stdlib_csscal(k - 1, r1, a(1, k), 1)
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**h
                       ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**h
                    if (k > 2) then
                       d = stdlib_slapy2(real(a(k - 1, k)), aimag(a(k - 1, k)))
                       d22 = real(a(k - 1, k - 1))/d
                       d11 = real(a(k, k))/d
                       tt = one/(d11*d22 - one)
                       d12 = a(k - 1, k)/d
                       d = tt/d
                       do j = k - 2, 1, -1
                          wkm1 = d*(d11*a(j, k - 1) - conjg(d12)*a(j, k))
                          wk = d*(d22*a(j, k) - d12*a(j, k - 1))
                          do i = j, 1, -1
                             a(i, j) = a(i, j) - a(i, k)*conjg(wk) - a(i, k - 1)*conjg( &
                                       wkm1)
                          end do
                          a(j, k) = wk
                          a(j, k - 1) = wkm1
                          a(j, j) = cmplx(real(a(j, j)), 0.0_sp)
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
           else
              ! factorize a as l*d*l**h using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
50      continue
              ! if k > n, exit from loop
              if (k > n) go to 90
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(a(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, a(k + 1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero) .or. stdlib_sisnan(absakk)) then
                 ! column k is zero or underflow, contains a nan:
                 ! set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(a(k, k))
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    jmax = k - 1 + stdlib_icamax(imax - k, a(imax, k), lda)
                    rowmax = cabs1(a(imax, jmax))
                    if (imax < n) then
                       jmax = imax + stdlib_icamax(n - imax, a(imax + 1, imax), 1)
                       rowmax = max(rowmax, cabs1(a(jmax, imax)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (abs(real(a(imax, imax))) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k+1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k + kstep - 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    do j = kk + 1, kp - 1
                       t = conjg(a(j, kk))
                       a(j, kk) = conjg(a(kp, j))
                       a(kp, j) = t
                    end do
                    a(kp, kk) = conjg(a(kp, kk))
                    r1 = real(a(kk, kk))
                    a(kk, kk) = real(a(kp, kp))
                    a(kp, kp) = r1
                    if (kstep == 2) then
                       a(k, k) = real(a(k, k))
                       t = a(k + 1, k)
                       a(k + 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 else
                    a(k, k) = real(a(k, k))
                    if (kstep == 2) a(k + 1, k + 1) = real(a(k + 1, k + 1))
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                       ! perform a rank-1 update of a(k+1:n,k+1:n) as
                       ! a := a - l(k)*d(k)*l(k)**h = a - w(k)*(1/d(k))*w(k)**h
                       r1 = one/real(a(k, k))
                       call stdlib_cher(uplo, n - k, -r1, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                 
                       ! store l(k) in column k
                       call stdlib_csscal(n - k, r1, a(k + 1, k), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k)
                    if (k < n - 1) then
                       ! perform a rank-2 update of a(k+2:n,k+2:n) as
                       ! a := a - ( l(k) l(k+1) )*d(k)*( l(k) l(k+1) )**h
                          ! = a - ( w(k) w(k+1) )*inv(d(k))*( w(k) w(k+1) )**h
                       ! where l(k) and l(k+1) are the k-th and (k+1)-th
                       ! columns of l
                       d = stdlib_slapy2(real(a(k + 1, k)), aimag(a(k + 1, k)))
                       d11 = real(a(k + 1, k + 1))/d
                       d22 = real(a(k, k))/d
                       tt = one/(d11*d22 - one)
                       d21 = a(k + 1, k)/d
                       d = tt/d
                       do j = k + 2, n
                          wk = d*(d11*a(j, k) - d21*a(j, k + 1))
                          wkp1 = d*(d22*a(j, k + 1) - conjg(d21)*a(j, k))
                          do i = j, n
                             a(i, j) = a(i, j) - a(i, k)*conjg(wk) - a(i, k + 1)*conjg( &
                                       wkp1)
                          end do
                          a(j, k) = wk
                          a(j, k + 1) = wkp1
                          a(j, j) = cmplx(real(a(j, j)), 0.0_sp)
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 50
           end if
90      continue
           return
           ! end of stdlib_chetf2
     end subroutine stdlib_chetf2

     ! CHETF2_RK computes the factorization of a complex Hermitian matrix A
     ! using the bounded Bunch-Kaufman (rook) diagonal pivoting method:
     ! A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T),
     ! where U (or L) is unit upper (or lower) triangular matrix,
     ! U**H (or L**H) is the conjugate of U (or L), P is a permutation
     ! matrix, P**T is the transpose of P, and D is Hermitian and block
     ! diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.
     ! For more information see Further Details section.

     subroutine stdlib_chetf2_rk(uplo, n, a, lda, e, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), e(*)
        ! ======================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: done, upper
           integer(ilp) :: i, ii, imax, itemp, j, jmax, k, kk, kp, kstep, p
           real(sp) :: absakk, alpha, colmax, d, d11, d22, r1, stemp, rowmax, tt, sfmin
           complex(sp) :: d12, d21, t, wk, wkm1, wkp1, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, conjg, max, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chetf2_rk', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           if (upper) then
              ! factorize a as u*d*u**h using the upper triangle of a
              ! initialize the first entry of array e, where superdiagonal
              ! elements of d are stored
              e(1) = czero
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 34
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(a(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, a(1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(a(k, k))
                 ! set e( k ) to zero
                 if (k > 1) e(k) = czero
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_icamax(k - imax, a(imax, imax + 1), lda)
                          rowmax = cabs1(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_icamax(imax - 1, a(1, imax), 1)
                          stemp = cabs1(a(itemp, imax))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! case(2)
                       ! equivalent to testing for
                       ! abs( real( w( imax,kw-1 ) ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(real(a(imax, imax))) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! case(3)
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k-1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       ! case(4)
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k - kstep + 1
                 ! for only a 2x2 pivot, interchange rows and columns k and p
                 ! in the leading submatrix a(1:k,1:k)
                 if ((kstep == 2) .and. (p /= k)) then
                    ! (1) swap columnar parts
                    if (p > 1) call stdlib_cswap(p - 1, a(1, k), 1, a(1, p), 1)
                    ! (2) swap and conjugate middle parts
                    do j = p + 1, k - 1
                       t = conjg(a(j, k))
                       a(j, k) = conjg(a(p, j))
                       a(p, j) = t
                    end do
                    ! (3) swap and conjugate corner elements at row-col interserction
                    a(p, k) = conjg(a(p, k))
                    ! (4) swap diagonal elements at row-col intersection
                    r1 = real(a(k, k))
                    a(k, k) = real(a(p, p))
                    a(p, p) = r1
                    ! convert upper triangle of a into u form by applying
                    ! the interchanges in columns k+1:n.
                    if (k < n) call stdlib_cswap(n - k, a(k, k + 1), lda, a(p, k + 1), lda)
                 end if
                 ! for both 1x1 and 2x2 pivots, interchange rows and
                 ! columns kk and kp in the leading submatrix a(1:k,1:k)
                 if (kp /= kk) then
                    ! (1) swap columnar parts
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    ! (2) swap and conjugate middle parts
                    do j = kp + 1, kk - 1
                       t = conjg(a(j, kk))
                       a(j, kk) = conjg(a(kp, j))
                       a(kp, j) = t
                    end do
                    ! (3) swap and conjugate corner elements at row-col interserction
                    a(kp, kk) = conjg(a(kp, kk))
                    ! (4) swap diagonal elements at row-col intersection
                    r1 = real(a(kk, kk))
                    a(kk, kk) = real(a(kp, kp))
                    a(kp, kp) = r1
                    if (kstep == 2) then
                       ! (*) make sure that diagonal element of pivot is real
                       a(k, k) = real(a(k, k))
                       ! (5) swap row elements
                       t = a(k - 1, k)
                       a(k - 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                    ! convert upper triangle of a into u form by applying
                    ! the interchanges in columns k+1:n.
                    if (k < n) call stdlib_cswap(n - k, a(kk, k + 1), lda, a(kp, k + 1), lda)
                 else
                    ! (*) make sure that diagonal element of pivot is real
                    a(k, k) = real(a(k, k))
                    if (kstep == 2) a(k - 1, k - 1) = real(a(k - 1, k - 1))
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    if (k > 1) then
                       ! perform a rank-1 update of a(1:k-1,1:k-1) and
                       ! store u(k) in column k
                       if (abs(real(a(k, k))) >= sfmin) then
                          ! perform a rank-1 update of a(1:k-1,1:k-1) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*1/d(k)*w(k)**t
                          d11 = one/real(a(k, k))
                          call stdlib_cher(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                          ! store u(k) in column k
                          call stdlib_csscal(k - 1, d11, a(1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = real(a(k, k))
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_cher(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                       end if
                       ! store the superdiagonal element of d in array e
                       e(k) = czero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k > 2) then
                       ! d = |a12|
                       d = stdlib_slapy2(real(a(k - 1, k)), aimag(a(k - 1, k)))
                       d11 = real(a(k, k)/d)
                       d22 = real(a(k - 1, k - 1)/d)
                       d12 = a(k - 1, k)/d
                       tt = one/(d11*d22 - one)
                       do j = k - 2, 1, -1
                          ! compute  d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j
                          wkm1 = tt*(d11*a(j, k - 1) - conjg(d12)*a(j, k))
                          wk = tt*(d22*a(j, k) - d12*a(j, k - 1))
                          ! perform a rank-2 update of a(1:k-2,1:k-2)
                          do i = j, 1, -1
                             a(i, j) = a(i, j) - (a(i, k)/d)*conjg(wk) - (a(i, k - 1) &
                                       /d)*conjg(wkm1)
                          end do
                          ! store u(k) and u(k-1) in cols k and k-1 for row j
                          a(j, k) = wk/d
                          a(j, k - 1) = wkm1/d
                          ! (*) make sure that diagonal element of pivot is real
                          a(j, j) = cmplx(real(a(j, j)), zero)
                       end do
                    end if
                    ! copy superdiagonal elements of d(k) to e(k) and
                    ! zero out superdiagonal entry of a
                    e(k) = a(k - 1, k)
                    e(k - 1) = czero
                    a(k - 1, k) = czero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
34      continue
           else
              ! factorize a as l*d*l**h using the lower triangle of a
              ! initialize the unused last entry of the subdiagonal array e.
              e(n) = czero
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
40      continue
              ! if k > n, exit from loop
              if (k > n) go to 64
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(a(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, a(k + 1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(a(k, k))
                 ! set e( k ) to zero
                 if (k < n) e(k) = czero
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
42      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_icamax(imax - k, a(imax, k), lda)
                          rowmax = cabs1(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_icamax(n - imax, a(imax + 1, imax), 1)
                          stemp = cabs1(a(itemp, imax))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! case(2)
                       ! equivalent to testing for
                       ! abs( real( w( imax,kw-1 ) ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(real(a(imax, imax))) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! case(3)
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       ! case(4)
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 42
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k + kstep - 1
                 ! for only a 2x2 pivot, interchange rows and columns k and p
                 ! in the trailing submatrix a(k:n,k:n)
                 if ((kstep == 2) .and. (p /= k)) then
                    ! (1) swap columnar parts
                    if (p < n) call stdlib_cswap(n - p, a(p + 1, k), 1, a(p + 1, p), 1)
                    ! (2) swap and conjugate middle parts
                    do j = k + 1, p - 1
                       t = conjg(a(j, k))
                       a(j, k) = conjg(a(p, j))
                       a(p, j) = t
                    end do
                    ! (3) swap and conjugate corner elements at row-col interserction
                    a(p, k) = conjg(a(p, k))
                    ! (4) swap diagonal elements at row-col intersection
                    r1 = real(a(k, k))
                    a(k, k) = real(a(p, p))
                    a(p, p) = r1
                    ! convert lower triangle of a into l form by applying
                    ! the interchanges in columns 1:k-1.
                    if (k > 1) call stdlib_cswap(k - 1, a(k, 1), lda, a(p, 1), lda)
                 end if
                 ! for both 1x1 and 2x2 pivots, interchange rows and
                 ! columns kk and kp in the trailing submatrix a(k:n,k:n)
                 if (kp /= kk) then
                    ! (1) swap columnar parts
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    ! (2) swap and conjugate middle parts
                    do j = kk + 1, kp - 1
                       t = conjg(a(j, kk))
                       a(j, kk) = conjg(a(kp, j))
                       a(kp, j) = t
                    end do
                    ! (3) swap and conjugate corner elements at row-col interserction
                    a(kp, kk) = conjg(a(kp, kk))
                    ! (4) swap diagonal elements at row-col intersection
                    r1 = real(a(kk, kk))
                    a(kk, kk) = real(a(kp, kp))
                    a(kp, kp) = r1
                    if (kstep == 2) then
                       ! (*) make sure that diagonal element of pivot is real
                       a(k, k) = real(a(k, k))
                       ! (5) swap row elements
                       t = a(k + 1, k)
                       a(k + 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                    ! convert lower triangle of a into l form by applying
                    ! the interchanges in columns 1:k-1.
                    if (k > 1) call stdlib_cswap(k - 1, a(kk, 1), lda, a(kp, 1), lda)
                 else
                    ! (*) make sure that diagonal element of pivot is real
                    a(k, k) = real(a(k, k))
                    if (kstep == 2) a(k + 1, k + 1) = real(a(k + 1, k + 1))
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of a now holds
                    ! w(k) = l(k)*d(k),
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                       ! perform a rank-1 update of a(k+1:n,k+1:n) and
                       ! store l(k) in column k
                       ! handle division by a small number
                       if (abs(real(a(k, k))) >= sfmin) then
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                          d11 = one/real(a(k, k))
                          call stdlib_cher(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                          ! store l(k) in column k
                          call stdlib_csscal(n - k, d11, a(k + 1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = real(a(k, k))
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_cher(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                       end if
                       ! store the subdiagonal element of d in array e
                       e(k) = czero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! perform a rank-2 update of a(k+2:n,k+2:n) as
                    ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t
                       ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k < n - 1) then
                       ! d = |a21|
                       d = stdlib_slapy2(real(a(k + 1, k)), aimag(a(k + 1, k)))
                       d11 = real(a(k + 1, k + 1))/d
                       d22 = real(a(k, k))/d
                       d21 = a(k + 1, k)/d
                       tt = one/(d11*d22 - one)
                       do j = k + 2, n
                          ! compute  d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j
                          wk = tt*(d11*a(j, k) - d21*a(j, k + 1))
                          wkp1 = tt*(d22*a(j, k + 1) - conjg(d21)*a(j, k))
                          ! perform a rank-2 update of a(k+2:n,k+2:n)
                          do i = j, n
                             a(i, j) = a(i, j) - (a(i, k)/d)*conjg(wk) - (a(i, k + 1) &
                                       /d)*conjg(wkp1)
                          end do
                          ! store l(k) and l(k+1) in cols k and k+1 for row j
                          a(j, k) = wk/d
                          a(j, k + 1) = wkp1/d
                          ! (*) make sure that diagonal element of pivot is real
                          a(j, j) = cmplx(real(a(j, j)), zero)
                       end do
                    end if
                    ! copy subdiagonal elements of d(k) to e(k) and
                    ! zero out subdiagonal entry of a
                    e(k) = a(k + 1, k)
                    e(k + 1) = czero
                    a(k + 1, k) = czero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 40
64      continue
           end if
           return
           ! end of stdlib_chetf2_rk
     end subroutine stdlib_chetf2_rk

     ! CHETF2_ROOK computes the factorization of a complex Hermitian matrix A
     ! using the bounded Bunch-Kaufman ("rook") diagonal pivoting method:
     ! A = U*D*U**H  or  A = L*D*L**H
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, U**H is the conjugate transpose of U, and D is
     ! Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_chetf2_rook(uplo, n, a, lda, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *)
        ! ======================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: done, upper
           integer(ilp) :: i, ii, imax, itemp, j, jmax, k, kk, kp, kstep, p
           real(sp) :: absakk, alpha, colmax, d, d11, d22, r1, stemp, rowmax, tt, sfmin
           complex(sp) :: d12, d21, t, wk, wkm1, wkp1, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, conjg, max, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chetf2_rook', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           if (upper) then
              ! factorize a as u*d*u**h using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 70
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(a(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, a(1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(a(k, k))
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_icamax(k - imax, a(imax, imax + 1), lda)
                          rowmax = cabs1(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_icamax(imax - 1, a(1, imax), 1)
                          stemp = cabs1(a(itemp, imax))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! case(2)
                       ! equivalent to testing for
                       ! abs( real( w( imax,kw-1 ) ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(real(a(imax, imax))) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! case(3)
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k-1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       ! case(4)
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k - kstep + 1
                 ! for only a 2x2 pivot, interchange rows and columns k and p
                 ! in the leading submatrix a(1:k,1:k)
                 if ((kstep == 2) .and. (p /= k)) then
                    ! (1) swap columnar parts
                    if (p > 1) call stdlib_cswap(p - 1, a(1, k), 1, a(1, p), 1)
                    ! (2) swap and conjugate middle parts
                    do j = p + 1, k - 1
                       t = conjg(a(j, k))
                       a(j, k) = conjg(a(p, j))
                       a(p, j) = t
                    end do
                    ! (3) swap and conjugate corner elements at row-col interserction
                    a(p, k) = conjg(a(p, k))
                    ! (4) swap diagonal elements at row-col intersection
                    r1 = real(a(k, k))
                    a(k, k) = real(a(p, p))
                    a(p, p) = r1
                 end if
                 ! for both 1x1 and 2x2 pivots, interchange rows and
                 ! columns kk and kp in the leading submatrix a(1:k,1:k)
                 if (kp /= kk) then
                    ! (1) swap columnar parts
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    ! (2) swap and conjugate middle parts
                    do j = kp + 1, kk - 1
                       t = conjg(a(j, kk))
                       a(j, kk) = conjg(a(kp, j))
                       a(kp, j) = t
                    end do
                    ! (3) swap and conjugate corner elements at row-col interserction
                    a(kp, kk) = conjg(a(kp, kk))
                    ! (4) swap diagonal elements at row-col intersection
                    r1 = real(a(kk, kk))
                    a(kk, kk) = real(a(kp, kp))
                    a(kp, kp) = r1
                    if (kstep == 2) then
                       ! (*) make sure that diagonal element of pivot is real
                       a(k, k) = real(a(k, k))
                       ! (5) swap row elements
                       t = a(k - 1, k)
                       a(k - 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 else
                    ! (*) make sure that diagonal element of pivot is real
                    a(k, k) = real(a(k, k))
                    if (kstep == 2) a(k - 1, k - 1) = real(a(k - 1, k - 1))
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    if (k > 1) then
                       ! perform a rank-1 update of a(1:k-1,1:k-1) and
                       ! store u(k) in column k
                       if (abs(real(a(k, k))) >= sfmin) then
                          ! perform a rank-1 update of a(1:k-1,1:k-1) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*1/d(k)*w(k)**t
                          d11 = one/real(a(k, k))
                          call stdlib_cher(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                          ! store u(k) in column k
                          call stdlib_csscal(k - 1, d11, a(1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = real(a(k, k))
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_cher(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k > 2) then
                       ! d = |a12|
                       d = stdlib_slapy2(real(a(k - 1, k)), aimag(a(k - 1, k)))
                       d11 = real(a(k, k)/d)
                       d22 = real(a(k - 1, k - 1)/d)
                       d12 = a(k - 1, k)/d
                       tt = one/(d11*d22 - one)
                       do j = k - 2, 1, -1
                          ! compute  d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j
                          wkm1 = tt*(d11*a(j, k - 1) - conjg(d12)*a(j, k))
                          wk = tt*(d22*a(j, k) - d12*a(j, k - 1))
                          ! perform a rank-2 update of a(1:k-2,1:k-2)
                          do i = j, 1, -1
                             a(i, j) = a(i, j) - (a(i, k)/d)*conjg(wk) - (a(i, k - 1) &
                                       /d)*conjg(wkm1)
                          end do
                          ! store u(k) and u(k-1) in cols k and k-1 for row j
                          a(j, k) = wk/d
                          a(j, k - 1) = wkm1/d
                          ! (*) make sure that diagonal element of pivot is real
                          a(j, j) = cmplx(real(a(j, j)), zero)
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
           else
              ! factorize a as l*d*l**h using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
40      continue
              ! if k > n, exit from loop
              if (k > n) go to 70
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(a(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, a(k + 1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(a(k, k))
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
42      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_icamax(imax - k, a(imax, k), lda)
                          rowmax = cabs1(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_icamax(n - imax, a(imax + 1, imax), 1)
                          stemp = cabs1(a(itemp, imax))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! case(2)
                       ! equivalent to testing for
                       ! abs( real( w( imax,kw-1 ) ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(real(a(imax, imax))) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! case(3)
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       ! case(4)
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 42
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k + kstep - 1
                 ! for only a 2x2 pivot, interchange rows and columns k and p
                 ! in the trailing submatrix a(k:n,k:n)
                 if ((kstep == 2) .and. (p /= k)) then
                    ! (1) swap columnar parts
                    if (p < n) call stdlib_cswap(n - p, a(p + 1, k), 1, a(p + 1, p), 1)
                    ! (2) swap and conjugate middle parts
                    do j = k + 1, p - 1
                       t = conjg(a(j, k))
                       a(j, k) = conjg(a(p, j))
                       a(p, j) = t
                    end do
                    ! (3) swap and conjugate corner elements at row-col interserction
                    a(p, k) = conjg(a(p, k))
                    ! (4) swap diagonal elements at row-col intersection
                    r1 = real(a(k, k))
                    a(k, k) = real(a(p, p))
                    a(p, p) = r1
                 end if
                 ! for both 1x1 and 2x2 pivots, interchange rows and
                 ! columns kk and kp in the trailing submatrix a(k:n,k:n)
                 if (kp /= kk) then
                    ! (1) swap columnar parts
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    ! (2) swap and conjugate middle parts
                    do j = kk + 1, kp - 1
                       t = conjg(a(j, kk))
                       a(j, kk) = conjg(a(kp, j))
                       a(kp, j) = t
                    end do
                    ! (3) swap and conjugate corner elements at row-col interserction
                    a(kp, kk) = conjg(a(kp, kk))
                    ! (4) swap diagonal elements at row-col intersection
                    r1 = real(a(kk, kk))
                    a(kk, kk) = real(a(kp, kp))
                    a(kp, kp) = r1
                    if (kstep == 2) then
                       ! (*) make sure that diagonal element of pivot is real
                       a(k, k) = real(a(k, k))
                       ! (5) swap row elements
                       t = a(k + 1, k)
                       a(k + 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 else
                    ! (*) make sure that diagonal element of pivot is real
                    a(k, k) = real(a(k, k))
                    if (kstep == 2) a(k + 1, k + 1) = real(a(k + 1, k + 1))
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of a now holds
                    ! w(k) = l(k)*d(k),
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                       ! perform a rank-1 update of a(k+1:n,k+1:n) and
                       ! store l(k) in column k
                       ! handle division by a small number
                       if (abs(real(a(k, k))) >= sfmin) then
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                          d11 = one/real(a(k, k))
                          call stdlib_cher(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                          ! store l(k) in column k
                          call stdlib_csscal(n - k, d11, a(k + 1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = real(a(k, k))
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_cher(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! perform a rank-2 update of a(k+2:n,k+2:n) as
                    ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t
                       ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k < n - 1) then
                       ! d = |a21|
                       d = stdlib_slapy2(real(a(k + 1, k)), aimag(a(k + 1, k)))
                       d11 = real(a(k + 1, k + 1))/d
                       d22 = real(a(k, k))/d
                       d21 = a(k + 1, k)/d
                       tt = one/(d11*d22 - one)
                       do j = k + 2, n
                          ! compute  d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j
                          wk = tt*(d11*a(j, k) - d21*a(j, k + 1))
                          wkp1 = tt*(d22*a(j, k + 1) - conjg(d21)*a(j, k))
                          ! perform a rank-2 update of a(k+2:n,k+2:n)
                          do i = j, n
                             a(i, j) = a(i, j) - (a(i, k)/d)*conjg(wk) - (a(i, k + 1) &
                                       /d)*conjg(wkp1)
                          end do
                          ! store l(k) and l(k+1) in cols k and k+1 for row j
                          a(j, k) = wk/d
                          a(j, k + 1) = wkp1/d
                          ! (*) make sure that diagonal element of pivot is real
                          a(j, j) = cmplx(real(a(j, j)), zero)
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 40
           end if
70      continue
           return
           ! end of stdlib_chetf2_rook
     end subroutine stdlib_chetf2_rook

     ! CHETRI computes the inverse of a complex Hermitian indefinite matrix
     ! A using the factorization A = U*D*U**H or A = L*D*L**H computed by
     ! CHETRF.

     subroutine stdlib_chetri(uplo, n, a, lda, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kp, kstep
           real(sp) :: ak, akp1, d, t
           complex(sp) :: akkp1, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, conjg, max, real
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chetri', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. a(info, info) == czero) return
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              do info = 1, n
                 if (ipiv(info) > 0 .and. a(info, info) == czero) return
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**h.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = one/real(a(k, k))
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_chemv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - real(stdlib_cdotc(k - 1, work, 1, a(1, k), 1))
                              
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(a(k, k + 1))
                 ak = real(a(k, k))/t
                 akp1 = real(a(k + 1, k + 1))/t
                 akkp1 = a(k, k + 1)/t
                 d = t*(ak*akp1 - one)
                 a(k, k) = akp1/d
                 a(k + 1, k + 1) = ak/d
                 a(k, k + 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_chemv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - real(stdlib_cdotc(k - 1, work, 1, a(1, k), 1))
                              
                    a(k, k + 1) = a(k, k + 1) - stdlib_cdotc(k - 1, a(1, k), 1, a(1, k + 1), 1)
                              
                    call stdlib_ccopy(k - 1, a(1, k + 1), 1, work, 1)
                    call stdlib_chemv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k + 1), 1)
                              
                    a(k + 1, k + 1) = a(k + 1, k + 1) - real(stdlib_cdotc(k - 1, work, 1, a(1, k + 1), &
                              1))
                 end if
                 kstep = 2
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the leading
                 ! submatrix a(1:k+1,1:k+1)
                 call stdlib_cswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                 do j = kp + 1, k - 1
                    temp = conjg(a(j, k))
                    a(j, k) = conjg(a(kp, j))
                    a(kp, j) = temp
                 end do
                 a(kp, k) = conjg(a(kp, k))
                 temp = a(k, k)
                 a(k, k) = a(kp, kp)
                 a(kp, kp) = temp
                 if (kstep == 2) then
                    temp = a(k, k + 1)
                    a(k, k + 1) = a(kp, k + 1)
                    a(kp, k + 1) = temp
                 end if
              end if
              k = k + kstep
              go to 30
50      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**h.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
60      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 80
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = one/real(a(k, k))
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_chemv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k), 1)
                    a(k, k) = a(k, k) - real(stdlib_cdotc(n - k, work, 1, a(k + 1, k), 1))
                              
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(a(k, k - 1))
                 ak = real(a(k - 1, k - 1))/t
                 akp1 = real(a(k, k))/t
                 akkp1 = a(k, k - 1)/t
                 d = t*(ak*akp1 - one)
                 a(k - 1, k - 1) = akp1/d
                 a(k, k) = ak/d
                 a(k, k - 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_chemv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k), 1)
                    a(k, k) = a(k, k) - real(stdlib_cdotc(n - k, work, 1, a(k + 1, k), 1))
                              
                    a(k, k - 1) = a(k, k - 1) - stdlib_cdotc(n - k, a(k + 1, k), 1, a(k + 1, k - 1), 1 &
                              )
                    call stdlib_ccopy(n - k, a(k + 1, k - 1), 1, work, 1)
                    call stdlib_chemv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k - 1), 1)
                    a(k - 1, k - 1) = a(k - 1, k - 1) - real(stdlib_cdotc(n - k, work, 1, a(k + 1, k - 1) &
                              , 1))
                 end if
                 kstep = 2
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the trailing
                 ! submatrix a(k-1:n,k-1:n)
                 if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                 do j = k + 1, kp - 1
                    temp = conjg(a(j, k))
                    a(j, k) = conjg(a(kp, j))
                    a(kp, j) = temp
                 end do
                 a(kp, k) = conjg(a(kp, k))
                 temp = a(k, k)
                 a(k, k) = a(kp, kp)
                 a(kp, kp) = temp
                 if (kstep == 2) then
                    temp = a(k, k - 1)
                    a(k, k - 1) = a(kp, k - 1)
                    a(kp, k - 1) = temp
                 end if
              end if
              k = k - kstep
              go to 60
80      continue
           end if
           return
           ! end of stdlib_chetri
     end subroutine stdlib_chetri

     ! CHETRI_ROOK computes the inverse of a complex Hermitian indefinite matrix
     ! A using the factorization A = U*D*U**H or A = L*D*L**H computed by
     ! CHETRF_ROOK.

     subroutine stdlib_chetri_rook(uplo, n, a, lda, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kp, kstep
           real(sp) :: ak, akp1, d, t
           complex(sp) :: akkp1, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, conjg, max, real
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chetri_rook', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. a(info, info) == czero) return
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              do info = 1, n
                 if (ipiv(info) > 0 .and. a(info, info) == czero) return
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**h.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 70
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = one/real(a(k, k))
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_chemv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - real(stdlib_cdotc(k - 1, work, 1, a(1, k), 1))
                              
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(a(k, k + 1))
                 ak = real(a(k, k))/t
                 akp1 = real(a(k + 1, k + 1))/t
                 akkp1 = a(k, k + 1)/t
                 d = t*(ak*akp1 - one)
                 a(k, k) = akp1/d
                 a(k + 1, k + 1) = ak/d
                 a(k, k + 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_chemv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - real(stdlib_cdotc(k - 1, work, 1, a(1, k), 1))
                              
                    a(k, k + 1) = a(k, k + 1) - stdlib_cdotc(k - 1, a(1, k), 1, a(1, k + 1), 1)
                              
                    call stdlib_ccopy(k - 1, a(1, k + 1), 1, work, 1)
                    call stdlib_chemv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k + 1), 1)
                              
                    a(k + 1, k + 1) = a(k + 1, k + 1) - real(stdlib_cdotc(k - 1, work, 1, a(1, k + 1), &
                              1))
                 end if
                 kstep = 2
              end if
              if (kstep == 1) then
                 ! interchange rows and columns k and ipiv(k) in the leading
                 ! submatrix a(1:k,1:k)
                 kp = ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    do j = kp + 1, k - 1
                       temp = conjg(a(j, k))
                       a(j, k) = conjg(a(kp, j))
                       a(kp, j) = temp
                    end do
                    a(kp, k) = conjg(a(kp, k))
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              else
                 ! interchange rows and columns k and k+1 with -ipiv(k) and
                 ! -ipiv(k+1) in the leading submatrix a(k+1:n,k+1:n)
                 ! (1) interchange rows and columns k and -ipiv(k)
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    do j = kp + 1, k - 1
                       temp = conjg(a(j, k))
                       a(j, k) = conjg(a(kp, j))
                       a(kp, j) = temp
                    end do
                    a(kp, k) = conjg(a(kp, k))
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                    temp = a(k, k + 1)
                    a(k, k + 1) = a(kp, k + 1)
                    a(kp, k + 1) = temp
                 end if
                 ! (2) interchange rows and columns k+1 and -ipiv(k+1)
                 k = k + 1
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    do j = kp + 1, k - 1
                       temp = conjg(a(j, k))
                       a(j, k) = conjg(a(kp, j))
                       a(kp, j) = temp
                    end do
                    a(kp, k) = conjg(a(kp, k))
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              end if
              k = k + 1
              go to 30
70      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**h.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
80      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 120
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = one/real(a(k, k))
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_chemv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k), 1)
                    a(k, k) = a(k, k) - real(stdlib_cdotc(n - k, work, 1, a(k + 1, k), 1))
                              
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(a(k, k - 1))
                 ak = real(a(k - 1, k - 1))/t
                 akp1 = real(a(k, k))/t
                 akkp1 = a(k, k - 1)/t
                 d = t*(ak*akp1 - one)
                 a(k - 1, k - 1) = akp1/d
                 a(k, k) = ak/d
                 a(k, k - 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_chemv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k), 1)
                    a(k, k) = a(k, k) - real(stdlib_cdotc(n - k, work, 1, a(k + 1, k), 1))
                              
                    a(k, k - 1) = a(k, k - 1) - stdlib_cdotc(n - k, a(k + 1, k), 1, a(k + 1, k - 1), 1 &
                              )
                    call stdlib_ccopy(n - k, a(k + 1, k - 1), 1, work, 1)
                    call stdlib_chemv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k - 1), 1)
                    a(k - 1, k - 1) = a(k - 1, k - 1) - real(stdlib_cdotc(n - k, work, 1, a(k + 1, k - 1) &
                              , 1))
                 end if
                 kstep = 2
              end if
              if (kstep == 1) then
                 ! interchange rows and columns k and ipiv(k) in the trailing
                 ! submatrix a(k:n,k:n)
                 kp = ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    do j = k + 1, kp - 1
                       temp = conjg(a(j, k))
                       a(j, k) = conjg(a(kp, j))
                       a(kp, j) = temp
                    end do
                    a(kp, k) = conjg(a(kp, k))
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              else
                 ! interchange rows and columns k and k-1 with -ipiv(k) and
                 ! -ipiv(k-1) in the trailing submatrix a(k-1:n,k-1:n)
                 ! (1) interchange rows and columns k and -ipiv(k)
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    do j = k + 1, kp - 1
                       temp = conjg(a(j, k))
                       a(j, k) = conjg(a(kp, j))
                       a(kp, j) = temp
                    end do
                    a(kp, k) = conjg(a(kp, k))
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                    temp = a(k, k - 1)
                    a(k, k - 1) = a(kp, k - 1)
                    a(kp, k - 1) = temp
                 end if
                 ! (2) interchange rows and columns k-1 and -ipiv(k-1)
                 k = k - 1
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    do j = k + 1, kp - 1
                       temp = conjg(a(j, k))
                       a(j, k) = conjg(a(kp, j))
                       a(kp, j) = temp
                    end do
                    a(kp, k) = conjg(a(kp, k))
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              end if
              k = k - 1
              go to 80
120    continue
           end if
           return
           ! end of stdlib_chetri_rook
     end subroutine stdlib_chetri_rook

     ! CHETRS_3 solves a system of linear equations A * X = B with a complex
     ! Hermitian matrix A using the factorization computed
     ! by CHETRF_RK or CHETRF_BK:
     ! A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T),
     ! where U (or L) is unit upper (or lower) triangular matrix,
     ! U**H (or L**H) is the conjugate of U (or L), P is a permutation
     ! matrix, P**T is the transpose of P, and D is Hermitian and block
     ! diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This algorithm is using Level 3 BLAS.

     subroutine stdlib_chetrs_3(uplo, n, nrhs, a, lda, e, ipiv, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), b(ldb, *), e(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, j, k, kp
           real(sp) :: s
           complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom
     
           ! .. intrinsic functions ..
           intrinsic :: abs, conjg, max, real
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -9
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chetrs_3', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! begin upper
              ! solve a*x = b, where a = u*d*u**h.
              ! p**t * b
              ! interchange rows k and ipiv(k) of matrix b in the same order
              ! that the formation order of ipiv(i) vector for upper case.
              ! (we can do the simple loop over ipiv with decrement -1,
              ! since the abs value of ipiv(i) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              do k = n, 1, -1
                 kp = abs(ipiv(k))
                 if (kp /= k) then
                    call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end if
              end do
              ! compute (u \p**t * b) -> b    [ (u \p**t * b) ]
              call stdlib_ctrsm('l', 'u', 'n', 'u', n, nrhs, cone, a, lda, b, ldb)
              ! compute d \ b -> b   [ d \ (u \p**t * b) ]
              i = n
              do while (i >= 1)
                 if (ipiv(i) > 0) then
                    s = real(cone)/real(a(i, i))
                    call stdlib_csscal(nrhs, s, b(i, 1), ldb)
                 else if (i > 1) then
                    akm1k = e(i)
                    akm1 = a(i - 1, i - 1)/akm1k
                    ak = a(i, i)/conjg(akm1k)
                    denom = akm1*ak - cone
                    do j = 1, nrhs
                       bkm1 = b(i - 1, j)/akm1k
                       bk = b(i, j)/conjg(akm1k)
                       b(i - 1, j) = (ak*bkm1 - bk)/denom
                       b(i, j) = (akm1*bk - bkm1)/denom
                    end do
                    i = i - 1
                 end if
                 i = i - 1
              end do
              ! compute (u**h \ b) -> b   [ u**h \ (d \ (u \p**t * b) ) ]
              call stdlib_ctrsm('l', 'u', 'c', 'u', n, nrhs, cone, a, lda, b, ldb)
              ! p * b  [ p * (u**h \ (d \ (u \p**t * b) )) ]
              ! interchange rows k and ipiv(k) of matrix b in reverse order
              ! from the formation order of ipiv(i) vector for upper case.
              ! (we can do the simple loop over ipiv with increment 1,
              ! since the abs value of ipiv(i) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              do k = 1, n, 1
                 kp = abs(ipiv(k))
                 if (kp /= k) then
                    call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end if
              end do
           else
              ! begin lower
              ! solve a*x = b, where a = l*d*l**h.
              ! p**t * b
              ! interchange rows k and ipiv(k) of matrix b in the same order
              ! that the formation order of ipiv(i) vector for lower case.
              ! (we can do the simple loop over ipiv with increment 1,
              ! since the abs value of ipiv(i) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              do k = 1, n, 1
                 kp = abs(ipiv(k))
                 if (kp /= k) then
                    call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end if
              end do
              ! compute (l \p**t * b) -> b    [ (l \p**t * b) ]
              call stdlib_ctrsm('l', 'l', 'n', 'u', n, nrhs, cone, a, lda, b, ldb)
              ! compute d \ b -> b   [ d \ (l \p**t * b) ]
              i = 1
              do while (i <= n)
                 if (ipiv(i) > 0) then
                    s = real(cone)/real(a(i, i))
                    call stdlib_csscal(nrhs, s, b(i, 1), ldb)
                 else if (i < n) then
                    akm1k = e(i)
                    akm1 = a(i, i)/conjg(akm1k)
                    ak = a(i + 1, i + 1)/akm1k
                    denom = akm1*ak - cone
                    do j = 1, nrhs
                       bkm1 = b(i, j)/conjg(akm1k)
                       bk = b(i + 1, j)/akm1k
                       b(i, j) = (ak*bkm1 - bk)/denom
                       b(i + 1, j) = (akm1*bk - bkm1)/denom
                    end do
                    i = i + 1
                 end if
                 i = i + 1
              end do
              ! compute (l**h \ b) -> b   [ l**h \ (d \ (l \p**t * b) ) ]
              call stdlib_ctrsm('l', 'l', 'c', 'u', n, nrhs, cone, a, lda, b, ldb)
              ! p * b  [ p * (l**h \ (d \ (l \p**t * b) )) ]
              ! interchange rows k and ipiv(k) of matrix b in reverse order
              ! from the formation order of ipiv(i) vector for lower case.
              ! (we can do the simple loop over ipiv with decrement -1,
              ! since the abs value of ipiv(i) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              do k = n, 1, -1
                 kp = abs(ipiv(k))
                 if (kp /= k) then
                    call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 end if
              end do
              ! end lower
           end if
           return
           ! end of stdlib_chetrs_3
     end subroutine stdlib_chetrs_3

     ! Level 3 BLAS like routine for C in RFP Format.
     ! CHFRK performs one of the Hermitian rank--k operations
     ! C := alpha*A*A**H + beta*C,
     ! or
     ! C := alpha*A**H*A + beta*C,
     ! where alpha and beta are real scalars, C is an n--by--n Hermitian
     ! matrix and A is an n--by--k matrix in the first case and a k--by--n
     ! matrix in the second case.

     subroutine stdlib_chfrk(transr, uplo, trans, n, k, alpha, a, lda, beta, c)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(sp) :: alpha, beta
           integer(ilp) :: k, lda, n
           character :: trans, transr, uplo
           ! .. array arguments ..
           complex(sp) :: a(lda, *), c(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lower, normaltransr, nisodd, notrans
           integer(ilp) :: info, nrowa, j, nk, n1, n2
           complex(sp) :: calpha, cbeta
     
           ! .. intrinsic functions ..
           intrinsic :: max, cmplx
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           normaltransr = stdlib_lsame(transr, 'n')
           lower = stdlib_lsame(uplo, 'l')
           notrans = stdlib_lsame(trans, 'n')
           if (notrans) then
              nrowa = n
           else
              nrowa = k
           end if
           if (.not. normaltransr .and. .not. stdlib_lsame(transr, 'c')) then
              info = -1
           else if (.not. lower .and. .not. stdlib_lsame(uplo, 'u')) then
              info = -2
           else if (.not. notrans .and. .not. stdlib_lsame(trans, 'c')) then
              info = -3
           else if (n < 0) then
              info = -4
           else if (k < 0) then
              info = -5
           else if (lda < max(1, nrowa)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chfrk ', -info)
              return
           end if
           ! quick return if possible.
           ! the quick return case: ((alpha==0).and.(beta/=zero)) is not
           ! done (it is in stdlib_cherk for example) and left in the general case.
           if ((n == 0) .or. (((alpha == zero) .or. (k == 0)) .and. (beta == one))) &
                     return
           if ((alpha == zero) .and. (beta == zero)) then
              do j = 1, ((n*(n + 1))/2)
                 c(j) = czero
              end do
              return
           end if
           calpha = cmplx(alpha, zero, KIND=sp)
           cbeta = cmplx(beta, zero, KIND=sp)
           ! c is n-by-n.
           ! if n is odd, set nisodd = .true., and n1 and n2.
           ! if n is even, nisodd = .false., and nk.
           if (mod(n, 2) == 0) then
              nisodd = .false.
              nk = n/2
           else
              nisodd = .true.
              if (lower) then
                 n2 = n/2
                 n1 = n - n2
              else
                 n1 = n/2
                 n2 = n - n1
              end if
           end if
           if (nisodd) then
              ! n is odd
              if (normaltransr) then
                 ! n is odd and transr = 'n'
                 if (lower) then
                    ! n is odd, transr = 'n', and uplo = 'l'
                    if (notrans) then
                       ! n is odd, transr = 'n', uplo = 'l', and trans = 'n'
                       call stdlib_cherk('l', 'n', n1, k, alpha, a(1, 1), lda, beta, c(1), n)
                                 
                       call stdlib_cherk('u', 'n', n2, k, alpha, a(n1 + 1, 1), lda, beta, c(n + 1) &
                                 , n)
                       call stdlib_cgemm('n', 'c', n2, n1, k, calpha, a(n1 + 1, 1), lda, a(1, 1) &
                                 , lda, cbeta, c(n1 + 1), n)
                    else
                       ! n is odd, transr = 'n', uplo = 'l', and trans = 'c'
                       call stdlib_cherk('l', 'c', n1, k, alpha, a(1, 1), lda, beta, c(1), n)
                                 
                       call stdlib_cherk('u', 'c', n2, k, alpha, a(1, n1 + 1), lda, beta, c(n + 1) &
                                 , n)
                       call stdlib_cgemm('c', 'n', n2, n1, k, calpha, a(1, n1 + 1), lda, a(1, 1) &
                                 , lda, cbeta, c(n1 + 1), n)
                    end if
                 else
                    ! n is odd, transr = 'n', and uplo = 'u'
                    if (notrans) then
                       ! n is odd, transr = 'n', uplo = 'u', and trans = 'n'
                       call stdlib_cherk('l', 'n', n1, k, alpha, a(1, 1), lda, beta, c(n2 + 1), &
                                 n)
                       call stdlib_cherk('u', 'n', n2, k, alpha, a(n2, 1), lda, beta, c(n1 + 1), &
                                  n)
                       call stdlib_cgemm('n', 'c', n1, n2, k, calpha, a(1, 1), lda, a(n2, 1), &
                                 lda, cbeta, c(1), n)
                    else
                       ! n is odd, transr = 'n', uplo = 'u', and trans = 'c'
                       call stdlib_cherk('l', 'c', n1, k, alpha, a(1, 1), lda, beta, c(n2 + 1), &
                                 n)
                       call stdlib_cherk('u', 'c', n2, k, alpha, a(1, n2), lda, beta, c(n1 + 1), &
                                  n)
                       call stdlib_cgemm('c', 'n', n1, n2, k, calpha, a(1, 1), lda, a(1, n2), &
                                 lda, cbeta, c(1), n)
                    end if
                 end if
              else
                 ! n is odd, and transr = 'c'
                 if (lower) then
                    ! n is odd, transr = 'c', and uplo = 'l'
                    if (notrans) then
                       ! n is odd, transr = 'c', uplo = 'l', and trans = 'n'
                       call stdlib_cherk('u', 'n', n1, k, alpha, a(1, 1), lda, beta, c(1), n1 &
                                 )
                       call stdlib_cherk('l', 'n', n2, k, alpha, a(n1 + 1, 1), lda, beta, c(2), &
                                 n1)
                       call stdlib_cgemm('n', 'c', n1, n2, k, calpha, a(1, 1), lda, a(n1 + 1, 1) &
                                 , lda, cbeta, c(n1*n1 + 1), n1)
                    else
                       ! n is odd, transr = 'c', uplo = 'l', and trans = 'c'
                       call stdlib_cherk('u', 'c', n1, k, alpha, a(1, 1), lda, beta, c(1), n1 &
                                 )
                       call stdlib_cherk('l', 'c', n2, k, alpha, a(1, n1 + 1), lda, beta, c(2), &
                                 n1)
                       call stdlib_cgemm('c', 'n', n1, n2, k, calpha, a(1, 1), lda, a(1, n1 + 1) &
                                 , lda, cbeta, c(n1*n1 + 1), n1)
                    end if
                 else
                    ! n is odd, transr = 'c', and uplo = 'u'
                    if (notrans) then
                       ! n is odd, transr = 'c', uplo = 'u', and trans = 'n'
                       call stdlib_cherk('u', 'n', n1, k, alpha, a(1, 1), lda, beta, c(n2*n2 + 1 &
                                 ), n2)
                       call stdlib_cherk('l', 'n', n2, k, alpha, a(n1 + 1, 1), lda, beta, c( &
                                 n1*n2 + 1), n2)
                       call stdlib_cgemm('n', 'c', n2, n1, k, calpha, a(n1 + 1, 1), lda, a(1, 1) &
                                 , lda, cbeta, c(1), n2)
                    else
                       ! n is odd, transr = 'c', uplo = 'u', and trans = 'c'
                       call stdlib_cherk('u', 'c', n1, k, alpha, a(1, 1), lda, beta, c(n2*n2 + 1 &
                                 ), n2)
                       call stdlib_cherk('l', 'c', n2, k, alpha, a(1, n1 + 1), lda, beta, c( &
                                 n1*n2 + 1), n2)
                       call stdlib_cgemm('c', 'n', n2, n1, k, calpha, a(1, n1 + 1), lda, a(1, 1) &
                                 , lda, cbeta, c(1), n2)
                    end if
                 end if
              end if
           else
              ! n is even
              if (normaltransr) then
                 ! n is even and transr = 'n'
                 if (lower) then
                    ! n is even, transr = 'n', and uplo = 'l'
                    if (notrans) then
                       ! n is even, transr = 'n', uplo = 'l', and trans = 'n'
                       call stdlib_cherk('l', 'n', nk, k, alpha, a(1, 1), lda, beta, c(2), n + &
                                 1)
                       call stdlib_cherk('u', 'n', nk, k, alpha, a(nk + 1, 1), lda, beta, c(1), &
                                 n + 1)
                       call stdlib_cgemm('n', 'c', nk, nk, k, calpha, a(nk + 1, 1), lda, a(1, 1) &
                                 , lda, cbeta, c(nk + 2), n + 1)
                    else
                       ! n is even, transr = 'n', uplo = 'l', and trans = 'c'
                       call stdlib_cherk('l', 'c', nk, k, alpha, a(1, 1), lda, beta, c(2), n + &
                                 1)
                       call stdlib_cherk('u', 'c', nk, k, alpha, a(1, nk + 1), lda, beta, c(1), &
                                 n + 1)
                       call stdlib_cgemm('c', 'n', nk, nk, k, calpha, a(1, nk + 1), lda, a(1, 1) &
                                 , lda, cbeta, c(nk + 2), n + 1)
                    end if
                 else
                    ! n is even, transr = 'n', and uplo = 'u'
                    if (notrans) then
                       ! n is even, transr = 'n', uplo = 'u', and trans = 'n'
                       call stdlib_cherk('l', 'n', nk, k, alpha, a(1, 1), lda, beta, c(nk + 2), &
                                 n + 1)
                       call stdlib_cherk('u', 'n', nk, k, alpha, a(nk + 1, 1), lda, beta, c(nk + 1 &
                                 ), n + 1)
                       call stdlib_cgemm('n', 'c', nk, nk, k, calpha, a(1, 1), lda, a(nk + 1, 1) &
                                 , lda, cbeta, c(1), n + 1)
                    else
                       ! n is even, transr = 'n', uplo = 'u', and trans = 'c'
                       call stdlib_cherk('l', 'c', nk, k, alpha, a(1, 1), lda, beta, c(nk + 2), &
                                 n + 1)
                       call stdlib_cherk('u', 'c', nk, k, alpha, a(1, nk + 1), lda, beta, c(nk + 1 &
                                 ), n + 1)
                       call stdlib_cgemm('c', 'n', nk, nk, k, calpha, a(1, 1), lda, a(1, nk + 1) &
                                 , lda, cbeta, c(1), n + 1)
                    end if
                 end if
              else
                 ! n is even, and transr = 'c'
                 if (lower) then
                    ! n is even, transr = 'c', and uplo = 'l'
                    if (notrans) then
                       ! n is even, transr = 'c', uplo = 'l', and trans = 'n'
                       call stdlib_cherk('u', 'n', nk, k, alpha, a(1, 1), lda, beta, c(nk + 1), &
                                 nk)
                       call stdlib_cherk('l', 'n', nk, k, alpha, a(nk + 1, 1), lda, beta, c(1), &
                                 nk)
                       call stdlib_cgemm('n', 'c', nk, nk, k, calpha, a(1, 1), lda, a(nk + 1, 1) &
                                 , lda, cbeta, c(((nk + 1)*nk) + 1), nk)
                    else
                       ! n is even, transr = 'c', uplo = 'l', and trans = 'c'
                       call stdlib_cherk('u', 'c', nk, k, alpha, a(1, 1), lda, beta, c(nk + 1), &
                                 nk)
                       call stdlib_cherk('l', 'c', nk, k, alpha, a(1, nk + 1), lda, beta, c(1), &
                                 nk)
                       call stdlib_cgemm('c', 'n', nk, nk, k, calpha, a(1, 1), lda, a(1, nk + 1) &
                                 , lda, cbeta, c(((nk + 1)*nk) + 1), nk)
                    end if
                 else
                    ! n is even, transr = 'c', and uplo = 'u'
                    if (notrans) then
                       ! n is even, transr = 'c', uplo = 'u', and trans = 'n'
                       call stdlib_cherk('u', 'n', nk, k, alpha, a(1, 1), lda, beta, c(nk*(nk + &
                                 1) + 1), nk)
                       call stdlib_cherk('l', 'n', nk, k, alpha, a(nk + 1, 1), lda, beta, c( &
                                 nk*nk + 1), nk)
                       call stdlib_cgemm('n', 'c', nk, nk, k, calpha, a(nk + 1, 1), lda, a(1, 1) &
                                 , lda, cbeta, c(1), nk)
                    else
                       ! n is even, transr = 'c', uplo = 'u', and trans = 'c'
                       call stdlib_cherk('u', 'c', nk, k, alpha, a(1, 1), lda, beta, c(nk*(nk + &
                                 1) + 1), nk)
                       call stdlib_cherk('l', 'c', nk, k, alpha, a(1, nk + 1), lda, beta, c( &
                                 nk*nk + 1), nk)
                       call stdlib_cgemm('c', 'n', nk, nk, k, calpha, a(1, nk + 1), lda, a(1, 1) &
                                 , lda, cbeta, c(1), nk)
                    end if
                 end if
              end if
           end if
           return
           ! end of stdlib_chfrk
     end subroutine stdlib_chfrk

     ! This subroutine translates from a BLAST-specified integer constant to
     ! the character string specifying a transposition operation.
     ! CHLA_TRANSTYPE returns an CHARACTER*1.  If CHLA_TRANSTYPE is 'X',
     ! then input is not an integer indicating a transposition operator.
     ! Otherwise CHLA_TRANSTYPE returns the constant value corresponding to
     ! TRANS.

     character*1 function stdlib_chla_transtype(trans)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: trans
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: blas_no_trans = 111
           integer(ilp), parameter :: blas_trans = 112
           integer(ilp), parameter :: blas_conj_trans = 113
           
           ! .. executable statements ..
           if (trans == blas_no_trans) then
              stdlib_chla_transtype = 'n'
           else if (trans == blas_trans) then
              stdlib_chla_transtype = 't'
           else if (trans == blas_conj_trans) then
              stdlib_chla_transtype = 'c'
           else
              stdlib_chla_transtype = 'x'
           end if
           return
           ! end of stdlib_chla_transtype
     end function stdlib_chla_transtype

     ! CHPGST reduces a complex Hermitian-definite generalized
     ! eigenproblem to standard form, using packed storage.
     ! If ITYPE = 1, the problem is A*x = lambda*B*x,
     ! and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
     ! If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
     ! B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
     ! B must have been previously factorized as U**H*U or L*L**H by CPPTRF.

     subroutine stdlib_chpgst(itype, uplo, n, ap, bp, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, itype, n
           ! .. array arguments ..
           complex(sp) :: ap(*), bp(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, j1, j1j1, jj, k, k1, k1k1, kk
           real(sp) :: ajj, akk, bjj, bkk
           complex(sp) :: ct
     
           ! .. intrinsic functions ..
           intrinsic :: real
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (itype < 1 .or. itype > 3) then
              info = -1
           else if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -2
           else if (n < 0) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chpgst', -info)
              return
           end if
           if (itype == 1) then
              if (upper) then
                 ! compute inv(u**h)*a*inv(u)
                 ! j1 and jj are the indices of a(1,j) and a(j,j)
                 jj = 0
                 do j = 1, n
                    j1 = jj + 1
                    jj = jj + j
                    ! compute the j-th column of the upper triangle of a
                    ap(jj) = real(ap(jj))
                    bjj = real(bp(jj))
                    call stdlib_ctpsv(uplo, 'conjugate transpose', 'non-unit', j, bp, ap(j1), 1 &
                              )
                    call stdlib_chpmv(uplo, j - 1, -cone, ap, bp(j1), 1, cone, ap(j1), 1)
                              
                    call stdlib_csscal(j - 1, one/bjj, ap(j1), 1)
                    ap(jj) = (ap(jj) - stdlib_cdotc(j - 1, ap(j1), 1, bp(j1), 1))/ &
                              bjj
                 end do
              else
                 ! compute inv(l)*a*inv(l**h)
                 ! kk and k1k1 are the indices of a(k,k) and a(k+1,k+1)
                 kk = 1
                 do k = 1, n
                    k1k1 = kk + n - k + 1
                    ! update the lower triangle of a(k:n,k:n)
                    akk = real(ap(kk))
                    bkk = real(bp(kk))
                    akk = akk/bkk**2
                    ap(kk) = akk
                    if (k < n) then
                       call stdlib_csscal(n - k, one/bkk, ap(kk + 1), 1)
                       ct = -half*akk
                       call stdlib_caxpy(n - k, ct, bp(kk + 1), 1, ap(kk + 1), 1)
                       call stdlib_chpr2(uplo, n - k, -cone, ap(kk + 1), 1, bp(kk + 1), 1, ap(k1k1 &
                                 ))
                       call stdlib_caxpy(n - k, ct, bp(kk + 1), 1, ap(kk + 1), 1)
                       call stdlib_ctpsv(uplo, 'no transpose', 'non-unit', n - k, bp(k1k1), ap( &
                                 kk + 1), 1)
                    end if
                    kk = k1k1
                 end do
              end if
           else
              if (upper) then
                 ! compute u*a*u**h
                 ! k1 and kk are the indices of a(1,k) and a(k,k)
                 kk = 0
                 do k = 1, n
                    k1 = kk + 1
                    kk = kk + k
                    ! update the upper triangle of a(1:k,1:k)
                    akk = real(ap(kk))
                    bkk = real(bp(kk))
                    call stdlib_ctpmv(uplo, 'no transpose', 'non-unit', k - 1, bp, ap(k1), 1)
                              
                    ct = half*akk
                    call stdlib_caxpy(k - 1, ct, bp(k1), 1, ap(k1), 1)
                    call stdlib_chpr2(uplo, k - 1, cone, ap(k1), 1, bp(k1), 1, ap)
                    call stdlib_caxpy(k - 1, ct, bp(k1), 1, ap(k1), 1)
                    call stdlib_csscal(k - 1, bkk, ap(k1), 1)
                    ap(kk) = akk*bkk**2
                 end do
              else
                 ! compute l**h *a*l
                 ! jj and j1j1 are the indices of a(j,j) and a(j+1,j+1)
                 jj = 1
                 do j = 1, n
                    j1j1 = jj + n - j + 1
                    ! compute the j-th column of the lower triangle of a
                    ajj = real(ap(jj))
                    bjj = real(bp(jj))
                    ap(jj) = ajj*bjj + stdlib_cdotc(n - j, ap(jj + 1), 1, bp(jj + 1), 1)
                    call stdlib_csscal(n - j, bjj, ap(jj + 1), 1)
                    call stdlib_chpmv(uplo, n - j, cone, ap(j1j1), bp(jj + 1), 1, cone, ap(jj + 1) &
                              , 1)
                    call stdlib_ctpmv(uplo, 'conjugate transpose', 'non-unit', n - j + 1, bp(jj), &
                              ap(jj), 1)
                    jj = j1j1
                 end do
              end if
           end if
           return
           ! end of stdlib_chpgst
     end subroutine stdlib_chpgst

     ! CHPTRF computes the factorization of a complex Hermitian packed
     ! matrix A using the Bunch-Kaufman diagonal pivoting method:
     ! A = U*D*U**H  or  A = L*D*L**H
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, and D is Hermitian and block diagonal with
     ! 1-by-1 and 2-by-2 diagonal blocks.

     subroutine stdlib_chptrf(uplo, n, ap, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: ap(*)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp
           real(sp) :: absakk, alpha, colmax, d, d11, d22, r1, rowmax, tt
           complex(sp) :: d12, d21, t, wk, wkm1, wkp1, zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, conjg, max, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chptrf', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           if (upper) then
              ! factorize a as u*d*u**h using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
              kc = (n - 1)*n/2 + 1
10      continue
              knc = kc
              ! if k < 1, exit from loop
              if (k < 1) go to 110
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(ap(kc + k - 1)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, ap(kc), 1)
                 colmax = cabs1(ap(kc + imax - 1))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero: set info and continue
                 if (info == 0) info = k
                 kp = k
                 ap(kc + k - 1) = real(ap(kc + k - 1))
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    rowmax = zero
                    jmax = imax
                    kx = imax*(imax + 1)/2 + imax
                    do j = imax + 1, k
                       if (cabs1(ap(kx)) > rowmax) then
                          rowmax = cabs1(ap(kx))
                          jmax = j
                       end if
                       kx = kx + j
                    end do
                    kpc = (imax - 1)*imax/2 + 1
                    if (imax > 1) then
                       jmax = stdlib_icamax(imax - 1, ap(kpc), 1)
                       rowmax = max(rowmax, cabs1(ap(kpc + jmax - 1)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (abs(real(ap(kpc + imax - 1))) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k-1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k - kstep + 1
                 if (kstep == 2) knc = knc - k + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    call stdlib_cswap(kp - 1, ap(knc), 1, ap(kpc), 1)
                    kx = kpc + kp - 1
                    do j = kp + 1, kk - 1
                       kx = kx + j - 1
                       t = conjg(ap(knc + j - 1))
                       ap(knc + j - 1) = conjg(ap(kx))
                       ap(kx) = t
                    end do
                    ap(kx + kk - 1) = conjg(ap(kx + kk - 1))
                    r1 = real(ap(knc + kk - 1))
                    ap(knc + kk - 1) = real(ap(kpc + kp - 1))
                    ap(kpc + kp - 1) = r1
                    if (kstep == 2) then
                       ap(kc + k - 1) = real(ap(kc + k - 1))
                       t = ap(kc + k - 2)
                       ap(kc + k - 2) = ap(kc + kp - 1)
                       ap(kc + kp - 1) = t
                    end if
                 else
                    ap(kc + k - 1) = real(ap(kc + k - 1))
                    if (kstep == 2) ap(kc - 1) = real(ap(kc - 1))
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! perform a rank-1 update of a(1:k-1,1:k-1) as
                    ! a := a - u(k)*d(k)*u(k)**h = a - w(k)*1/d(k)*w(k)**h
                    r1 = one/real(ap(kc + k - 1))
                    call stdlib_chpr(uplo, k - 1, -r1, ap(kc), 1, ap)
                    ! store u(k) in column k
                    call stdlib_csscal(k - 1, r1, ap(kc), 1)
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**h
                       ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**h
                    if (k > 2) then
                       d = stdlib_slapy2(real(ap(k - 1 + (k - 1)*k/2)), aimag(ap(k - 1 + (k - 1) &
                                 *k/2)))
                       d22 = real(ap(k - 1 + (k - 2)*(k - 1)/2))/d
                       d11 = real(ap(k + (k - 1)*k/2))/d
                       tt = one/(d11*d22 - one)
                       d12 = ap(k - 1 + (k - 1)*k/2)/d
                       d = tt/d
                       do j = k - 2, 1, -1
                          wkm1 = d*(d11*ap(j + (k - 2)*(k - 1)/2) - conjg(d12)*ap(j + (k - 1)*k &
                                    /2))
                          wk = d*(d22*ap(j + (k - 1)*k/2) - d12*ap(j + (k - 2)*(k - 1)/2))
                                    
                          do i = j, 1, -1
                             ap(i + (j - 1)*j/2) = ap(i + (j - 1)*j/2) - ap(i + (k - 1)*k/2) &
                                       *conjg(wk) - ap(i + (k - 2)*(k - 1)/2)*conjg(wkm1)
                          end do
                          ap(j + (k - 1)*k/2) = wk
                          ap(j + (k - 2)*(k - 1)/2) = wkm1
                          ap(j + (j - 1)*j/2) = cmplx(real(ap(j + (j - 1)*j/2)), 0.0_sp)
                                    
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              kc = knc - k
              go to 10
           else
              ! factorize a as l*d*l**h using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
              kc = 1
              npp = n*(n + 1)/2
60      continue
              knc = kc
              ! if k > n, exit from loop
              if (k > n) go to 110
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(ap(kc)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, ap(kc + 1), 1)
                 colmax = cabs1(ap(kc + imax - k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero: set info and continue
                 if (info == 0) info = k
                 kp = k
                 ap(kc) = real(ap(kc))
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    rowmax = zero
                    kx = kc + imax - k
                    do j = k, imax - 1
                       if (cabs1(ap(kx)) > rowmax) then
                          rowmax = cabs1(ap(kx))
                          jmax = j
                       end if
                       kx = kx + n - j
                    end do
                    kpc = npp - (n - imax + 1)*(n - imax + 2)/2 + 1
                    if (imax < n) then
                       jmax = imax + stdlib_icamax(n - imax, ap(kpc + 1), 1)
                       rowmax = max(rowmax, cabs1(ap(kpc + jmax - imax)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (abs(real(ap(kpc))) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k+1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k + kstep - 1
                 if (kstep == 2) knc = knc + n - k + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_cswap(n - kp, ap(knc + kp - kk + 1), 1, ap(kpc + 1), 1)
                              
                    kx = knc + kp - kk
                    do j = kk + 1, kp - 1
                       kx = kx + n - j + 1
                       t = conjg(ap(knc + j - kk))
                       ap(knc + j - kk) = conjg(ap(kx))
                       ap(kx) = t
                    end do
                    ap(knc + kp - kk) = conjg(ap(knc + kp - kk))
                    r1 = real(ap(knc))
                    ap(knc) = real(ap(kpc))
                    ap(kpc) = r1
                    if (kstep == 2) then
                       ap(kc) = real(ap(kc))
                       t = ap(kc + 1)
                       ap(kc + 1) = ap(kc + kp - k)
                       ap(kc + kp - k) = t
                    end if
                 else
                    ap(kc) = real(ap(kc))
                    if (kstep == 2) ap(knc) = real(ap(knc))
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                       ! perform a rank-1 update of a(k+1:n,k+1:n) as
                       ! a := a - l(k)*d(k)*l(k)**h = a - w(k)*(1/d(k))*w(k)**h
                       r1 = one/real(ap(kc))
                       call stdlib_chpr(uplo, n - k, -r1, ap(kc + 1), 1, ap(kc + n - k + 1))
                       ! store l(k) in column k
                       call stdlib_csscal(n - k, r1, ap(kc + 1), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    if (k < n - 1) then
                       ! perform a rank-2 update of a(k+2:n,k+2:n) as
                       ! a := a - ( l(k) l(k+1) )*d(k)*( l(k) l(k+1) )**h
                          ! = a - ( w(k) w(k+1) )*inv(d(k))*( w(k) w(k+1) )**h
                       ! where l(k) and l(k+1) are the k-th and (k+1)-th
                       ! columns of l
                       d = stdlib_slapy2(real(ap(k + 1 + (k - 1)*(2*n - k)/2)), aimag(ap(k + 1 + ( &
                                  k - 1)*(2*n - k)/2)))
                       d11 = real(ap(k + 1 + k*(2*n - k - 1)/2))/d
                       d22 = real(ap(k + (k - 1)*(2*n - k)/2))/d
                       tt = one/(d11*d22 - one)
                       d21 = ap(k + 1 + (k - 1)*(2*n - k)/2)/d
                       d = tt/d
                       do j = k + 2, n
                          wk = d*(d11*ap(j + (k - 1)*(2*n - k)/2) - d21*ap(j + k*(2*n - k - 1)/2) &
                                     )
                          wkp1 = d*(d22*ap(j + k*(2*n - k - 1)/2) - conjg(d21)*ap(j + (k - 1)*( &
                                    2*n - k)/2))
                          do i = j, n
                             ap(i + (j - 1)*(2*n - j)/2) = ap(i + (j - 1)*(2*n - j)/2) - ap( &
                             i + (k - 1)*(2*n - k)/2)*conjg(wk) - ap(i + k*(2*n - k - 1)/2) &
                                       *conjg(wkp1)
                          end do
                          ap(j + (k - 1)*(2*n - k)/2) = wk
                          ap(j + k*(2*n - k - 1)/2) = wkp1
                          ap(j + (j - 1)*(2*n - j)/2) = cmplx(real(ap(j + (j - 1)*(2*n - j)/2 &
                                    )), 0.0_sp)
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              kc = knc + n - k + 2
              go to 60
           end if
110    continue
           return
           ! end of stdlib_chptrf
     end subroutine stdlib_chptrf

     ! CHPTRI computes the inverse of a complex Hermitian indefinite matrix
     ! A in packed storage using the factorization A = U*D*U**H or
     ! A = L*D*L**H computed by CHPTRF.

     subroutine stdlib_chptri(uplo, n, ap, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: ap(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp
           real(sp) :: ak, akp1, d, t
           complex(sp) :: akkp1, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, conjg, real
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chptri', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              kp = n*(n + 1)/2
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. ap(kp) == czero) return
                 kp = kp - info
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              kp = 1
              do info = 1, n
                 if (ipiv(info) > 0 .and. ap(kp) == czero) return
                 kp = kp + n - info + 1
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**h.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
              kc = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              kcnext = kc + k
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 ap(kc + k - 1) = one/real(ap(kc + k - 1))
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, ap(kc), 1, work, 1)
                    call stdlib_chpmv(uplo, k - 1, -cone, ap, work, 1, czero, ap(kc), 1)
                    ap(kc + k - 1) = ap(kc + k - 1) - real(stdlib_cdotc(k - 1, work, 1, ap(kc), 1))
                              
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(ap(kcnext + k - 1))
                 ak = real(ap(kc + k - 1))/t
                 akp1 = real(ap(kcnext + k))/t
                 akkp1 = ap(kcnext + k - 1)/t
                 d = t*(ak*akp1 - one)
                 ap(kc + k - 1) = akp1/d
                 ap(kcnext + k) = ak/d
                 ap(kcnext + k - 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, ap(kc), 1, work, 1)
                    call stdlib_chpmv(uplo, k - 1, -cone, ap, work, 1, czero, ap(kc), 1)
                    ap(kc + k - 1) = ap(kc + k - 1) - real(stdlib_cdotc(k - 1, work, 1, ap(kc), 1))
                              
                    ap(kcnext + k - 1) = ap(kcnext + k - 1) - stdlib_cdotc(k - 1, ap(kc), 1, ap( &
                              kcnext), 1)
                    call stdlib_ccopy(k - 1, ap(kcnext), 1, work, 1)
                    call stdlib_chpmv(uplo, k - 1, -cone, ap, work, 1, czero, ap(kcnext), 1)
                              
                    ap(kcnext + k) = ap(kcnext + k) - real(stdlib_cdotc(k - 1, work, 1, ap(kcnext &
                              ), 1))
                 end if
                 kstep = 2
                 kcnext = kcnext + k + 1
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the leading
                 ! submatrix a(1:k+1,1:k+1)
                 kpc = (kp - 1)*kp/2 + 1
                 call stdlib_cswap(kp - 1, ap(kc), 1, ap(kpc), 1)
                 kx = kpc + kp - 1
                 do j = kp + 1, k - 1
                    kx = kx + j - 1
                    temp = conjg(ap(kc + j - 1))
                    ap(kc + j - 1) = conjg(ap(kx))
                    ap(kx) = temp
                 end do
                 ap(kc + kp - 1) = conjg(ap(kc + kp - 1))
                 temp = ap(kc + k - 1)
                 ap(kc + k - 1) = ap(kpc + kp - 1)
                 ap(kpc + kp - 1) = temp
                 if (kstep == 2) then
                    temp = ap(kc + k + k - 1)
                    ap(kc + k + k - 1) = ap(kc + k + kp - 1)
                    ap(kc + k + kp - 1) = temp
                 end if
              end if
              k = k + kstep
              kc = kcnext
              go to 30
50      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**h.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              npp = n*(n + 1)/2
              k = n
              kc = npp
60      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 80
              kcnext = kc - (n - k + 2)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 ap(kc) = one/real(ap(kc))
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, ap(kc + 1), 1, work, 1)
                    call stdlib_chpmv(uplo, n - k, -cone, ap(kc + n - k + 1), work, 1, czero, ap(kc + 1) &
                              , 1)
                    ap(kc) = ap(kc) - real(stdlib_cdotc(n - k, work, 1, ap(kc + 1), 1))
                              
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = abs(ap(kcnext + 1))
                 ak = real(ap(kcnext))/t
                 akp1 = real(ap(kc))/t
                 akkp1 = ap(kcnext + 1)/t
                 d = t*(ak*akp1 - one)
                 ap(kcnext) = akp1/d
                 ap(kc) = ak/d
                 ap(kcnext + 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, ap(kc + 1), 1, work, 1)
                    call stdlib_chpmv(uplo, n - k, -cone, ap(kc + (n - k + 1)), work, 1, czero, ap( &
                              kc + 1), 1)
                    ap(kc) = ap(kc) - real(stdlib_cdotc(n - k, work, 1, ap(kc + 1), 1))
                              
                    ap(kcnext + 1) = ap(kcnext + 1) - stdlib_cdotc(n - k, ap(kc + 1), 1, ap(kcnext + &
                              2), 1)
                    call stdlib_ccopy(n - k, ap(kcnext + 2), 1, work, 1)
                    call stdlib_chpmv(uplo, n - k, -cone, ap(kc + (n - k + 1)), work, 1, czero, ap( &
                              kcnext + 2), 1)
                    ap(kcnext) = ap(kcnext) - real(stdlib_cdotc(n - k, work, 1, ap(kcnext + 2), &
                              1))
                 end if
                 kstep = 2
                 kcnext = kcnext - (n - k + 3)
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the trailing
                 ! submatrix a(k-1:n,k-1:n)
                 kpc = npp - (n - kp + 1)*(n - kp + 2)/2 + 1
                 if (kp < n) call stdlib_cswap(n - kp, ap(kc + kp - k + 1), 1, ap(kpc + 1), 1)
                 kx = kc + kp - k
                 do j = k + 1, kp - 1
                    kx = kx + n - j + 1
                    temp = conjg(ap(kc + j - k))
                    ap(kc + j - k) = conjg(ap(kx))
                    ap(kx) = temp
                 end do
                 ap(kc + kp - k) = conjg(ap(kc + kp - k))
                 temp = ap(kc)
                 ap(kc) = ap(kpc)
                 ap(kpc) = temp
                 if (kstep == 2) then
                    temp = ap(kc - n + k - 1)
                    ap(kc - n + k - 1) = ap(kc - n + kp - 1)
                    ap(kc - n + kp - 1) = temp
                 end if
              end if
              k = k - kstep
              kc = kcnext
              go to 60
80      continue
           end if
           return
           ! end of stdlib_chptri
     end subroutine stdlib_chptri

     ! CLA_GBAMV  performs one of the matrix-vector operations
     ! y := alpha*abs(A)*abs(x) + beta*abs(y),
     ! or   y := alpha*abs(A)**T*abs(x) + beta*abs(y),
     ! where alpha and beta are scalars, x and y are vectors and A is an
     ! m by n matrix.
     ! This function is primarily used in calculating error bounds.
     ! To protect against underflow during evaluation, components in
     ! the resulting vector are perturbed away from zero by (N+1)
     ! times the underflow threshold.  To prevent unnecessarily large
     ! errors for block-structure embedded in general matrices,
     ! "symbolically" zero components are not perturbed.  A zero
     ! entry is considered "symbolic" if all multiplications involved
     ! in computing that entry have at least one zero multiplicand.

     subroutine stdlib_cla_gbamv(trans, m, n, kl, ku, alpha, ab, ldab, x, incx, beta, y, incy)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(sp) :: alpha, beta
           integer(ilp) :: incx, incy, ldab, m, n, kl, ku, trans
           ! .. array arguments ..
           complex(sp) :: ab(ldab, *), x(*)
           real(sp) :: y(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: symb_zero
           real(sp) :: temp, safe1
           integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny, kd, ke
           complex(sp) :: cdum
     
           ! .. intrinsic functions ..
           intrinsic :: max, abs, real, aimag, sign
           ! .. statement functions
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(cdum) = abs(real(cdum)) + abs(aimag(cdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (.not. ((trans == stdlib_ilatrans('n')) .or. (trans == stdlib_ilatrans('t')) &
                     .or. (trans == stdlib_ilatrans('c')))) then
              info = 1
           else if (m < 0) then
              info = 2
           else if (n < 0) then
              info = 3
           else if (kl < 0 .or. kl > m - 1) then
              info = 4
           else if (ku < 0 .or. ku > n - 1) then
              info = 5
           else if (ldab < kl + ku + 1) then
              info = 6
           else if (incx == 0) then
              info = 8
           else if (incy == 0) then
              info = 11
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cla_gbamv ', info)
              return
           end if
           ! quick return if possible.
           if ((m == 0) .or. (n == 0) .or. ((alpha == czero) .and. (beta == cone))) return
           ! set  lenx  and  leny, the lengths of the vectors x and y, and set
           ! up the start points in  x  and  y.
           if (trans == stdlib_ilatrans('n')) then
              lenx = n
              leny = m
           else
              lenx = m
              leny = n
           end if
           if (incx > 0) then
              kx = 1
           else
              kx = 1 - (lenx - 1)*incx
           end if
           if (incy > 0) then
              ky = 1
           else
              ky = 1 - (leny - 1)*incy
           end if
           ! set safe1 essentially to be the underflow threshold times the
           ! number of additions in each row.
           safe1 = stdlib_slamch('safe minimum')
           safe1 = (n + 1)*safe1
           ! form  y := alpha*abs(a)*abs(x) + beta*abs(y).
           ! the o(m*n) symb_zero tests could be replaced by o(n) queries to
           ! the inexact flag.  still doesn't help change the iteration order
           ! to per-column.
           kd = ku + 1
           ke = kl + 1
           iy = ky
           if (incx == 1) then
              if (trans == stdlib_ilatrans('n')) then
                 do i = 1, leny
                    if (beta == 0.0) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == 0.0) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= 0.0) then
                       do j = max(i - kl, 1), min(i + ku, lenx)
                          temp = cabs1(ab(kd + i - j, j))
                          symb_zero = symb_zero .and. (x(j) == czero .or. temp == czero)
                                    
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              else
                 do i = 1, leny
                    if (beta == 0.0) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == 0.0) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= 0.0) then
                       do j = max(i - kl, 1), min(i + ku, lenx)
                          temp = cabs1(ab(ke - i + j, i))
                          symb_zero = symb_zero .and. (x(j) == czero .or. temp == czero)
                                    
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              end if
           else
              if (trans == stdlib_ilatrans('n')) then
                 do i = 1, leny
                    if (beta == 0.0) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == 0.0) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= 0.0) then
                       jx = kx
                       do j = max(i - kl, 1), min(i + ku, lenx)
                          temp = cabs1(ab(kd + i - j, j))
                          symb_zero = symb_zero .and. (x(jx) == czero .or. temp == czero)
                                    
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              else
                 do i = 1, leny
                    if (beta == 0.0) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == 0.0) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= 0.0) then
                       jx = kx
                       do j = max(i - kl, 1), min(i + ku, lenx)
                          temp = cabs1(ab(ke - i + j, i))
                          symb_zero = symb_zero .and. (x(jx) == czero .or. temp == czero)
                                    
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              end if
           end if
           return
           ! end of stdlib_cla_gbamv
     end subroutine stdlib_cla_gbamv

     ! CLA_GBRPVGRW computes the reciprocal pivot growth factor
     ! norm(A)/norm(U). The "max absolute element" norm is used. If this is
     ! much less than 1, the stability of the LU factorization of the
     ! (equilibrated) matrix A could be poor. This also means that the
     ! solution X, estimated condition numbers, and error bounds could be
     ! unreliable.

     real(sp) function stdlib_cla_gbrpvgrw(n, kl, ku, ncols, ab, ldab, afb, ldafb)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n, kl, ku, ncols, ldab, ldafb
           ! .. array arguments ..
           complex(sp) :: ab(ldab, *), afb(ldafb, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j, kd
           real(sp) :: amax, umax, rpvgrw
           complex(sp) :: zdum
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, real, aimag
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           rpvgrw = 1.0
           kd = ku + 1
           do j = 1, ncols
              amax = 0.0
              umax = 0.0
              do i = max(j - ku, 1), min(j + kl, n)
                 amax = max(cabs1(ab(kd + i - j, j)), amax)
              end do
              do i = max(j - ku, 1), j
                 umax = max(cabs1(afb(kd + i - j, j)), umax)
              end do
              if (umax /= 0.0) then
                 rpvgrw = min(amax/umax, rpvgrw)
              end if
           end do
           stdlib_cla_gbrpvgrw = rpvgrw
           ! end of stdlib_cla_gbrpvgrw
     end function stdlib_cla_gbrpvgrw

     ! CLA_GEAMV  performs one of the matrix-vector operations
     ! y := alpha*abs(A)*abs(x) + beta*abs(y),
     ! or   y := alpha*abs(A)**T*abs(x) + beta*abs(y),
     ! where alpha and beta are scalars, x and y are vectors and A is an
     ! m by n matrix.
     ! This function is primarily used in calculating error bounds.
     ! To protect against underflow during evaluation, components in
     ! the resulting vector are perturbed away from zero by (N+1)
     ! times the underflow threshold.  To prevent unnecessarily large
     ! errors for block-structure embedded in general matrices,
     ! "symbolically" zero components are not perturbed.  A zero
     ! entry is considered "symbolic" if all multiplications involved
     ! in computing that entry have at least one zero multiplicand.

     subroutine stdlib_cla_geamv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(sp) :: alpha, beta
           integer(ilp) :: incx, incy, lda, m, n
           integer(ilp) :: trans
           ! .. array arguments ..
           complex(sp) :: a(lda, *), x(*)
           real(sp) :: y(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: symb_zero
           real(sp) :: temp, safe1
           integer(ilp) :: i, info, iy, j, jx, kx, ky, lenx, leny
           complex(sp) :: cdum
     
           ! .. intrinsic functions ..
           intrinsic :: max, abs, real, aimag, sign
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(cdum) = abs(real(cdum)) + abs(aimag(cdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (.not. ((trans == stdlib_ilatrans('n')) .or. (trans == stdlib_ilatrans('t')) &
                     .or. (trans == stdlib_ilatrans('c')))) then
              info = 1
           else if (m < 0) then
              info = 2
           else if (n < 0) then
              info = 3
           else if (lda < max(1, m)) then
              info = 6
           else if (incx == 0) then
              info = 8
           else if (incy == 0) then
              info = 11
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cla_geamv ', info)
              return
           end if
           ! quick return if possible.
           if ((m == 0) .or. (n == 0) .or. ((alpha == czero) .and. (beta == cone))) return
           ! set  lenx  and  leny, the lengths of the vectors x and y, and set
           ! up the start points in  x  and  y.
           if (trans == stdlib_ilatrans('n')) then
              lenx = n
              leny = m
           else
              lenx = m
              leny = n
           end if
           if (incx > 0) then
              kx = 1
           else
              kx = 1 - (lenx - 1)*incx
           end if
           if (incy > 0) then
              ky = 1
           else
              ky = 1 - (leny - 1)*incy
           end if
           ! set safe1 essentially to be the underflow threshold times the
           ! number of additions in each row.
           safe1 = stdlib_slamch('safe minimum')
           safe1 = (n + 1)*safe1
           ! form  y := alpha*abs(a)*abs(x) + beta*abs(y).
           ! the o(m*n) symb_zero tests could be replaced by o(n) queries to
           ! the inexact flag.  still doesn't help change the iteration order
           ! to per-column.
           iy = ky
           if (incx == 1) then
              if (trans == stdlib_ilatrans('n')) then
                 do i = 1, leny
                    if (beta == 0.0) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == 0.0) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= 0.0) then
                       do j = 1, lenx
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == czero .or. temp == czero)
                                    
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              else
                 do i = 1, leny
                    if (beta == 0.0) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == 0.0) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= 0.0) then
                       do j = 1, lenx
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == czero .or. temp == czero)
                                    
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              end if
           else
              if (trans == stdlib_ilatrans('n')) then
                 do i = 1, leny
                    if (beta == 0.0) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == 0.0) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= 0.0) then
                       jx = kx
                       do j = 1, lenx
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(jx) == czero .or. temp == czero)
                                    
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              else
                 do i = 1, leny
                    if (beta == 0.0) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == 0.0) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= 0.0) then
                       jx = kx
                       do j = 1, lenx
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(jx) == czero .or. temp == czero)
                                    
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              end if
           end if
           return
           ! end of stdlib_cla_geamv
     end subroutine stdlib_cla_geamv

     ! CLA_GERPVGRW computes the reciprocal pivot growth factor
     ! norm(A)/norm(U). The "max absolute element" norm is used. If this is
     ! much less than 1, the stability of the LU factorization of the
     ! (equilibrated) matrix A could be poor. This also means that the
     ! solution X, estimated condition numbers, and error bounds could be
     ! unreliable.

     real(sp) function stdlib_cla_gerpvgrw(n, ncols, a, lda, af, ldaf)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n, ncols, lda, ldaf
           ! .. array arguments ..
           complex(sp) :: a(lda, *), af(ldaf, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: amax, umax, rpvgrw
           complex(sp) :: zdum
           ! .. intrinsic functions ..
           intrinsic :: max, min, abs, real, aimag
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           rpvgrw = 1.0
           do j = 1, ncols
              amax = 0.0
              umax = 0.0
              do i = 1, n
                 amax = max(cabs1(a(i, j)), amax)
              end do
              do i = 1, j
                 umax = max(cabs1(af(i, j)), umax)
              end do
              if (umax /= 0.0) then
                 rpvgrw = min(amax/umax, rpvgrw)
              end if
           end do
           stdlib_cla_gerpvgrw = rpvgrw
           ! end of stdlib_cla_gerpvgrw
     end function stdlib_cla_gerpvgrw

     ! CLA_SYAMV  performs the matrix-vector operation
     ! y := alpha*abs(A)*abs(x) + beta*abs(y),
     ! where alpha and beta are scalars, x and y are vectors and A is an
     ! n by n symmetric matrix.
     ! This function is primarily used in calculating error bounds.
     ! To protect against underflow during evaluation, components in
     ! the resulting vector are perturbed away from zero by (N+1)
     ! times the underflow threshold.  To prevent unnecessarily large
     ! errors for block-structure embedded in general matrices,
     ! "symbolically" zero components are not perturbed.  A zero
     ! entry is considered "symbolic" if all multiplications involved
     ! in computing that entry have at least one zero multiplicand.

     subroutine stdlib_cla_heamv(uplo, n, alpha, a, lda, x, incx, beta, y, incy)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(sp) :: alpha, beta
           integer(ilp) :: incx, incy, lda, n, uplo
           ! .. array arguments ..
           complex(sp) :: a(lda, *), x(*)
           real(sp) :: y(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: symb_zero
           real(sp) :: temp, safe1
           integer(ilp) :: i, info, iy, j, jx, kx, ky
           complex(sp) :: zdum
     
           ! .. intrinsic functions ..
           intrinsic :: max, abs, sign, real, aimag
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (uplo /= stdlib_ilauplo('u') .and. uplo /= stdlib_ilauplo('l')) then
              info = 1
           else if (n < 0) then
              info = 2
           else if (lda < max(1, n)) then
              info = 5
           else if (incx == 0) then
              info = 7
           else if (incy == 0) then
              info = 10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_chemv ', info)
              return
           end if
           ! quick return if possible.
           if ((n == 0) .or. ((alpha == zero) .and. (beta == one))) return
           ! set up the start points in  x  and  y.
           if (incx > 0) then
              kx = 1
           else
              kx = 1 - (n - 1)*incx
           end if
           if (incy > 0) then
              ky = 1
           else
              ky = 1 - (n - 1)*incy
           end if
           ! set safe1 essentially to be the underflow threshold times the
           ! number of additions in each row.
           safe1 = stdlib_slamch('safe minimum')
           safe1 = (n + 1)*safe1
           ! form  y := alpha*abs(a)*abs(x) + beta*abs(y).
           ! the o(n^2) symb_zero tests could be replaced by o(n) queries to
           ! the inexact flag.  still doesn't help change the iteration order
           ! to per-column.
           iy = ky
           if (incx == 1) then
              if (uplo == stdlib_ilauplo('u')) then
                 do i = 1, n
                    if (beta == zero) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == zero) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= zero) then
                       do j = 1, i
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                       do j = i + 1, n
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              else
                 do i = 1, n
                    if (beta == zero) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == zero) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= zero) then
                       do j = 1, i
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                       do j = i + 1, n
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              end if
           else
              if (uplo == stdlib_ilauplo('u')) then
                 do i = 1, n
                    if (beta == zero) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == zero) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    jx = kx
                    if (alpha /= zero) then
                       do j = 1, i
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                       do j = i + 1, n
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              else
                 do i = 1, n
                    if (beta == zero) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == zero) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    jx = kx
                    if (alpha /= zero) then
                       do j = 1, i
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                       do j = i + 1, n
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              end if
           end if
           return
           ! end of stdlib_cla_heamv
     end subroutine stdlib_cla_heamv

     ! CLA_LIN_BERR computes componentwise relative backward error from
     ! the formula
     ! max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) )
     ! where abs(Z) is the componentwise absolute value of the matrix
     ! or vector Z.

     subroutine stdlib_cla_lin_berr(n, nz, nrhs, res, ayb, berr)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n, nz, nrhs
           ! .. array arguments ..
           real(sp) :: ayb(n, nrhs), berr(nrhs)
           complex(sp) :: res(n, nrhs)
        ! =====================================================================
           ! .. local scalars ..
           real(sp) :: tmp, safe1
           integer(ilp) :: i, j
           complex(sp) :: cdum
           ! .. intrinsic functions ..
           intrinsic :: abs, real, aimag, max
     
           ! .. statement functions ..
           complex(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(cdum) = abs(real(cdum)) + abs(aimag(cdum))
           ! .. executable statements ..
           ! adding safe1 to the numerator guards against spuriously zero
           ! residuals.  a similar safeguard is in the cla_yyamv routine used
           ! to compute ayb.
           safe1 = stdlib_slamch('safe minimum')
           safe1 = (nz + 1)*safe1
           do j = 1, nrhs
              berr(j) = 0.0
              do i = 1, n
                 if (ayb(i, j) /= 0.0) then
                    tmp = (safe1 + cabs1(res(i, j)))/ayb(i, j)
                    berr(j) = max(berr(j), tmp)
                 end if
           ! if ayb is exactly 0.0 (and if computed by cla_yyamv), then we know
           ! the true residual also must be exactly 0.0.
              end do
           end do
           ! end of stdlib_cla_lin_berr
     end subroutine stdlib_cla_lin_berr

     ! CLA_PORPVGRW computes the reciprocal pivot growth factor
     ! norm(A)/norm(U). The "max absolute element" norm is used. If this is
     ! much less than 1, the stability of the LU factorization of the
     ! (equilibrated) matrix A could be poor. This also means that the
     ! solution X, estimated condition numbers, and error bounds could be
     ! unreliable.

     real(sp) function stdlib_cla_porpvgrw(uplo, ncols, a, lda, af, ldaf, work)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character*1 uplo
           integer(ilp) :: ncols, lda, ldaf
           ! .. array arguments ..
           complex(sp) :: a(lda, *), af(ldaf, *)
           real(sp) :: work(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: amax, umax, rpvgrw
           logical(lk) :: upper
           complex(sp) :: zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, real, aimag
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           upper = stdlib_lsame('upper', uplo)
           ! stdlib_spotrf will have factored only the ncolsxncols leading minor, so
           ! we restrict the growth search to that minor and use only the first
           ! 2*ncols workspace entries.
           rpvgrw = 1.0
           do i = 1, 2*ncols
              work(i) = 0.0
           end do
           ! find the max magnitude entry of each column.
           if (upper) then
              do j = 1, ncols
                 do i = 1, j
                    work(ncols + j) = max(cabs1(a(i, j)), work(ncols + j))
                 end do
              end do
           else
              do j = 1, ncols
                 do i = j, ncols
                    work(ncols + j) = max(cabs1(a(i, j)), work(ncols + j))
                 end do
              end do
           end if
           ! now find the max magnitude entry of each column of the factor in
           ! af.  no pivoting, so no permutations.
           if (stdlib_lsame('upper', uplo)) then
              do j = 1, ncols
                 do i = 1, j
                    work(j) = max(cabs1(af(i, j)), work(j))
                 end do
              end do
           else
              do j = 1, ncols
                 do i = j, ncols
                    work(j) = max(cabs1(af(i, j)), work(j))
                 end do
              end do
           end if
           ! compute the *inverse* of the max element growth factor.  dividing
           ! by zero would imply the largest entry of the factor's column is
           ! zero.  than can happen when either the column of a is zero or
           ! massive pivots made the factor underflow to zero.  neither counts
           ! as growth in itself, so simply ignore terms with zero
           ! denominators.
           if (stdlib_lsame('upper', uplo)) then
              do i = 1, ncols
                 umax = work(i)
                 amax = work(ncols + i)
                 if (umax /= 0.0) then
                    rpvgrw = min(amax/umax, rpvgrw)
                 end if
              end do
           else
              do i = 1, ncols
                 umax = work(i)
                 amax = work(ncols + i)
                 if (umax /= 0.0) then
                    rpvgrw = min(amax/umax, rpvgrw)
                 end if
              end do
           end if
           stdlib_cla_porpvgrw = rpvgrw
           ! end of stdlib_cla_porpvgrw
     end function stdlib_cla_porpvgrw

     ! CLA_SYAMV  performs the matrix-vector operation
     ! y := alpha*abs(A)*abs(x) + beta*abs(y),
     ! where alpha and beta are scalars, x and y are vectors and A is an
     ! n by n symmetric matrix.
     ! This function is primarily used in calculating error bounds.
     ! To protect against underflow during evaluation, components in
     ! the resulting vector are perturbed away from zero by (N+1)
     ! times the underflow threshold.  To prevent unnecessarily large
     ! errors for block-structure embedded in general matrices,
     ! "symbolically" zero components are not perturbed.  A zero
     ! entry is considered "symbolic" if all multiplications involved
     ! in computing that entry have at least one zero multiplicand.

     subroutine stdlib_cla_syamv(uplo, n, alpha, a, lda, x, incx, beta, y, incy)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(sp) :: alpha, beta
           integer(ilp) :: incx, incy, lda, n
           integer(ilp) :: uplo
           ! .. array arguments ..
           complex(sp) :: a(lda, *), x(*)
           real(sp) :: y(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: symb_zero
           real(sp) :: temp, safe1
           integer(ilp) :: i, info, iy, j, jx, kx, ky
           complex(sp) :: zdum
     
           ! .. intrinsic functions ..
           intrinsic :: max, abs, sign, real, aimag
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (uplo /= stdlib_ilauplo('u') .and. uplo /= stdlib_ilauplo('l')) then
              info = 1
           else if (n < 0) then
              info = 2
           else if (lda < max(1, n)) then
              info = 5
           else if (incx == 0) then
              info = 7
           else if (incy == 0) then
              info = 10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cla_syamv', info)
              return
           end if
           ! quick return if possible.
           if ((n == 0) .or. ((alpha == zero) .and. (beta == one))) return
           ! set up the start points in  x  and  y.
           if (incx > 0) then
              kx = 1
           else
              kx = 1 - (n - 1)*incx
           end if
           if (incy > 0) then
              ky = 1
           else
              ky = 1 - (n - 1)*incy
           end if
           ! set safe1 essentially to be the underflow threshold times the
           ! number of additions in each row.
           safe1 = stdlib_slamch('safe minimum')
           safe1 = (n + 1)*safe1
           ! form  y := alpha*abs(a)*abs(x) + beta*abs(y).
           ! the o(n^2) symb_zero tests could be replaced by o(n) queries to
           ! the inexact flag.  still doesn't help change the iteration order
           ! to per-column.
           iy = ky
           if (incx == 1) then
              if (uplo == stdlib_ilauplo('u')) then
                 do i = 1, n
                    if (beta == zero) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == zero) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= zero) then
                       do j = 1, i
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                       do j = i + 1, n
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              else
                 do i = 1, n
                    if (beta == zero) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == zero) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    if (alpha /= zero) then
                       do j = 1, i
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                       do j = i + 1, n
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(j))*temp
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              end if
           else
              if (uplo == stdlib_ilauplo('u')) then
                 do i = 1, n
                    if (beta == zero) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == zero) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    jx = kx
                    if (alpha /= zero) then
                       do j = 1, i
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                       do j = i + 1, n
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              else
                 do i = 1, n
                    if (beta == zero) then
                       symb_zero = .true.
                       y(iy) = 0.0
                    else if (y(iy) == zero) then
                       symb_zero = .true.
                    else
                       symb_zero = .false.
                       y(iy) = beta*abs(y(iy))
                    end if
                    jx = kx
                    if (alpha /= zero) then
                       do j = 1, i
                          temp = cabs1(a(i, j))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                       do j = i + 1, n
                          temp = cabs1(a(j, i))
                          symb_zero = symb_zero .and. (x(j) == zero .or. temp == zero)
                          y(iy) = y(iy) + alpha*cabs1(x(jx))*temp
                          jx = jx + incx
                       end do
                    end if
                    if (.not. symb_zero) y(iy) = y(iy) + sign(safe1, y(iy))
                    iy = iy + incy
                 end do
              end if
           end if
           return
           ! end of stdlib_cla_syamv
     end subroutine stdlib_cla_syamv

     ! CLA_WWADDW adds a vector W into a doubled-single vector (X, Y).
     ! This works for all extant IBM's hex and binary floating point
     ! arithmetic, but not for decimal.

     subroutine stdlib_cla_wwaddw(n, x, y, w)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: n
           ! .. array arguments ..
           complex(sp) :: x(*), y(*), w(*)
        ! =====================================================================
           ! .. local scalars ..
           complex(sp) :: s
           integer(ilp) :: i
           ! .. executable statements ..
           do 10 i = 1, n
             s = x(i) + w(i)
             s = (s + s) - s
             y(i) = ((x(i) - s) + w(i)) + y(i)
             x(i) = s
10      continue
           return
           ! end of stdlib_cla_wwaddw
     end subroutine stdlib_cla_wwaddw

     ! CLACGV conjugates a complex vector of length N.

     subroutine stdlib_clacgv(n, x, incx)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, n
           ! .. array arguments ..
           complex(sp) :: x(*)
       ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ioff
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           if (incx == 1) then
              do i = 1, n
                 x(i) = conjg(x(i))
              end do
           else
              ioff = 1
              if (incx < 0) ioff = 1 - (n - 1)*incx
              do i = 1, n
                 x(ioff) = conjg(x(ioff))
                 ioff = ioff + incx
              end do
           end if
           return
           ! end of stdlib_clacgv
     end subroutine stdlib_clacgv

     ! CLACN2 estimates the 1-norm of a square, complex matrix A.
     ! Reverse communication is used for evaluating matrix-vector products.

     subroutine stdlib_clacn2(n, v, x, est, kase, isave)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: kase, n
           real(sp) :: est
           ! .. array arguments ..
           integer(ilp) :: isave(3)
           complex(sp) :: v(*), x(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: itmax = 5
           
           ! .. local scalars ..
           integer(ilp) :: i, jlast
           real(sp) :: absxi, altsgn, estold, safmin, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, real
           ! .. executable statements ..
           safmin = stdlib_slamch('safe minimum')
           if (kase == 0) then
              do i = 1, n
                 x(i) = cmplx(one/real(n), KIND=sp)
              end do
              kase = 1
              isave(1) = 1
              return
           end if
           go to(20, 40, 70, 90, 120) isave(1)
           ! ................ entry   (isave( 1 ) = 1)
           ! first iteration.  x has been overwritten by a*x.
20      continue
           if (n == 1) then
              v(1) = x(1)
              est = abs(v(1))
              ! ... quit
              go to 130
           end if
           est = stdlib_scsum1(n, x, 1)
           do i = 1, n
              absxi = abs(x(i))
              if (absxi > safmin) then
                 x(i) = cmplx(real(x(i))/absxi, aimag(x(i))/absxi)
              else
                 x(i) = cone
              end if
           end do
           kase = 2
           isave(1) = 2
           return
           ! ................ entry   (isave( 1 ) = 2)
           ! first iteration.  x has been overwritten by ctrans(a)*x.
40      continue
           isave(2) = stdlib_icmax1(n, x, 1)
           isave(3) = 2
           ! main loop - iterations 2,3,...,itmax.
50      continue
           do i = 1, n
              x(i) = czero
           end do
           x(isave(2)) = cone
           kase = 1
           isave(1) = 3
           return
           ! ................ entry   (isave( 1 ) = 3)
           ! x has been overwritten by a*x.
70      continue
           call stdlib_ccopy(n, x, 1, v, 1)
           estold = est
           est = stdlib_scsum1(n, v, 1)
           ! test for cycling.
           if (est <= estold) go to 100
           do i = 1, n
              absxi = abs(x(i))
              if (absxi > safmin) then
                 x(i) = cmplx(real(x(i))/absxi, aimag(x(i))/absxi)
              else
                 x(i) = cone
              end if
           end do
           kase = 2
           isave(1) = 4
           return
           ! ................ entry   (isave( 1 ) = 4)
           ! x has been overwritten by ctrans(a)*x.
90      continue
           jlast = isave(2)
           isave(2) = stdlib_icmax1(n, x, 1)
           if ((abs(x(jlast)) /= abs(x(isave(2)))) .and. (isave(3) < itmax)) &
                     then
              isave(3) = isave(3) + 1
              go to 50
           end if
           ! iteration complete.  final stage.
100    continue
           altsgn = one
           do i = 1, n
              x(i) = cmplx(altsgn*(one + real(i - 1)/real(n - 1)))
              altsgn = -altsgn
           end do
           kase = 1
           isave(1) = 5
           return
           ! ................ entry   (isave( 1 ) = 5)
           ! x has been overwritten by a*x.
120    continue
           temp = two*(stdlib_scsum1(n, x, 1)/real(3*n))
           if (temp > est) then
              call stdlib_ccopy(n, x, 1, v, 1)
              est = temp
           end if
130    continue
           kase = 0
           return
           ! end of stdlib_clacn2
     end subroutine stdlib_clacn2

     ! CLACON estimates the 1-norm of a square, complex matrix A.
     ! Reverse communication is used for evaluating matrix-vector products.

     subroutine stdlib_clacon(n, v, x, est, kase)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: kase, n
           real(sp) :: est
           ! .. array arguments ..
           complex(sp) :: v(n), x(n)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: itmax = 5
           
           ! .. local scalars ..
           integer(ilp) :: i, iter, j, jlast, jump
           real(sp) :: absxi, altsgn, estold, safmin, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, real
           ! .. save statement ..
           save
           ! .. executable statements ..
           safmin = stdlib_slamch('safe minimum')
           if (kase == 0) then
              do i = 1, n
                 x(i) = cmplx(one/real(n), KIND=sp)
              end do
              kase = 1
              jump = 1
              return
           end if
           go to(20, 40, 70, 90, 120) jump
           ! ................ entry   (jump = 1)
           ! first iteration.  x has been overwritten by a*x.
20      continue
           if (n == 1) then
              v(1) = x(1)
              est = abs(v(1))
              ! ... quit
              go to 130
           end if
           est = stdlib_scsum1(n, x, 1)
           do i = 1, n
              absxi = abs(x(i))
              if (absxi > safmin) then
                 x(i) = cmplx(real(x(i))/absxi, aimag(x(i))/absxi)
              else
                 x(i) = cone
              end if
           end do
           kase = 2
           jump = 2
           return
           ! ................ entry   (jump = 2)
           ! first iteration.  x has been overwritten by ctrans(a)*x.
40      continue
           j = stdlib_icmax1(n, x, 1)
           iter = 2
           ! main loop - iterations 2,3,...,itmax.
50      continue
           do i = 1, n
              x(i) = czero
           end do
           x(j) = cone
           kase = 1
           jump = 3
           return
           ! ................ entry   (jump = 3)
           ! x has been overwritten by a*x.
70      continue
           call stdlib_ccopy(n, x, 1, v, 1)
           estold = est
           est = stdlib_scsum1(n, v, 1)
           ! test for cycling.
           if (est <= estold) go to 100
           do i = 1, n
              absxi = abs(x(i))
              if (absxi > safmin) then
                 x(i) = cmplx(real(x(i))/absxi, aimag(x(i))/absxi)
              else
                 x(i) = cone
              end if
           end do
           kase = 2
           jump = 4
           return
           ! ................ entry   (jump = 4)
           ! x has been overwritten by ctrans(a)*x.
90      continue
           jlast = j
           j = stdlib_icmax1(n, x, 1)
           if ((abs(x(jlast)) /= abs(x(j))) .and. (iter < itmax)) then
              iter = iter + 1
              go to 50
           end if
           ! iteration complete.  final stage.
100    continue
           altsgn = one
           do i = 1, n
              x(i) = cmplx(altsgn*(one + real(i - 1)/real(n - 1)))
              altsgn = -altsgn
           end do
           kase = 1
           jump = 5
           return
           ! ................ entry   (jump = 5)
           ! x has been overwritten by a*x.
120    continue
           temp = two*(stdlib_scsum1(n, x, 1)/real(3*n))
           if (temp > est) then
              call stdlib_ccopy(n, x, 1, v, 1)
              est = temp
           end if
130    continue
           kase = 0
           return
           ! end of stdlib_clacon
     end subroutine stdlib_clacon

     ! CLACP2 copies all or part of a real two-dimensional matrix A to a
     ! complex matrix B.

     subroutine stdlib_clacp2(uplo, m, n, a, lda, b, ldb)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: lda, ldb, m, n
           ! .. array arguments ..
           real(sp) :: a(lda, *)
           complex(sp) :: b(ldb, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. intrinsic functions ..
           intrinsic :: min
           ! .. executable statements ..
           if (stdlib_lsame(uplo, 'u')) then
              do j = 1, n
                 do i = 1, min(j, m)
                    b(i, j) = a(i, j)
                 end do
              end do
           else if (stdlib_lsame(uplo, 'l')) then
              do j = 1, n
                 do i = j, m
                    b(i, j) = a(i, j)
                 end do
              end do
           else
              do j = 1, n
                 do i = 1, m
                    b(i, j) = a(i, j)
                 end do
              end do
           end if
           return
           ! end of stdlib_clacp2
     end subroutine stdlib_clacp2

     ! CLACPY copies all or part of a two-dimensional matrix A to another
     ! matrix B.

     subroutine stdlib_clacpy(uplo, m, n, a, lda, b, ldb)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: lda, ldb, m, n
           ! .. array arguments ..
           complex(sp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. intrinsic functions ..
           intrinsic :: min
           ! .. executable statements ..
           if (stdlib_lsame(uplo, 'u')) then
              do j = 1, n
                 do i = 1, min(j, m)
                    b(i, j) = a(i, j)
                 end do
              end do
           else if (stdlib_lsame(uplo, 'l')) then
              do j = 1, n
                 do i = j, m
                    b(i, j) = a(i, j)
                 end do
              end do
           else
              do j = 1, n
                 do i = 1, m
                    b(i, j) = a(i, j)
                 end do
              end do
           end if
           return
           ! end of stdlib_clacpy
     end subroutine stdlib_clacpy

     ! CLACRM performs a very simple matrix-matrix multiplication:
     ! C := A * B,
     ! where A is M by N and complex; B is N by N and real;
     ! C is M by N and complex.

     subroutine stdlib_clacrm(m, n, a, lda, b, ldb, c, ldc, rwork)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: lda, ldb, ldc, m, n
           ! .. array arguments ..
           real(sp) :: b(ldb, *), rwork(*)
           complex(sp) :: a(lda, *), c(ldc, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, l
           ! .. intrinsic functions ..
           intrinsic :: aimag, cmplx, real
     
           ! .. executable statements ..
           ! quick return if possible.
           if ((m == 0) .or. (n == 0)) return
           do j = 1, n
              do i = 1, m
                 rwork((j - 1)*m + i) = real(a(i, j))
              end do
           end do
           l = m*n + 1
           call stdlib_sgemm('n', 'n', m, n, n, one, rwork, m, b, ldb, zero, rwork(l), m)
                     
           do j = 1, n
              do i = 1, m
                 c(i, j) = rwork(l + (j - 1)*m + i - 1)
              end do
           end do
           do j = 1, n
              do i = 1, m
                 rwork((j - 1)*m + i) = aimag(a(i, j))
              end do
           end do
           call stdlib_sgemm('n', 'n', m, n, n, one, rwork, m, b, ldb, zero, rwork(l), m)
                     
           do j = 1, n
              do i = 1, m
                 c(i, j) = cmplx(real(c(i, j)), rwork(l + (j - 1)*m + i - 1))
              end do
           end do
           return
           ! end of stdlib_clacrm
     end subroutine stdlib_clacrm

     ! CLACRT performs the operation
     ! (  c  s )( x )  ==> ( x )
     ! ( -s  c )( y )      ( y )
     ! where c and s are complex and the vectors x and y are complex.

     subroutine stdlib_clacrt(n, cx, incx, cy, incy, c, s)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, incy, n
           complex(sp) :: c, s
           ! .. array arguments ..
           complex(sp) :: cx(*), cy(*)
       ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ix, iy
           complex(sp) :: ctemp
           ! .. executable statements ..
           if (n <= 0) return
           if (incx == 1 .and. incy == 1) go to 20
           ! code for unequal increments or equal increments not equal to 1
           ix = 1
           iy = 1
           if (incx < 0) ix = (-n + 1)*incx + 1
           if (incy < 0) iy = (-n + 1)*incy + 1
           do i = 1, n
              ctemp = c*cx(ix) + s*cy(iy)
              cy(iy) = c*cy(iy) - s*cx(ix)
              cx(ix) = ctemp
              ix = ix + incx
              iy = iy + incy
           end do
           return
           ! code for both increments equal to 1
20      continue
           do i = 1, n
              ctemp = c*cx(i) + s*cy(i)
              cy(i) = c*cy(i) - s*cx(i)
              cx(i) = ctemp
           end do
           return
     end subroutine stdlib_clacrt

     ! CLADIV := X / Y, where X and Y are complex.  The computation of X / Y
     ! will not overflow on an intermediary step unless the results
     ! overflows.

     complex(sp) function stdlib_cladiv(x, y)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           complex(sp) :: x, y
        ! =====================================================================
           ! .. local scalars ..
           real(sp) :: zi, zr
     
           ! .. intrinsic functions ..
           intrinsic :: aimag, cmplx, real
           ! .. executable statements ..
           call stdlib_sladiv(real(x), aimag(x), real(y), aimag(y), zr, zi)
           stdlib_cladiv = cmplx(zr, zi, KIND=sp)
           return
           ! end of stdlib_cladiv
     end function stdlib_cladiv

     ! CLAED8 merges the two sets of eigenvalues together into a single
     ! sorted set.  Then it tries to deflate the size of the problem.
     ! There are two ways in which deflation can occur:  when two or more
     ! eigenvalues are close together or if there is a tiny element in the
     ! Z vector.  For each such occurrence the order of the related secular
     ! equation problem is reduced by one.

     subroutine stdlib_claed8(k, n, qsiz, q, ldq, d, rho, cutpnt, z, dlamda, q2, ldq2, w, indxp, &
               indx, indxq, perm, givptr, givcol, givnum, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: cutpnt, givptr, info, k, ldq, ldq2, n, qsiz
           real(sp) :: rho
           ! .. array arguments ..
           integer(ilp) :: givcol(2, *), indx(*), indxp(*), indxq(*), perm(*)
           real(sp) :: d(*), dlamda(*), givnum(2, *), w(*), z(*)
           complex(sp) :: q(ldq, *), q2(ldq2, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: mone = -1.0e0
           
           ! .. local scalars ..
           integer(ilp) :: i, imax, j, jlam, jmax, jp, k2, n1, n1p1, n2
           real(sp) :: c, eps, s, t, tau, tol
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (n < 0) then
              info = -2
           else if (qsiz < n) then
              info = -3
           else if (ldq < max(1, n)) then
              info = -5
           else if (cutpnt < min(1, n) .or. cutpnt > n) then
              info = -8
           else if (ldq2 < max(1, n)) then
              info = -12
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_claed8', -info)
              return
           end if
           ! need to initialize givptr to o here in case of quick exit
           ! to prevent an unspecified code behavior (usually sigfault)
           ! when iwork array on entry to *stedc is not zeroed
           ! (or at least some iwork entries which used in *laed7 for givptr).
           givptr = 0
           ! quick return if possible
           if (n == 0) return
           n1 = cutpnt
           n2 = n - n1
           n1p1 = n1 + 1
           if (rho < zero) then
              call stdlib_sscal(n2, mone, z(n1p1), 1)
           end if
           ! normalize z so that norm(z) = 1
           t = one/sqrt(two)
           do j = 1, n
              indx(j) = j
           end do
           call stdlib_sscal(n, t, z, 1)
           rho = abs(two*rho)
           ! sort the eigenvalues into increasing order
           do i = cutpnt + 1, n
              indxq(i) = indxq(i) + cutpnt
           end do
           do i = 1, n
              dlamda(i) = d(indxq(i))
              w(i) = z(indxq(i))
           end do
           i = 1
           j = cutpnt + 1
           call stdlib_slamrg(n1, n2, dlamda, 1, 1, indx)
           do i = 1, n
              d(i) = dlamda(indx(i))
              z(i) = w(indx(i))
           end do
           ! calculate the allowable deflation tolerance
           imax = stdlib_isamax(n, z, 1)
           jmax = stdlib_isamax(n, d, 1)
           eps = stdlib_slamch('epsilon')
           tol = eight*eps*abs(d(jmax))
           ! if the rank-1 modifier is small enough, no more needs to be done
           ! -- except to reorganize q so that its columns correspond with the
           ! elements in d.
           if (rho*abs(z(imax)) <= tol) then
              k = 0
              do j = 1, n
                 perm(j) = indxq(indx(j))
                 call stdlib_ccopy(qsiz, q(1, perm(j)), 1, q2(1, j), 1)
              end do
              call stdlib_clacpy('a', qsiz, n, q2(1, 1), ldq2, q(1, 1), ldq)
              return
           end if
           ! if there are multiple eigenvalues then the problem deflates.  here
           ! the number of equal eigenvalues are found.  as each equal
           ! eigenvalue is found, an elementary reflector is computed to rotate
           ! the corresponding eigensubspace so that the corresponding
           ! components of z are zero in this new basis.
           k = 0
           k2 = n + 1
           do j = 1, n
              if (rho*abs(z(j)) <= tol) then
                 ! deflate due to small z component.
                 k2 = k2 - 1
                 indxp(k2) = j
                 if (j == n) go to 100
              else
                 jlam = j
                 go to 70
              end if
           end do
70      continue
           j = j + 1
           if (j > n) go to 90
           if (rho*abs(z(j)) <= tol) then
              ! deflate due to small z component.
              k2 = k2 - 1
              indxp(k2) = j
           else
              ! check if eigenvalues are close enough to allow deflation.
              s = z(jlam)
              c = z(j)
              ! find sqrt(a**2+b**2) without overflow or
              ! destructive underflow.
              tau = stdlib_slapy2(c, s)
              t = d(j) - d(jlam)
              c = c/tau
              s = -s/tau
              if (abs(t*c*s) <= tol) then
                 ! deflation is possible.
                 z(j) = tau
                 z(jlam) = zero
                 ! record the appropriate givens rotation
                 givptr = givptr + 1
                 givcol(1, givptr) = indxq(indx(jlam))
                 givcol(2, givptr) = indxq(indx(j))
                 givnum(1, givptr) = c
                 givnum(2, givptr) = s
                 call stdlib_csrot(qsiz, q(1, indxq(indx(jlam))), 1, q(1, indxq(indx(j) &
                           )), 1, c, s)
                 t = d(jlam)*c*c + d(j)*s*s
                 d(j) = d(jlam)*s*s + d(j)*c*c
                 d(jlam) = t
                 k2 = k2 - 1
                 i = 1
80      continue
                 if (k2 + i <= n) then
                    if (d(jlam) < d(indxp(k2 + i))) then
                       indxp(k2 + i - 1) = indxp(k2 + i)
                       indxp(k2 + i) = jlam
                       i = i + 1
                       go to 80
                    else
                       indxp(k2 + i - 1) = jlam
                    end if
                 else
                    indxp(k2 + i - 1) = jlam
                 end if
                 jlam = j
              else
                 k = k + 1
                 w(k) = z(jlam)
                 dlamda(k) = d(jlam)
                 indxp(k) = jlam
                 jlam = j
              end if
           end if
           go to 70
90      continue
           ! record the last eigenvalue.
           k = k + 1
           w(k) = z(jlam)
           dlamda(k) = d(jlam)
           indxp(k) = jlam
100    continue
           ! sort the eigenvalues and corresponding eigenvectors into dlamda
           ! and q2 respectively.  the eigenvalues/vectors which were not
           ! deflated go into the first k slots of dlamda and q2 respectively,
           ! while those which were deflated go into the last n - k slots.
           do j = 1, n
              jp = indxp(j)
              dlamda(j) = d(jp)
              perm(j) = indxq(indx(jp))
              call stdlib_ccopy(qsiz, q(1, perm(j)), 1, q2(1, j), 1)
           end do
           ! the deflated eigenvalues and their corresponding vectors go back
           ! into the last n - k slots of d and q respectively.
           if (k < n) then
              call stdlib_scopy(n - k, dlamda(k + 1), 1, d(k + 1), 1)
              call stdlib_clacpy('a', qsiz, n - k, q2(1, k + 1), ldq2, q(1, k + 1), ldq)
           end if
           return
           ! end of stdlib_claed8
     end subroutine stdlib_claed8

     ! CLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix
     ! ( ( A, B );( B, C ) )
     ! provided the norm of the matrix of eigenvectors is larger than
     ! some threshold value.
     ! RT1 is the eigenvalue of larger absolute value, and RT2 of
     ! smaller absolute value.  If the eigenvectors are computed, then
     ! on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence
     ! [  CS1     SN1   ] . [ A  B ] . [ CS1    -SN1   ] = [ RT1  0  ]
     ! [ -SN1     CS1   ]   [ B  C ]   [ SN1     CS1   ]   [  0  RT2 ]

     subroutine stdlib_claesy(a, b, c, rt1, rt2, evscal, cs1, sn1)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           complex(sp) :: a, b, c, cs1, evscal, rt1, rt2, sn1
       ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1e0
           
           ! .. local scalars ..
           real(sp) :: babs, evnorm, tabs, z
           complex(sp) :: s, t, tmp
           ! .. intrinsic functions ..
           intrinsic :: abs, max, sqrt
           ! .. executable statements ..
           ! special case:  the matrix is actually diagonal.
           ! to avoid divide by zero later, we treat this case separately.
           if (abs(b) == zero) then
              rt1 = a
              rt2 = c
              if (abs(rt1) < abs(rt2)) then
                 tmp = rt1
                 rt1 = rt2
                 rt2 = tmp
                 cs1 = zero
                 sn1 = one
              else
                 cs1 = one
                 sn1 = zero
              end if
           else
              ! compute the eigenvalues and eigenvectors.
              ! the characteristic equation is
                 ! lambda **2 - (a+c) lambda + (a*c - b*b)
              ! and we solve it using the quadratic formula.
              s = (a + c)*half
              t = (a - c)*half
              ! take the square root carefully to avoid over/under flow.
              babs = abs(b)
              tabs = abs(t)
              z = max(babs, tabs)
              if (z > zero) t = z*sqrt((t/z)**2 + (b/z)**2)
              ! compute the two eigenvalues.  rt1 and rt2 are exchanged
              ! if necessary so that rt1 will have the greater magnitude.
              rt1 = s + t
              rt2 = s - t
              if (abs(rt1) < abs(rt2)) then
                 tmp = rt1
                 rt1 = rt2
                 rt2 = tmp
              end if
              ! choose cs1 = 1 and sn1 to satisfy the first equation, then
              ! scale the components of this eigenvector so that the matrix
              ! of eigenvectors x satisfies  x * x**t = i .  (no scaling is
              ! done if the norm of the eigenvalue matrix is less than thresh.)
              sn1 = (rt1 - a)/b
              tabs = abs(sn1)
              if (tabs > one) then
                 t = tabs*sqrt((one/tabs)**2 + (sn1/tabs)**2)
              else
                 t = sqrt(cone + sn1*sn1)
              end if
              evnorm = abs(t)
              if (evnorm >= thresh) then
                 evscal = cone/t
                 cs1 = evscal
                 sn1 = sn1*evscal
              else
                 evscal = zero
              end if
           end if
           return
           ! end of stdlib_claesy
     end subroutine stdlib_claesy

     ! CLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix
     ! [  A         B  ]
     ! [  CONJG(B)  C  ].
     ! On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
     ! eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
     ! eigenvector for RT1, giving the decomposition
     ! [ CS1  CONJG(SN1) ] [    A     B ] [ CS1 -CONJG(SN1) ] = [ RT1  0  ]
     ! [-SN1     CS1     ] [ CONJG(B) C ] [ SN1     CS1     ]   [  0  RT2 ].

     subroutine stdlib_claev2(a, b, c, rt1, rt2, cs1, sn1)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(sp) :: cs1, rt1, rt2
           complex(sp) :: a, b, c, sn1
       ! =====================================================================
           
           ! .. local scalars ..
           real(sp) :: t
           complex(sp) :: w
     
           ! .. intrinsic functions ..
           intrinsic :: abs, conjg, real
           ! .. executable statements ..
           if (abs(b) == zero) then
              w = one
           else
              w = conjg(b)/abs(b)
           end if
           call stdlib_slaev2(real(a), abs(b), real(c), rt1, rt2, cs1, t)
           sn1 = w*t
           return
           ! end of stdlib_claev2
     end subroutine stdlib_claev2

     ! CLAG2Z converts a COMPLEX matrix, SA, to a COMPLEX*16 matrix, A.
     ! Note that while it is possible to overflow while converting
     ! from double to single, it is not possible to overflow when
     ! converting from single to double.
     ! This is an auxiliary routine so there is no argument checking.

     subroutine stdlib_clag2z(m, n, sa, ldsa, a, lda, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, ldsa, m, n
           ! .. array arguments ..
           complex(sp) :: sa(ldsa, *)
           complex(dp) :: a(lda, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
           ! .. executable statements ..
           info = 0
           do j = 1, n
              do i = 1, m
                 a(i, j) = sa(i, j)
              end do
           end do
           return
           ! end of stdlib_clag2z
     end subroutine stdlib_clag2z

     ! CLAGTM performs a matrix-vector product of the form
     ! B := alpha * A * X + beta * B
     ! where A is a tridiagonal matrix of order N, B and X are N by NRHS
     ! matrices, and alpha and beta are real scalars, each of which may be
     ! 0., 1., or -1.

     subroutine stdlib_clagtm(trans, n, nrhs, alpha, dl, d, du, x, ldx, beta, b, ldb)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: trans
           integer(ilp) :: ldb, ldx, n, nrhs
           real(sp) :: alpha, beta
           ! .. array arguments ..
           complex(sp) :: b(ldb, *), d(*), dl(*), du(*), x(ldx, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           if (n == 0) return
           ! multiply b by beta if beta/=1.
           if (beta == zero) then
              do j = 1, nrhs
                 do i = 1, n
                    b(i, j) = zero
                 end do
              end do
           else if (beta == -one) then
              do j = 1, nrhs
                 do i = 1, n
                    b(i, j) = -b(i, j)
                 end do
              end do
           end if
           if (alpha == one) then
              if (stdlib_lsame(trans, 'n')) then
                 ! compute b := b + a*x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) + d(1)*x(1, j)
                    else
                       b(1, j) = b(1, j) + d(1)*x(1, j) + du(1)*x(2, j)
                       b(n, j) = b(n, j) + dl(n - 1)*x(n - 1, j) + d(n)*x(n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) + dl(i - 1)*x(i - 1, j) + d(i)*x(i, j) + du(i &
                                    )*x(i + 1, j)
                       end do
                    end if
                 end do
              else if (stdlib_lsame(trans, 't')) then
                 ! compute b := b + a**t * x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) + d(1)*x(1, j)
                    else
                       b(1, j) = b(1, j) + d(1)*x(1, j) + dl(1)*x(2, j)
                       b(n, j) = b(n, j) + du(n - 1)*x(n - 1, j) + d(n)*x(n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) + du(i - 1)*x(i - 1, j) + d(i)*x(i, j) + dl(i &
                                    )*x(i + 1, j)
                       end do
                    end if
                 end do
              else if (stdlib_lsame(trans, 'c')) then
                 ! compute b := b + a**h * x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) + conjg(d(1))*x(1, j)
                    else
                       b(1, j) = b(1, j) + conjg(d(1))*x(1, j) + conjg(dl(1))*x(2, &
                                 j)
                       b(n, j) = b(n, j) + conjg(du(n - 1))*x(n - 1, j) + conjg(d(n))*x( &
                                  n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) + conjg(du(i - 1))*x(i - 1, j) + conjg(d(i)) &
                                    *x(i, j) + conjg(dl(i))*x(i + 1, j)
                       end do
                    end if
                 end do
              end if
           else if (alpha == -one) then
              if (stdlib_lsame(trans, 'n')) then
                 ! compute b := b - a*x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) - d(1)*x(1, j)
                    else
                       b(1, j) = b(1, j) - d(1)*x(1, j) - du(1)*x(2, j)
                       b(n, j) = b(n, j) - dl(n - 1)*x(n - 1, j) - d(n)*x(n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) - dl(i - 1)*x(i - 1, j) - d(i)*x(i, j) - du(i &
                                    )*x(i + 1, j)
                       end do
                    end if
                 end do
              else if (stdlib_lsame(trans, 't')) then
                 ! compute b := b - a**t*x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) - d(1)*x(1, j)
                    else
                       b(1, j) = b(1, j) - d(1)*x(1, j) - dl(1)*x(2, j)
                       b(n, j) = b(n, j) - du(n - 1)*x(n - 1, j) - d(n)*x(n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) - du(i - 1)*x(i - 1, j) - d(i)*x(i, j) - dl(i &
                                    )*x(i + 1, j)
                       end do
                    end if
                 end do
              else if (stdlib_lsame(trans, 'c')) then
                 ! compute b := b - a**h*x
                 do j = 1, nrhs
                    if (n == 1) then
                       b(1, j) = b(1, j) - conjg(d(1))*x(1, j)
                    else
                       b(1, j) = b(1, j) - conjg(d(1))*x(1, j) - conjg(dl(1))*x(2, &
                                 j)
                       b(n, j) = b(n, j) - conjg(du(n - 1))*x(n - 1, j) - conjg(d(n))*x( &
                                  n, j)
                       do i = 2, n - 1
                          b(i, j) = b(i, j) - conjg(du(i - 1))*x(i - 1, j) - conjg(d(i)) &
                                    *x(i, j) - conjg(dl(i))*x(i + 1, j)
                       end do
                    end if
                 end do
              end if
           end if
           return
           ! end of stdlib_clagtm
     end subroutine stdlib_clagtm

     ! CLAHEF computes a partial factorization of a complex Hermitian
     ! matrix A using the Bunch-Kaufman diagonal pivoting method. The
     ! partial factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I      0     )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**H U22**H )
     ! A  =  ( L11  0 ) (  D   0  ) ( L11**H L21**H )  if UPLO = 'L'
     ! ( L21  I ) (  0  A22 ) (  0      I     )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! Note that U**H denotes the conjugate transpose of U.
     ! CLAHEF is an auxiliary routine called by CHETRF. It uses blocked code
     ! (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
     ! A22 (if UPLO = 'L').

     subroutine stdlib_clahef(uplo, n, nb, kb, a, lda, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), w(ldw, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw
           real(sp) :: absakk, alpha, colmax, r1, rowmax, t
           complex(sp) :: d11, d21, d22, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, conjg, max, min, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11 (note that conjg(w) is actually stored)
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              k = n
10      continue
              ! kw is the column of w which corresponds to column k of a
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              kstep = 1
              ! copy column k of a to column kw of w and update it
              call stdlib_ccopy(k - 1, a(1, k), 1, w(1, kw), 1)
              w(k, kw) = real(a(k, k))
              if (k < n) then
                 call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w(k, kw + 1), &
                           ldw, cone, w(1, kw), 1)
                 w(k, kw) = real(w(k, kw))
              end if
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(w(k, kw)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, w(1, kw), 1)
                 colmax = cabs1(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(a(k, k))
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! begin pivot search along imax row
                    ! copy column imax to column kw-1 of w and update it
                    call stdlib_ccopy(imax - 1, a(1, imax), 1, w(1, kw - 1), 1)
                    w(imax, kw - 1) = real(a(imax, imax))
                    call stdlib_ccopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                              
                    call stdlib_clacgv(k - imax, w(imax + 1, kw - 1), 1)
                    if (k < n) then
                       call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w(imax, &
                                  kw + 1), ldw, cone, w(1, kw - 1), 1)
                       w(imax, kw - 1) = real(w(imax, kw - 1))
                    end if
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value.
                    ! determine only rowmax.
                    jmax = imax + stdlib_icamax(k - imax, w(imax + 1, kw - 1), 1)
                    rowmax = cabs1(w(jmax, kw - 1))
                    if (imax > 1) then
                       jmax = stdlib_icamax(imax - 1, w(1, kw - 1), 1)
                       rowmax = max(rowmax, cabs1(w(jmax, kw - 1)))
                    end if
                    ! case(2)
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    ! case(3)
                    else if (abs(real(w(imax, kw - 1))) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                       ! copy column kw-1 of w to column kw of w
                       call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                    ! case(4)
                    else
                       ! interchange rows and columns k-1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                    ! end pivot search along imax row
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kkw of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k-1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = real(a(kk, kk))
                    call stdlib_ccopy(kk - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    call stdlib_clacgv(kk - 1 - kp, a(kp, kp + 1), lda)
                    if (kp > 1) call stdlib_ccopy(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last k+1 to n columns of a
                    ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in last kkw to nb columns of w.
                    if (k < n) call stdlib_cswap(n - k, a(kk, k + 1), lda, a(kp, k + 1), lda)
                    call stdlib_cswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(kw) = u(k)*d(k),
                    ! where u(k) is the k-th column of u
                    ! (1) store subdiag. elements of column u(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! (note: diagonal element u(k,k) is a unit element
                    ! and not stored)
                       ! a(k,k) := d(k,k) = w(k,kw)
                       ! a(1:k-1,k) := u(1:k-1,k) = w(1:k-1,kw)/d(k,k)
                    ! (note: no need to use for hermitian matrix
                    ! a( k, k ) = real( w( k, k) ,KIND=sp) to separately copy diagonal
                    ! element d(k,k) from w (potentially saves only one load))
                    call stdlib_ccopy(k, w(1, kw), 1, a(1, k), 1)
                    if (k > 1) then
                       ! (note: no need to check if a(k,k) is not zero,
                        ! since that was ensured earlier in pivot search:
                        ! case a(k,k) = 0 falls into 2x2 pivot case(4))
                       r1 = one/real(a(k, k))
                       call stdlib_csscal(k - 1, r1, a(1, k), 1)
                       ! (2) conjugate column w(kw)
                       call stdlib_clacgv(k - 1, w(1, kw), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold
                    ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2
                    ! block d(k-1:k,k-1:k) in columns k-1 and k of a.
                    ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit
                    ! block and not stored)
                       ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw)
                       ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) =
                       ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) )
                    if (k > 2) then
                       ! factor out the columns of the inverse of 2-by-2 pivot
                       ! block d, so that each column contains 1, to reduce the
                       ! number of flops when we multiply panel
                       ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1).
                       ! d**(-1) = ( d11 cj(d21) )**(-1) =
                                 ! ( d21    d22 )
                       ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
                                                ! ( (-d21) (     d11 ) )
                       ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
                         ! * ( d21*( d22/d21 ) conj(d21)*(           - 1 ) ) =
                           ! (     (      -1 )           ( d11/conj(d21) ) )
                       ! = 1/(|d21|**2) * 1/(d22*d11-1) *
                         ! * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                           ! (     (  -1 )           ( d22 ) )
                       ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                                            ! (     (  -1 )           ( d22 ) )
                       ! = ( (t/conj(d21))*( d11 ) (t/d21)*(  -1 ) ) =
                         ! (               (  -1 )         ( d22 ) )
                       ! = ( conj(d21)*( d11 ) d21*(  -1 ) )
                         ! (           (  -1 )     ( d22 ) ),
                       ! where d11 = d22/d21,
                             ! d22 = d11/conj(d21),
                             ! d21 = t/d21,
                             ! t = 1/(d22*d11-1).
                       ! (note: no need to check for division by zero,
                        ! since that was ensured earlier in pivot search:
                        ! (a) d21 != 0, since in 2x2 pivot case(4)
                            ! |d21| should be larger than |d11| and |d22|;
                        ! (b) (d22*d11 - 1) != 0, since from (a),
                            ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.)
                       d21 = w(k - 1, kw)
                       d11 = w(k, kw)/conjg(d21)
                       d22 = w(k - 1, kw - 1)/d21
                       t = one/(real(d11*d22) - one)
                       d21 = t/d21
                       ! update elements in columns a(k-1) and a(k) as
                       ! dot products of rows of ( w(kw-1) w(kw) ) and columns
                       ! of d**(-1)
                       do j = 1, k - 2
                          a(j, k - 1) = d21*(d11*w(j, kw - 1) - w(j, kw))
                          a(j, k) = conjg(d21)*(d22*w(j, kw) - w(j, kw - 1))
                       end do
                    end if
                    ! copy d(k) to a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = w(k - 1, kw)
                    a(k, k) = w(k, kw)
                    ! (2) conjugate columns w(kw) and w(kw-1)
                    call stdlib_clacgv(k - 1, w(1, kw), 1)
                    call stdlib_clacgv(k - 2, w(1, kw - 1), 1)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h
              ! computing blocks of nb columns at a time (note that conjg(w) is
              ! actually stored)
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    a(jj, jj) = real(a(jj, jj))
                    call stdlib_cgemv('no transpose', jj - j + 1, n - k, -cone, a(j, k + 1), lda, w(jj, &
                               kw + 1), ldw, cone, a(j, jj), 1)
                    a(jj, jj) = real(a(jj, jj))
                 end do
                 ! update the rectangular superdiagonal block
                 call stdlib_cgemm('no transpose', 'transpose', j - 1, jb, n - k, -cone, a(1, k + 1), &
                           lda, w(j, kw + 1), ldw, cone, a(1, j), lda)
              end do
              ! put u12 in standard form by partially undoing the interchanges
              ! in of rows in columns k+1:n looping backwards from k+1 to n
              j = k + 1
60      continue
                 ! undo the interchanges (if any) of rows j and jp
                 ! at each step j
                 ! (here, j is a diagonal index)
                 jj = j
                 jp = ipiv(j)
                 if (jp < 0) then
                    jp = -jp
                    ! (here, j is a diagonal index)
                    j = j + 1
                 end if
                 ! (note: here, j is used to determine row length. length n-j+1
                 ! of the rows to swap back doesn't include diagonal element)
                 j = j + 1
                 if (jp /= jj .and. j <= n) call stdlib_cswap(n - j + 1, a(jp, j), lda, a(jj, j), &
                           lda)
              if (j <= n) go to 60
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22 (note that conjg(w) is actually stored)
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              kstep = 1
              ! copy column k of a to column k of w and update it
              w(k, k) = real(a(k, k))
              if (k < n) call stdlib_ccopy(n - k, a(k + 1, k), 1, w(k + 1, k), 1)
              call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w(k, 1), ldw, &
                         cone, w(k, k), 1)
              w(k, k) = real(w(k, k))
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(w(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, w(k + 1, k), 1)
                 colmax = cabs1(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(a(k, k))
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! begin pivot search along imax row
                    ! copy column imax to column k+1 of w and update it
                    call stdlib_ccopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                    call stdlib_clacgv(imax - k, w(k, k + 1), 1)
                    w(imax, k + 1) = real(a(imax, imax))
                    if (imax < n) call stdlib_ccopy(n - imax, a(imax + 1, imax), 1, w(imax + 1, k + 1), &
                              1)
                    call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w(imax, &
                              1), ldw, cone, w(k, k + 1), 1)
                    w(imax, k + 1) = real(w(imax, k + 1))
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value.
                    ! determine only rowmax.
                    jmax = k - 1 + stdlib_icamax(imax - k, w(k, k + 1), 1)
                    rowmax = cabs1(w(jmax, k + 1))
                    if (imax < n) then
                       jmax = imax + stdlib_icamax(n - imax, w(imax + 1, k + 1), 1)
                       rowmax = max(rowmax, cabs1(w(jmax, k + 1)))
                    end if
                    ! case(2)
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    ! case(3)
                    else if (abs(real(w(imax, k + 1))) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                       ! copy column k+1 of w to column k of w
                       call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                    ! case(4)
                    else
                       ! interchange rows and columns k+1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                    ! end pivot search along imax row
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k + kstep - 1
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kk of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k+1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = real(a(kk, kk))
                    call stdlib_ccopy(kp - kk - 1, a(kk + 1, kk), 1, a(kp, kk + 1), lda)
                    call stdlib_clacgv(kp - kk - 1, a(kp, kk + 1), lda)
                    if (kp < n) call stdlib_ccopy(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    ! interchange rows kk and kp in first k-1 columns of a
                    ! (columns k (or k and k+1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in first kk columns of w.
                    if (k > 1) call stdlib_cswap(k - 1, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_cswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k),
                    ! where l(k) is the k-th column of l
                    ! (1) store subdiag. elements of column l(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! (note: diagonal element l(k,k) is a unit element
                    ! and not stored)
                       ! a(k,k) := d(k,k) = w(k,k)
                       ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k)
                    ! (note: no need to use for hermitian matrix
                    ! a( k, k ) = real( w( k, k) ,KIND=sp) to separately copy diagonal
                    ! element d(k,k) from w (potentially saves only one load))
                    call stdlib_ccopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       ! (note: no need to check if a(k,k) is not zero,
                        ! since that was ensured earlier in pivot search:
                        ! case a(k,k) = 0 falls into 2x2 pivot case(4))
                       r1 = one/real(a(k, k))
                       call stdlib_csscal(n - k, r1, a(k + 1, k), 1)
                       ! (2) conjugate column w(k)
                       call stdlib_clacgv(n - k, w(k + 1, k), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2
                    ! block d(k:k+1,k:k+1) in columns k and k+1 of a.
                    ! (note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit
                    ! block and not stored)
                       ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1)
                       ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) =
                       ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) )
                    if (k < n - 1) then
                       ! factor out the columns of the inverse of 2-by-2 pivot
                       ! block d, so that each column contains 1, to reduce the
                       ! number of flops when we multiply panel
                       ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1).
                       ! d**(-1) = ( d11 cj(d21) )**(-1) =
                                 ! ( d21    d22 )
                       ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
                                                ! ( (-d21) (     d11 ) )
                       ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
                         ! * ( d21*( d22/d21 ) conj(d21)*(           - 1 ) ) =
                           ! (     (      -1 )           ( d11/conj(d21) ) )
                       ! = 1/(|d21|**2) * 1/(d22*d11-1) *
                         ! * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                           ! (     (  -1 )           ( d22 ) )
                       ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                                            ! (     (  -1 )           ( d22 ) )
                       ! = ( (t/conj(d21))*( d11 ) (t/d21)*(  -1 ) ) =
                         ! (               (  -1 )         ( d22 ) )
                       ! = ( conj(d21)*( d11 ) d21*(  -1 ) )
                         ! (           (  -1 )     ( d22 ) )
                       ! where d11 = d22/d21,
                             ! d22 = d11/conj(d21),
                             ! d21 = t/d21,
                             ! t = 1/(d22*d11-1).
                       ! (note: no need to check for division by zero,
                        ! since that was ensured earlier in pivot search:
                        ! (a) d21 != 0, since in 2x2 pivot case(4)
                            ! |d21| should be larger than |d11| and |d22|;
                        ! (b) (d22*d11 - 1) != 0, since from (a),
                            ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.)
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/conjg(d21)
                       t = one/(real(d11*d22) - one)
                       d21 = t/d21
                       ! update elements in columns a(k) and a(k+1) as
                       ! dot products of rows of ( w(k) w(k+1) ) and columns
                       ! of d**(-1)
                       do j = k + 2, n
                          a(j, k) = conjg(d21)*(d11*w(j, k) - w(j, k + 1))
                          a(j, k + 1) = d21*(d22*w(j, k + 1) - w(j, k))
                       end do
                    end if
                    ! copy d(k) to a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = w(k + 1, k)
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                    ! (2) conjugate columns w(k) and w(k+1)
                    call stdlib_clacgv(n - k, w(k + 1, k), 1)
                    call stdlib_clacgv(n - k - 1, w(k + 2, k + 1), 1)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**h = a22 - l21*w**h
              ! computing blocks of nb columns at a time (note that conjg(w) is
              ! actually stored)
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    a(jj, jj) = real(a(jj, jj))
                    call stdlib_cgemv('no transpose', j + jb - jj, k - 1, -cone, a(jj, 1), lda, w(jj, &
                               1), ldw, cone, a(jj, jj), 1)
                    a(jj, jj) = real(a(jj, jj))
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_cgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           cone, a(j + jb, 1), lda, w(j, 1), ldw, cone, a(j + jb, j), lda)
              end do
              ! put l21 in standard form by partially undoing the interchanges
              ! of rows in columns 1:k-1 looping backwards from k-1 to 1
              j = k - 1
120    continue
                 ! undo the interchanges (if any) of rows j and jp
                 ! at each step j
                 ! (here, j is a diagonal index)
                 jj = j
                 jp = ipiv(j)
                 if (jp < 0) then
                    jp = -jp
                    ! (here, j is a diagonal index)
                    j = j - 1
                 end if
                 ! (note: here, j is used to determine row length. length j
                 ! of the rows to swap back doesn't include diagonal element)
                 j = j - 1
                 if (jp /= jj .and. j >= 1) call stdlib_cswap(j, a(jp, 1), lda, a(jj, 1), lda)
                           
              if (j >= 1) go to 120
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_clahef
     end subroutine stdlib_clahef

     ! CLAHEF_RK computes a partial factorization of a complex Hermitian
     ! matrix A using the bounded Bunch-Kaufman (rook) diagonal
     ! pivoting method. The partial factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I       0    )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**H U22**H )
     ! A  =  ( L11  0 ) (  D   0  ) ( L11**H L21**H )  if UPLO = 'L',
     ! ( L21  I ) (  0  A22 ) (  0       I    )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! CLAHEF_RK is an auxiliary routine called by CHETRF_RK. It uses
     ! blocked code (calling Level 3 BLAS) to update the submatrix
     ! A11 (if UPLO = 'U') or A22 (if UPLO = 'L').

     subroutine stdlib_clahef_rk(uplo, n, nb, kb, a, lda, e, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), w(ldw, *), e(*)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: done
           integer(ilp) :: imax, itemp, ii, j, jb, jj, jmax, k, kk, kkw, kp, kstep, kw, p
           real(sp) :: absakk, alpha, colmax, stemp, r1, rowmax, t, sfmin
           complex(sp) :: d11, d21, d22, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, conjg, aimag, max, min, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11 (note that conjg(w) is actually stored)
              ! initialize the first entry of array e, where superdiagonal
              ! elements of d are stored
              e(1) = czero
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              k = n
10      continue
              ! kw is the column of w which corresponds to column k of a
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              kstep = 1
              p = k
              ! copy column k of a to column kw of w and update it
              if (k > 1) call stdlib_ccopy(k - 1, a(1, k), 1, w(1, kw), 1)
              w(k, kw) = real(a(k, k))
              if (k < n) then
                 call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w(k, kw + 1), &
                           ldw, cone, w(1, kw), 1)
                 w(k, kw) = real(w(k, kw))
              end if
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(w(k, kw)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, w(1, kw), 1)
                 colmax = cabs1(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(w(k, kw))
                 if (k > 1) call stdlib_ccopy(k - 1, w(1, kw), 1, a(1, k), 1)
                 ! set e( k ) to zero
                 if (k > 1) e(k) = czero
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! lop until pivot found
                    done = .false.
12      continue
                       ! begin pivot search loop body
                       ! copy column imax to column kw-1 of w and update it
                       if (imax > 1) call stdlib_ccopy(imax - 1, a(1, imax), 1, w(1, kw - 1), 1)
                                 
                       w(imax, kw - 1) = real(a(imax, imax))
                       call stdlib_ccopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                                 
                       call stdlib_clacgv(k - imax, w(imax + 1, kw - 1), 1)
                       if (k < n) then
                          call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w( &
                                    imax, kw + 1), ldw, cone, w(1, kw - 1), 1)
                          w(imax, kw - 1) = real(w(imax, kw - 1))
                       end if
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_icamax(k - imax, w(imax + 1, kw - 1), 1)
                          rowmax = cabs1(w(jmax, kw - 1))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_icamax(imax - 1, w(1, kw - 1), 1)
                          stemp = cabs1(w(itemp, kw - 1))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! case(2)
                       ! equivalent to testing for
                       ! abs( real( w( imax,kw-1 ) ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(real(w(imax, kw - 1))) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column kw-1 of w to column kw of w
                          call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                          done = .true.
                       ! case(3)
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k-1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       ! case(4)
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 ! interchange rows and columns p and k.
                 ! updated column p is already stored in column kw of w.
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p of submatrix a
                    ! at step k. no need to copy element into columns
                    ! k and k-1 of a for 2-by-2 pivot, since these columns
                    ! will be later overwritten.
                    a(p, p) = real(a(k, k))
                    call stdlib_ccopy(k - 1 - p, a(p + 1, k), 1, a(p, p + 1), lda)
                    call stdlib_clacgv(k - 1 - p, a(p, p + 1), lda)
                    if (p > 1) call stdlib_ccopy(p - 1, a(1, k), 1, a(1, p), 1)
                    ! interchange rows k and p in the last k+1 to n columns of a
                    ! (columns k and k-1 of a for 2-by-2 pivot will be
                    ! later overwritten). interchange rows k and p
                    ! in last kkw to nb columns of w.
                    if (k < n) call stdlib_cswap(n - k, a(k, k + 1), lda, a(p, k + 1), lda)
                    call stdlib_cswap(n - kk + 1, w(k, kkw), ldw, w(p, kkw), ldw)
                 end if
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kkw of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k-1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = real(a(kk, kk))
                    call stdlib_ccopy(kk - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    call stdlib_clacgv(kk - 1 - kp, a(kp, kp + 1), lda)
                    if (kp > 1) call stdlib_ccopy(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last k+1 to n columns of a
                    ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in last kkw to nb columns of w.
                    if (k < n) call stdlib_cswap(n - k, a(kk, k + 1), lda, a(kp, k + 1), lda)
                    call stdlib_cswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(kw) = u(k)*d(k),
                    ! where u(k) is the k-th column of u
                    ! (1) store subdiag. elements of column u(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! (note: diagonal element u(k,k) is a unit element
                    ! and not stored)
                       ! a(k,k) := d(k,k) = w(k,kw)
                       ! a(1:k-1,k) := u(1:k-1,k) = w(1:k-1,kw)/d(k,k)
                    ! (note: no need to use for hermitian matrix
                    ! a( k, k ) = real( w( k, k) ) to separately copy diagonal
                    ! element d(k,k) from w (potentially saves only one load))
                    call stdlib_ccopy(k, w(1, kw), 1, a(1, k), 1)
                    if (k > 1) then
                       ! (note: no need to check if a(k,k) is not zero,
                        ! since that was ensured earlier in pivot search:
                        ! case a(k,k) = 0 falls into 2x2 pivot case(3))
                       ! handle division by a small number
                       t = real(a(k, k))
                       if (abs(t) >= sfmin) then
                          r1 = one/t
                          call stdlib_csscal(k - 1, r1, a(1, k), 1)
                       else
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/t
                          end do
                       end if
                       ! (2) conjugate column w(kw)
                       call stdlib_clacgv(k - 1, w(1, kw), 1)
                       ! store the superdiagonal element of d in array e
                       e(k) = czero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold
                    ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2
                    ! block d(k-1:k,k-1:k) in columns k-1 and k of a.
                    ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit
                    ! block and not stored)
                       ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw)
                       ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) =
                       ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) )
                    if (k > 2) then
                       ! factor out the columns of the inverse of 2-by-2 pivot
                       ! block d, so that each column contains 1, to reduce the
                       ! number of flops when we multiply panel
                       ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1).
                       ! d**(-1) = ( d11 cj(d21) )**(-1) =
                                 ! ( d21    d22 )
                       ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
                                                ! ( (-d21) (     d11 ) )
                       ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
                         ! * ( d21*( d22/d21 ) conj(d21)*(           - 1 ) ) =
                           ! (     (      -1 )           ( d11/conj(d21) ) )
                       ! = 1/(|d21|**2) * 1/(d22*d11-1) *
                         ! * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                           ! (     (  -1 )           ( d22 ) )
                       ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                                            ! (     (  -1 )           ( d22 ) )
                       ! = ( (t/conj(d21))*( d11 ) (t/d21)*(  -1 ) ) =
                         ! (               (  -1 )         ( d22 ) )
                       ! handle division by a small number. (note: order of
                       ! operations is important)
                       ! = ( t*(( d11 )/conj(d21)) t*((  -1 )/d21 ) )
                         ! (   ((  -1 )          )   (( d22 )     ) ),
                       ! where d11 = d22/d21,
                             ! d22 = d11/conj(d21),
                             ! d21 = d21,
                             ! t = 1/(d22*d11-1).
                       ! (note: no need to check for division by zero,
                        ! since that was ensured earlier in pivot search:
                        ! (a) d21 != 0 in 2x2 pivot case(4),
                            ! since |d21| should be larger than |d11| and |d22|;
                        ! (b) (d22*d11 - 1) != 0, since from (a),
                            ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.)
                       d21 = w(k - 1, kw)
                       d11 = w(k, kw)/conjg(d21)
                       d22 = w(k - 1, kw - 1)/d21
                       t = one/(real(d11*d22) - one)
                       ! update elements in columns a(k-1) and a(k) as
                       ! dot products of rows of ( w(kw-1) w(kw) ) and columns
                       ! of d**(-1)
                       do j = 1, k - 2
                          a(j, k - 1) = t*((d11*w(j, kw - 1) - w(j, kw))/d21)
                          a(j, k) = t*((d22*w(j, kw) - w(j, kw - 1))/conjg(d21))
                       end do
                    end if
                    ! copy diagonal elements of d(k) to a,
                    ! copy superdiagonal element of d(k) to e(k) and
                    ! zero out superdiagonal entry of a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = czero
                    a(k, k) = w(k, kw)
                    e(k) = w(k - 1, kw)
                    e(k - 1) = czero
                    ! (2) conjugate columns w(kw) and w(kw-1)
                    call stdlib_clacgv(k - 1, w(1, kw), 1)
                    call stdlib_clacgv(k - 2, w(1, kw - 1), 1)
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h
              ! computing blocks of nb columns at a time (note that conjg(w) is
              ! actually stored)
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    a(jj, jj) = real(a(jj, jj))
                    call stdlib_cgemv('no transpose', jj - j + 1, n - k, -cone, a(j, k + 1), lda, w(jj, &
                               kw + 1), ldw, cone, a(j, jj), 1)
                    a(jj, jj) = real(a(jj, jj))
                 end do
                 ! update the rectangular superdiagonal block
                 if (j >= 2) call stdlib_cgemm('no transpose', 'transpose', j - 1, jb, n - k, -cone, a( &
                           1, k + 1), lda, w(j, kw + 1), ldw, cone, a(1, j), lda)
              end do
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22 (note that conjg(w) is actually stored)
              ! initialize the unused last entry of the subdiagonal array e.
              e(n) = czero
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              kstep = 1
              p = k
              ! copy column k of a to column k of w and update column k of w
              w(k, k) = real(a(k, k))
              if (k < n) call stdlib_ccopy(n - k, a(k + 1, k), 1, w(k + 1, k), 1)
              if (k > 1) then
                 call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w(k, 1), &
                           ldw, cone, w(k, k), 1)
                 w(k, k) = real(w(k, k))
              end if
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(w(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, w(k + 1, k), 1)
                 colmax = cabs1(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(w(k, k))
                 if (k < n) call stdlib_ccopy(n - k, w(k + 1, k), 1, a(k + 1, k), 1)
                 ! set e( k ) to zero
                 if (k < n) e(k) = czero
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
72      continue
                       ! begin pivot search loop body
                       ! copy column imax to column k+1 of w and update it
                       call stdlib_ccopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                       call stdlib_clacgv(imax - k, w(k, k + 1), 1)
                       w(imax, k + 1) = real(a(imax, imax))
                       if (imax < n) call stdlib_ccopy(n - imax, a(imax + 1, imax), 1, w(imax + 1, k + 1 &
                                 ), 1)
                       if (k > 1) then
                          call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w( &
                                    imax, 1), ldw, cone, w(k, k + 1), 1)
                          w(imax, k + 1) = real(w(imax, k + 1))
                       end if
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_icamax(imax - k, w(k, k + 1), 1)
                          rowmax = cabs1(w(jmax, k + 1))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_icamax(n - imax, w(imax + 1, k + 1), 1)
                          stemp = cabs1(w(itemp, k + 1))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! case(2)
                       ! equivalent to testing for
                       ! abs( real( w( imax,k+1 ) ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(real(w(imax, k + 1))) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column k+1 of w to column k of w
                          call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                          done = .true.
                       ! case(3)
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       ! case(4)
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 72
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k + kstep - 1
                 ! interchange rows and columns p and k (only for 2-by-2 pivot).
                 ! updated column p is already stored in column k of w.
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column kk-1 to column p of submatrix a
                    ! at step k. no need to copy element into columns
                    ! k and k+1 of a for 2-by-2 pivot, since these columns
                    ! will be later overwritten.
                    a(p, p) = real(a(k, k))
                    call stdlib_ccopy(p - k - 1, a(k + 1, k), 1, a(p, k + 1), lda)
                    call stdlib_clacgv(p - k - 1, a(p, k + 1), lda)
                    if (p < n) call stdlib_ccopy(n - p, a(p + 1, k), 1, a(p + 1, p), 1)
                    ! interchange rows k and p in first k-1 columns of a
                    ! (columns k and k+1 of a for 2-by-2 pivot will be
                    ! later overwritten). interchange rows k and p
                    ! in first kk columns of w.
                    if (k > 1) call stdlib_cswap(k - 1, a(k, 1), lda, a(p, 1), lda)
                    call stdlib_cswap(kk, w(k, 1), ldw, w(p, 1), ldw)
                 end if
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kk of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k+1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = real(a(kk, kk))
                    call stdlib_ccopy(kp - kk - 1, a(kk + 1, kk), 1, a(kp, kk + 1), lda)
                    call stdlib_clacgv(kp - kk - 1, a(kp, kk + 1), lda)
                    if (kp < n) call stdlib_ccopy(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    ! interchange rows kk and kp in first k-1 columns of a
                    ! (column k (or k and k+1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in first kk columns of w.
                    if (k > 1) call stdlib_cswap(k - 1, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_cswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k),
                    ! where l(k) is the k-th column of l
                    ! (1) store subdiag. elements of column l(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! (note: diagonal element l(k,k) is a unit element
                    ! and not stored)
                       ! a(k,k) := d(k,k) = w(k,k)
                       ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k)
                    ! (note: no need to use for hermitian matrix
                    ! a( k, k ) = real( w( k, k) ) to separately copy diagonal
                    ! element d(k,k) from w (potentially saves only one load))
                    call stdlib_ccopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       ! (note: no need to check if a(k,k) is not zero,
                        ! since that was ensured earlier in pivot search:
                        ! case a(k,k) = 0 falls into 2x2 pivot case(3))
                       ! handle division by a small number
                       t = real(a(k, k))
                       if (abs(t) >= sfmin) then
                          r1 = one/t
                          call stdlib_csscal(n - k, r1, a(k + 1, k), 1)
                       else
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/t
                          end do
                       end if
                       ! (2) conjugate column w(k)
                       call stdlib_clacgv(n - k, w(k + 1, k), 1)
                       ! store the subdiagonal element of d in array e
                       e(k) = czero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2
                    ! block d(k:k+1,k:k+1) in columns k and k+1 of a.
                    ! note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit
                    ! block and not stored.
                       ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1)
                       ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) =
                       ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) )
                    if (k < n - 1) then
                       ! factor out the columns of the inverse of 2-by-2 pivot
                       ! block d, so that each column contains 1, to reduce the
                       ! number of flops when we multiply panel
                       ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1).
                       ! d**(-1) = ( d11 cj(d21) )**(-1) =
                                 ! ( d21    d22 )
                       ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
                                                ! ( (-d21) (     d11 ) )
                       ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
                         ! * ( d21*( d22/d21 ) conj(d21)*(           - 1 ) ) =
                           ! (     (      -1 )           ( d11/conj(d21) ) )
                       ! = 1/(|d21|**2) * 1/(d22*d11-1) *
                         ! * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                           ! (     (  -1 )           ( d22 ) )
                       ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                                            ! (     (  -1 )           ( d22 ) )
                       ! = ( (t/conj(d21))*( d11 ) (t/d21)*(  -1 ) ) =
                         ! (               (  -1 )         ( d22 ) )
                       ! handle division by a small number. (note: order of
                       ! operations is important)
                       ! = ( t*(( d11 )/conj(d21)) t*((  -1 )/d21 ) )
                         ! (   ((  -1 )          )   (( d22 )     ) ),
                       ! where d11 = d22/d21,
                             ! d22 = d11/conj(d21),
                             ! d21 = d21,
                             ! t = 1/(d22*d11-1).
                       ! (note: no need to check for division by zero,
                        ! since that was ensured earlier in pivot search:
                        ! (a) d21 != 0 in 2x2 pivot case(4),
                            ! since |d21| should be larger than |d11| and |d22|;
                        ! (b) (d22*d11 - 1) != 0, since from (a),
                            ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.)
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/conjg(d21)
                       t = one/(real(d11*d22) - one)
                       ! update elements in columns a(k) and a(k+1) as
                       ! dot products of rows of ( w(k) w(k+1) ) and columns
                       ! of d**(-1)
                       do j = k + 2, n
                          a(j, k) = t*((d11*w(j, k) - w(j, k + 1))/conjg(d21))
                          a(j, k + 1) = t*((d22*w(j, k + 1) - w(j, k))/d21)
                       end do
                    end if
                    ! copy diagonal elements of d(k) to a,
                    ! copy subdiagonal element of d(k) to e(k) and
                    ! zero out subdiagonal entry of a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = czero
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                    e(k) = w(k + 1, k)
                    e(k + 1) = czero
                    ! (2) conjugate columns w(k) and w(k+1)
                    call stdlib_clacgv(n - k, w(k + 1, k), 1)
                    call stdlib_clacgv(n - k - 1, w(k + 2, k + 1), 1)
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**h = a22 - l21*w**h
              ! computing blocks of nb columns at a time (note that conjg(w) is
              ! actually stored)
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    a(jj, jj) = real(a(jj, jj))
                    call stdlib_cgemv('no transpose', j + jb - jj, k - 1, -cone, a(jj, 1), lda, w(jj, &
                               1), ldw, cone, a(jj, jj), 1)
                    a(jj, jj) = real(a(jj, jj))
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_cgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           cone, a(j + jb, 1), lda, w(j, 1), ldw, cone, a(j + jb, j), lda)
              end do
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_clahef_rk
     end subroutine stdlib_clahef_rk

     ! CLAHEF_ROOK computes a partial factorization of a complex Hermitian
     ! matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting
     ! method. The partial factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I      0     )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**H U22**H )
     ! A  =  ( L11  0 ) (  D   0  ) ( L11**H L21**H )  if UPLO = 'L'
     ! ( L21  I ) (  0  A22 ) (  0      I     )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! Note that U**H denotes the conjugate transpose of U.
     ! CLAHEF_ROOK is an auxiliary routine called by CHETRF_ROOK. It uses
     ! blocked code (calling Level 3 BLAS) to update the submatrix
     ! A11 (if UPLO = 'U') or A22 (if UPLO = 'L').

     subroutine stdlib_clahef_rook(uplo, n, nb, kb, a, lda, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), w(ldw, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: done
           integer(ilp) :: imax, itemp, ii, j, jb, jj, jmax, jp1, jp2, k, kk, kkw, kp, kstep, kw, &
                     p
           real(sp) :: absakk, alpha, colmax, stemp, r1, rowmax, t, sfmin
           complex(sp) :: d11, d21, d22, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, conjg, aimag, max, min, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11 (note that conjg(w) is actually stored)
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              k = n
10      continue
              ! kw is the column of w which corresponds to column k of a
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              kstep = 1
              p = k
              ! copy column k of a to column kw of w and update it
              if (k > 1) call stdlib_ccopy(k - 1, a(1, k), 1, w(1, kw), 1)
              w(k, kw) = real(a(k, k))
              if (k < n) then
                 call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w(k, kw + 1), &
                           ldw, cone, w(1, kw), 1)
                 w(k, kw) = real(w(k, kw))
              end if
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(w(k, kw)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, w(1, kw), 1)
                 colmax = cabs1(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(w(k, kw))
                 if (k > 1) call stdlib_ccopy(k - 1, w(1, kw), 1, a(1, k), 1)
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! lop until pivot found
                    done = .false.
12      continue
                       ! begin pivot search loop body
                       ! copy column imax to column kw-1 of w and update it
                       if (imax > 1) call stdlib_ccopy(imax - 1, a(1, imax), 1, w(1, kw - 1), 1)
                                 
                       w(imax, kw - 1) = real(a(imax, imax))
                       call stdlib_ccopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                                 
                       call stdlib_clacgv(k - imax, w(imax + 1, kw - 1), 1)
                       if (k < n) then
                          call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w( &
                                    imax, kw + 1), ldw, cone, w(1, kw - 1), 1)
                          w(imax, kw - 1) = real(w(imax, kw - 1))
                       end if
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_icamax(k - imax, w(imax + 1, kw - 1), 1)
                          rowmax = cabs1(w(jmax, kw - 1))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_icamax(imax - 1, w(1, kw - 1), 1)
                          stemp = cabs1(w(itemp, kw - 1))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! case(2)
                       ! equivalent to testing for
                       ! abs( real( w( imax,kw-1 ) ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(real(w(imax, kw - 1))) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column kw-1 of w to column kw of w
                          call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                          done = .true.
                       ! case(3)
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k-1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       ! case(4)
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 ! interchange rows and columns p and k.
                 ! updated column p is already stored in column kw of w.
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p of submatrix a
                    ! at step k. no need to copy element into columns
                    ! k and k-1 of a for 2-by-2 pivot, since these columns
                    ! will be later overwritten.
                    a(p, p) = real(a(k, k))
                    call stdlib_ccopy(k - 1 - p, a(p + 1, k), 1, a(p, p + 1), lda)
                    call stdlib_clacgv(k - 1 - p, a(p, p + 1), lda)
                    if (p > 1) call stdlib_ccopy(p - 1, a(1, k), 1, a(1, p), 1)
                    ! interchange rows k and p in the last k+1 to n columns of a
                    ! (columns k and k-1 of a for 2-by-2 pivot will be
                    ! later overwritten). interchange rows k and p
                    ! in last kkw to nb columns of w.
                    if (k < n) call stdlib_cswap(n - k, a(k, k + 1), lda, a(p, k + 1), lda)
                    call stdlib_cswap(n - kk + 1, w(k, kkw), ldw, w(p, kkw), ldw)
                 end if
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kkw of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k-1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = real(a(kk, kk))
                    call stdlib_ccopy(kk - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    call stdlib_clacgv(kk - 1 - kp, a(kp, kp + 1), lda)
                    if (kp > 1) call stdlib_ccopy(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last k+1 to n columns of a
                    ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in last kkw to nb columns of w.
                    if (k < n) call stdlib_cswap(n - k, a(kk, k + 1), lda, a(kp, k + 1), lda)
                    call stdlib_cswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(kw) = u(k)*d(k),
                    ! where u(k) is the k-th column of u
                    ! (1) store subdiag. elements of column u(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! (note: diagonal element u(k,k) is a unit element
                    ! and not stored)
                       ! a(k,k) := d(k,k) = w(k,kw)
                       ! a(1:k-1,k) := u(1:k-1,k) = w(1:k-1,kw)/d(k,k)
                    ! (note: no need to use for hermitian matrix
                    ! a( k, k ) = real( w( k, k) ) to separately copy diagonal
                    ! element d(k,k) from w (potentially saves only one load))
                    call stdlib_ccopy(k, w(1, kw), 1, a(1, k), 1)
                    if (k > 1) then
                       ! (note: no need to check if a(k,k) is not zero,
                        ! since that was ensured earlier in pivot search:
                        ! case a(k,k) = 0 falls into 2x2 pivot case(3))
                       ! handle division by a small number
                       t = real(a(k, k))
                       if (abs(t) >= sfmin) then
                          r1 = one/t
                          call stdlib_csscal(k - 1, r1, a(1, k), 1)
                       else
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/t
                          end do
                       end if
                       ! (2) conjugate column w(kw)
                       call stdlib_clacgv(k - 1, w(1, kw), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold
                    ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! (1) store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2
                    ! block d(k-1:k,k-1:k) in columns k-1 and k of a.
                    ! (note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit
                    ! block and not stored)
                       ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw)
                       ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) =
                       ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) )
                    if (k > 2) then
                       ! factor out the columns of the inverse of 2-by-2 pivot
                       ! block d, so that each column contains 1, to reduce the
                       ! number of flops when we multiply panel
                       ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1).
                       ! d**(-1) = ( d11 cj(d21) )**(-1) =
                                 ! ( d21    d22 )
                       ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
                                                ! ( (-d21) (     d11 ) )
                       ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
                         ! * ( d21*( d22/d21 ) conj(d21)*(           - 1 ) ) =
                           ! (     (      -1 )           ( d11/conj(d21) ) )
                       ! = 1/(|d21|**2) * 1/(d22*d11-1) *
                         ! * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                           ! (     (  -1 )           ( d22 ) )
                       ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                                            ! (     (  -1 )           ( d22 ) )
                       ! = ( (t/conj(d21))*( d11 ) (t/d21)*(  -1 ) ) =
                         ! (               (  -1 )         ( d22 ) )
                       ! handle division by a small number. (note: order of
                       ! operations is important)
                       ! = ( t*(( d11 )/conj(d21)) t*((  -1 )/d21 ) )
                         ! (   ((  -1 )          )   (( d22 )     ) ),
                       ! where d11 = d22/d21,
                             ! d22 = d11/conj(d21),
                             ! d21 = d21,
                             ! t = 1/(d22*d11-1).
                       ! (note: no need to check for division by zero,
                        ! since that was ensured earlier in pivot search:
                        ! (a) d21 != 0 in 2x2 pivot case(4),
                            ! since |d21| should be larger than |d11| and |d22|;
                        ! (b) (d22*d11 - 1) != 0, since from (a),
                            ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.)
                       d21 = w(k - 1, kw)
                       d11 = w(k, kw)/conjg(d21)
                       d22 = w(k - 1, kw - 1)/d21
                       t = one/(real(d11*d22) - one)
                       ! update elements in columns a(k-1) and a(k) as
                       ! dot products of rows of ( w(kw-1) w(kw) ) and columns
                       ! of d**(-1)
                       do j = 1, k - 2
                          a(j, k - 1) = t*((d11*w(j, kw - 1) - w(j, kw))/d21)
                          a(j, k) = t*((d22*w(j, kw) - w(j, kw - 1))/conjg(d21))
                       end do
                    end if
                    ! copy d(k) to a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = w(k - 1, kw)
                    a(k, k) = w(k, kw)
                    ! (2) conjugate columns w(kw) and w(kw-1)
                    call stdlib_clacgv(k - 1, w(1, kw), 1)
                    call stdlib_clacgv(k - 2, w(1, kw - 1), 1)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**h = a11 - u12*w**h
              ! computing blocks of nb columns at a time (note that conjg(w) is
              ! actually stored)
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    a(jj, jj) = real(a(jj, jj))
                    call stdlib_cgemv('no transpose', jj - j + 1, n - k, -cone, a(j, k + 1), lda, w(jj, &
                               kw + 1), ldw, cone, a(j, jj), 1)
                    a(jj, jj) = real(a(jj, jj))
                 end do
                 ! update the rectangular superdiagonal block
                 if (j >= 2) call stdlib_cgemm('no transpose', 'transpose', j - 1, jb, n - k, -cone, a( &
                           1, k + 1), lda, w(j, kw + 1), ldw, cone, a(1, j), lda)
              end do
              ! put u12 in standard form by partially undoing the interchanges
              ! in of rows in columns k+1:n looping backwards from k+1 to n
              j = k + 1
60      continue
                 ! undo the interchanges (if any) of rows j and jp2
                 ! (or j and jp2, and j+1 and jp1) at each step j
                 kstep = 1
                 jp1 = 1
                 ! (here, j is a diagonal index)
                 jj = j
                 jp2 = ipiv(j)
                 if (jp2 < 0) then
                    jp2 = -jp2
                    ! (here, j is a diagonal index)
                    j = j + 1
                    jp1 = -ipiv(j)
                    kstep = 2
                 end if
                 ! (note: here, j is used to determine row length. length n-j+1
                 ! of the rows to swap back doesn't include diagonal element)
                 j = j + 1
                 if (jp2 /= jj .and. j <= n) call stdlib_cswap(n - j + 1, a(jp2, j), lda, a(jj, j), &
                           lda)
                 jj = jj + 1
                 if (kstep == 2 .and. jp1 /= jj .and. j <= n) call stdlib_cswap(n - j + 1, a(jp1, j), &
                           lda, a(jj, j), lda)
              if (j < n) go to 60
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22 (note that conjg(w) is actually stored)
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              kstep = 1
              p = k
              ! copy column k of a to column k of w and update column k of w
              w(k, k) = real(a(k, k))
              if (k < n) call stdlib_ccopy(n - k, a(k + 1, k), 1, w(k + 1, k), 1)
              if (k > 1) then
                 call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w(k, 1), &
                           ldw, cone, w(k, k), 1)
                 w(k, k) = real(w(k, k))
              end if
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = abs(real(w(k, k)))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, w(k + 1, k), 1)
                 colmax = cabs1(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 a(k, k) = real(w(k, k))
                 if (k < n) call stdlib_ccopy(n - k, w(k + 1, k), 1, a(k + 1, k), 1)
              else
                 ! ============================================================
                 ! begin pivot search
                 ! case(1)
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
72      continue
                       ! begin pivot search loop body
                       ! copy column imax to column k+1 of w and update it
                       call stdlib_ccopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                       call stdlib_clacgv(imax - k, w(k, k + 1), 1)
                       w(imax, k + 1) = real(a(imax, imax))
                       if (imax < n) call stdlib_ccopy(n - imax, a(imax + 1, imax), 1, w(imax + 1, k + 1 &
                                 ), 1)
                       if (k > 1) then
                          call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w( &
                                    imax, 1), ldw, cone, w(k, k + 1), 1)
                          w(imax, k + 1) = real(w(imax, k + 1))
                       end if
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_icamax(imax - k, w(k, k + 1), 1)
                          rowmax = cabs1(w(jmax, k + 1))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_icamax(n - imax, w(imax + 1, k + 1), 1)
                          stemp = cabs1(w(itemp, k + 1))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! case(2)
                       ! equivalent to testing for
                       ! abs( real( w( imax,k+1 ) ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (abs(real(w(imax, k + 1))) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column k+1 of w to column k of w
                          call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                          done = .true.
                       ! case(3)
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       ! case(4)
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 72
                 end if
                 ! end pivot search
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k + kstep - 1
                 ! interchange rows and columns p and k (only for 2-by-2 pivot).
                 ! updated column p is already stored in column k of w.
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column kk-1 to column p of submatrix a
                    ! at step k. no need to copy element into columns
                    ! k and k+1 of a for 2-by-2 pivot, since these columns
                    ! will be later overwritten.
                    a(p, p) = real(a(k, k))
                    call stdlib_ccopy(p - k - 1, a(k + 1, k), 1, a(p, k + 1), lda)
                    call stdlib_clacgv(p - k - 1, a(p, k + 1), lda)
                    if (p < n) call stdlib_ccopy(n - p, a(p + 1, k), 1, a(p + 1, p), 1)
                    ! interchange rows k and p in first k-1 columns of a
                    ! (columns k and k+1 of a for 2-by-2 pivot will be
                    ! later overwritten). interchange rows k and p
                    ! in first kk columns of w.
                    if (k > 1) call stdlib_cswap(k - 1, a(k, 1), lda, a(p, 1), lda)
                    call stdlib_cswap(kk, w(k, 1), ldw, w(p, 1), ldw)
                 end if
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kk of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k+1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = real(a(kk, kk))
                    call stdlib_ccopy(kp - kk - 1, a(kk + 1, kk), 1, a(kp, kk + 1), lda)
                    call stdlib_clacgv(kp - kk - 1, a(kp, kk + 1), lda)
                    if (kp < n) call stdlib_ccopy(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    ! interchange rows kk and kp in first k-1 columns of a
                    ! (column k (or k and k+1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in first kk columns of w.
                    if (k > 1) call stdlib_cswap(k - 1, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_cswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k),
                    ! where l(k) is the k-th column of l
                    ! (1) store subdiag. elements of column l(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! (note: diagonal element l(k,k) is a unit element
                    ! and not stored)
                       ! a(k,k) := d(k,k) = w(k,k)
                       ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k)
                    ! (note: no need to use for hermitian matrix
                    ! a( k, k ) = real( w( k, k) ) to separately copy diagonal
                    ! element d(k,k) from w (potentially saves only one load))
                    call stdlib_ccopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       ! (note: no need to check if a(k,k) is not zero,
                        ! since that was ensured earlier in pivot search:
                        ! case a(k,k) = 0 falls into 2x2 pivot case(3))
                       ! handle division by a small number
                       t = real(a(k, k))
                       if (abs(t) >= sfmin) then
                          r1 = one/t
                          call stdlib_csscal(n - k, r1, a(k + 1, k), 1)
                       else
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/t
                          end do
                       end if
                       ! (2) conjugate column w(k)
                       call stdlib_clacgv(n - k, w(k + 1, k), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! (1) store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2
                    ! block d(k:k+1,k:k+1) in columns k and k+1 of a.
                    ! note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit
                    ! block and not stored.
                       ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1)
                       ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) =
                       ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) )
                    if (k < n - 1) then
                       ! factor out the columns of the inverse of 2-by-2 pivot
                       ! block d, so that each column contains 1, to reduce the
                       ! number of flops when we multiply panel
                       ! ( w(kw-1) w(kw) ) by this inverse, i.e. by d**(-1).
                       ! d**(-1) = ( d11 cj(d21) )**(-1) =
                                 ! ( d21    d22 )
                       ! = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
                                                ! ( (-d21) (     d11 ) )
                       ! = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
                         ! * ( d21*( d22/d21 ) conj(d21)*(           - 1 ) ) =
                           ! (     (      -1 )           ( d11/conj(d21) ) )
                       ! = 1/(|d21|**2) * 1/(d22*d11-1) *
                         ! * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                           ! (     (  -1 )           ( d22 ) )
                       ! = (1/|d21|**2) * t * ( d21*( d11 ) conj(d21)*(  -1 ) ) =
                                            ! (     (  -1 )           ( d22 ) )
                       ! = ( (t/conj(d21))*( d11 ) (t/d21)*(  -1 ) ) =
                         ! (               (  -1 )         ( d22 ) )
                       ! handle division by a small number. (note: order of
                       ! operations is important)
                       ! = ( t*(( d11 )/conj(d21)) t*((  -1 )/d21 ) )
                         ! (   ((  -1 )          )   (( d22 )     ) ),
                       ! where d11 = d22/d21,
                             ! d22 = d11/conj(d21),
                             ! d21 = d21,
                             ! t = 1/(d22*d11-1).
                       ! (note: no need to check for division by zero,
                        ! since that was ensured earlier in pivot search:
                        ! (a) d21 != 0 in 2x2 pivot case(4),
                            ! since |d21| should be larger than |d11| and |d22|;
                        ! (b) (d22*d11 - 1) != 0, since from (a),
                            ! both |d11| < 1, |d22| < 1, hence |d22*d11| << 1.)
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/conjg(d21)
                       t = one/(real(d11*d22) - one)
                       ! update elements in columns a(k) and a(k+1) as
                       ! dot products of rows of ( w(k) w(k+1) ) and columns
                       ! of d**(-1)
                       do j = k + 2, n
                          a(j, k) = t*((d11*w(j, k) - w(j, k + 1))/conjg(d21))
                          a(j, k + 1) = t*((d22*w(j, k + 1) - w(j, k))/d21)
                       end do
                    end if
                    ! copy d(k) to a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = w(k + 1, k)
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                    ! (2) conjugate columns w(k) and w(k+1)
                    call stdlib_clacgv(n - k, w(k + 1, k), 1)
                    call stdlib_clacgv(n - k - 1, w(k + 2, k + 1), 1)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**h = a22 - l21*w**h
              ! computing blocks of nb columns at a time (note that conjg(w) is
              ! actually stored)
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    a(jj, jj) = real(a(jj, jj))
                    call stdlib_cgemv('no transpose', j + jb - jj, k - 1, -cone, a(jj, 1), lda, w(jj, &
                               1), ldw, cone, a(jj, jj), 1)
                    a(jj, jj) = real(a(jj, jj))
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_cgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           cone, a(j + jb, 1), lda, w(j, 1), ldw, cone, a(j + jb, j), lda)
              end do
              ! put l21 in standard form by partially undoing the interchanges
              ! of rows in columns 1:k-1 looping backwards from k-1 to 1
              j = k - 1
120    continue
                 ! undo the interchanges (if any) of rows j and jp2
                 ! (or j and jp2, and j-1 and jp1) at each step j
                 kstep = 1
                 jp1 = 1
                 ! (here, j is a diagonal index)
                 jj = j
                 jp2 = ipiv(j)
                 if (jp2 < 0) then
                    jp2 = -jp2
                    ! (here, j is a diagonal index)
                    j = j - 1
                    jp1 = -ipiv(j)
                    kstep = 2
                 end if
                 ! (note: here, j is used to determine row length. length j
                 ! of the rows to swap back doesn't include diagonal element)
                 j = j - 1
                 if (jp2 /= jj .and. j >= 1) call stdlib_cswap(j, a(jp2, 1), lda, a(jj, 1), lda)
                           
                 jj = jj - 1
                 if (kstep == 2 .and. jp1 /= jj .and. j >= 1) call stdlib_cswap(j, a(jp1, 1), lda, a( &
                            jj, 1), lda)
              if (j > 1) go to 120
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_clahef_rook
     end subroutine stdlib_clahef_rook

     ! CLAIC1 applies one step of incremental condition estimation in
     ! its simplest version:
     ! Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j
     ! lower triangular matrix L, such that
     ! twonorm(L*x) = sest
     ! Then CLAIC1 computes sestpr, s, c such that
     ! the vector
     ! [ s*x ]
     ! xhat = [  c  ]
     ! is an approximate singular vector of
     ! [ L      0  ]
     ! Lhat = [ w**H gamma ]
     ! in the sense that
     ! twonorm(Lhat*xhat) = sestpr.
     ! Depending on JOB, an estimate for the largest or smallest singular
     ! value is computed.
     ! Note that [s c]**H and sestpr**2 is an eigenpair of the system
     ! diag(sest*sest, 0) + [alpha  gamma] * [ conjg(alpha) ]
     ! [ conjg(gamma) ]
     ! where  alpha =  x**H*w.

     subroutine stdlib_claic1(job, j, x, sest, w, gamma, sestpr, s, c)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: j, job
           real(sp) :: sest, sestpr
           complex(sp) :: c, gamma, s
           ! .. array arguments ..
           complex(sp) :: w(j), x(j)
        ! =====================================================================
           
           ! .. local scalars ..
           real(sp) :: absalp, absest, absgam, b, eps, norma, s1, s2, scl, t, test, tmp, zeta1, &
                     zeta2
           complex(sp) :: alpha, cosine, sine
           ! .. intrinsic functions ..
           intrinsic :: abs, conjg, max, sqrt
     
           ! .. executable statements ..
           eps = stdlib_slamch('epsilon')
           alpha = stdlib_cdotc(j, x, 1, w, 1)
           absalp = abs(alpha)
           absgam = abs(gamma)
           absest = abs(sest)
           if (job == 1) then
              ! estimating largest singular value
              ! special cases
              if (sest == zero) then
                 s1 = max(absgam, absalp)
                 if (s1 == zero) then
                    s = zero
                    c = one
                    sestpr = zero
                 else
                    s = alpha/s1
                    c = gamma/s1
                    tmp = real(sqrt(s*conjg(s) + c*conjg(c)))
                    s = s/tmp
                    c = c/tmp
                    sestpr = s1*tmp
                 end if
                 return
              else if (absgam <= eps*absest) then
                 s = one
                 c = zero
                 tmp = max(absest, absalp)
                 s1 = absest/tmp
                 s2 = absalp/tmp
                 sestpr = tmp*sqrt(s1*s1 + s2*s2)
                 return
              else if (absalp <= eps*absest) then
                 s1 = absgam
                 s2 = absest
                 if (s1 <= s2) then
                    s = one
                    c = zero
                    sestpr = s2
                 else
                    s = zero
                    c = one
                    sestpr = s1
                 end if
                 return
              else if (absest <= eps*absalp .or. absest <= eps*absgam) then
                 s1 = absgam
                 s2 = absalp
                 if (s1 <= s2) then
                    tmp = s1/s2
                    scl = sqrt(one + tmp*tmp)
                    sestpr = s2*scl
                    s = (alpha/s2)/scl
                    c = (gamma/s2)/scl
                 else
                    tmp = s2/s1
                    scl = sqrt(one + tmp*tmp)
                    sestpr = s1*scl
                    s = (alpha/s1)/scl
                    c = (gamma/s1)/scl
                 end if
                 return
              else
                 ! normal case
                 zeta1 = absalp/absest
                 zeta2 = absgam/absest
                 b = (one - zeta1*zeta1 - zeta2*zeta2)*half
                 c = zeta1*zeta1
                 if (b > zero) then
                    t = real(c/(b + sqrt(b*b + c)))
                 else
                    t = real(sqrt(b*b + c) - b)
                 end if
                 sine = -(alpha/absest)/t
                 cosine = -(gamma/absest)/(one + t)
                 tmp = real(sqrt(sine*conjg(sine) + cosine*conjg(cosine)))
                 s = sine/tmp
                 c = cosine/tmp
                 sestpr = sqrt(t + one)*absest
                 return
              end if
           else if (job == 2) then
              ! estimating smallest singular value
              ! special cases
              if (sest == zero) then
                 sestpr = zero
                 if (max(absgam, absalp) == zero) then
                    sine = one
                    cosine = zero
                 else
                    sine = -conjg(gamma)
                    cosine = conjg(alpha)
                 end if
                 s1 = max(abs(sine), abs(cosine))
                 s = sine/s1
                 c = cosine/s1
                 tmp = real(sqrt(s*conjg(s) + c*conjg(c)))
                 s = s/tmp
                 c = c/tmp
                 return
              else if (absgam <= eps*absest) then
                 s = zero
                 c = one
                 sestpr = absgam
                 return
              else if (absalp <= eps*absest) then
                 s1 = absgam
                 s2 = absest
                 if (s1 <= s2) then
                    s = zero
                    c = one
                    sestpr = s1
                 else
                    s = one
                    c = zero
                    sestpr = s2
                 end if
                 return
              else if (absest <= eps*absalp .or. absest <= eps*absgam) then
                 s1 = absgam
                 s2 = absalp
                 if (s1 <= s2) then
                    tmp = s1/s2
                    scl = sqrt(one + tmp*tmp)
                    sestpr = absest*(tmp/scl)
                    s = -(conjg(gamma)/s2)/scl
                    c = (conjg(alpha)/s2)/scl
                 else
                    tmp = s2/s1
                    scl = sqrt(one + tmp*tmp)
                    sestpr = absest/scl
                    s = -(conjg(gamma)/s1)/scl
                    c = (conjg(alpha)/s1)/scl
                 end if
                 return
              else
                 ! normal case
                 zeta1 = absalp/absest
                 zeta2 = absgam/absest
                 norma = max(one + zeta1*zeta1 + zeta1*zeta2, zeta1*zeta2 + zeta2*zeta2)
                 ! see if root is closer to zero or to one
                 test = one + two*(zeta1 - zeta2)*(zeta1 + zeta2)
                 if (test >= zero) then
                    ! root is close to zero, compute directly
                    b = (zeta1*zeta1 + zeta2*zeta2 + one)*half
                    c = zeta2*zeta2
                    t = real(c/(b + sqrt(abs(b*b - c))))
                    sine = (alpha/absest)/(one - t)
                    cosine = -(gamma/absest)/t
                    sestpr = sqrt(t + four*eps*eps*norma)*absest
                 else
                    ! root is closer to one, shift by that amount
                    b = (zeta2*zeta2 + zeta1*zeta1 - one)*half
                    c = zeta1*zeta1
                    if (b >= zero) then
                       t = real(-c/(b + sqrt(b*b + c)))
                    else
                       t = real(b - sqrt(b*b + c))
                    end if
                    sine = -(alpha/absest)/t
                    cosine = -(gamma/absest)/(one + t)
                    sestpr = sqrt(one + t + four*eps*eps*norma)*absest
                 end if
                 tmp = real(sqrt(sine*conjg(sine) + cosine*conjg(cosine)))
                 s = sine/tmp
                 c = cosine/tmp
                 return
              end if
           end if
           return
           ! end of stdlib_claic1
     end subroutine stdlib_claic1

     ! CLAPMR rearranges the rows of the M by N matrix X as specified
     ! by the permutation K(1),K(2),...,K(M) of the integers 1,...,M.
     ! If FORWRD = .TRUE.,  forward permutation:
     ! X(K(I),*) is moved X(I,*) for I = 1,2,...,M.
     ! If FORWRD = .FALSE., backward permutation:
     ! X(I,*) is moved to X(K(I),*) for I = 1,2,...,M.

     subroutine stdlib_clapmr(forwrd, m, n, x, ldx, k)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           logical(lk) :: forwrd
           integer(ilp) :: ldx, m, n
           ! .. array arguments ..
           integer(ilp) :: k(*)
           complex(sp) :: x(ldx, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, in, j, jj
           complex(sp) :: temp
           ! .. executable statements ..
           if (m <= 1) return
           do i = 1, m
              k(i) = -k(i)
           end do
           if (forwrd) then
              ! forward permutation
              do i = 1, m
                 if (k(i) > 0) go to 40
                 j = i
                 k(j) = -k(j)
                 in = k(j)
20      continue
                 if (k(in) > 0) go to 40
                 do jj = 1, n
                    temp = x(j, jj)
                    x(j, jj) = x(in, jj)
                    x(in, jj) = temp
                 end do
                 k(in) = -k(in)
                 j = in
                 in = k(in)
                 go to 20
40      continue
              end do
           else
              ! backward permutation
              do i = 1, m
                 if (k(i) > 0) go to 80
                 k(i) = -k(i)
                 j = k(i)
60      continue
                 if (j == i) go to 80
                 do jj = 1, n
                    temp = x(i, jj)
                    x(i, jj) = x(j, jj)
                    x(j, jj) = temp
                 end do
                 k(j) = -k(j)
                 j = k(j)
                 go to 60
80      continue
              end do
           end if
           return
           ! end of stdlib_clapmr
     end subroutine stdlib_clapmr

     ! CLAPMT rearranges the columns of the M by N matrix X as specified
     ! by the permutation K(1),K(2),...,K(N) of the integers 1,...,N.
     ! If FORWRD = .TRUE.,  forward permutation:
     ! X(*,K(J)) is moved X(*,J) for J = 1,2,...,N.
     ! If FORWRD = .FALSE., backward permutation:
     ! X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N.

     subroutine stdlib_clapmt(forwrd, m, n, x, ldx, k)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           logical(lk) :: forwrd
           integer(ilp) :: ldx, m, n
           ! .. array arguments ..
           integer(ilp) :: k(*)
           complex(sp) :: x(ldx, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ii, j, in
           complex(sp) :: temp
           ! .. executable statements ..
           if (n <= 1) return
           do i = 1, n
              k(i) = -k(i)
           end do
           if (forwrd) then
              ! forward permutation
              do i = 1, n
                 if (k(i) > 0) go to 40
                 j = i
                 k(j) = -k(j)
                 in = k(j)
20      continue
                 if (k(in) > 0) go to 40
                 do ii = 1, m
                    temp = x(ii, j)
                    x(ii, j) = x(ii, in)
                    x(ii, in) = temp
                 end do
                 k(in) = -k(in)
                 j = in
                 in = k(in)
                 go to 20
40      continue
              end do
           else
              ! backward permutation
              do i = 1, n
                 if (k(i) > 0) go to 100
                 k(i) = -k(i)
                 j = k(i)
80      continue
                 if (j == i) go to 100
                 do ii = 1, m
                    temp = x(ii, i)
                    x(ii, i) = x(ii, j)
                    x(ii, j) = temp
                 end do
                 k(j) = -k(j)
                 j = k(j)
                 go to 80
100    continue
              end do
           end if
           return
           ! end of stdlib_clapmt
     end subroutine stdlib_clapmt

     ! CLAQGB equilibrates a general M by N band matrix A with KL
     ! subdiagonals and KU superdiagonals using the row and scaling factors
     ! in the vectors R and C.

     subroutine stdlib_claqgb(m, n, kl, ku, ab, ldab, r, c, rowcnd, colcnd, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed
           integer(ilp) :: kl, ku, ldab, m, n
           real(sp) :: amax, colcnd, rowcnd
           ! .. array arguments ..
           real(sp) :: c(*), r(*)
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: cj, large, small
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_slamch('safe minimum')/stdlib_slamch('precision')
           large = one/small
           if (rowcnd >= thresh .and. amax >= small .and. amax <= large) then
              ! no row scaling
              if (colcnd >= thresh) then
                 ! no column scaling
                 equed = 'n'
              else
                 ! column scaling
                 do j = 1, n
                    cj = c(j)
                    do i = max(1, j - ku), min(m, j + kl)
                       ab(ku + 1 + i - j, j) = cj*ab(ku + 1 + i - j, j)
                    end do
                 end do
                 equed = 'c'
              end if
           else if (colcnd >= thresh) then
              ! row scaling, no column scaling
              do j = 1, n
                 do i = max(1, j - ku), min(m, j + kl)
                    ab(ku + 1 + i - j, j) = r(i)*ab(ku + 1 + i - j, j)
                 end do
              end do
              equed = 'r'
           else
              ! row and column scaling
              do j = 1, n
                 cj = c(j)
                 do i = max(1, j - ku), min(m, j + kl)
                    ab(ku + 1 + i - j, j) = cj*r(i)*ab(ku + 1 + i - j, j)
                 end do
              end do
              equed = 'b'
           end if
           return
           ! end of stdlib_claqgb
     end subroutine stdlib_claqgb

     ! CLAQGE equilibrates a general M by N matrix A using the row and
     ! column scaling factors in the vectors R and C.

     subroutine stdlib_claqge(m, n, a, lda, r, c, rowcnd, colcnd, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed
           integer(ilp) :: lda, m, n
           real(sp) :: amax, colcnd, rowcnd
           ! .. array arguments ..
           real(sp) :: c(*), r(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: cj, large, small
     
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_slamch('safe minimum')/stdlib_slamch('precision')
           large = one/small
           if (rowcnd >= thresh .and. amax >= small .and. amax <= large) then
              ! no row scaling
              if (colcnd >= thresh) then
                 ! no column scaling
                 equed = 'n'
              else
                 ! column scaling
                 do j = 1, n
                    cj = c(j)
                    do i = 1, m
                       a(i, j) = cj*a(i, j)
                    end do
                 end do
                 equed = 'c'
              end if
           else if (colcnd >= thresh) then
              ! row scaling, no column scaling
              do j = 1, n
                 do i = 1, m
                    a(i, j) = r(i)*a(i, j)
                 end do
              end do
              equed = 'r'
           else
              ! row and column scaling
              do j = 1, n
                 cj = c(j)
                 do i = 1, m
                    a(i, j) = cj*r(i)*a(i, j)
                 end do
              end do
              equed = 'b'
           end if
           return
           ! end of stdlib_claqge
     end subroutine stdlib_claqge

     ! CLAQHB equilibrates an Hermitian band matrix A using the scaling
     ! factors in the vector S.

     subroutine stdlib_claqhb(uplo, n, kd, ab, ldab, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: kd, ldab, n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: cj, large, small
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, real
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_slamch('safe minimum')/stdlib_slamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored in band format.
                 do j = 1, n
                    cj = s(j)
                    do i = max(1, j - kd), j - 1
                       ab(kd + 1 + i - j, j) = cj*s(i)*ab(kd + 1 + i - j, j)
                    end do
                    ab(kd + 1, j) = cj*cj*real(ab(kd + 1, j))
                 end do
              else
                 ! lower triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    ab(1, j) = cj*cj*real(ab(1, j))
                    do i = j + 1, min(n, j + kd)
                       ab(1 + i - j, j) = cj*s(i)*ab(1 + i - j, j)
                    end do
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_claqhb
     end subroutine stdlib_claqhb

     ! CLAQHE equilibrates a Hermitian matrix A using the scaling factors
     ! in the vector S.

     subroutine stdlib_claqhe(uplo, n, a, lda, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: lda, n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: cj, large, small
     
           ! .. intrinsic functions ..
           intrinsic :: real
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_slamch('safe minimum')/stdlib_slamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    do i = 1, j - 1
                       a(i, j) = cj*s(i)*a(i, j)
                    end do
                    a(j, j) = cj*cj*real(a(j, j))
                 end do
              else
                 ! lower triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    a(j, j) = cj*cj*real(a(j, j))
                    do i = j + 1, n
                       a(i, j) = cj*s(i)*a(i, j)
                    end do
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_claqhe
     end subroutine stdlib_claqhe

     ! CLAQHP equilibrates a Hermitian matrix A using the scaling factors
     ! in the vector S.

     subroutine stdlib_claqhp(uplo, n, ap, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: ap(*)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, j, jc
           real(sp) :: cj, large, small
     
           ! .. intrinsic functions ..
           intrinsic :: real
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_slamch('safe minimum')/stdlib_slamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored.
                 jc = 1
                 do j = 1, n
                    cj = s(j)
                    do i = 1, j - 1
                       ap(jc + i - 1) = cj*s(i)*ap(jc + i - 1)
                    end do
                    ap(jc + j - 1) = cj*cj*real(ap(jc + j - 1))
                    jc = jc + j
                 end do
              else
                 ! lower triangle of a is stored.
                 jc = 1
                 do j = 1, n
                    cj = s(j)
                    ap(jc) = cj*cj*real(ap(jc))
                    do i = j + 1, n
                       ap(jc + i - j) = cj*s(i)*ap(jc + i - j)
                    end do
                    jc = jc + n - j + 1
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_claqhp
     end subroutine stdlib_claqhp

     ! Given a 2-by-2 or 3-by-3 matrix H, CLAQR1 sets v to a
     ! scalar multiple of the first column of the product
     ! (*)  K = (H - s1*I)*(H - s2*I)
     ! scaling to avoid overflows and most underflows.
     ! This is useful for starting double implicit shift bulges
     ! in the QR algorithm.

     subroutine stdlib_claqr1(n, h, ldh, s1, s2, v)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           complex(sp) :: s1, s2
           integer(ilp) :: ldh, n
           ! .. array arguments ..
           complex(sp) :: h(ldh, *), v(*)
        ! ================================================================
           ! .. parameters ..
           real(sp), parameter :: rzero = zero
           
           ! .. local scalars ..
           complex(sp) :: cdum, h21s, h31s
           real(sp) :: s
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, real
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(cdum) = abs(real(cdum)) + abs(aimag(cdum))
           ! .. executable statements ..
           ! quick return if possible
           if (n /= 2 .and. n /= 3) then
              return
           end if
           if (n == 2) then
              s = cabs1(h(1, 1) - s2) + cabs1(h(2, 1))
              if (s == rzero) then
                 v(1) = czero
                 v(2) = czero
              else
                 h21s = h(2, 1)/s
                 v(1) = h21s*h(1, 2) + (h(1, 1) - s1)*((h(1, 1) - s2)/s)
                 v(2) = h21s*(h(1, 1) + h(2, 2) - s1 - s2)
              end if
           else
              s = cabs1(h(1, 1) - s2) + cabs1(h(2, 1)) + cabs1(h(3, 1))
              if (s == czero) then
                 v(1) = czero
                 v(2) = czero
                 v(3) = czero
              else
                 h21s = h(2, 1)/s
                 h31s = h(3, 1)/s
                 v(1) = (h(1, 1) - s1)*((h(1, 1) - s2)/s) + h(1, 2)*h21s + h(1, 3) &
                           *h31s
                 v(2) = h21s*(h(1, 1) + h(2, 2) - s1 - s2) + h(2, 3)*h31s
                 v(3) = h31s*(h(1, 1) + h(3, 3) - s1 - s2) + h21s*h(3, 2)
              end if
           end if
     end subroutine stdlib_claqr1

     ! CLAQSB equilibrates a symmetric band matrix A using the scaling
     ! factors in the vector S.

     subroutine stdlib_claqsb(uplo, n, kd, ab, ldab, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: kd, ldab, n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: cj, large, small
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_slamch('safe minimum')/stdlib_slamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored in band format.
                 do j = 1, n
                    cj = s(j)
                    do i = max(1, j - kd), j
                       ab(kd + 1 + i - j, j) = cj*s(i)*ab(kd + 1 + i - j, j)
                    end do
                 end do
              else
                 ! lower triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    do i = j, min(n, j + kd)
                       ab(1 + i - j, j) = cj*s(i)*ab(1 + i - j, j)
                    end do
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_claqsb
     end subroutine stdlib_claqsb

     ! CLAQSP equilibrates a symmetric matrix A using the scaling factors
     ! in the vector S.

     subroutine stdlib_claqsp(uplo, n, ap, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: ap(*)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, j, jc
           real(sp) :: cj, large, small
     
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_slamch('safe minimum')/stdlib_slamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored.
                 jc = 1
                 do j = 1, n
                    cj = s(j)
                    do i = 1, j
                       ap(jc + i - 1) = cj*s(i)*ap(jc + i - 1)
                    end do
                    jc = jc + j
                 end do
              else
                 ! lower triangle of a is stored.
                 jc = 1
                 do j = 1, n
                    cj = s(j)
                    do i = j, n
                       ap(jc + i - j) = cj*s(i)*ap(jc + i - j)
                    end do
                    jc = jc + n - j + 1
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_claqsp
     end subroutine stdlib_claqsp

     ! CLAQSY equilibrates a symmetric matrix A using the scaling factors
     ! in the vector S.

     subroutine stdlib_claqsy(uplo, n, a, lda, s, scond, amax, equed)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: equed, uplo
           integer(ilp) :: lda, n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: thresh = 0.1_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, j
           real(sp) :: cj, large, small
     
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) then
              equed = 'n'
              return
           end if
           ! initialize large and small.
           small = stdlib_slamch('safe minimum')/stdlib_slamch('precision')
           large = one/small
           if (scond >= thresh .and. amax >= small .and. amax <= large) then
              ! no equilibration
              equed = 'n'
           else
              ! replace a by diag(s) * a * diag(s).
              if (stdlib_lsame(uplo, 'u')) then
                 ! upper triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    do i = 1, j
                       a(i, j) = cj*s(i)*a(i, j)
                    end do
                 end do
              else
                 ! lower triangle of a is stored.
                 do j = 1, n
                    cj = s(j)
                    do i = j, n
                       a(i, j) = cj*s(i)*a(i, j)
                    end do
                 end do
              end if
              equed = 'y'
           end if
           return
           ! end of stdlib_claqsy
     end subroutine stdlib_claqsy

     ! CLAR1V computes the (scaled) r-th column of the inverse of
     ! the sumbmatrix in rows B1 through BN of the tridiagonal matrix
     ! L D L**T - sigma I. When sigma is close to an eigenvalue, the
     ! computed vector is an accurate eigenvector. Usually, r corresponds
     ! to the index where the eigenvector is largest in magnitude.
     ! The following steps accomplish this computation :
     ! (a) Stationary qd transform,  L D L**T - sigma I = L(+) D(+) L(+)**T,
     ! (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T,
     ! (c) Computation of the diagonal elements of the inverse of
     ! L D L**T - sigma I by combining the above transforms, and choosing
     ! r as the index where the diagonal of the inverse is (one of the)
     ! largest in magnitude.
     ! (d) Computation of the (scaled) r-th column of the inverse using the
     ! twisted factorization obtained by combining the top part of the
     ! the stationary and the bottom part of the progressive transform.

     subroutine stdlib_clar1v(n, b1, bn, lambda, d, l, ld, lld, pivmin, gaptol, z, wantnc, negcnt, &
                ztz, mingma, r, isuppz, nrminv, resid, rqcorr, work)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           logical(lk) :: wantnc
           integer(ilp) :: b1, bn, n, negcnt, r
           real(sp) :: gaptol, lambda, mingma, nrminv, pivmin, resid, rqcorr, ztz
           ! .. array arguments ..
           integer(ilp) :: isuppz(*)
           real(sp) :: d(*), l(*), ld(*), lld(*), work(*)
           complex(sp) :: z(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: sawnan1, sawnan2
           integer(ilp) :: i, indlpl, indp, inds, indumn, neg1, neg2, r1, r2
           real(sp) :: dminus, dplus, eps, s, tmp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, real
           ! .. executable statements ..
           eps = stdlib_slamch('precision')
           if (r == 0) then
              r1 = b1
              r2 = bn
           else
              r1 = r
              r2 = r
           end if
           ! storage for lplus
           indlpl = 0
           ! storage for uminus
           indumn = n
           inds = 2*n + 1
           indp = 3*n + 1
           if (b1 == 1) then
              work(inds) = zero
           else
              work(inds + b1 - 1) = lld(b1 - 1)
           end if
           ! compute the stationary transform (using the differential form)
           ! until the index r2.
           sawnan1 = .false.
           neg1 = 0
           s = work(inds + b1 - 1) - lambda
           do i = b1, r1 - 1
              dplus = d(i) + s
              work(indlpl + i) = ld(i)/dplus
              if (dplus < zero) neg1 = neg1 + 1
              work(inds + i) = s*work(indlpl + i)*l(i)
              s = work(inds + i) - lambda
           end do
           sawnan1 = stdlib_sisnan(s)
           if (sawnan1) goto 60
           do i = r1, r2 - 1
              dplus = d(i) + s
              work(indlpl + i) = ld(i)/dplus
              work(inds + i) = s*work(indlpl + i)*l(i)
              s = work(inds + i) - lambda
           end do
           sawnan1 = stdlib_sisnan(s)
60    continue
           if (sawnan1) then
              ! runs a slower version of the above loop if a nan is detected
              neg1 = 0
              s = work(inds + b1 - 1) - lambda
              do i = b1, r1 - 1
                 dplus = d(i) + s
                 if (abs(dplus) < pivmin) dplus = -pivmin
                 work(indlpl + i) = ld(i)/dplus
                 if (dplus < zero) neg1 = neg1 + 1
                 work(inds + i) = s*work(indlpl + i)*l(i)
                 if (work(indlpl + i) == zero) work(inds + i) = lld(i)
                 s = work(inds + i) - lambda
              end do
              do i = r1, r2 - 1
                 dplus = d(i) + s
                 if (abs(dplus) < pivmin) dplus = -pivmin
                 work(indlpl + i) = ld(i)/dplus
                 work(inds + i) = s*work(indlpl + i)*l(i)
                 if (work(indlpl + i) == zero) work(inds + i) = lld(i)
                 s = work(inds + i) - lambda
              end do
           end if
           ! compute the progressive transform (using the differential form)
           ! until the index r1
           sawnan2 = .false.
           neg2 = 0
           work(indp + bn - 1) = d(bn) - lambda
           do i = bn - 1, r1, -1
              dminus = lld(i) + work(indp + i)
              tmp = d(i)/dminus
              if (dminus < zero) neg2 = neg2 + 1
              work(indumn + i) = l(i)*tmp
              work(indp + i - 1) = work(indp + i)*tmp - lambda
           end do
           tmp = work(indp + r1 - 1)
           sawnan2 = stdlib_sisnan(tmp)
           if (sawnan2) then
              ! runs a slower version of the above loop if a nan is detected
              neg2 = 0
              do i = bn - 1, r1, -1
                 dminus = lld(i) + work(indp + i)
                 if (abs(dminus) < pivmin) dminus = -pivmin
                 tmp = d(i)/dminus
                 if (dminus < zero) neg2 = neg2 + 1
                 work(indumn + i) = l(i)*tmp
                 work(indp + i - 1) = work(indp + i)*tmp - lambda
                 if (tmp == zero) work(indp + i - 1) = d(i) - lambda
              end do
           end if
           ! find the index (from r1 to r2) of the largest (in magnitude)
           ! diagonal element of the inverse
           mingma = work(inds + r1 - 1) + work(indp + r1 - 1)
           if (mingma < zero) neg1 = neg1 + 1
           if (wantnc) then
              negcnt = neg1 + neg2
           else
              negcnt = -1
           end if
           if (abs(mingma) == zero) mingma = eps*work(inds + r1 - 1)
           r = r1
           do i = r1, r2 - 1
              tmp = work(inds + i) + work(indp + i)
              if (tmp == zero) tmp = eps*work(inds + i)
              if (abs(tmp) <= abs(mingma)) then
                 mingma = tmp
                 r = i + 1
              end if
           end do
           ! compute the fp vector: solve n^t v = e_r
           isuppz(1) = b1
           isuppz(2) = bn
           z(r) = cone
           ztz = one
           ! compute the fp vector upwards from r
           if (.not. sawnan1 .and. .not. sawnan2) then
              do i = r - 1, b1, -1
                 z(i) = -(work(indlpl + i)*z(i + 1))
                 if ((abs(z(i)) + abs(z(i + 1)))*abs(ld(i)) < gaptol) then
                    z(i) = zero
                    isuppz(1) = i + 1
                    goto 220
                 end if
                 ztz = ztz + real(z(i)*z(i))
              end do
220   continue
           else
              ! run slower loop if nan occurred.
              do i = r - 1, b1, -1
                 if (z(i + 1) == zero) then
                    z(i) = -(ld(i + 1)/ld(i))*z(i + 2)
                 else
                    z(i) = -(work(indlpl + i)*z(i + 1))
                 end if
                 if ((abs(z(i)) + abs(z(i + 1)))*abs(ld(i)) < gaptol) then
                    z(i) = zero
                    isuppz(1) = i + 1
                    go to 240
                 end if
                 ztz = ztz + real(z(i)*z(i))
              end do
240   continue
           end if
           ! compute the fp vector downwards from r in blocks of size blksiz
           if (.not. sawnan1 .and. .not. sawnan2) then
              do i = r, bn - 1
                 z(i + 1) = -(work(indumn + i)*z(i))
                 if ((abs(z(i)) + abs(z(i + 1)))*abs(ld(i)) < gaptol) then
                    z(i + 1) = zero
                    isuppz(2) = i
                    go to 260
                 end if
                 ztz = ztz + real(z(i + 1)*z(i + 1))
              end do
260   continue
           else
              ! run slower loop if nan occurred.
              do i = r, bn - 1
                 if (z(i) == zero) then
                    z(i + 1) = -(ld(i - 1)/ld(i))*z(i - 1)
                 else
                    z(i + 1) = -(work(indumn + i)*z(i))
                 end if
                 if ((abs(z(i)) + abs(z(i + 1)))*abs(ld(i)) < gaptol) then
                    z(i + 1) = zero
                    isuppz(2) = i
                    go to 280
                 end if
                 ztz = ztz + real(z(i + 1)*z(i + 1))
              end do
280   continue
           end if
           ! compute quantities for convergence test
           tmp = one/ztz
           nrminv = sqrt(tmp)
           resid = abs(mingma)*nrminv
           rqcorr = mingma*tmp
           return
           ! end of stdlib_clar1v
     end subroutine stdlib_clar1v

     ! CLAR2V applies a vector of complex plane rotations with real cosines
     ! from both sides to a sequence of 2-by-2 complex Hermitian matrices,
     ! defined by the elements of the vectors x, y and z. For i = 1,2,...,n
     ! (       x(i)  z(i) ) :=
     ! ( conjg(z(i)) y(i) )
     ! (  c(i) conjg(s(i)) ) (       x(i)  z(i) ) ( c(i) -conjg(s(i)) )
     ! ( -s(i)       c(i)  ) ( conjg(z(i)) y(i) ) ( s(i)        c(i)  )

     subroutine stdlib_clar2v(n, x, y, z, incx, c, s, incc)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incc, incx, n
           ! .. array arguments ..
           real(sp) :: c(*)
           complex(sp) :: s(*), x(*), y(*), z(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ic, ix
           real(sp) :: ci, sii, sir, t1i, t1r, t5, t6, xi, yi, zii, zir
           complex(sp) :: si, t2, t3, t4, zi
           ! .. intrinsic functions ..
           intrinsic :: aimag, cmplx, conjg, real
           ! .. executable statements ..
           ix = 1
           ic = 1
           do i = 1, n
              xi = real(x(ix))
              yi = real(y(ix))
              zi = z(ix)
              zir = real(zi)
              zii = aimag(zi)
              ci = c(ic)
              si = s(ic)
              sir = real(si)
              sii = aimag(si)
              t1r = sir*zir - sii*zii
              t1i = sir*zii + sii*zir
              t2 = ci*zi
              t3 = t2 - conjg(si)*xi
              t4 = conjg(t2) + si*yi
              t5 = ci*xi + t1r
              t6 = ci*yi - t1r
              x(ix) = ci*t5 + (sir*real(t4) + sii*aimag(t4))
              y(ix) = ci*t6 - (sir*real(t3) - sii*aimag(t3))
              z(ix) = ci*t3 + conjg(si)*cmplx(t6, t1i, KIND=sp)
              ix = ix + incx
              ic = ic + incc
           end do
           return
           ! end of stdlib_clar2v
     end subroutine stdlib_clar2v

     ! CLARCM performs a very simple matrix-matrix multiplication:
     ! C := A * B,
     ! where A is M by M and real; B is M by N and complex;
     ! C is M by N and complex.

     subroutine stdlib_clarcm(m, n, a, lda, b, ldb, c, ldc, rwork)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: lda, ldb, ldc, m, n
           ! .. array arguments ..
           real(sp) :: a(lda, *), rwork(*)
           complex(sp) :: b(ldb, *), c(ldc, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, l
           ! .. intrinsic functions ..
           intrinsic :: aimag, cmplx, real
     
           ! .. executable statements ..
           ! quick return if possible.
           if ((m == 0) .or. (n == 0)) return
           do j = 1, n
              do i = 1, m
                 rwork((j - 1)*m + i) = real(b(i, j))
              end do
           end do
           l = m*n + 1
           call stdlib_sgemm('n', 'n', m, n, m, one, a, lda, rwork, m, zero, rwork(l), m)
                     
           do j = 1, n
              do i = 1, m
                 c(i, j) = rwork(l + (j - 1)*m + i - 1)
              end do
           end do
           do j = 1, n
              do i = 1, m
                 rwork((j - 1)*m + i) = aimag(b(i, j))
              end do
           end do
           call stdlib_sgemm('n', 'n', m, n, m, one, a, lda, rwork, m, zero, rwork(l), m)
                     
           do j = 1, n
              do i = 1, m
                 c(i, j) = cmplx(real(c(i, j)), rwork(l + (j - 1)*m + i - 1))
              end do
           end do
           return
           ! end of stdlib_clarcm
     end subroutine stdlib_clarcm

     ! CLARF applies a complex elementary reflector H to a complex M-by-N
     ! matrix C, from either the left or the right. H is represented in the
     ! form
     ! H = I - tau * v * v**H
     ! where tau is a complex scalar and v is a complex vector.
     ! If tau = 0, then H is taken to be the unit matrix.
     ! To apply H**H (the conjugate transpose of H), supply conjg(tau) instead
     ! tau.

     subroutine stdlib_clarf(side, m, n, v, incv, tau, c, ldc, work)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side
           integer(ilp) :: incv, ldc, m, n
           complex(sp) :: tau
           ! .. array arguments ..
           complex(sp) :: c(ldc, *), v(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: applyleft
           integer(ilp) :: i, lastv, lastc
     
           ! .. executable statements ..
           applyleft = stdlib_lsame(side, 'l')
           lastv = 0
           lastc = 0
           if (tau /= czero) then
           ! set up variables for scanning v.  lastv begins pointing to the end
           ! of v.
              if (applyleft) then
                 lastv = m
              else
                 lastv = n
              end if
              if (incv > 0) then
                 i = 1 + (lastv - 1)*incv
              else
                 i = 1
              end if
           ! look for the last non-czero row in v.
              do while (lastv > 0 .and. v(i) == czero)
                 lastv = lastv - 1
                 i = i - incv
              end do
              if (applyleft) then
           ! scan for the last non-czero column in c(1:lastv,:).
                 lastc = stdlib_ilaclc(lastv, n, c, ldc)
              else
           ! scan for the last non-czero row in c(:,1:lastv).
                 lastc = stdlib_ilaclr(m, lastv, c, ldc)
              end if
           end if
           ! note that lastc.eq.0 renders the blas operations null; no special
           ! case is needed at this level.
           if (applyleft) then
              ! form  h * c
              if (lastv > 0) then
                 ! w(1:lastc,1) := c(1:lastv,1:lastc)**h * v(1:lastv,1)
                 call stdlib_cgemv('conjugate transpose', lastv, lastc, cone, c, ldc, v, incv, &
                           czero, work, 1)
                 ! c(1:lastv,1:lastc) := c(...) - v(1:lastv,1) * w(1:lastc,1)**h
                 call stdlib_cgerc(lastv, lastc, -tau, v, incv, work, 1, c, ldc)
              end if
           else
              ! form  c * h
              if (lastv > 0) then
                 ! w(1:lastc,1) := c(1:lastc,1:lastv) * v(1:lastv,1)
                 call stdlib_cgemv('no transpose', lastc, lastv, cone, c, ldc, v, incv, czero, &
                           work, 1)
                 ! c(1:lastc,1:lastv) := c(...) - w(1:lastc,1) * v(1:lastv,1)**h
                 call stdlib_cgerc(lastc, lastv, -tau, work, 1, v, incv, c, ldc)
              end if
           end if
           return
           ! end of stdlib_clarf
     end subroutine stdlib_clarf

     ! CLARFB applies a complex block reflector H or its transpose H**H to a
     ! complex M-by-N matrix C, from either the left or the right.

     subroutine stdlib_clarfb(side, trans, direct, storev, m, n, k, v, ldv, t, ldt, c, ldc, work, &
               ldwork)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, side, storev, trans
           integer(ilp) :: k, ldc, ldt, ldv, ldwork, m, n
           ! .. array arguments ..
           complex(sp) :: c(ldc, *), t(ldt, *), v(ldv, *), work(ldwork, *)
        ! =====================================================================
           
           ! .. local scalars ..
           character :: transt
           integer(ilp) :: i, j
     
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0) return
           if (stdlib_lsame(trans, 'n')) then
              transt = 'c'
           else
              transt = 'n'
           end if
           if (stdlib_lsame(storev, 'c')) then
              if (stdlib_lsame(direct, 'f')) then
                 ! let  v =  ( v1 )    (first k rows)
                           ! ( v2 )
                 ! where  v1  is unit lower triangular.
                 if (stdlib_lsame(side, 'l')) then
                    ! form  h * c  or  h**h * c  where  c = ( c1 )
                                                          ! ( c2 )
                    ! w := c**h * v  =  (c1**h * v1 + c2**h * v2)  (stored in work)
                    ! w := c1**h
                    do j = 1, k
                       call stdlib_ccopy(n, c(j, 1), ldc, work(1, j), 1)
                       call stdlib_clacgv(n, work(1, j), 1)
                    end do
                    ! w := w * v1
                    call stdlib_ctrmm('right', 'lower', 'no transpose', 'unit', n, k, cone, v, &
                              ldv, work, ldwork)
                    if (m > k) then
                       ! w := w + c2**h *v2
                       call stdlib_cgemm('conjugate transpose', 'no transpose', n, k, m - k, cone, &
                                 c(k + 1, 1), ldc, v(k + 1, 1), ldv, cone, work, ldwork)
                    end if
                    ! w := w * t**h  or  w * t
                    call stdlib_ctrmm('right', 'upper', transt, 'non-unit', n, k, cone, t, ldt, &
                              work, ldwork)
                    ! c := c - v * w**h
                    if (m > k) then
                       ! c2 := c2 - v2 * w**h
                       call stdlib_cgemm('no transpose', 'conjugate transpose', m - k, n, k, -cone, &
                                 v(k + 1, 1), ldv, work, ldwork, cone, c(k + 1, 1), ldc)
                    end if
                    ! w := w * v1**h
                    call stdlib_ctrmm('right', 'lower', 'conjugate transpose', 'unit', n, k, cone, &
                               v, ldv, work, ldwork)
                    ! c1 := c1 - w**h
                    do j = 1, k
                       do i = 1, n
                          c(j, i) = c(j, i) - conjg(work(i, j))
                       end do
                    end do
                 else if (stdlib_lsame(side, 'r')) then
                    ! form  c * h  or  c * h**h  where  c = ( c1  c2 )
                    ! w := c * v  =  (c1*v1 + c2*v2)  (stored in work)
                    ! w := c1
                    do j = 1, k
                       call stdlib_ccopy(m, c(1, j), 1, work(1, j), 1)
                    end do
                    ! w := w * v1
                    call stdlib_ctrmm('right', 'lower', 'no transpose', 'unit', m, k, cone, v, &
                              ldv, work, ldwork)
                    if (n > k) then
                       ! w := w + c2 * v2
                       call stdlib_cgemm('no transpose', 'no transpose', m, k, n - k, cone, c(1, k + &
                                 1), ldc, v(k + 1, 1), ldv, cone, work, ldwork)
                    end if
                    ! w := w * t  or  w * t**h
                    call stdlib_ctrmm('right', 'upper', trans, 'non-unit', m, k, cone, t, ldt, &
                              work, ldwork)
                    ! c := c - w * v**h
                    if (n > k) then
                       ! c2 := c2 - w * v2**h
                       call stdlib_cgemm('no transpose', 'conjugate transpose', m, n - k, k, -cone, &
                                 work, ldwork, v(k + 1, 1), ldv, cone, c(1, k + 1), ldc)
                    end if
                    ! w := w * v1**h
                    call stdlib_ctrmm('right', 'lower', 'conjugate transpose', 'unit', m, k, cone, &
                               v, ldv, work, ldwork)
                    ! c1 := c1 - w
                    do j = 1, k
                       do i = 1, m
                          c(i, j) = c(i, j) - work(i, j)
                       end do
                    end do
                 end if
              else
                 ! let  v =  ( v1 )
                           ! ( v2 )    (last k rows)
                 ! where  v2  is unit upper triangular.
                 if (stdlib_lsame(side, 'l')) then
                    ! form  h * c  or  h**h * c  where  c = ( c1 )
                                                        ! ( c2 )
                    ! w := c**h * v  =  (c1**h * v1 + c2**h * v2)  (stored in work)
                    ! w := c2**h
                    do j = 1, k
                       call stdlib_ccopy(n, c(m - k + j, 1), ldc, work(1, j), 1)
                       call stdlib_clacgv(n, work(1, j), 1)
                    end do
                    ! w := w * v2
                    call stdlib_ctrmm('right', 'upper', 'no transpose', 'unit', n, k, cone, v(m - &
                              k + 1, 1), ldv, work, ldwork)
                    if (m > k) then
                       ! w := w + c1**h * v1
                       call stdlib_cgemm('conjugate transpose', 'no transpose', n, k, m - k, cone, &
                                 c, ldc, v, ldv, cone, work, ldwork)
                    end if
                    ! w := w * t**h  or  w * t
                    call stdlib_ctrmm('right', 'lower', transt, 'non-unit', n, k, cone, t, ldt, &
                              work, ldwork)
                    ! c := c - v * w**h
                    if (m > k) then
                       ! c1 := c1 - v1 * w**h
                       call stdlib_cgemm('no transpose', 'conjugate transpose', m - k, n, k, -cone, &
                                 v, ldv, work, ldwork, cone, c, ldc)
                    end if
                    ! w := w * v2**h
                    call stdlib_ctrmm('right', 'upper', 'conjugate transpose', 'unit', n, k, cone, &
                               v(m - k + 1, 1), ldv, work, ldwork)
                    ! c2 := c2 - w**h
                    do j = 1, k
                       do i = 1, n
                          c(m - k + j, i) = c(m - k + j, i) - conjg(work(i, j))
                       end do
                    end do
                 else if (stdlib_lsame(side, 'r')) then
                    ! form  c * h  or  c * h**h  where  c = ( c1  c2 )
                    ! w := c * v  =  (c1*v1 + c2*v2)  (stored in work)
                    ! w := c2
                    do j = 1, k
                       call stdlib_ccopy(m, c(1, n - k + j), 1, work(1, j), 1)
                    end do
                    ! w := w * v2
                    call stdlib_ctrmm('right', 'upper', 'no transpose', 'unit', m, k, cone, v(n - &
                              k + 1, 1), ldv, work, ldwork)
                    if (n > k) then
                       ! w := w + c1 * v1
                       call stdlib_cgemm('no transpose', 'no transpose', m, k, n - k, cone, c, ldc, &
                                 v, ldv, cone, work, ldwork)
                    end if
                    ! w := w * t  or  w * t**h
                    call stdlib_ctrmm('right', 'lower', trans, 'non-unit', m, k, cone, t, ldt, &
                              work, ldwork)
                    ! c := c - w * v**h
                    if (n > k) then
                       ! c1 := c1 - w * v1**h
                       call stdlib_cgemm('no transpose', 'conjugate transpose', m, n - k, k, -cone, &
                                 work, ldwork, v, ldv, cone, c, ldc)
                    end if
                    ! w := w * v2**h
                    call stdlib_ctrmm('right', 'upper', 'conjugate transpose', 'unit', m, k, cone, &
                               v(n - k + 1, 1), ldv, work, ldwork)
                    ! c2 := c2 - w
                    do j = 1, k
                       do i = 1, m
                          c(i, n - k + j) = c(i, n - k + j) - work(i, j)
                       end do
                    end do
                 end if
              end if
           else if (stdlib_lsame(storev, 'r')) then
              if (stdlib_lsame(direct, 'f')) then
                 ! let  v =  ( v1  v2 )    (v1: first k columns)
                 ! where  v1  is unit upper triangular.
                 if (stdlib_lsame(side, 'l')) then
                    ! form  h * c  or  h**h * c  where  c = ( c1 )
                                                          ! ( c2 )
                    ! w := c**h * v**h  =  (c1**h * v1**h + c2**h * v2**h) (stored in work)
                    ! w := c1**h
                    do j = 1, k
                       call stdlib_ccopy(n, c(j, 1), ldc, work(1, j), 1)
                       call stdlib_clacgv(n, work(1, j), 1)
                    end do
                    ! w := w * v1**h
                    call stdlib_ctrmm('right', 'upper', 'conjugate transpose', 'unit', n, k, cone, &
                               v, ldv, work, ldwork)
                    if (m > k) then
                       ! w := w + c2**h * v2**h
                       call stdlib_cgemm('conjugate transpose', 'conjugate transpose', n, k, m - k, &
                                 cone, c(k + 1, 1), ldc, v(1, k + 1), ldv, cone, work, ldwork)
                    end if
                    ! w := w * t**h  or  w * t
                    call stdlib_ctrmm('right', 'upper', transt, 'non-unit', n, k, cone, t, ldt, &
                              work, ldwork)
                    ! c := c - v**h * w**h
                    if (m > k) then
                       ! c2 := c2 - v2**h * w**h
                       call stdlib_cgemm('conjugate transpose', 'conjugate transpose', m - k, n, k, &
                                 -cone, v(1, k + 1), ldv, work, ldwork, cone, c(k + 1, 1), ldc)
                    end if
                    ! w := w * v1
                    call stdlib_ctrmm('right', 'upper', 'no transpose', 'unit', n, k, cone, v, &
                              ldv, work, ldwork)
                    ! c1 := c1 - w**h
                    do j = 1, k
                       do i = 1, n
                          c(j, i) = c(j, i) - conjg(work(i, j))
                       end do
                    end do
                 else if (stdlib_lsame(side, 'r')) then
                    ! form  c * h  or  c * h**h  where  c = ( c1  c2 )
                    ! w := c * v**h  =  (c1*v1**h + c2*v2**h)  (stored in work)
                    ! w := c1
                    do j = 1, k
                       call stdlib_ccopy(m, c(1, j), 1, work(1, j), 1)
                    end do
                    ! w := w * v1**h
                    call stdlib_ctrmm('right', 'upper', 'conjugate transpose', 'unit', m, k, cone, &
                               v, ldv, work, ldwork)
                    if (n > k) then
                       ! w := w + c2 * v2**h
                       call stdlib_cgemm('no transpose', 'conjugate transpose', m, k, n - k, cone, &
                                 c(1, k + 1), ldc, v(1, k + 1), ldv, cone, work, ldwork)
                    end if
                    ! w := w * t  or  w * t**h
                    call stdlib_ctrmm('right', 'upper', trans, 'non-unit', m, k, cone, t, ldt, &
                              work, ldwork)
                    ! c := c - w * v
                    if (n > k) then
                       ! c2 := c2 - w * v2
                       call stdlib_cgemm('no transpose', 'no transpose', m, n - k, k, -cone, work, &
                                 ldwork, v(1, k + 1), ldv, cone, c(1, k + 1), ldc)
                    end if
                    ! w := w * v1
                    call stdlib_ctrmm('right', 'upper', 'no transpose', 'unit', m, k, cone, v, &
                              ldv, work, ldwork)
                    ! c1 := c1 - w
                    do j = 1, k
                       do i = 1, m
                          c(i, j) = c(i, j) - work(i, j)
                       end do
                    end do
                 end if
              else
                 ! let  v =  ( v1  v2 )    (v2: last k columns)
                 ! where  v2  is unit lower triangular.
                 if (stdlib_lsame(side, 'l')) then
                    ! form  h * c  or  h**h * c  where  c = ( c1 )
                                                          ! ( c2 )
                    ! w := c**h * v**h  =  (c1**h * v1**h + c2**h * v2**h) (stored in work)
                    ! w := c2**h
                    do j = 1, k
                       call stdlib_ccopy(n, c(m - k + j, 1), ldc, work(1, j), 1)
                       call stdlib_clacgv(n, work(1, j), 1)
                    end do
                    ! w := w * v2**h
                    call stdlib_ctrmm('right', 'lower', 'conjugate transpose', 'unit', n, k, cone, &
                               v(1, m - k + 1), ldv, work, ldwork)
                    if (m > k) then
                       ! w := w + c1**h * v1**h
                       call stdlib_cgemm('conjugate transpose', 'conjugate transpose', n, k, m - k, &
                                 cone, c, ldc, v, ldv, cone, work, ldwork)
                    end if
                    ! w := w * t**h  or  w * t
                    call stdlib_ctrmm('right', 'lower', transt, 'non-unit', n, k, cone, t, ldt, &
                              work, ldwork)
                    ! c := c - v**h * w**h
                    if (m > k) then
                       ! c1 := c1 - v1**h * w**h
                       call stdlib_cgemm('conjugate transpose', 'conjugate transpose', m - k, n, k, &
                                 -cone, v, ldv, work, ldwork, cone, c, ldc)
                    end if
                    ! w := w * v2
                    call stdlib_ctrmm('right', 'lower', 'no transpose', 'unit', n, k, cone, v(1, &
                              m - k + 1), ldv, work, ldwork)
                    ! c2 := c2 - w**h
                    do j = 1, k
                       do i = 1, n
                          c(m - k + j, i) = c(m - k + j, i) - conjg(work(i, j))
                       end do
                    end do
                 else if (stdlib_lsame(side, 'r')) then
                    ! form  c * h  or  c * h**h  where  c = ( c1  c2 )
                    ! w := c * v**h  =  (c1*v1**h + c2*v2**h)  (stored in work)
                    ! w := c2
                    do j = 1, k
                       call stdlib_ccopy(m, c(1, n - k + j), 1, work(1, j), 1)
                    end do
                    ! w := w * v2**h
                    call stdlib_ctrmm('right', 'lower', 'conjugate transpose', 'unit', m, k, cone, &
                               v(1, n - k + 1), ldv, work, ldwork)
                    if (n > k) then
                       ! w := w + c1 * v1**h
                       call stdlib_cgemm('no transpose', 'conjugate transpose', m, k, n - k, cone, &
                                 c, ldc, v, ldv, cone, work, ldwork)
                    end if
                    ! w := w * t  or  w * t**h
                    call stdlib_ctrmm('right', 'lower', trans, 'non-unit', m, k, cone, t, ldt, &
                              work, ldwork)
                    ! c := c - w * v
                    if (n > k) then
                       ! c1 := c1 - w * v1
                       call stdlib_cgemm('no transpose', 'no transpose', m, n - k, k, -cone, work, &
                                 ldwork, v, ldv, cone, c, ldc)
                    end if
                    ! w := w * v2
                    call stdlib_ctrmm('right', 'lower', 'no transpose', 'unit', m, k, cone, v(1, &
                              n - k + 1), ldv, work, ldwork)
                    ! c1 := c1 - w
                    do j = 1, k
                       do i = 1, m
                          c(i, n - k + j) = c(i, n - k + j) - work(i, j)
                       end do
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_clarfb
     end subroutine stdlib_clarfb

     ! CLARFB_GETT applies a complex Householder block reflector H from the
     ! left to a complex (K+M)-by-N  "triangular-pentagonal" matrix
     ! composed of two block matrices: an upper trapezoidal K-by-N matrix A
     ! stored in the array A, and a rectangular M-by-(N-K) matrix B, stored
     ! in the array B. The block reflector H is stored in a compact
     ! WY-representation, where the elementary reflectors are in the
     ! arrays A, B and T. See Further Details section.

     subroutine stdlib_clarfb_gett(ident, m, n, k, t, ldt, a, lda, b, ldb, work, ldwork)
     
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: ident
           integer(ilp) :: k, lda, ldb, ldt, ldwork, m, n
           ! .. array arguments ..
           complex(sp) :: a(lda, *), b(ldb, *), t(ldt, *), work(ldwork, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: lnotident
           integer(ilp) :: i, j
     
           ! .. executable statements ..
           ! quick return if possible
           if (m < 0 .or. n <= 0 .or. k == 0 .or. k > n) return
           lnotident = .not. stdlib_lsame(ident, 'i')
           ! ------------------------------------------------------------------
           ! first step. computation of the column block 2:
              ! ( a2 ) := h * ( a2 )
              ! ( b2 )        ( b2 )
           ! ------------------------------------------------------------------
           if (n > k) then
              ! col2_(1) compute w2: = a2. therefore, copy a2 = a(1:k, k+1:n)
              ! into w2=work(1:k, 1:n-k) column-by-column.
              do j = 1, n - k
                 call stdlib_ccopy(k, a(1, k + j), 1, work(1, j), 1)
              end do
              if (lnotident) then
                 ! col2_(2) compute w2: = (v1**h) * w2 = (a1**h) * w2,
                 ! v1 is not an identy matrix, but unit lower-triangular
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib_ctrmm('l', 'l', 'c', 'u', k, n - k, cone, a, lda, work, ldwork)
                           
              end if
              ! col2_(3) compute w2: = w2 + (v2**h) * b2 = w2 + (b1**h) * b2
              ! v2 stored in b1.
              if (m > 0) then
                 call stdlib_cgemm('c', 'n', k, n - k, m, cone, b, ldb, b(1, k + 1), ldb, cone, &
                           work, ldwork)
              end if
              ! col2_(4) compute w2: = t * w2,
              ! t is upper-triangular.
              call stdlib_ctrmm('l', 'u', 'n', 'n', k, n - k, cone, t, ldt, work, ldwork)
              ! col2_(5) compute b2: = b2 - v2 * w2 = b2 - b1 * w2,
              ! v2 stored in b1.
              if (m > 0) then
                 call stdlib_cgemm('n', 'n', m, n - k, k, -cone, b, ldb, work, ldwork, cone, b(1, &
                           k + 1), ldb)
              end if
              if (lnotident) then
                 ! col2_(6) compute w2: = v1 * w2 = a1 * w2,
                 ! v1 is not an identity matrix, but unit lower-triangular,
                 ! v1 stored in a1 (diagonal ones are not stored).
                 call stdlib_ctrmm('l', 'l', 'n', 'u', k, n - k, cone, a, lda, work, ldwork)
                           
              end if
              ! col2_(7) compute a2: = a2 - w2 =
                                   ! = a(1:k, k+1:n-k) - work(1:k, 1:n-k),
              ! column-by-column.
              do j = 1, n - k
                 do i = 1, k
                    a(i, k + j) = a(i, k + j) - work(i, j)
                 end do
              end do
           end if
           ! ------------------------------------------------------------------
           ! second step. computation of the column block 1:
              ! ( a1 ) := h * ( a1 )
              ! ( b1 )        (  0 )
           ! ------------------------------------------------------------------
           ! col1_(1) compute w1: = a1. copy the upper-triangular
           ! a1 = a(1:k, 1:k) into the upper-triangular
           ! w1 = work(1:k, 1:k) column-by-column.
           do j = 1, k
              call stdlib_ccopy(j, a(1, j), 1, work(1, j), 1)
           end do
           ! set the subdiagonal elements of w1 to zero column-by-column.
           do j = 1, k - 1
              do i = j + 1, k
                 work(i, j) = czero
              end do
           end do
           if (lnotident) then
              ! col1_(2) compute w1: = (v1**h) * w1 = (a1**h) * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular with zeroes below the diagonal.
              call stdlib_ctrmm('l', 'l', 'c', 'u', k, k, cone, a, lda, work, ldwork)
           end if
           ! col1_(3) compute w1: = t * w1,
           ! t is upper-triangular,
           ! w1 is upper-triangular with zeroes below the diagonal.
           call stdlib_ctrmm('l', 'u', 'n', 'n', k, k, cone, t, ldt, work, ldwork)
           ! col1_(4) compute b1: = - v2 * w1 = - b1 * w1,
           ! v2 = b1, w1 is upper-triangular with zeroes below the diagonal.
           if (m > 0) then
              call stdlib_ctrmm('r', 'u', 'n', 'n', m, k, -cone, work, ldwork, b, ldb)
           end if
           if (lnotident) then
              ! col1_(5) compute w1: = v1 * w1 = a1 * w1,
              ! v1 is not an identity matrix, but unit lower-triangular
              ! v1 stored in a1 (diagonal ones are not stored),
              ! w1 is upper-triangular on input with zeroes below the diagonal,
              ! and square on output.
              call stdlib_ctrmm('l', 'l', 'n', 'u', k, k, cone, a, lda, work, ldwork)
              ! col1_(6) compute a1: = a1 - w1 = a(1:k, 1:k) - work(1:k, 1:k)
              ! column-by-column. a1 is upper-triangular on input.
              ! if ident, a1 is square on output, and w1 is square,
              ! if not ident, a1 is upper-triangular on output,
              ! w1 is upper-triangular.
              ! col1_(6)_a compute elements of a1 below the diagonal.
              do j = 1, k - 1
                 do i = j + 1, k
                    a(i, j) = -work(i, j)
                 end do
              end do
           end if
           ! col1_(6)_b compute elements of a1 on and above the diagonal.
           do j = 1, k
              do i = 1, j
                 a(i, j) = a(i, j) - work(i, j)
              end do
           end do
           return
           ! end of stdlib_clarfb_gett
     end subroutine stdlib_clarfb_gett

     ! CLARFG generates a complex elementary reflector H of order n, such
     ! that
     ! H**H * ( alpha ) = ( beta ),   H**H * H = I.
     ! (   x   )   (   0  )
     ! where alpha and beta are scalars, with beta real, and x is an
     ! (n-1)-element complex vector. H is represented in the form
     ! H = I - tau * ( 1 ) * ( 1 v**H ) ,
     ! ( v )
     ! where tau is a complex scalar and v is a complex (n-1)-element
     ! vector. Note that H is not hermitian.
     ! If the elements of x are all zero and alpha is real, then tau = 0
     ! and H is taken to be the unit matrix.
     ! Otherwise  1 <= real(tau) <= 2  and  abs(tau-1) <= 1 .

     subroutine stdlib_clarfg(n, alpha, x, incx, tau)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, n
           complex(sp) :: alpha, tau
           ! .. array arguments ..
           complex(sp) :: x(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: j, knt
           real(sp) :: alphi, alphr, beta, rsafmn, safmin, xnorm
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, real, sign
     
           ! .. executable statements ..
           if (n <= 0) then
              tau = zero
              return
           end if
           xnorm = stdlib_scnrm2(n - 1, x, incx)
           alphr = real(alpha)
           alphi = aimag(alpha)
           if (xnorm == zero .and. alphi == zero) then
              ! h  =  i
              tau = zero
           else
              ! general case
              beta = -sign(stdlib_slapy3(alphr, alphi, xnorm), alphr)
              safmin = stdlib_slamch('s')/stdlib_slamch('e')
              rsafmn = one/safmin
              knt = 0
              if (abs(beta) < safmin) then
                 ! xnorm, beta may be inaccurate; scale x and recompute them
10      continue
                 knt = knt + 1
                 call stdlib_csscal(n - 1, rsafmn, x, incx)
                 beta = beta*rsafmn
                 alphi = alphi*rsafmn
                 alphr = alphr*rsafmn
                 if ((abs(beta) < safmin) .and. (knt < 20)) go to 10
                 ! new beta is at most 1, at least safmin
                 xnorm = stdlib_scnrm2(n - 1, x, incx)
                 alpha = cmplx(alphr, alphi, KIND=sp)
                 beta = -sign(stdlib_slapy3(alphr, alphi, xnorm), alphr)
              end if
              tau = cmplx((beta - alphr)/beta, -alphi/beta, KIND=sp)
              alpha = stdlib_cladiv(cmplx(one, KIND=sp), alpha - beta)
              call stdlib_cscal(n - 1, alpha, x, incx)
              ! if alpha is subnormal, it may lose relative accuracy
              do j = 1, knt
                 beta = beta*safmin
              end do
              alpha = beta
           end if
           return
           ! end of stdlib_clarfg
     end subroutine stdlib_clarfg

     ! CLARFGP generates a complex elementary reflector H of order n, such
     ! that
     ! H**H * ( alpha ) = ( beta ),   H**H * H = I.
     ! (   x   )   (   0  )
     ! where alpha and beta are scalars, beta is real and non-negative, and
     ! x is an (n-1)-element complex vector.  H is represented in the form
     ! H = I - tau * ( 1 ) * ( 1 v**H ) ,
     ! ( v )
     ! where tau is a complex scalar and v is a complex (n-1)-element
     ! vector. Note that H is not hermitian.
     ! If the elements of x are all zero and alpha is real, then tau = 0
     ! and H is taken to be the unit matrix.

     subroutine stdlib_clarfgp(n, alpha, x, incx, tau)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, n
           complex(sp) :: alpha, tau
           ! .. array arguments ..
           complex(sp) :: x(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: j, knt
           real(sp) :: alphi, alphr, beta, bignum, smlnum, xnorm
           complex(sp) :: savealpha
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, real, sign
     
           ! .. executable statements ..
           if (n <= 0) then
              tau = zero
              return
           end if
           xnorm = stdlib_scnrm2(n - 1, x, incx)
           alphr = real(alpha)
           alphi = aimag(alpha)
           if (xnorm == zero) then
              ! h  =  [1-alpha/abs(alpha) 0; 0 i], sign chosen so alpha >= 0.
              if (alphi == zero) then
                 if (alphr >= zero) then
                    ! when tau.eq.zero, the vector is special-cased to be
                    ! all zeros in the application routines.  we do not need
                    ! to clear it.
                    tau = zero
                 else
                    ! however, the application routines rely on explicit
                    ! zero checks when tau.ne.zero, and we must clear x.
                    tau = two
                    do j = 1, n - 1
                       x(1 + (j - 1)*incx) = zero
                    end do
                    alpha = -alpha
                 end if
              else
                 ! only "reflecting" the diagonal entry to be real and non-negative.
                 xnorm = stdlib_slapy2(alphr, alphi)
                 tau = cmplx(one - alphr/xnorm, -alphi/xnorm, KIND=sp)
                 do j = 1, n - 1
                    x(1 + (j - 1)*incx) = zero
                 end do
                 alpha = xnorm
              end if
           else
              ! general case
              beta = sign(stdlib_slapy3(alphr, alphi, xnorm), alphr)
              smlnum = stdlib_slamch('s')/stdlib_slamch('e')
              bignum = one/smlnum
              knt = 0
              if (abs(beta) < smlnum) then
                 ! xnorm, beta may be inaccurate; scale x and recompute them
10      continue
                 knt = knt + 1
                 call stdlib_csscal(n - 1, bignum, x, incx)
                 beta = beta*bignum
                 alphi = alphi*bignum
                 alphr = alphr*bignum
                 if ((abs(beta) < smlnum) .and. (knt < 20)) go to 10
                 ! new beta is at most 1, at least smlnum
                 xnorm = stdlib_scnrm2(n - 1, x, incx)
                 alpha = cmplx(alphr, alphi, KIND=sp)
                 beta = sign(stdlib_slapy3(alphr, alphi, xnorm), alphr)
              end if
              savealpha = alpha
              alpha = alpha + beta
              if (beta < zero) then
                 beta = -beta
                 tau = -alpha/beta
              else
                 alphr = alphi*(alphi/real(alpha))
                 alphr = alphr + xnorm*(xnorm/real(alpha))
                 tau = cmplx(alphr/beta, -alphi/beta, KIND=sp)
                 alpha = cmplx(-alphr, alphi, KIND=sp)
              end if
              alpha = stdlib_cladiv(cmplx(one, KIND=sp), alpha)
              if (abs(tau) <= smlnum) then
                 ! in the case where the computed tau ends up being a denormalized number,
                 ! it loses relative accuracy. this is a big problem. solution: flush tau
                 ! to zero (or two or whatever makes a nonnegative real number for beta).
                 ! (bug report provided by pat quillen from mathworks on jul 29, 2009.)
                 ! (thanks pat. thanks mathworks.)
                 alphr = real(savealpha)
                 alphi = aimag(savealpha)
                 if (alphi == zero) then
                    if (alphr >= zero) then
                       tau = zero
                    else
                       tau = two
                       do j = 1, n - 1
                          x(1 + (j - 1)*incx) = zero
                       end do
                       beta = real(-savealpha)
                    end if
                 else
                    xnorm = stdlib_slapy2(alphr, alphi)
                    tau = cmplx(one - alphr/xnorm, -alphi/xnorm, KIND=sp)
                    do j = 1, n - 1
                       x(1 + (j - 1)*incx) = zero
                    end do
                    beta = xnorm
                 end if
              else
                 ! this is the general case.
                 call stdlib_cscal(n - 1, alpha, x, incx)
              end if
              ! if beta is subnormal, it may lose relative accuracy
              do j = 1, knt
                 beta = beta*smlnum
              end do
              alpha = beta
           end if
           return
           ! end of stdlib_clarfgp
     end subroutine stdlib_clarfgp

     ! CLARFT forms the triangular factor T of a complex block reflector H
     ! of order n, which is defined as a product of k elementary reflectors.
     ! If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
     ! If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
     ! If STOREV = 'C', the vector which defines the elementary reflector
     ! H(i) is stored in the i-th column of the array V, and
     ! H  =  I - V * T * V**H
     ! If STOREV = 'R', the vector which defines the elementary reflector
     ! H(i) is stored in the i-th row of the array V, and
     ! H  =  I - V**H * T * V

     subroutine stdlib_clarft(direct, storev, n, k, v, ldv, tau, t, ldt)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, storev
           integer(ilp) :: k, ldt, ldv, n
           ! .. array arguments ..
           complex(sp) :: t(ldt, *), tau(*), v(ldv, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, j, prevlastv, lastv
     
           ! .. executable statements ..
           ! quick return if possible
           if (n == 0) return
           if (stdlib_lsame(direct, 'f')) then
              prevlastv = n
              do i = 1, k
                 prevlastv = max(prevlastv, i)
                 if (tau(i) == czero) then
                    ! h(i)  =  i
                    do j = 1, i
                       t(j, i) = czero
                    end do
                 else
                    ! general case
                    if (stdlib_lsame(storev, 'c')) then
                       ! skip any trailing zeros.
                       do lastv = n, i + 1, -1
                          if (v(lastv, i) /= czero) exit
                       end do
                       do j = 1, i - 1
                          t(j, i) = -tau(i)*conjg(v(i, j))
                       end do
                       j = min(lastv, prevlastv)
                       ! t(1:i-1,i) := - tau(i) * v(i:j,1:i-1)**h * v(i:j,i)
                       call stdlib_cgemv('conjugate transpose', j - i, i - 1, -tau(i), v(i + 1, 1), &
                                 ldv, v(i + 1, i), 1, cone, t(1, i), 1)
                    else
                       ! skip any trailing zeros.
                       do lastv = n, i + 1, -1
                          if (v(i, lastv) /= czero) exit
                       end do
                       do j = 1, i - 1
                          t(j, i) = -tau(i)*v(j, i)
                       end do
                       j = min(lastv, prevlastv)
                       ! t(1:i-1,i) := - tau(i) * v(1:i-1,i:j) * v(i,i:j)**h
                       call stdlib_cgemm('n', 'c', i - 1, 1, j - i, -tau(i), v(1, i + 1), ldv, v(i, &
                                  i + 1), ldv, cone, t(1, i), ldt)
                    end if
                    ! t(1:i-1,i) := t(1:i-1,1:i-1) * t(1:i-1,i)
                    call stdlib_ctrmv('upper', 'no transpose', 'non-unit', i - 1, t, ldt, t(1, i), &
                               1)
                    t(i, i) = tau(i)
                    if (i > 1) then
                       prevlastv = max(prevlastv, lastv)
                    else
                       prevlastv = lastv
                    end if
                 end if
              end do
           else
              prevlastv = 1
              do i = k, 1, -1
                 if (tau(i) == czero) then
                    ! h(i)  =  i
                    do j = i, k
                       t(j, i) = czero
                    end do
                 else
                    ! general case
                    if (i < k) then
                       if (stdlib_lsame(storev, 'c')) then
                          ! skip any leading zeros.
                          do lastv = 1, i - 1
                             if (v(lastv, i) /= czero) exit
                          end do
                          do j = i + 1, k
                             t(j, i) = -tau(i)*conjg(v(n - k + i, j))
                          end do
                          j = max(lastv, prevlastv)
                          ! t(i+1:k,i) = -tau(i) * v(j:n-k+i,i+1:k)**h * v(j:n-k+i,i)
                          call stdlib_cgemv('conjugate transpose', n - k + i - j, k - i, -tau(i), v(j, &
                                    i + 1), ldv, v(j, i), 1, cone, t(i + 1, i), 1)
                       else
                          ! skip any leading zeros.
                          do lastv = 1, i - 1
                             if (v(i, lastv) /= czero) exit
                          end do
                          do j = i + 1, k
                             t(j, i) = -tau(i)*v(j, n - k + i)
                          end do
                          j = max(lastv, prevlastv)
                          ! t(i+1:k,i) = -tau(i) * v(i+1:k,j:n-k+i) * v(i,j:n-k+i)**h
                          call stdlib_cgemm('n', 'c', k - i, 1, n - k + i - j, -tau(i), v(i + 1, j), &
                                    ldv, v(i, j), ldv, cone, t(i + 1, i), ldt)
                       end if
                       ! t(i+1:k,i) := t(i+1:k,i+1:k) * t(i+1:k,i)
                       call stdlib_ctrmv('lower', 'no transpose', 'non-unit', k - i, t(i + 1, i + 1), &
                                 ldt, t(i + 1, i), 1)
                       if (i > 1) then
                          prevlastv = min(prevlastv, lastv)
                       else
                          prevlastv = lastv
                       end if
                    end if
                    t(i, i) = tau(i)
                 end if
              end do
           end if
           return
           ! end of stdlib_clarft
     end subroutine stdlib_clarft

     ! CLARFX applies a complex elementary reflector H to a complex m by n
     ! matrix C, from either the left or the right. H is represented in the
     ! form
     ! H = I - tau * v * v**H
     ! where tau is a complex scalar and v is a complex vector.
     ! If tau = 0, then H is taken to be the unit matrix
     ! This version uses inline code if H has order < 11.

     subroutine stdlib_clarfx(side, m, n, v, tau, c, ldc, work)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side
           integer(ilp) :: ldc, m, n
           complex(sp) :: tau
           ! .. array arguments ..
           complex(sp) :: c(ldc, *), v(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: j
           complex(sp) :: sum, t1, t10, t2, t3, t4, t5, t6, t7, t8, t9, v1, v10, v2, v3, v4, v5, &
                     v6, v7, v8, v9
     
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           if (tau == czero) return
           if (stdlib_lsame(side, 'l')) then
              ! form  h * c, where h has order m.
              go to(10, 30, 50, 70, 90, 110, 130, 150, 170, 190) m
              ! code for general m
              call stdlib_clarf(side, m, n, v, 1, tau, c, ldc, work)
              go to 410
10      continue
              ! special code for 1 x 1 householder
              t1 = cone - tau*v(1)*conjg(v(1))
              do j = 1, n
                 c(1, j) = t1*c(1, j)
              end do
              go to 410
30      continue
              ! special code for 2 x 2 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
              end do
              go to 410
50      continue
              ! special code for 3 x 3 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              v3 = conjg(v(3))
              t3 = tau*conjg(v3)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
              end do
              go to 410
70      continue
              ! special code for 4 x 4 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              v3 = conjg(v(3))
              t3 = tau*conjg(v3)
              v4 = conjg(v(4))
              t4 = tau*conjg(v4)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
              end do
              go to 410
90      continue
              ! special code for 5 x 5 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              v3 = conjg(v(3))
              t3 = tau*conjg(v3)
              v4 = conjg(v(4))
              t4 = tau*conjg(v4)
              v5 = conjg(v(5))
              t5 = tau*conjg(v5)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j)
                           
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
              end do
              go to 410
110    continue
              ! special code for 6 x 6 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              v3 = conjg(v(3))
              t3 = tau*conjg(v3)
              v4 = conjg(v(4))
              t4 = tau*conjg(v4)
              v5 = conjg(v(5))
              t5 = tau*conjg(v5)
              v6 = conjg(v(6))
              t6 = tau*conjg(v6)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
              end do
              go to 410
130    continue
              ! special code for 7 x 7 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              v3 = conjg(v(3))
              t3 = tau*conjg(v3)
              v4 = conjg(v(4))
              t4 = tau*conjg(v4)
              v5 = conjg(v(5))
              t5 = tau*conjg(v5)
              v6 = conjg(v(6))
              t6 = tau*conjg(v6)
              v7 = conjg(v(7))
              t7 = tau*conjg(v7)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j) + v7*c(7, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
                 c(7, j) = c(7, j) - sum*t7
              end do
              go to 410
150    continue
              ! special code for 8 x 8 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              v3 = conjg(v(3))
              t3 = tau*conjg(v3)
              v4 = conjg(v(4))
              t4 = tau*conjg(v4)
              v5 = conjg(v(5))
              t5 = tau*conjg(v5)
              v6 = conjg(v(6))
              t6 = tau*conjg(v6)
              v7 = conjg(v(7))
              t7 = tau*conjg(v7)
              v8 = conjg(v(8))
              t8 = tau*conjg(v8)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j) + v7*c(7, j) + v8*c(8, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
                 c(7, j) = c(7, j) - sum*t7
                 c(8, j) = c(8, j) - sum*t8
              end do
              go to 410
170    continue
              ! special code for 9 x 9 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              v3 = conjg(v(3))
              t3 = tau*conjg(v3)
              v4 = conjg(v(4))
              t4 = tau*conjg(v4)
              v5 = conjg(v(5))
              t5 = tau*conjg(v5)
              v6 = conjg(v(6))
              t6 = tau*conjg(v6)
              v7 = conjg(v(7))
              t7 = tau*conjg(v7)
              v8 = conjg(v(8))
              t8 = tau*conjg(v8)
              v9 = conjg(v(9))
              t9 = tau*conjg(v9)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j) + v7*c(7, j) + v8*c(8, j) + v9*c(9, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
                 c(7, j) = c(7, j) - sum*t7
                 c(8, j) = c(8, j) - sum*t8
                 c(9, j) = c(9, j) - sum*t9
              end do
              go to 410
190    continue
              ! special code for 10 x 10 householder
              v1 = conjg(v(1))
              t1 = tau*conjg(v1)
              v2 = conjg(v(2))
              t2 = tau*conjg(v2)
              v3 = conjg(v(3))
              t3 = tau*conjg(v3)
              v4 = conjg(v(4))
              t4 = tau*conjg(v4)
              v5 = conjg(v(5))
              t5 = tau*conjg(v5)
              v6 = conjg(v(6))
              t6 = tau*conjg(v6)
              v7 = conjg(v(7))
              t7 = tau*conjg(v7)
              v8 = conjg(v(8))
              t8 = tau*conjg(v8)
              v9 = conjg(v(9))
              t9 = tau*conjg(v9)
              v10 = conjg(v(10))
              t10 = tau*conjg(v10)
              do j = 1, n
                 sum = v1*c(1, j) + v2*c(2, j) + v3*c(3, j) + v4*c(4, j) + v5*c(5, j) + &
                           v6*c(6, j) + v7*c(7, j) + v8*c(8, j) + v9*c(9, j) + v10*c(10, j)
                 c(1, j) = c(1, j) - sum*t1
                 c(2, j) = c(2, j) - sum*t2
                 c(3, j) = c(3, j) - sum*t3
                 c(4, j) = c(4, j) - sum*t4
                 c(5, j) = c(5, j) - sum*t5
                 c(6, j) = c(6, j) - sum*t6
                 c(7, j) = c(7, j) - sum*t7
                 c(8, j) = c(8, j) - sum*t8
                 c(9, j) = c(9, j) - sum*t9
                 c(10, j) = c(10, j) - sum*t10
              end do
              go to 410
           else
              ! form  c * h, where h has order n.
              go to(210, 230, 250, 270, 290, 310, 330, 350, 370, 390) n
              ! code for general n
              call stdlib_clarf(side, m, n, v, 1, tau, c, ldc, work)
              go to 410
210    continue
              ! special code for 1 x 1 householder
              t1 = cone - tau*v(1)*conjg(v(1))
              do j = 1, m
                 c(j, 1) = t1*c(j, 1)
              end do
              go to 410
230    continue
              ! special code for 2 x 2 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
              end do
              go to 410
250    continue
              ! special code for 3 x 3 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              v3 = v(3)
              t3 = tau*conjg(v3)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
              end do
              go to 410
270    continue
              ! special code for 4 x 4 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              v3 = v(3)
              t3 = tau*conjg(v3)
              v4 = v(4)
              t4 = tau*conjg(v4)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
              end do
              go to 410
290    continue
              ! special code for 5 x 5 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              v3 = v(3)
              t3 = tau*conjg(v3)
              v4 = v(4)
              t4 = tau*conjg(v4)
              v5 = v(5)
              t5 = tau*conjg(v5)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5)
                           
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
              end do
              go to 410
310    continue
              ! special code for 6 x 6 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              v3 = v(3)
              t3 = tau*conjg(v3)
              v4 = v(4)
              t4 = tau*conjg(v4)
              v5 = v(5)
              t5 = tau*conjg(v5)
              v6 = v(6)
              t6 = tau*conjg(v6)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
              end do
              go to 410
330    continue
              ! special code for 7 x 7 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              v3 = v(3)
              t3 = tau*conjg(v3)
              v4 = v(4)
              t4 = tau*conjg(v4)
              v5 = v(5)
              t5 = tau*conjg(v5)
              v6 = v(6)
              t6 = tau*conjg(v6)
              v7 = v(7)
              t7 = tau*conjg(v7)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6) + v7*c(j, 7)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
                 c(j, 7) = c(j, 7) - sum*t7
              end do
              go to 410
350    continue
              ! special code for 8 x 8 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              v3 = v(3)
              t3 = tau*conjg(v3)
              v4 = v(4)
              t4 = tau*conjg(v4)
              v5 = v(5)
              t5 = tau*conjg(v5)
              v6 = v(6)
              t6 = tau*conjg(v6)
              v7 = v(7)
              t7 = tau*conjg(v7)
              v8 = v(8)
              t8 = tau*conjg(v8)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6) + v7*c(j, 7) + v8*c(j, 8)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
                 c(j, 7) = c(j, 7) - sum*t7
                 c(j, 8) = c(j, 8) - sum*t8
              end do
              go to 410
370    continue
              ! special code for 9 x 9 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              v3 = v(3)
              t3 = tau*conjg(v3)
              v4 = v(4)
              t4 = tau*conjg(v4)
              v5 = v(5)
              t5 = tau*conjg(v5)
              v6 = v(6)
              t6 = tau*conjg(v6)
              v7 = v(7)
              t7 = tau*conjg(v7)
              v8 = v(8)
              t8 = tau*conjg(v8)
              v9 = v(9)
              t9 = tau*conjg(v9)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6) + v7*c(j, 7) + v8*c(j, 8) + v9*c(j, 9)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
                 c(j, 7) = c(j, 7) - sum*t7
                 c(j, 8) = c(j, 8) - sum*t8
                 c(j, 9) = c(j, 9) - sum*t9
              end do
              go to 410
390    continue
              ! special code for 10 x 10 householder
              v1 = v(1)
              t1 = tau*conjg(v1)
              v2 = v(2)
              t2 = tau*conjg(v2)
              v3 = v(3)
              t3 = tau*conjg(v3)
              v4 = v(4)
              t4 = tau*conjg(v4)
              v5 = v(5)
              t5 = tau*conjg(v5)
              v6 = v(6)
              t6 = tau*conjg(v6)
              v7 = v(7)
              t7 = tau*conjg(v7)
              v8 = v(8)
              t8 = tau*conjg(v8)
              v9 = v(9)
              t9 = tau*conjg(v9)
              v10 = v(10)
              t10 = tau*conjg(v10)
              do j = 1, m
                 sum = v1*c(j, 1) + v2*c(j, 2) + v3*c(j, 3) + v4*c(j, 4) + v5*c(j, 5) + &
                           v6*c(j, 6) + v7*c(j, 7) + v8*c(j, 8) + v9*c(j, 9) + v10*c(j, 10)
                 c(j, 1) = c(j, 1) - sum*t1
                 c(j, 2) = c(j, 2) - sum*t2
                 c(j, 3) = c(j, 3) - sum*t3
                 c(j, 4) = c(j, 4) - sum*t4
                 c(j, 5) = c(j, 5) - sum*t5
                 c(j, 6) = c(j, 6) - sum*t6
                 c(j, 7) = c(j, 7) - sum*t7
                 c(j, 8) = c(j, 8) - sum*t8
                 c(j, 9) = c(j, 9) - sum*t9
                 c(j, 10) = c(j, 10) - sum*t10
              end do
              go to 410
           end if
410    return
           ! end of stdlib_clarfx
     end subroutine stdlib_clarfx

     ! CLARFY applies an elementary reflector, or Householder matrix, H,
     ! to an n x n Hermitian matrix C, from both the left and the right.
     ! H is represented in the form
     ! H = I - tau * v * v'
     ! where  tau  is a scalar and  v  is a vector.
     ! If  tau  is  zero, then  H  is taken to be the unit matrix.

     subroutine stdlib_clarfy(uplo, n, v, incv, tau, c, ldc, work)
        ! -- lapack test routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: incv, ldc, n
           complex(sp) :: tau
           ! .. array arguments ..
           complex(sp) :: c(ldc, *), v(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           complex(sp) :: alpha
     
           ! .. executable statements ..
           if (tau == czero) return
           ! form  w:= c * v
           call stdlib_chemv(uplo, n, cone, c, ldc, v, incv, czero, work, 1)
           alpha = -chalf*tau*stdlib_cdotc(n, work, 1, v, incv)
           call stdlib_caxpy(n, alpha, v, incv, work, 1)
           ! c := c - v * w' - w * v'
           call stdlib_cher2(uplo, n, -tau, v, incv, work, 1, c, ldc)
           return
           ! end of stdlib_clarfy
     end subroutine stdlib_clarfy

     ! CLARNV returns a vector of n random complex numbers from a uniform or
     ! normal distribution.

     subroutine stdlib_clarnv(idist, iseed, n, x)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: idist, n
           ! .. array arguments ..
           integer(ilp) :: iseed(4)
           complex(sp) :: x(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: lv = 128
           real(sp), parameter :: twopi = 6.28318530717958647692528676655900576839_sp
           
           ! .. local scalars ..
           integer(ilp) :: i, il, iv
           ! .. local arrays ..
           real(sp) :: u(lv)
           ! .. intrinsic functions ..
           intrinsic :: cmplx, exp, log, min, sqrt
     
           ! .. executable statements ..
           do 60 iv = 1, n, lv/2
              il = min(lv/2, n - iv + 1)
              ! call stdlib_slaruv to generate 2*il real numbers from a uniform (0,1)
              ! distribution (2*il <= lv)
              call stdlib_slaruv(iseed, 2*il, u)
              if (idist == 1) then
                 ! copy generated numbers
                 do i = 1, il
                    x(iv + i - 1) = cmplx(u(2*i - 1), u(2*i))
                 end do
              else if (idist == 2) then
                 ! convert generated numbers to uniform (-1,1) distribution
                 do i = 1, il
                    x(iv + i - 1) = cmplx(two*u(2*i - 1) - one, two*u(2*i) - one)
                 end do
              else if (idist == 3) then
                 ! convert generated numbers to normal (0,1) distribution
                 do i = 1, il
                    x(iv + i - 1) = sqrt(-two*log(u(2*i - 1)))*exp(cmplx(zero, twopi*u(2*i) &
                              , KIND=sp))
                 end do
              else if (idist == 4) then
                 ! convert generated numbers to complex numbers uniformly
                 ! distributed on the unit disk
                 do i = 1, il
                    x(iv + i - 1) = sqrt(u(2*i - 1))*exp(cmplx(zero, twopi*u(2*i), KIND=sp))
                              
                 end do
              else if (idist == 5) then
                 ! convert generated numbers to complex numbers uniformly
                 ! distributed on the unit circle
                 do i = 1, il
                    x(iv + i - 1) = exp(cmplx(zero, twopi*u(2*i), KIND=sp))
                 end do
              end if
60      continue
           return
           ! end of stdlib_clarnv
     end subroutine stdlib_clarnv

     ! !
     ! CLARTG generates a plane rotation so that
     ! [  C         S  ] . [ F ]  =  [ R ]
     ! [ -conjg(S)  C  ]   [ G ]     [ 0 ]
     ! where C is real and C**2 + |S|**2 = 1.
     ! The mathematical formulas used for C and S are
     ! sgn(x) = {  x / |x|,   x != 0
     ! {  1,         x = 0
     ! R = sgn(F) * sqrt(|F|**2 + |G|**2)
     ! C = |F| / sqrt(|F|**2 + |G|**2)
     ! S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
     ! When F and G are real, the formulas simplify to C = F/R and
     ! S = G/R, and the returned values of C, S, and R should be
     ! identical to those returned by CLARTG.
     ! The algorithm used to compute these quantities incorporates scaling
     ! to avoid overflow or underflow in computing the square root of the
     ! sum of squares.
     ! This is a faster version of the BLAS1 routine CROTG, except for
     ! the following differences:
     ! F and G are unchanged on return.
     ! If G=0, then C=1 and S=0.
     ! If F=0, then C=0 and S is chosen so that R is real.
     ! Below, wp=>sp stands for single precision from LA_CONSTANTS module.

     subroutine stdlib_clartg(f, g, c, s, r)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! february 2021
        ! .. scalar arguments ..
        real(sp) :: c
        complex(sp) :: f, g, r, s
        ! .. local scalars ..
        real(sp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
        complex(sp) :: fs, gs, t
        ! .. intrinsic functions ..
        intrinsic :: abs, aimag, conjg, max, min, real, sqrt
        ! .. statement functions ..
        real(sp) :: abssq
        ! .. statement function definitions ..
        abssq(t) = real(t)**2 + aimag(t)**2
        ! .. executable statements ..
        if (g == czero) then
           c = one
           s = czero
           r = f
        else if (f == czero) then
           c = zero
           g1 = max(abs(real(g)), abs(aimag(g)))
           if (g1 > rtmin .and. g1 < rtmax) then
              ! use unscaled algorithm
              g2 = abssq(g)
              d = sqrt(g2)
              s = conjg(g)/d
              r = d
           else
              ! use scaled algorithm
              u = min(safmax, max(safmin, g1))
              uu = one/u
              gs = g*uu
              g2 = abssq(gs)
              d = sqrt(g2)
              s = conjg(gs)/d
              r = d*u
           end if
        else
           f1 = max(abs(real(f)), abs(aimag(f)))
           g1 = max(abs(real(g)), abs(aimag(g)))
     if (f1 > rtmin .and. f1 < rtmax .and. g1 > rtmin .and. g1 < rtmax) then
              ! use unscaled algorithm
              f2 = abssq(f)
              g2 = abssq(g)
              h2 = f2 + g2
              if (f2 > rtmin .and. h2 < rtmax) then
                 d = sqrt(f2*h2)
              else
                 d = sqrt(f2)*sqrt(h2)
              end if
              p = 1/d
              c = f2*p
              s = conjg(g)*(f*p)
              r = f*(h2*p)
           else
              ! use scaled algorithm
              u = min(safmax, max(safmin, f1, g1))
              uu = one/u
              gs = g*uu
              g2 = abssq(gs)
              if (f1*uu < rtmin) then
                 ! f is not well-scaled when scaled by g1.
                 ! use a different scaling for f.
                 v = min(safmax, max(safmin, f1))
                 vv = one/v
                 w = v*uu
                 fs = f*vv
                 f2 = abssq(fs)
                 h2 = f2*w**2 + g2
              else
                 ! otherwise use the same scaling for f and g.
                 w = one
                 fs = f*uu
                 f2 = abssq(fs)
                 h2 = f2 + g2
              end if
              if (f2 > rtmin .and. h2 < rtmax) then
                 d = sqrt(f2*h2)
              else
                 d = sqrt(f2)*sqrt(h2)
              end if
              p = 1/d
              c = (f2*p)*w
              s = conjg(gs)*(fs*p)
              r = (fs*(h2*p))*u
           end if
        end if
        return
     end subroutine stdlib_clartg

     ! CLARTV applies a vector of complex plane rotations with real cosines
     ! to elements of the complex vectors x and y. For i = 1,2,...,n
     ! ( x(i) ) := (        c(i)   s(i) ) ( x(i) )
     ! ( y(i) )    ( -conjg(s(i))  c(i) ) ( y(i) )

     subroutine stdlib_clartv(n, x, incx, y, incy, c, s, incc)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incc, incx, incy, n
           ! .. array arguments ..
           real(sp) :: c(*)
           complex(sp) :: s(*), x(*), y(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ic, ix, iy
           complex(sp) :: xi, yi
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           ix = 1
           iy = 1
           ic = 1
           do i = 1, n
              xi = x(ix)
              yi = y(iy)
              x(ix) = c(ic)*xi + s(ic)*yi
              y(iy) = c(ic)*yi - conjg(s(ic))*xi
              ix = ix + incx
              iy = iy + incy
              ic = ic + incc
           end do
           return
           ! end of stdlib_clartv
     end subroutine stdlib_clartv

     ! CLARZ applies a complex elementary reflector H to a complex
     ! M-by-N matrix C, from either the left or the right. H is represented
     ! in the form
     ! H = I - tau * v * v**H
     ! where tau is a complex scalar and v is a complex vector.
     ! If tau = 0, then H is taken to be the unit matrix.
     ! To apply H**H (the conjugate transpose of H), supply conjg(tau) instead
     ! tau.
     ! H is a product of k elementary reflectors as returned by CTZRZF.

     subroutine stdlib_clarz(side, m, n, l, v, incv, tau, c, ldc, work)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: side
           integer(ilp) :: incv, l, ldc, m, n
           complex(sp) :: tau
           ! .. array arguments ..
           complex(sp) :: c(ldc, *), v(*), work(*)
        ! =====================================================================
           
           ! .. executable statements ..
           if (stdlib_lsame(side, 'l')) then
              ! form  h * c
              if (tau /= czero) then
                 ! w( 1:n ) = conjg( c( 1, 1:n ) )
                 call stdlib_ccopy(n, c, ldc, work, 1)
                 call stdlib_clacgv(n, work, 1)
                 ! w( 1:n ) = conjg( w( 1:n ) + c( m-l+1:m, 1:n )**h * v( 1:l ) )
                 call stdlib_cgemv('conjugate transpose', l, n, cone, c(m - l + 1, 1), ldc, v, incv, &
                            cone, work, 1)
                 call stdlib_clacgv(n, work, 1)
                 ! c( 1, 1:n ) = c( 1, 1:n ) - tau * w( 1:n )
                 call stdlib_caxpy(n, -tau, work, 1, c, ldc)
                 ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ...
                                     ! tau * v( 1:l ) * w( 1:n )**h
                 call stdlib_cgeru(l, n, -tau, v, incv, work, 1, c(m - l + 1, 1), ldc)
              end if
           else
              ! form  c * h
              if (tau /= czero) then
                 ! w( 1:m ) = c( 1:m, 1 )
                 call stdlib_ccopy(m, c, 1, work, 1)
                 ! w( 1:m ) = w( 1:m ) + c( 1:m, n-l+1:n, 1:n ) * v( 1:l )
                 call stdlib_cgemv('no transpose', m, l, cone, c(1, n - l + 1), ldc, v, incv, cone, &
                           work, 1)
                 ! c( 1:m, 1 ) = c( 1:m, 1 ) - tau * w( 1:m )
                 call stdlib_caxpy(m, -tau, work, 1, c, 1)
                 ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ...
                                     ! tau * w( 1:m ) * v( 1:l )**h
                 call stdlib_cgerc(m, l, -tau, work, 1, v, incv, c(1, n - l + 1), ldc)
              end if
           end if
           return
           ! end of stdlib_clarz
     end subroutine stdlib_clarz

     ! CLARZB applies a complex block reflector H or its transpose H**H
     ! to a complex distributed M-by-N  C from the left or the right.
     ! Currently, only STOREV = 'R' and DIRECT = 'B' are supported.

     subroutine stdlib_clarzb(side, trans, direct, storev, m, n, k, l, v, ldv, t, ldt, c, ldc, &
               work, ldwork)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, side, storev, trans
           integer(ilp) :: k, l, ldc, ldt, ldv, ldwork, m, n
           ! .. array arguments ..
           complex(sp) :: c(ldc, *), t(ldt, *), v(ldv, *), work(ldwork, *)
        ! =====================================================================
           
           ! .. local scalars ..
           character :: transt
           integer(ilp) :: i, info, j
     
           ! .. executable statements ..
           ! quick return if possible
           if (m <= 0 .or. n <= 0) return
           ! check for currently supported options
           info = 0
           if (.not. stdlib_lsame(direct, 'b')) then
              info = -3
           else if (.not. stdlib_lsame(storev, 'r')) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clarzb', -info)
              return
           end if
           if (stdlib_lsame(trans, 'n')) then
              transt = 'c'
           else
              transt = 'n'
           end if
           if (stdlib_lsame(side, 'l')) then
              ! form  h * c  or  h**h * c
              ! w( 1:n, 1:k ) = c( 1:k, 1:n )**h
              do j = 1, k
                 call stdlib_ccopy(n, c(j, 1), ldc, work(1, j), 1)
              end do
              ! w( 1:n, 1:k ) = w( 1:n, 1:k ) + ...
                              ! c( m-l+1:m, 1:n )**h * v( 1:k, 1:l )**t
              if (l > 0) call stdlib_cgemm('transpose', 'conjugate transpose', n, k, l, cone, c(m - &
                        l + 1, 1), ldc, v, ldv, cone, work, ldwork)
              ! w( 1:n, 1:k ) = w( 1:n, 1:k ) * t**t  or  w( 1:m, 1:k ) * t
              call stdlib_ctrmm('right', 'lower', transt, 'non-unit', n, k, cone, t, ldt, work, &
                        ldwork)
              ! c( 1:k, 1:n ) = c( 1:k, 1:n ) - w( 1:n, 1:k )**h
              do j = 1, n
                 do i = 1, k
                    c(i, j) = c(i, j) - work(j, i)
                 end do
              end do
              ! c( m-l+1:m, 1:n ) = c( m-l+1:m, 1:n ) - ...
                                  ! v( 1:k, 1:l )**h * w( 1:n, 1:k )**h
              if (l > 0) call stdlib_cgemm('transpose', 'transpose', l, n, k, -cone, v, ldv, work, &
                        ldwork, cone, c(m - l + 1, 1), ldc)
           else if (stdlib_lsame(side, 'r')) then
              ! form  c * h  or  c * h**h
              ! w( 1:m, 1:k ) = c( 1:m, 1:k )
              do j = 1, k
                 call stdlib_ccopy(m, c(1, j), 1, work(1, j), 1)
              end do
              ! w( 1:m, 1:k ) = w( 1:m, 1:k ) + ...
                              ! c( 1:m, n-l+1:n ) * v( 1:k, 1:l )**h
              if (l > 0) call stdlib_cgemm('no transpose', 'transpose', m, k, l, cone, c(1, n - l + 1) &
                        , ldc, v, ldv, cone, work, ldwork)
              ! w( 1:m, 1:k ) = w( 1:m, 1:k ) * conjg( t )  or
                              ! w( 1:m, 1:k ) * t**h
              do j = 1, k
                 call stdlib_clacgv(k - j + 1, t(j, j), 1)
              end do
              call stdlib_ctrmm('right', 'lower', trans, 'non-unit', m, k, cone, t, ldt, work, &
                        ldwork)
              do j = 1, k
                 call stdlib_clacgv(k - j + 1, t(j, j), 1)
              end do
              ! c( 1:m, 1:k ) = c( 1:m, 1:k ) - w( 1:m, 1:k )
              do j = 1, k
                 do i = 1, m
                    c(i, j) = c(i, j) - work(i, j)
                 end do
              end do
              ! c( 1:m, n-l+1:n ) = c( 1:m, n-l+1:n ) - ...
                                  ! w( 1:m, 1:k ) * conjg( v( 1:k, 1:l ) )
              do j = 1, l
                 call stdlib_clacgv(k, v(1, j), 1)
              end do
              if (l > 0) call stdlib_cgemm('no transpose', 'no transpose', m, l, k, -cone, work, &
                        ldwork, v, ldv, cone, c(1, n - l + 1), ldc)
              do j = 1, l
                 call stdlib_clacgv(k, v(1, j), 1)
              end do
           end if
           return
           ! end of stdlib_clarzb
     end subroutine stdlib_clarzb

     ! CLARZT forms the triangular factor T of a complex block reflector
     ! H of order > n, which is defined as a product of k elementary
     ! reflectors.
     ! If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
     ! If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
     ! If STOREV = 'C', the vector which defines the elementary reflector
     ! H(i) is stored in the i-th column of the array V, and
     ! H  =  I - V * T * V**H
     ! If STOREV = 'R', the vector which defines the elementary reflector
     ! H(i) is stored in the i-th row of the array V, and
     ! H  =  I - V**H * T * V
     ! Currently, only STOREV = 'R' and DIRECT = 'B' are supported.

     subroutine stdlib_clarzt(direct, storev, n, k, v, ldv, tau, t, ldt)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, storev
           integer(ilp) :: k, ldt, ldv, n
           ! .. array arguments ..
           complex(sp) :: t(ldt, *), tau(*), v(ldv, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, info, j
     
           ! .. executable statements ..
           ! check for currently supported options
           info = 0
           if (.not. stdlib_lsame(direct, 'b')) then
              info = -1
           else if (.not. stdlib_lsame(storev, 'r')) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clarzt', -info)
              return
           end if
           do i = k, 1, -1
              if (tau(i) == czero) then
                 ! h(i)  =  i
                 do j = i, k
                    t(j, i) = czero
                 end do
              else
                 ! general case
                 if (i < k) then
                    ! t(i+1:k,i) = - tau(i) * v(i+1:k,1:n) * v(i,1:n)**h
                    call stdlib_clacgv(n, v(i, 1), ldv)
                    call stdlib_cgemv('no transpose', k - i, n, -tau(i), v(i + 1, 1), ldv, v(i, &
                              1), ldv, czero, t(i + 1, i), 1)
                    call stdlib_clacgv(n, v(i, 1), ldv)
                    ! t(i+1:k,i) = t(i+1:k,i+1:k) * t(i+1:k,i)
                    call stdlib_ctrmv('lower', 'no transpose', 'non-unit', k - i, t(i + 1, i + 1), &
                              ldt, t(i + 1, i), 1)
                 end if
                 t(i, i) = tau(i)
              end if
           end do
           return
           ! end of stdlib_clarzt
     end subroutine stdlib_clarzt

     ! CLASCL multiplies the M by N complex matrix A by the real scalar
     ! CTO/CFROM.  This is done without over/underflow as long as the final
     ! result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
     ! A may be full, upper triangular, lower triangular, upper Hessenberg,
     ! or banded.

     subroutine stdlib_clascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: type
           integer(ilp) :: info, kl, ku, lda, m, n
           real(sp) :: cfrom, cto
           ! .. array arguments ..
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: done
           integer(ilp) :: i, itype, j, k1, k2, k3, k4
           real(sp) :: bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min
     
           ! .. executable statements ..
           ! test the input arguments
           info = 0
           if (stdlib_lsame(type, 'g')) then
              itype = 0
           else if (stdlib_lsame(type, 'l')) then
              itype = 1
           else if (stdlib_lsame(type, 'u')) then
              itype = 2
           else if (stdlib_lsame(type, 'h')) then
              itype = 3
           else if (stdlib_lsame(type, 'b')) then
              itype = 4
           else if (stdlib_lsame(type, 'q')) then
              itype = 5
           else if (stdlib_lsame(type, 'z')) then
              itype = 6
           else
              itype = -1
           end if
           if (itype == -1) then
              info = -1
           else if (cfrom == zero .or. stdlib_sisnan(cfrom)) then
              info = -4
           else if (stdlib_sisnan(cto)) then
              info = -5
           else if (m < 0) then
              info = -6
           else if (n < 0 .or. (itype == 4 .and. n /= m) .or. (itype == 5 .and. n /= m)) then
              info = -7
           else if (itype <= 3 .and. lda < max(1, m)) then
              info = -9
           else if (itype >= 4) then
              if (kl < 0 .or. kl > max(m - 1, 0)) then
                 info = -2
              else if (ku < 0 .or. ku > max(n - 1, 0) .or. ((itype == 4 .or. itype == 5) .and. kl /= ku) &
                        ) then
                 info = -3
              else if ((itype == 4 .and. lda < kl + 1) .or. (itype == 5 .and. lda < ku + 1) .or. (itype == 6 &
                        .and. lda < 2*kl + ku + 1)) then
                 info = -9
              end if
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clascl', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. m == 0) return
           ! get machine parameters
           smlnum = stdlib_slamch('s')
           bignum = one/smlnum
           cfromc = cfrom
           ctoc = cto
10      continue
           cfrom1 = cfromc*smlnum
           if (cfrom1 == cfromc) then
              ! cfromc is an inf.  multiply by a correctly signed zero for
              ! finite ctoc, or a nan if ctoc is infinite.
              mul = ctoc/cfromc
              done = .true.
              cto1 = ctoc
           else
              cto1 = ctoc/bignum
              if (cto1 == ctoc) then
                 ! ctoc is either 0 or an inf.  in both cases, ctoc itself
                 ! serves as the correct multiplication factor.
                 mul = ctoc
                 done = .true.
                 cfromc = one
              else if (abs(cfrom1) > abs(ctoc) .and. ctoc /= zero) then
                 mul = smlnum
                 done = .false.
                 cfromc = cfrom1
              else if (abs(cto1) > abs(cfromc)) then
                 mul = bignum
                 done = .false.
                 ctoc = cto1
              else
                 mul = ctoc/cfromc
                 done = .true.
              end if
           end if
           if (itype == 0) then
              ! full matrix
              do j = 1, n
                 do i = 1, m
                    a(i, j) = a(i, j)*mul
                 end do
              end do
           else if (itype == 1) then
              ! lower triangular matrix
              do j = 1, n
                 do i = j, m
                    a(i, j) = a(i, j)*mul
                 end do
              end do
           else if (itype == 2) then
              ! upper triangular matrix
              do j = 1, n
                 do i = 1, min(j, m)
                    a(i, j) = a(i, j)*mul
                 end do
              end do
           else if (itype == 3) then
              ! upper hessenberg matrix
              do j = 1, n
                 do i = 1, min(j + 1, m)
                    a(i, j) = a(i, j)*mul
                 end do
              end do
           else if (itype == 4) then
              ! lower chalf of a symmetric band matrix
              k3 = kl + 1
              k4 = n + 1
              do j = 1, n
                 do i = 1, min(k3, k4 - j)
                    a(i, j) = a(i, j)*mul
                 end do
              end do
           else if (itype == 5) then
              ! upper chalf of a symmetric band matrix
              k1 = ku + 2
              k3 = ku + 1
              do j = 1, n
                 do i = max(k1 - j, 1), k3
                    a(i, j) = a(i, j)*mul
                 end do
              end do
           else if (itype == 6) then
              ! band matrix
              k1 = kl + ku + 2
              k2 = kl + 1
              k3 = 2*kl + ku + 1
              k4 = kl + ku + 1 + m
              do j = 1, n
                 do i = max(k1 - j, k2), min(k3, k4 - j)
                    a(i, j) = a(i, j)*mul
                 end do
              end do
           end if
           if (.not. done) go to 10
           return
           ! end of stdlib_clascl
     end subroutine stdlib_clascl

     ! CLASET initializes a 2-D array A to BETA on the diagonal and
     ! ALPHA on the offdiagonals.

     subroutine stdlib_claset(uplo, m, n, alpha, beta, a, lda)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: lda, m, n
           complex(sp) :: alpha, beta
           ! .. array arguments ..
           complex(sp) :: a(lda, *)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. intrinsic functions ..
           intrinsic :: min
           ! .. executable statements ..
           if (stdlib_lsame(uplo, 'u')) then
              ! set the diagonal to beta and the strictly upper triangular
              ! part of the array to alpha.
              do j = 2, n
                 do i = 1, min(j - 1, m)
                    a(i, j) = alpha
                 end do
              end do
              do i = 1, min(n, m)
                 a(i, i) = beta
              end do
           else if (stdlib_lsame(uplo, 'l')) then
              ! set the diagonal to beta and the strictly lower triangular
              ! part of the array to alpha.
              do j = 1, min(m, n)
                 do i = j + 1, m
                    a(i, j) = alpha
                 end do
              end do
              do i = 1, min(n, m)
                 a(i, i) = beta
              end do
           else
              ! set the array to beta on the diagonal and alpha on the
              ! offdiagonal.
              do j = 1, n
                 do i = 1, m
                    a(i, j) = alpha
                 end do
              end do
              do i = 1, min(m, n)
                 a(i, i) = beta
              end do
           end if
           return
           ! end of stdlib_claset
     end subroutine stdlib_claset

     ! CLASR applies a sequence of real plane rotations to a complex matrix
     ! A, from either the left or the right.
     ! When SIDE = 'L', the transformation takes the form
     ! A := P*A
     ! and when SIDE = 'R', the transformation takes the form
     ! A := A*P**T
     ! where P is an orthogonal matrix consisting of a sequence of z plane
     ! rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R',
     ! and P**T is the transpose of P.
     ! When DIRECT = 'F' (Forward sequence), then
     ! P = P(z-1) * ... * P(2) * P(1)
     ! and when DIRECT = 'B' (Backward sequence), then
     ! P = P(1) * P(2) * ... * P(z-1)
     ! where P(k) is a plane rotation matrix defined by the 2-by-2 rotation
     ! R(k) = (  c(k)  s(k) )
     ! = ( -s(k)  c(k) ).
     ! When PIVOT = 'V' (Variable pivot), the rotation is performed
     ! for the plane (k,k+1), i.e., P(k) has the form
     ! P(k) = (  1                                            )
     ! (       ...                                     )
     ! (              1                                )
     ! (                   c(k)  s(k)                  )
     ! (                  -s(k)  c(k)                  )
     ! (                                1              )
     ! (                                     ...       )
     ! (                                            1  )
     ! where R(k) appears as a rank-2 modification to the identity matrix in
     ! rows and columns k and k+1.
     ! When PIVOT = 'T' (Top pivot), the rotation is performed for the
     ! plane (1,k+1), so P(k) has the form
     ! P(k) = (  c(k)                    s(k)                 )
     ! (         1                                     )
     ! (              ...                              )
     ! (                     1                         )
     ! ( -s(k)                    c(k)                 )
     ! (                                 1             )
     ! (                                      ...      )
     ! (                                             1 )
     ! where R(k) appears in rows and columns 1 and k+1.
     ! Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is
     ! performed for the plane (k,z), giving P(k) the form
     ! P(k) = ( 1                                             )
     ! (      ...                                      )
     ! (             1                                 )
     ! (                  c(k)                    s(k) )
     ! (                         1                     )
     ! (                              ...              )
     ! (                                     1         )
     ! (                 -s(k)                    c(k) )
     ! where R(k) appears in rows and columns k and z.  The rotations are
     ! performed without ever forming P(k) explicitly.

     subroutine stdlib_clasr(side, pivot, direct, m, n, c, s, a, lda)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: direct, pivot, side
           integer(ilp) :: lda, m, n
           ! .. array arguments ..
           real(sp) :: c(*), s(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, info, j
           real(sp) :: ctemp, stemp
           complex(sp) :: temp
           ! .. intrinsic functions ..
           intrinsic :: max
     
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           if (.not. (stdlib_lsame(side, 'l') .or. stdlib_lsame(side, 'r'))) then
              info = 1
           else if (.not. (stdlib_lsame(pivot, 'v') .or. stdlib_lsame(pivot, 't') .or. &
                     stdlib_lsame(pivot, 'b'))) then
              info = 2
           else if (.not. (stdlib_lsame(direct, 'f') .or. stdlib_lsame(direct, 'b'))) &
                     then
              info = 3
           else if (m < 0) then
              info = 4
           else if (n < 0) then
              info = 5
           else if (lda < max(1, m)) then
              info = 9
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clasr ', info)
              return
           end if
           ! quick return if possible
           if ((m == 0) .or. (n == 0)) return
           if (stdlib_lsame(side, 'l')) then
              ! form  p * a
              if (stdlib_lsame(pivot, 'v')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 1, m - 1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j + 1, i)
                             a(j + 1, i) = ctemp*temp - stemp*a(j, i)
                             a(j, i) = stemp*temp + ctemp*a(j, i)
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = m - 1, 1, -1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j + 1, i)
                             a(j + 1, i) = ctemp*temp - stemp*a(j, i)
                             a(j, i) = stemp*temp + ctemp*a(j, i)
                          end do
                       end if
                    end do
                 end if
              else if (stdlib_lsame(pivot, 't')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 2, m
                       ctemp = c(j - 1)
                       stemp = s(j - 1)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j, i)
                             a(j, i) = ctemp*temp - stemp*a(1, i)
                             a(1, i) = stemp*temp + ctemp*a(1, i)
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = m, 2, -1
                       ctemp = c(j - 1)
                       stemp = s(j - 1)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j, i)
                             a(j, i) = ctemp*temp - stemp*a(1, i)
                             a(1, i) = stemp*temp + ctemp*a(1, i)
                          end do
                       end if
                    end do
                 end if
              else if (stdlib_lsame(pivot, 'b')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 1, m - 1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j, i)
                             a(j, i) = stemp*a(m, i) + ctemp*temp
                             a(m, i) = ctemp*a(m, i) - stemp*temp
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = m - 1, 1, -1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, n
                             temp = a(j, i)
                             a(j, i) = stemp*a(m, i) + ctemp*temp
                             a(m, i) = ctemp*a(m, i) - stemp*temp
                          end do
                       end if
                    end do
                 end if
              end if
           else if (stdlib_lsame(side, 'r')) then
              ! form a * p**t
              if (stdlib_lsame(pivot, 'v')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 1, n - 1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j + 1)
                             a(i, j + 1) = ctemp*temp - stemp*a(i, j)
                             a(i, j) = stemp*temp + ctemp*a(i, j)
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = n - 1, 1, -1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j + 1)
                             a(i, j + 1) = ctemp*temp - stemp*a(i, j)
                             a(i, j) = stemp*temp + ctemp*a(i, j)
                          end do
                       end if
                    end do
                 end if
              else if (stdlib_lsame(pivot, 't')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 2, n
                       ctemp = c(j - 1)
                       stemp = s(j - 1)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j)
                             a(i, j) = ctemp*temp - stemp*a(i, 1)
                             a(i, 1) = stemp*temp + ctemp*a(i, 1)
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = n, 2, -1
                       ctemp = c(j - 1)
                       stemp = s(j - 1)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j)
                             a(i, j) = ctemp*temp - stemp*a(i, 1)
                             a(i, 1) = stemp*temp + ctemp*a(i, 1)
                          end do
                       end if
                    end do
                 end if
              else if (stdlib_lsame(pivot, 'b')) then
                 if (stdlib_lsame(direct, 'f')) then
                    do j = 1, n - 1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j)
                             a(i, j) = stemp*a(i, n) + ctemp*temp
                             a(i, n) = ctemp*a(i, n) - stemp*temp
                          end do
                       end if
                    end do
                 else if (stdlib_lsame(direct, 'b')) then
                    do j = n - 1, 1, -1
                       ctemp = c(j)
                       stemp = s(j)
                       if ((ctemp /= one) .or. (stemp /= zero)) then
                          do i = 1, m
                             temp = a(i, j)
                             a(i, j) = stemp*a(i, n) + ctemp*temp
                             a(i, n) = ctemp*a(i, n) - stemp*temp
                          end do
                       end if
                    end do
                 end if
              end if
           end if
           return
           ! end of stdlib_clasr
     end subroutine stdlib_clasr

     ! !
     ! CLASSQ  returns the values  scl  and  smsq  such that
     ! ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
     ! where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
     ! assumed to be non-negative.
     ! scale and sumsq must be supplied in SCALE and SUMSQ and
     ! scl and smsq are overwritten on SCALE and SUMSQ respectively.
     ! If scale * sqrt( sumsq ) > tbig then
     ! we require:   scale >= sqrt( TINY*EPS ) / sbig   on entry,
     ! and if 0 < scale * sqrt( sumsq ) < tsml then
     ! we require:   scale <= sqrt( HUGE ) / ssml       on entry,
     ! where
     ! tbig -- upper threshold for values whose square is representable;
     ! sbig -- scaling constant for big numbers; \see la_constants.f90
     ! tsml -- lower threshold for values whose square is representable;
     ! ssml -- scaling constant for small numbers; \see la_constants.f90
     ! and
     ! TINY*EPS -- tiniest representable number;
     ! HUGE     -- biggest representable number.

     subroutine stdlib_classq(n, x, incx, scl, sumsq)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
        ! .. scalar arguments ..
     integer(ilp) :: incx, n
        real(sp) :: scl, sumsq
        ! .. array arguments ..
        complex(sp) :: x(*)
        ! .. local scalars ..
     integer(ilp) :: i, ix
     logical(lk) :: notbig
        real(sp) :: abig, amed, asml, ax, ymax, ymin
        ! quick return if possible
        if (ieee_is_nan(scl) .or. ieee_is_nan(sumsq)) return
        if (sumsq == zero) scl = one
        if (scl == zero) then
           scl = one
           sumsq = zero
        end if
        if (n <= 0) then
           return
        end if
        ! compute the sum of squares in 3 accumulators:
           ! abig -- sums of squares scaled down to avoid overflow
           ! asml -- sums of squares scaled up to avoid underflow
           ! amed -- sums of squares that do not require scaling
        ! the thresholds and multipliers are
           ! tbig -- values bigger than this are scaled down by sbig
           ! tsml -- values smaller than this are scaled up by ssml
        notbig = .true.
        asml = zero
        amed = zero
        abig = zero
        ix = 1
        if (incx < 0) ix = 1 - (n - 1)*incx
        do i = 1, n
           ax = abs(real(x(ix)))
           if (ax > tbig) then
              abig = abig + (ax*sbig)**2
              notbig = .false.
           else if (ax < tsml) then
              if (notbig) asml = asml + (ax*ssml)**2
           else
              amed = amed + ax**2
           end if
           ax = abs(aimag(x(ix)))
           if (ax > tbig) then
              abig = abig + (ax*sbig)**2
              notbig = .false.
           else if (ax < tsml) then
              if (notbig) asml = asml + (ax*ssml)**2
           else
              amed = amed + ax**2
           end if
           ix = ix + incx
        end do
        ! put the existing sum of squares into one of the accumulators
        if (sumsq > zero) then
           ax = scl*sqrt(sumsq)
           if (ax > tbig) then
              ! we assume scl >= sqrt( tiny*eps ) / sbig
              abig = abig + (scl*sbig)**2*sumsq
           else if (ax < tsml) then
              ! we assume scl <= sqrt( huge ) / ssml
              if (notbig) asml = asml + (scl*ssml)**2*sumsq
           else
              amed = amed + scl**2*sumsq
           end if
        end if
        ! combine abig and amed or amed and asml if more than one
        ! accumulator was used.
        if (abig > zero) then
           ! combine abig and amed if abig > 0.
           if (amed > zero .or. ieee_is_nan(amed)) then
              abig = abig + (amed*sbig)*sbig
           end if
           scl = one/sbig
           sumsq = abig
        else if (asml > zero) then
           ! combine amed and asml if asml > 0.
           if (amed > zero .or. ieee_is_nan(amed)) then
              amed = sqrt(amed)
              asml = sqrt(asml)/ssml
              if (asml > amed) then
                 ymin = amed
                 ymax = asml
              else
                 ymin = asml
                 ymax = amed
              end if
              scl = one
              sumsq = ymax**2*(one + (ymin/ymax)**2)
           else
              scl = one/ssml
              sumsq = asml
           end if
        else
           ! otherwise all values are mid-range or zero
           scl = one
           sumsq = amed
        end if
        return
     end subroutine stdlib_classq

     ! CLASWP performs a series of row interchanges on the matrix A.
     ! One row interchange is initiated for each of rows K1 through K2 of A.

     subroutine stdlib_claswp(n, a, lda, k1, k2, ipiv, incx)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, k1, k2, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *)
       ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, i1, i2, inc, ip, ix, ix0, j, k, n32
           complex(sp) :: temp
           ! .. executable statements ..
           ! interchange row i with row ipiv(k1+(i-k1)*abs(incx)) for each of rows
           ! k1 through k2.
           if (incx > 0) then
              ix0 = k1
              i1 = k1
              i2 = k2
              inc = 1
           else if (incx < 0) then
              ix0 = k1 + (k1 - k2)*incx
              i1 = k2
              i2 = k1
              inc = -1
           else
              return
           end if
           n32 = (n/32)*32
           if (n32 /= 0) then
              do j = 1, n32, 32
                 ix = ix0
                 do i = i1, i2, inc
                    ip = ipiv(ix)
                    if (ip /= i) then
                       do k = j, j + 31
                          temp = a(i, k)
                          a(i, k) = a(ip, k)
                          a(ip, k) = temp
                       end do
                    end if
                    ix = ix + incx
                 end do
              end do
           end if
           if (n32 /= n) then
              n32 = n32 + 1
              ix = ix0
              do i = i1, i2, inc
                 ip = ipiv(ix)
                 if (ip /= i) then
                    do k = n32, n
                       temp = a(i, k)
                       a(i, k) = a(ip, k)
                       a(ip, k) = temp
                    end do
                 end if
                 ix = ix + incx
              end do
           end if
           return
           ! end of stdlib_claswp
     end subroutine stdlib_claswp

     ! CLASYF computes a partial factorization of a complex symmetric matrix
     ! A using the Bunch-Kaufman diagonal pivoting method. The partial
     ! factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I       0    )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**T U22**T )
     ! A  =  ( L11  0 ) ( D    0  ) ( L11**T L21**T )  if UPLO = 'L'
     ! ( L21  I ) ( 0   A22 ) (  0       I    )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! Note that U**T denotes the transpose of U.
     ! CLASYF is an auxiliary routine called by CSYTRF. It uses blocked code
     ! (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
     ! A22 (if UPLO = 'L').

     subroutine stdlib_clasyf(uplo, n, nb, kb, a, lda, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), w(ldw, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           integer(ilp) :: imax, j, jb, jj, jmax, jp, k, kk, kkw, kp, kstep, kw
           real(sp) :: absakk, alpha, colmax, rowmax
           complex(sp) :: d11, d21, d22, r1, t, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, max, min, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              ! kw is the column of w which corresponds to column k of a
              k = n
10      continue
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              ! copy column k of a to column kw of w and update it
              call stdlib_ccopy(k, a(1, k), 1, w(1, kw), 1)
              if (k < n) call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w(k, &
                        kw + 1), ldw, cone, w(1, kw), 1)
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(w(k, kw))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, w(1, kw), 1)
                 colmax = cabs1(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! copy column imax to column kw-1 of w and update it
                    call stdlib_ccopy(imax, a(1, imax), 1, w(1, kw - 1), 1)
                    call stdlib_ccopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                              
                    if (k < n) call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w( &
                               imax, kw + 1), ldw, cone, w(1, kw - 1), 1)
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    jmax = imax + stdlib_icamax(k - imax, w(imax + 1, kw - 1), 1)
                    rowmax = cabs1(w(jmax, kw - 1))
                    if (imax > 1) then
                       jmax = stdlib_icamax(imax - 1, w(1, kw - 1), 1)
                       rowmax = max(rowmax, cabs1(w(jmax, kw - 1)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (cabs1(w(imax, kw - 1)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                       ! copy column kw-1 of w to column kw of w
                       call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                    else
                       ! interchange rows and columns k-1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kkw of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k-1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = a(kk, kk)
                    call stdlib_ccopy(kk - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    if (kp > 1) call stdlib_ccopy(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last k+1 to n columns of a
                    ! (columns k (or k and k-1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in last kkw to nb columns of w.
                    if (k < n) call stdlib_cswap(n - k, a(kk, k + 1), lda, a(kp, k + 1), lda)
                    call stdlib_cswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(kw) = u(k)*d(k),
                    ! where u(k) is the k-th column of u
                    ! store subdiag. elements of column u(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! note: diagonal element u(k,k) is a unit element
                    ! and not stored.
                       ! a(k,k) := d(k,k) = w(k,kw)
                       ! a(1:k-1,k) := u(1:k-1,k) = w(1:k-1,kw)/d(k,k)
                    call stdlib_ccopy(k, w(1, kw), 1, a(1, k), 1)
                    r1 = cone/a(k, k)
                    call stdlib_cscal(k - 1, r1, a(1, k), 1)
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now hold
                    ! ( w(kw-1) w(kw) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! store u(1:k-2,k-1) and u(1:k-2,k) and 2-by-2
                    ! block d(k-1:k,k-1:k) in columns k-1 and k of a.
                    ! note: 2-by-2 diagonal block u(k-1:k,k-1:k) is a unit
                    ! block and not stored.
                       ! a(k-1:k,k-1:k) := d(k-1:k,k-1:k) = w(k-1:k,kw-1:kw)
                       ! a(1:k-2,k-1:k) := u(1:k-2,k:k-1:k) =
                       ! = w(1:k-2,kw-1:kw) * ( d(k-1:k,k-1:k)**(-1) )
                    if (k > 2) then
                       ! compose the columns of the inverse of 2-by-2 pivot
                       ! block d in the following way to reduce the number
                       ! of flops when we myltiply panel ( w(kw-1) w(kw) ) by
                       ! this inverse
                       ! d**(-1) = ( d11 d21 )**(-1) =
                                 ! ( d21 d22 )
                       ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) =
                                              ! ( (-d21 ) ( d11 ) )
                       ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) *
                         ! * ( ( d22/d21 ) (      -1 ) ) =
                           ! ( (      -1 ) ( d11/d21 ) )
                       ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) (  -1 ) ) =
                                                 ! ( ( -1  ) ( d22 ) )
                       ! = 1/d21 * t * ( ( d11 ) (  -1 ) )
                                     ! ( (  -1 ) ( d22 ) )
                       ! = d21 * ( ( d11 ) (  -1 ) )
                               ! ( (  -1 ) ( d22 ) )
                       d21 = w(k - 1, kw)
                       d11 = w(k, kw)/d21
                       d22 = w(k - 1, kw - 1)/d21
                       t = cone/(d11*d22 - cone)
                       ! update elements in columns a(k-1) and a(k) as
                       ! dot products of rows of ( w(kw-1) w(kw) ) and columns
                       ! of d**(-1)
                       d21 = t/d21
                       do j = 1, k - 2
                          a(j, k - 1) = d21*(d11*w(j, kw - 1) - w(j, kw))
                          a(j, k) = d21*(d22*w(j, kw) - w(j, kw - 1))
                       end do
                    end if
                    ! copy d(k) to a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = w(k - 1, kw)
                    a(k, k) = w(k, kw)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t
              ! computing blocks of nb columns at a time
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_cgemv('no transpose', jj - j + 1, n - k, -cone, a(j, k + 1), lda, w(jj, &
                               kw + 1), ldw, cone, a(j, jj), 1)
                 end do
                 ! update the rectangular superdiagonal block
                 call stdlib_cgemm('no transpose', 'transpose', j - 1, jb, n - k, -cone, a(1, k + 1), &
                           lda, w(j, kw + 1), ldw, cone, a(1, j), lda)
              end do
              ! put u12 in standard form by partially undoing the interchanges
              ! in columns k+1:n looping backwards from k+1 to n
              j = k + 1
60      continue
                 ! undo the interchanges (if any) of rows jj and jp at each
                 ! step j
                 ! (here, j is a diagonal index)
                 jj = j
                 jp = ipiv(j)
                 if (jp < 0) then
                    jp = -jp
                    ! (here, j is a diagonal index)
                    j = j + 1
                 end if
                 ! (note: here, j is used to determine row length. length n-j+1
                 ! of the rows to swap back doesn't include diagonal element)
                 j = j + 1
                 if (jp /= jj .and. j <= n) call stdlib_cswap(n - j + 1, a(jp, j), lda, a(jj, j), &
                           lda)
              if (j < n) go to 60
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              ! copy column k of a to column k of w and update it
              call stdlib_ccopy(n - k + 1, a(k, k), 1, w(k, k), 1)
              call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w(k, 1), ldw, &
                         cone, w(k, k), 1)
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(w(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, w(k + 1, k), 1)
                 colmax = cabs1(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! copy column imax to column k+1 of w and update it
                    call stdlib_ccopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                    call stdlib_ccopy(n - imax + 1, a(imax, imax), 1, w(imax, k + 1), 1)
                    call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w(imax, &
                              1), ldw, cone, w(k, k + 1), 1)
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    jmax = k - 1 + stdlib_icamax(imax - k, w(k, k + 1), 1)
                    rowmax = cabs1(w(jmax, k + 1))
                    if (imax < n) then
                       jmax = imax + stdlib_icamax(n - imax, w(imax + 1, k + 1), 1)
                       rowmax = max(rowmax, cabs1(w(jmax, k + 1)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (cabs1(w(imax, k + 1)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                       ! copy column k+1 of w to column k of w
                       call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                    else
                       ! interchange rows and columns k+1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 ! ============================================================
                 ! kk is the column of a where pivoting step stopped
                 kk = k + kstep - 1
                 ! interchange rows and columns kp and kk.
                 ! updated column kp is already stored in column kk of w.
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp of submatrix a
                    ! at step k. no need to copy element into column k
                    ! (or k and k+1 for 2-by-2 pivot) of a, since these columns
                    ! will be later overwritten.
                    a(kp, kp) = a(kk, kk)
                    call stdlib_ccopy(kp - kk - 1, a(kk + 1, kk), 1, a(kp, kk + 1), lda)
                    if (kp < n) call stdlib_ccopy(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    ! interchange rows kk and kp in first k-1 columns of a
                    ! (columns k (or k and k+1 for 2-by-2 pivot) of a will be
                    ! later overwritten). interchange rows kk and kp
                    ! in first kk columns of w.
                    if (k > 1) call stdlib_cswap(k - 1, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_cswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k),
                    ! where l(k) is the k-th column of l
                    ! store subdiag. elements of column l(k)
                    ! and 1-by-1 block d(k) in column k of a.
                    ! (note: diagonal element l(k,k) is a unit element
                    ! and not stored)
                       ! a(k,k) := d(k,k) = w(k,k)
                       ! a(k+1:n,k) := l(k+1:n,k) = w(k+1:n,k)/d(k,k)
                    call stdlib_ccopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       r1 = cone/a(k, k)
                       call stdlib_cscal(n - k, r1, a(k + 1, k), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! store l(k+2:n,k) and l(k+2:n,k+1) and 2-by-2
                    ! block d(k:k+1,k:k+1) in columns k and k+1 of a.
                    ! (note: 2-by-2 diagonal block l(k:k+1,k:k+1) is a unit
                    ! block and not stored)
                       ! a(k:k+1,k:k+1) := d(k:k+1,k:k+1) = w(k:k+1,k:k+1)
                       ! a(k+2:n,k:k+1) := l(k+2:n,k:k+1) =
                       ! = w(k+2:n,k:k+1) * ( d(k:k+1,k:k+1)**(-1) )
                    if (k < n - 1) then
                       ! compose the columns of the inverse of 2-by-2 pivot
                       ! block d in the following way to reduce the number
                       ! of flops when we myltiply panel ( w(k) w(k+1) ) by
                       ! this inverse
                       ! d**(-1) = ( d11 d21 )**(-1) =
                                 ! ( d21 d22 )
                       ! = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) =
                                              ! ( (-d21 ) ( d11 ) )
                       ! = 1/d21 * 1/((d11/d21)*(d22/d21)-1) *
                         ! * ( ( d22/d21 ) (      -1 ) ) =
                           ! ( (      -1 ) ( d11/d21 ) )
                       ! = 1/d21 * 1/(d22*d11-1) * ( ( d11 ) (  -1 ) ) =
                                                 ! ( ( -1  ) ( d22 ) )
                       ! = 1/d21 * t * ( ( d11 ) (  -1 ) )
                                     ! ( (  -1 ) ( d22 ) )
                       ! = d21 * ( ( d11 ) (  -1 ) )
                               ! ( (  -1 ) ( d22 ) )
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/d21
                       t = cone/(d11*d22 - cone)
                       d21 = t/d21
                       ! update elements in columns a(k) and a(k+1) as
                       ! dot products of rows of ( w(k) w(k+1) ) and columns
                       ! of d**(-1)
                       do j = k + 2, n
                          a(j, k) = d21*(d11*w(j, k) - w(j, k + 1))
                          a(j, k + 1) = d21*(d22*w(j, k + 1) - w(j, k))
                       end do
                    end if
                    ! copy d(k) to a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = w(k + 1, k)
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**t = a22 - l21*w**t
              ! computing blocks of nb columns at a time
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_cgemv('no transpose', j + jb - jj, k - 1, -cone, a(jj, 1), lda, w(jj, &
                               1), ldw, cone, a(jj, jj), 1)
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_cgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           cone, a(j + jb, 1), lda, w(j, 1), ldw, cone, a(j + jb, j), lda)
              end do
              ! put l21 in standard form by partially undoing the interchanges
              ! of rows in columns 1:k-1 looping backwards from k-1 to 1
              j = k - 1
120    continue
                 ! undo the interchanges (if any) of rows jj and jp at each
                 ! step j
                 ! (here, j is a diagonal index)
                 jj = j
                 jp = ipiv(j)
                 if (jp < 0) then
                    jp = -jp
                    ! (here, j is a diagonal index)
                    j = j - 1
                 end if
                 ! (note: here, j is used to determine row length. length j
                 ! of the rows to swap back doesn't include diagonal element)
                 j = j - 1
                 if (jp /= jj .and. j >= 1) call stdlib_cswap(j, a(jp, 1), lda, a(jj, 1), lda)
                           
              if (j > 1) go to 120
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_clasyf
     end subroutine stdlib_clasyf

     ! CLASYF_RK computes a partial factorization of a complex symmetric
     ! matrix A using the bounded Bunch-Kaufman (rook) diagonal
     ! pivoting method. The partial factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I       0    )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**T U22**T )
     ! A  =  ( L11  0 ) (  D   0  ) ( L11**T L21**T )  if UPLO = 'L',
     ! ( L21  I ) (  0  A22 ) (  0       I    )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! CLASYF_RK is an auxiliary routine called by CSYTRF_RK. It uses
     ! blocked code (calling Level 3 BLAS) to update the submatrix
     ! A11 (if UPLO = 'U') or A22 (if UPLO = 'L').

     subroutine stdlib_clasyf_rk(uplo, n, nb, kb, a, lda, e, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), e(*), w(ldw, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: done
           integer(ilp) :: imax, itemp, j, jb, jj, jmax, k, kk, kw, kkw, kp, kstep, p, ii
           real(sp) :: absakk, alpha, colmax, rowmax, sfmin, stemp
           complex(sp) :: d11, d12, d21, d22, r1, t, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, max, min, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11
              ! initialize the first entry of array e, where superdiagonal
              ! elements of d are stored
              e(1) = czero
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              k = n
10      continue
              ! kw is the column of w which corresponds to column k of a
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              kstep = 1
              p = k
              ! copy column k of a to column kw of w and update it
              call stdlib_ccopy(k, a(1, k), 1, w(1, kw), 1)
              if (k < n) call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w(k, &
                        kw + 1), ldw, cone, w(1, kw), 1)
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(w(k, kw))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, w(1, kw), 1)
                 colmax = cabs1(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 call stdlib_ccopy(k, w(1, kw), 1, a(1, k), 1)
                 ! set e( k ) to zero
                 if (k > 1) e(k) = czero
              else
                 ! ============================================================
                 ! test for interchange
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! copy column imax to column kw-1 of w and update it
                       call stdlib_ccopy(imax, a(1, imax), 1, w(1, kw - 1), 1)
                       call stdlib_ccopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                                 
                       if (k < n) call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, &
                                  w(imax, kw + 1), ldw, cone, w(1, kw - 1), 1)
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_icamax(k - imax, w(imax + 1, kw - 1), 1)
                          rowmax = cabs1(w(jmax, kw - 1))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_icamax(imax - 1, w(1, kw - 1), 1)
                          stemp = cabs1(w(itemp, kw - 1))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for
                       ! cabs1( w( imax, kw-1 ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (cabs1(w(imax, kw - 1)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column kw-1 of w to column kw of w
                          call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                          done = .true.
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k-1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! ============================================================
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p
                    call stdlib_ccopy(k - p, a(p + 1, k), 1, a(p, p + 1), lda)
                    call stdlib_ccopy(p, a(1, k), 1, a(1, p), 1)
                    ! interchange rows k and p in last n-k+1 columns of a
                    ! and last n-k+2 columns of w
                    call stdlib_cswap(n - k + 1, a(k, k), lda, a(p, k), lda)
                    call stdlib_cswap(n - kk + 1, w(k, kkw), ldw, w(p, kkw), ldw)
                 end if
                 ! updated column kp is already stored in column kkw of w
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp
                    a(kp, k) = a(kk, k)
                    call stdlib_ccopy(k - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    call stdlib_ccopy(kp, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last n-kk+1 columns
                    ! of a and w
                    call stdlib_cswap(n - kk + 1, a(kk, kk), lda, a(kp, kk), lda)
                    call stdlib_cswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! store u(k) in column k of a
                    call stdlib_ccopy(k, w(1, kw), 1, a(1, k), 1)
                    if (k > 1) then
                       if (cabs1(a(k, k)) >= sfmin) then
                          r1 = cone/a(k, k)
                          call stdlib_cscal(k - 1, r1, a(1, k), 1)
                       else if (a(k, k) /= czero) then
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/a(k, k)
                          end do
                       end if
                       ! store the superdiagonal element of d in array e
                       e(k) = czero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now
                    ! hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    if (k > 2) then
                       ! store u(k) and u(k-1) in columns k and k-1 of a
                       d12 = w(k - 1, kw)
                       d11 = w(k, kw)/d12
                       d22 = w(k - 1, kw - 1)/d12
                       t = cone/(d11*d22 - cone)
                       do j = 1, k - 2
                          a(j, k - 1) = t*((d11*w(j, kw - 1) - w(j, kw))/d12)
                          a(j, k) = t*((d22*w(j, kw) - w(j, kw - 1))/d12)
                       end do
                    end if
                    ! copy diagonal elements of d(k) to a,
                    ! copy superdiagonal element of d(k) to e(k) and
                    ! zero out superdiagonal entry of a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = czero
                    a(k, k) = w(k, kw)
                    e(k) = w(k - 1, kw)
                    e(k - 1) = czero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t
              ! computing blocks of nb columns at a time
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_cgemv('no transpose', jj - j + 1, n - k, -cone, a(j, k + 1), lda, w(jj, &
                               kw + 1), ldw, cone, a(j, jj), 1)
                 end do
                 ! update the rectangular superdiagonal block
                 if (j >= 2) call stdlib_cgemm('no transpose', 'transpose', j - 1, jb, n - k, -cone, a( &
                           1, k + 1), lda, w(j, kw + 1), ldw, cone, a(1, j), lda)
              end do
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22
              ! initialize the unused last entry of the subdiagonal array e.
              e(n) = czero
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              kstep = 1
              p = k
              ! copy column k of a to column k of w and update it
              call stdlib_ccopy(n - k + 1, a(k, k), 1, w(k, k), 1)
              if (k > 1) call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w(k, &
                        1), ldw, cone, w(k, k), 1)
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(w(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, w(k + 1, k), 1)
                 colmax = cabs1(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 call stdlib_ccopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                 ! set e( k ) to zero
                 if (k < n) e(k) = czero
              else
                 ! ============================================================
                 ! test for interchange
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
72      continue
                       ! begin pivot search loop body
                       ! copy column imax to column k+1 of w and update it
                       call stdlib_ccopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                       call stdlib_ccopy(n - imax + 1, a(imax, imax), 1, w(imax, k + 1), 1)
                       if (k > 1) call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), &
                                 lda, w(imax, 1), ldw, cone, w(k, k + 1), 1)
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_icamax(imax - k, w(k, k + 1), 1)
                          rowmax = cabs1(w(jmax, k + 1))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_icamax(n - imax, w(imax + 1, k + 1), 1)
                          stemp = cabs1(w(itemp, k + 1))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for
                       ! cabs1( w( imax, k+1 ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (cabs1(w(imax, k + 1)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column k+1 of w to column k of w
                          call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                          done = .true.
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 72
                 end if
                 ! ============================================================
                 kk = k + kstep - 1
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p
                    call stdlib_ccopy(p - k, a(k, k), 1, a(p, k), lda)
                    call stdlib_ccopy(n - p + 1, a(p, k), 1, a(p, p), 1)
                    ! interchange rows k and p in first k columns of a
                    ! and first k+1 columns of w
                    call stdlib_cswap(k, a(k, 1), lda, a(p, 1), lda)
                    call stdlib_cswap(kk, w(k, 1), ldw, w(p, 1), ldw)
                 end if
                 ! updated column kp is already stored in column kk of w
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp
                    a(kp, k) = a(kk, k)
                    call stdlib_ccopy(kp - k - 1, a(k + 1, kk), 1, a(kp, k + 1), lda)
                    call stdlib_ccopy(n - kp + 1, a(kp, kk), 1, a(kp, kp), 1)
                    ! interchange rows kk and kp in first kk columns of a and w
                    call stdlib_cswap(kk, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_cswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    ! store l(k) in column k of a
                    call stdlib_ccopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       if (cabs1(a(k, k)) >= sfmin) then
                          r1 = cone/a(k, k)
                          call stdlib_cscal(n - k, r1, a(k + 1, k), 1)
                       else if (a(k, k) /= czero) then
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/a(k, k)
                          end do
                       end if
                       ! store the subdiagonal element of d in array e
                       e(k) = czero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    if (k < n - 1) then
                       ! store l(k) and l(k+1) in columns k and k+1 of a
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/d21
                       t = cone/(d11*d22 - cone)
                       do j = k + 2, n
                          a(j, k) = t*((d11*w(j, k) - w(j, k + 1))/d21)
                          a(j, k + 1) = t*((d22*w(j, k + 1) - w(j, k))/d21)
                       end do
                    end if
                    ! copy diagonal elements of d(k) to a,
                    ! copy subdiagonal element of d(k) to e(k) and
                    ! zero out subdiagonal entry of a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = czero
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                    e(k) = w(k + 1, k)
                    e(k + 1) = czero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**t = a22 - l21*w**t
              ! computing blocks of nb columns at a time
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_cgemv('no transpose', j + jb - jj, k - 1, -cone, a(jj, 1), lda, w(jj, &
                               1), ldw, cone, a(jj, jj), 1)
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_cgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           cone, a(j + jb, 1), lda, w(j, 1), ldw, cone, a(j + jb, j), lda)
              end do
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_clasyf_rk
     end subroutine stdlib_clasyf_rk

     ! CLASYF_ROOK computes a partial factorization of a complex symmetric
     ! matrix A using the bounded Bunch-Kaufman ("rook") diagonal
     ! pivoting method. The partial factorization has the form:
     ! A  =  ( I  U12 ) ( A11  0  ) (  I       0    )  if UPLO = 'U', or:
     ! ( 0  U22 ) (  0   D  ) ( U12**T U22**T )
     ! A  =  ( L11  0 ) (  D   0  ) ( L11**T L21**T )  if UPLO = 'L'
     ! ( L21  I ) (  0  A22 ) (  0       I    )
     ! where the order of D is at most NB. The actual order is returned in
     ! the argument KB, and is either NB or NB-1, or N if N <= NB.
     ! CLASYF_ROOK is an auxiliary routine called by CSYTRF_ROOK. It uses
     ! blocked code (calling Level 3 BLAS) to update the submatrix
     ! A11 (if UPLO = 'U') or A22 (if UPLO = 'L').

     subroutine stdlib_clasyf_rook(uplo, n, nb, kb, a, lda, ipiv, w, ldw, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kb, lda, ldw, n, nb
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), w(ldw, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: done
           integer(ilp) :: imax, itemp, j, jb, jj, jmax, jp1, jp2, k, kk, kw, kkw, kp, kstep, p, &
                     ii
           real(sp) :: absakk, alpha, colmax, rowmax, stemp, sfmin
           complex(sp) :: d11, d12, d21, d22, r1, t, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, min, sqrt, aimag, real
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           info = 0
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           if (stdlib_lsame(uplo, 'u')) then
              ! factorize the trailing columns of a using the upper triangle
              ! of a and working backwards, and compute the matrix w = u12*d
              ! for use in updating a11
              ! k is the main loop index, decreasing from n in steps of 1 or 2
              k = n
10      continue
              ! kw is the column of w which corresponds to column k of a
              kw = nb + k - n
              ! exit from loop
              if ((k <= n - nb + 1 .and. nb < n) .or. k < 1) go to 30
              kstep = 1
              p = k
              ! copy column k of a to column kw of w and update it
              call stdlib_ccopy(k, a(1, k), 1, w(1, kw), 1)
              if (k < n) call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, w(k, &
                        kw + 1), ldw, cone, w(1, kw), 1)
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(w(k, kw))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, w(1, kw), 1)
                 colmax = cabs1(w(imax, kw))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 call stdlib_ccopy(k, w(1, kw), 1, a(1, k), 1)
              else
                 ! ============================================================
                 ! test for interchange
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! copy column imax to column kw-1 of w and update it
                       call stdlib_ccopy(imax, a(1, imax), 1, w(1, kw - 1), 1)
                       call stdlib_ccopy(k - imax, a(imax, imax + 1), lda, w(imax + 1, kw - 1), 1)
                                 
                       if (k < n) call stdlib_cgemv('no transpose', k, n - k, -cone, a(1, k + 1), lda, &
                                  w(imax, kw + 1), ldw, cone, w(1, kw - 1), 1)
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_icamax(k - imax, w(imax + 1, kw - 1), 1)
                          rowmax = cabs1(w(jmax, kw - 1))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_icamax(imax - 1, w(1, kw - 1), 1)
                          stemp = cabs1(w(itemp, kw - 1))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for
                       ! cabs1( w( imax, kw-1 ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (cabs1(w(imax, kw - 1)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column kw-1 of w to column kw of w
                          call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                          done = .true.
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k-1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_ccopy(k, w(1, kw - 1), 1, w(1, kw), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! ============================================================
                 kk = k - kstep + 1
                 ! kkw is the column of w which corresponds to column kk of a
                 kkw = nb + kk - n
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p
                    call stdlib_ccopy(k - p, a(p + 1, k), 1, a(p, p + 1), lda)
                    call stdlib_ccopy(p, a(1, k), 1, a(1, p), 1)
                    ! interchange rows k and p in last n-k+1 columns of a
                    ! and last n-k+2 columns of w
                    call stdlib_cswap(n - k + 1, a(k, k), lda, a(p, k), lda)
                    call stdlib_cswap(n - kk + 1, w(k, kkw), ldw, w(p, kkw), ldw)
                 end if
                 ! updated column kp is already stored in column kkw of w
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp
                    a(kp, k) = a(kk, k)
                    call stdlib_ccopy(k - 1 - kp, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    call stdlib_ccopy(kp, a(1, kk), 1, a(1, kp), 1)
                    ! interchange rows kk and kp in last n-kk+1 columns
                    ! of a and w
                    call stdlib_cswap(n - kk + 1, a(kk, kk), lda, a(kp, kk), lda)
                    call stdlib_cswap(n - kk + 1, w(kk, kkw), ldw, w(kp, kkw), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column kw of w now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! store u(k) in column k of a
                    call stdlib_ccopy(k, w(1, kw), 1, a(1, k), 1)
                    if (k > 1) then
                       if (cabs1(a(k, k)) >= sfmin) then
                          r1 = cone/a(k, k)
                          call stdlib_cscal(k - 1, r1, a(1, k), 1)
                       else if (a(k, k) /= czero) then
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/a(k, k)
                          end do
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns kw and kw-1 of w now
                    ! hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    if (k > 2) then
                       ! store u(k) and u(k-1) in columns k and k-1 of a
                       d12 = w(k - 1, kw)
                       d11 = w(k, kw)/d12
                       d22 = w(k - 1, kw - 1)/d12
                       t = cone/(d11*d22 - cone)
                       do j = 1, k - 2
                          a(j, k - 1) = t*((d11*w(j, kw - 1) - w(j, kw))/d12)
                          a(j, k) = t*((d22*w(j, kw) - w(j, kw - 1))/d12)
                       end do
                    end if
                    ! copy d(k) to a
                    a(k - 1, k - 1) = w(k - 1, kw - 1)
                    a(k - 1, k) = w(k - 1, kw)
                    a(k, k) = w(k, kw)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
30      continue
              ! update the upper triangle of a11 (= a(1:k,1:k)) as
              ! a11 := a11 - u12*d*u12**t = a11 - u12*w**t
              ! computing blocks of nb columns at a time
              do j = ((k - 1)/nb)*nb + 1, 1, -nb
                 jb = min(nb, k - j + 1)
                 ! update the upper triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_cgemv('no transpose', jj - j + 1, n - k, -cone, a(j, k + 1), lda, w(jj, &
                               kw + 1), ldw, cone, a(j, jj), 1)
                 end do
                 ! update the rectangular superdiagonal block
                 if (j >= 2) call stdlib_cgemm('no transpose', 'transpose', j - 1, jb, n - k, -cone, a( &
                           1, k + 1), lda, w(j, kw + 1), ldw, cone, a(1, j), lda)
              end do
              ! put u12 in standard form by partially undoing the interchanges
              ! in columns k+1:n
              j = k + 1
60      continue
                 kstep = 1
                 jp1 = 1
                 jj = j
                 jp2 = ipiv(j)
                 if (jp2 < 0) then
                    jp2 = -jp2
                    j = j + 1
                    jp1 = -ipiv(j)
                    kstep = 2
                 end if
                 j = j + 1
                 if (jp2 /= jj .and. j <= n) call stdlib_cswap(n - j + 1, a(jp2, j), lda, a(jj, j), &
                           lda)
                 jj = j - 1
                 if (jp1 /= jj .and. kstep == 2) call stdlib_cswap(n - j + 1, a(jp1, j), lda, a(jj, j &
                           ), lda)
              if (j <= n) go to 60
              ! set kb to the number of columns factorized
              kb = n - k
           else
              ! factorize the leading columns of a using the lower triangle
              ! of a and working forwards, and compute the matrix w = l21*d
              ! for use in updating a22
              ! k is the main loop index, increasing from 1 in steps of 1 or 2
              k = 1
70      continue
              ! exit from loop
              if ((k >= nb .and. nb < n) .or. k > n) go to 90
              kstep = 1
              p = k
              ! copy column k of a to column k of w and update it
              call stdlib_ccopy(n - k + 1, a(k, k), 1, w(k, k), 1)
              if (k > 1) call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), lda, w(k, &
                        1), ldw, cone, w(k, k), 1)
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(w(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, w(k + 1, k), 1)
                 colmax = cabs1(w(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 call stdlib_ccopy(n - k + 1, w(k, k), 1, a(k, k), 1)
              else
                 ! ============================================================
                 ! test for interchange
                 ! equivalent to testing for absakk>=alpha*colmax
                 ! (used to handle nan and inf)
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
72      continue
                       ! begin pivot search loop body
                       ! copy column imax to column k+1 of w and update it
                       call stdlib_ccopy(imax - k, a(imax, k), lda, w(k, k + 1), 1)
                       call stdlib_ccopy(n - imax + 1, a(imax, imax), 1, w(imax, k + 1), 1)
                       if (k > 1) call stdlib_cgemv('no transpose', n - k + 1, k - 1, -cone, a(k, 1), &
                                 lda, w(imax, 1), ldw, cone, w(k, k + 1), 1)
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_icamax(imax - k, w(k, k + 1), 1)
                          rowmax = cabs1(w(jmax, k + 1))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_icamax(n - imax, w(imax + 1, k + 1), 1)
                          stemp = cabs1(w(itemp, k + 1))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for
                       ! cabs1( w( imax, k+1 ) )>=alpha*rowmax
                       ! (used to handle nan and inf)
                       if (.not. (cabs1(w(imax, k + 1)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          ! copy column k+1 of w to column k of w
                          call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                          done = .true.
                       ! equivalent to testing for rowmax==colmax,
                       ! (used to handle nan and inf)
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found: set params and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                          ! copy updated jmaxth (next imaxth) column to kth of w
                          call stdlib_ccopy(n - k + 1, w(k, k + 1), 1, w(k, k), 1)
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 72
                 end if
                 ! ============================================================
                 kk = k + kstep - 1
                 if ((kstep == 2) .and. (p /= k)) then
                    ! copy non-updated column k to column p
                    call stdlib_ccopy(p - k, a(k, k), 1, a(p, k), lda)
                    call stdlib_ccopy(n - p + 1, a(p, k), 1, a(p, p), 1)
                    ! interchange rows k and p in first k columns of a
                    ! and first k+1 columns of w
                    call stdlib_cswap(k, a(k, 1), lda, a(p, 1), lda)
                    call stdlib_cswap(kk, w(k, 1), ldw, w(p, 1), ldw)
                 end if
                 ! updated column kp is already stored in column kk of w
                 if (kp /= kk) then
                    ! copy non-updated column kk to column kp
                    a(kp, k) = a(kk, k)
                    call stdlib_ccopy(kp - k - 1, a(k + 1, kk), 1, a(kp, k + 1), lda)
                    call stdlib_ccopy(n - kp + 1, a(kp, kk), 1, a(kp, kp), 1)
                    ! interchange rows kk and kp in first kk columns of a and w
                    call stdlib_cswap(kk, a(kk, 1), lda, a(kp, 1), lda)
                    call stdlib_cswap(kk, w(kk, 1), ldw, w(kp, 1), ldw)
                 end if
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k of w now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    ! store l(k) in column k of a
                    call stdlib_ccopy(n - k + 1, w(k, k), 1, a(k, k), 1)
                    if (k < n) then
                       if (cabs1(a(k, k)) >= sfmin) then
                          r1 = cone/a(k, k)
                          call stdlib_cscal(n - k, r1, a(k + 1, k), 1)
                       else if (a(k, k) /= czero) then
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/a(k, k)
                          end do
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 of w now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    if (k < n - 1) then
                       ! store l(k) and l(k+1) in columns k and k+1 of a
                       d21 = w(k + 1, k)
                       d11 = w(k + 1, k + 1)/d21
                       d22 = w(k, k)/d21
                       t = cone/(d11*d22 - cone)
                       do j = k + 2, n
                          a(j, k) = t*((d11*w(j, k) - w(j, k + 1))/d21)
                          a(j, k + 1) = t*((d22*w(j, k + 1) - w(j, k))/d21)
                       end do
                    end if
                    ! copy d(k) to a
                    a(k, k) = w(k, k)
                    a(k + 1, k) = w(k + 1, k)
                    a(k + 1, k + 1) = w(k + 1, k + 1)
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 70
90      continue
              ! update the lower triangle of a22 (= a(k:n,k:n)) as
              ! a22 := a22 - l21*d*l21**t = a22 - l21*w**t
              ! computing blocks of nb columns at a time
              do j = k, n, nb
                 jb = min(nb, n - j + 1)
                 ! update the lower triangle of the diagonal block
                 do jj = j, j + jb - 1
                    call stdlib_cgemv('no transpose', j + jb - jj, k - 1, -cone, a(jj, 1), lda, w(jj, &
                               1), ldw, cone, a(jj, jj), 1)
                 end do
                 ! update the rectangular subdiagonal block
                 if (j + jb <= n) call stdlib_cgemm('no transpose', 'transpose', n - j - jb + 1, jb, k - 1, - &
                           cone, a(j + jb, 1), lda, w(j, 1), ldw, cone, a(j + jb, j), lda)
              end do
              ! put l21 in standard form by partially undoing the interchanges
              ! in columns 1:k-1
              j = k - 1
120    continue
                 kstep = 1
                 jp1 = 1
                 jj = j
                 jp2 = ipiv(j)
                 if (jp2 < 0) then
                    jp2 = -jp2
                    j = j - 1
                    jp1 = -ipiv(j)
                    kstep = 2
                 end if
                 j = j - 1
                 if (jp2 /= jj .and. j >= 1) call stdlib_cswap(j, a(jp2, 1), lda, a(jj, 1), lda)
                           
                 jj = j + 1
                 if (jp1 /= jj .and. kstep == 2) call stdlib_cswap(j, a(jp1, 1), lda, a(jj, 1), &
                           lda)
              if (j >= 1) go to 120
              ! set kb to the number of columns factorized
              kb = k - 1
           end if
           return
           ! end of stdlib_clasyf_rook
     end subroutine stdlib_clasyf_rook

     ! CLATBS solves one of the triangular systems
     ! A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
     ! with scaling to prevent overflow, where A is an upper or lower
     ! triangular band matrix.  Here A**T denotes the transpose of A, x and b
     ! are n-element vectors, and s is a scaling factor, usually less than
     ! or equal to 1, chosen so that the components of x will be less than
     ! the overflow threshold.  If the unscaled problem will not cause
     ! overflow, the Level 2 BLAS routine CTBSV is called.  If the matrix A
     ! is singular (A(j,j) = 0 for some j), then s is set to 0 and a
     ! non-trivial solution to A*x = 0 is returned.

     subroutine stdlib_clatbs(uplo, trans, diag, normin, n, kd, ab, ldab, x, scale, cnorm, info)
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, normin, trans, uplo
           integer(ilp) :: info, kd, ldab, n
           real(sp) :: scale
           ! .. array arguments ..
           real(sp) :: cnorm(*)
           complex(sp) :: ab(ldab, *), x(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           integer(ilp) :: i, imax, j, jfirst, jinc, jlast, jlen, maind
           real(sp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax
           complex(sp) :: csumj, tjjs, uscal, zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, conjg, max, min, real
           ! .. statement functions ..
           real(sp) :: cabs1, cabs2
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           cabs2(zdum) = abs(real(zdum)/2.) + abs(aimag(zdum)/2.)
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           ! test the input parameters.
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (.not. stdlib_lsame(normin, 'y') .and. .not. stdlib_lsame(normin, 'n')) &
                     then
              info = -4
           else if (n < 0) then
              info = -5
           else if (kd < 0) then
              info = -6
           else if (ldab < kd + 1) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clatbs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine machine dependent parameters to control overflow.
           smlnum = stdlib_slamch('safe minimum')
           bignum = one/smlnum
           call stdlib_slabad(smlnum, bignum)
           smlnum = smlnum/stdlib_slamch('precision')
           bignum = one/smlnum
           scale = one
           if (stdlib_lsame(normin, 'n')) then
              ! compute the 1-norm of each column, not including the diagonal.
              if (upper) then
                 ! a is upper triangular.
                 do j = 1, n
                    jlen = min(kd, j - 1)
                    cnorm(j) = stdlib_scasum(jlen, ab(kd + 1 - jlen, j), 1)
                 end do
              else
                 ! a is lower triangular.
                 do j = 1, n
                    jlen = min(kd, n - j)
                    if (jlen > 0) then
                       cnorm(j) = stdlib_scasum(jlen, ab(2, j), 1)
                    else
                       cnorm(j) = zero
                    end if
                 end do
              end if
           end if
           ! scale the column norms by tscal if the maximum element in cnorm is
           ! greater than bignum/2.
           imax = stdlib_isamax(n, cnorm, 1)
           tmax = cnorm(imax)
           if (tmax <= bignum*half) then
              tscal = one
           else
              tscal = half/(smlnum*tmax)
              call stdlib_sscal(n, tscal, cnorm, 1)
           end if
           ! compute a bound on the computed solution vector to see if the
           ! level 2 blas routine stdlib_ctbsv can be used.
           xmax = zero
           do j = 1, n
              xmax = max(xmax, cabs2(x(j)))
           end do
           xbnd = xmax
           if (notran) then
              ! compute the growth in a * x = b.
              if (upper) then
                 jfirst = n
                 jlast = 1
                 jinc = -1
                 maind = kd + 1
              else
                 jfirst = 1
                 jlast = n
                 jinc = 1
                 maind = 1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 60
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, g(0) = max{x(i), i=1,...,n}.
                 grow = half/max(xbnd, smlnum)
                 xbnd = grow
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 60
                    tjjs = ab(maind, j)
                    tjj = cabs1(tjjs)
                    if (tjj >= smlnum) then
                       ! m(j) = g(j-1) / abs(a(j,j))
                       xbnd = min(xbnd, min(one, tjj)*grow)
                    else
                       ! m(j) could overflow, set xbnd to 0.
                       xbnd = zero
                    end if
                    if (tjj + cnorm(j) >= smlnum) then
                       ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) )
                       grow = grow*(tjj/(tjj + cnorm(j)))
                    else
                       ! g(j) could overflow, set grow to 0.
                       grow = zero
                    end if
                 end do
                 grow = xbnd
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, half/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 60
                    ! g(j) = g(j-1)*( 1 + cnorm(j) )
                    grow = grow*(one/(one + cnorm(j)))
                 end do
              end if
60      continue
           else
              ! compute the growth in a**t * x = b  or  a**h * x = b.
              if (upper) then
                 jfirst = 1
                 jlast = n
                 jinc = 1
                 maind = kd + 1
              else
                 jfirst = n
                 jlast = 1
                 jinc = -1
                 maind = 1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 90
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, m(0) = max{x(i), i=1,...,n}.
                 grow = half/max(xbnd, smlnum)
                 xbnd = grow
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 90
                    ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) )
                    xj = one + cnorm(j)
                    grow = min(grow, xbnd/xj)
                    tjjs = ab(maind, j)
                    tjj = cabs1(tjjs)
                    if (tjj >= smlnum) then
                       ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j))
                       if (xj > tjj) xbnd = xbnd*(tjj/xj)
                    else
                       ! m(j) could overflow, set xbnd to 0.
                       xbnd = zero
                    end if
                 end do
                 grow = min(grow, xbnd)
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, half/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 90
                    ! g(j) = ( 1 + cnorm(j) )*g(j-1)
                    xj = one + cnorm(j)
                    grow = grow/xj
                 end do
              end if
90      continue
           end if
           if ((grow*tscal) > smlnum) then
              ! use the level 2 blas solve if the reciprocal of the bound on
              ! elements of x is not too small.
              call stdlib_ctbsv(uplo, trans, diag, n, kd, ab, ldab, x, 1)
           else
              ! use a level 1 blas solve, scaling intermediate results.
              if (xmax > bignum*half) then
                 ! scale x so that its components are less than or equal to
                 ! bignum in absolute value.
                 scale = (bignum*half)/xmax
                 call stdlib_csscal(n, scale, x, 1)
                 xmax = bignum
              else
                 xmax = xmax*two
              end if
              if (notran) then
                 ! solve a * x = b
                 loop_110: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) / a(j,j), scaling x if necessary.
                    xj = cabs1(x(j))
                    if (nounit) then
                       tjjs = ab(maind, j)*tscal
                    else
                       tjjs = tscal
                       if (tscal == one) go to 105
                    end if
                       tjj = cabs1(tjjs)
                       if (tjj > smlnum) then
                          ! abs(a(j,j)) > smlnum:
                          if (tjj < one) then
                             if (xj > tjj*bignum) then
                                ! scale x by 1/b(j).
                                rec = one/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                          end if
                          x(j) = stdlib_cladiv(x(j), tjjs)
                          xj = cabs1(x(j))
                       else if (tjj > zero) then
                          ! 0 < abs(a(j,j)) <= smlnum:
                          if (xj > tjj*bignum) then
                             ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum
                             ! to avoid overflow when dividing by a(j,j).
                             rec = (tjj*bignum)/xj
                             if (cnorm(j) > one) then
                                ! scale by 1/cnorm(j) to avoid overflow when
                                ! multiplying x(j) times column j.
                                rec = rec/cnorm(j)
                             end if
                             call stdlib_csscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                          x(j) = stdlib_cladiv(x(j), tjjs)
                          xj = cabs1(x(j))
                       else
                          ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                          ! scale = 0, and compute a solution to a*x = 0.
                          do i = 1, n
                             x(i) = zero
                          end do
                          x(j) = one
                          xj = one
                          scale = zero
                          xmax = zero
                       end if
105    continue
                    ! scale x if necessary to avoid overflow when adding a
                    ! multiple of column j of a.
                    if (xj > one) then
                       rec = one/xj
                       if (cnorm(j) > (bignum - xmax)*rec) then
                          ! scale x by 1/(2*abs(x(j))).
                          rec = rec*half
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                       end if
                    else if (xj*cnorm(j) > (bignum - xmax)) then
                       ! scale x by 1/2.
                       call stdlib_csscal(n, half, x, 1)
                       scale = scale*half
                    end if
                    if (upper) then
                       if (j > 1) then
                          ! compute the update
                             ! x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) -
                                                   ! x(j)* a(max(1,j-kd):j-1,j)
                          jlen = min(kd, j - 1)
                          call stdlib_caxpy(jlen, -x(j)*tscal, ab(kd + 1 - jlen, j), 1, x(j - jlen &
                                    ), 1)
                          i = stdlib_icamax(j - 1, x, 1)
                          xmax = cabs1(x(i))
                       end if
                    else if (j < n) then
                       ! compute the update
                          ! x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) -
                                                ! x(j) * a(j+1:min(j+kd,n),j)
                       jlen = min(kd, n - j)
                       if (jlen > 0) call stdlib_caxpy(jlen, -x(j)*tscal, ab(2, j), 1, x(j + 1), &
                                  1)
                       i = j + stdlib_icamax(n - j, x(j + 1), 1)
                       xmax = cabs1(x(i))
                    end if
                 end do loop_110
              else if (stdlib_lsame(trans, 't')) then
                 ! solve a**t * x = b
                 loop_150: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = cabs1(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = ab(maind, j)*tscal
                       else
                          tjjs = tscal
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                             rec = min(one, rec*tjj)
                             uscal = stdlib_cladiv(uscal, tjjs)
                          end if
                       if (rec < one) then
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    csumj = zero
                    if (uscal == cmplx(one, KIND=sp)) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_cdotu to perform the dot product.
                       if (upper) then
                          jlen = min(kd, j - 1)
                          csumj = stdlib_cdotu(jlen, ab(kd + 1 - jlen, j), 1, x(j - jlen), 1)
                                    
                       else
                          jlen = min(kd, n - j)
                          if (jlen > 1) csumj = stdlib_cdotu(jlen, ab(2, j), 1, x(j + 1), 1)
                                    
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          jlen = min(kd, j - 1)
                          do i = 1, jlen
                             csumj = csumj + (ab(kd + i - jlen, j)*uscal)*x(j - jlen - 1 + i)
                          end do
                       else
                          jlen = min(kd, n - j)
                          do i = 1, jlen
                             csumj = csumj + (ab(i + 1, j)*uscal)*x(j + i)
                          end do
                       end if
                    end if
                    if (uscal == cmplx(tscal, KIND=sp)) then
                       ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - csumj
                       xj = cabs1(x(j))
                       if (nounit) then
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                          tjjs = ab(maind, j)*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 145
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                             if (tjj < one) then
                                if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                   rec = one/xj
                                   call stdlib_csscal(n, rec, x, 1)
                                   scale = scale*rec
                                   xmax = xmax*rec
                                end if
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                             if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                                rec = (tjj*bignum)/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0 and compute a solution to a**t *x = 0.
                             do i = 1, n
                                x(i) = zero
                             end do
                             x(j) = one
                             scale = zero
                             xmax = zero
                          end if
145    continue
                    else
                       ! compute x(j) := x(j) / a(j,j) - csumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = stdlib_cladiv(x(j), tjjs) - csumj
                    end if
                    xmax = max(xmax, cabs1(x(j)))
                 end do loop_150
              else
                 ! solve a**h * x = b
                 loop_190: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = cabs1(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = conjg(ab(maind, j))*tscal
                       else
                          tjjs = tscal
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                             rec = min(one, rec*tjj)
                             uscal = stdlib_cladiv(uscal, tjjs)
                          end if
                       if (rec < one) then
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    csumj = zero
                    if (uscal == cmplx(one, KIND=sp)) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_cdotc to perform the dot product.
                       if (upper) then
                          jlen = min(kd, j - 1)
                          csumj = stdlib_cdotc(jlen, ab(kd + 1 - jlen, j), 1, x(j - jlen), 1)
                                    
                       else
                          jlen = min(kd, n - j)
                          if (jlen > 1) csumj = stdlib_cdotc(jlen, ab(2, j), 1, x(j + 1), 1)
                                    
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          jlen = min(kd, j - 1)
                          do i = 1, jlen
                             csumj = csumj + (conjg(ab(kd + i - jlen, j))*uscal)*x(j - jlen - 1 + i)
                                       
                          end do
                       else
                          jlen = min(kd, n - j)
                          do i = 1, jlen
                             csumj = csumj + (conjg(ab(i + 1, j))*uscal)*x(j + i)
                          end do
                       end if
                    end if
                    if (uscal == cmplx(tscal, KIND=sp)) then
                       ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - csumj
                       xj = cabs1(x(j))
                       if (nounit) then
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                          tjjs = conjg(ab(maind, j))*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 185
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                             if (tjj < one) then
                                if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                   rec = one/xj
                                   call stdlib_csscal(n, rec, x, 1)
                                   scale = scale*rec
                                   xmax = xmax*rec
                                end if
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                             if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                                rec = (tjj*bignum)/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0 and compute a solution to a**h *x = 0.
                             do i = 1, n
                                x(i) = zero
                             end do
                             x(j) = one
                             scale = zero
                             xmax = zero
                          end if
185    continue
                    else
                       ! compute x(j) := x(j) / a(j,j) - csumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = stdlib_cladiv(x(j), tjjs) - csumj
                    end if
                    xmax = max(xmax, cabs1(x(j)))
                 end do loop_190
              end if
              scale = scale/tscal
           end if
           ! scale the column norms by 1/tscal for return.
           if (tscal /= one) then
              call stdlib_sscal(n, one/tscal, cnorm, 1)
           end if
           return
           ! end of stdlib_clatbs
     end subroutine stdlib_clatbs

     ! CLATPS solves one of the triangular systems
     ! A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
     ! with scaling to prevent overflow, where A is an upper or lower
     ! triangular matrix stored in packed form.  Here A**T denotes the
     ! transpose of A, A**H denotes the conjugate transpose of A, x and b
     ! are n-element vectors, and s is a scaling factor, usually less than
     ! or equal to 1, chosen so that the components of x will be less than
     ! the overflow threshold.  If the unscaled problem will not cause
     ! overflow, the Level 2 BLAS routine CTPSV is called. If the matrix A
     ! is singular (A(j,j) = 0 for some j), then s is set to 0 and a
     ! non-trivial solution to A*x = 0 is returned.

     subroutine stdlib_clatps(uplo, trans, diag, normin, n, ap, x, scale, cnorm, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, normin, trans, uplo
           integer(ilp) :: info, n
           real(sp) :: scale
           ! .. array arguments ..
           real(sp) :: cnorm(*)
           complex(sp) :: ap(*), x(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           integer(ilp) :: i, imax, ip, j, jfirst, jinc, jlast, jlen
           real(sp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax
           complex(sp) :: csumj, tjjs, uscal, zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, conjg, max, min, real
           ! .. statement functions ..
           real(sp) :: cabs1, cabs2
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           cabs2(zdum) = abs(real(zdum)/2.) + abs(aimag(zdum)/2.)
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           ! test the input parameters.
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (.not. stdlib_lsame(normin, 'y') .and. .not. stdlib_lsame(normin, 'n')) &
                     then
              info = -4
           else if (n < 0) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clatps', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine machine dependent parameters to control overflow.
           smlnum = stdlib_slamch('safe minimum')
           bignum = one/smlnum
           call stdlib_slabad(smlnum, bignum)
           smlnum = smlnum/stdlib_slamch('precision')
           bignum = one/smlnum
           scale = one
           if (stdlib_lsame(normin, 'n')) then
              ! compute the 1-norm of each column, not including the diagonal.
              if (upper) then
                 ! a is upper triangular.
                 ip = 1
                 do j = 1, n
                    cnorm(j) = stdlib_scasum(j - 1, ap(ip), 1)
                    ip = ip + j
                 end do
              else
                 ! a is lower triangular.
                 ip = 1
                 do j = 1, n - 1
                    cnorm(j) = stdlib_scasum(n - j, ap(ip + 1), 1)
                    ip = ip + n - j + 1
                 end do
                 cnorm(n) = zero
              end if
           end if
           ! scale the column norms by tscal if the maximum element in cnorm is
           ! greater than bignum/2.
           imax = stdlib_isamax(n, cnorm, 1)
           tmax = cnorm(imax)
           if (tmax <= bignum*half) then
              tscal = one
           else
              tscal = half/(smlnum*tmax)
              call stdlib_sscal(n, tscal, cnorm, 1)
           end if
           ! compute a bound on the computed solution vector to see if the
           ! level 2 blas routine stdlib_ctpsv can be used.
           xmax = zero
           do j = 1, n
              xmax = max(xmax, cabs2(x(j)))
           end do
           xbnd = xmax
           if (notran) then
              ! compute the growth in a * x = b.
              if (upper) then
                 jfirst = n
                 jlast = 1
                 jinc = -1
              else
                 jfirst = 1
                 jlast = n
                 jinc = 1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 60
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, g(0) = max{x(i), i=1,...,n}.
                 grow = half/max(xbnd, smlnum)
                 xbnd = grow
                 ip = jfirst*(jfirst + 1)/2
                 jlen = n
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 60
                    tjjs = ap(ip)
                    tjj = cabs1(tjjs)
                    if (tjj >= smlnum) then
                       ! m(j) = g(j-1) / abs(a(j,j))
                       xbnd = min(xbnd, min(one, tjj)*grow)
                    else
                       ! m(j) could overflow, set xbnd to 0.
                       xbnd = zero
                    end if
                    if (tjj + cnorm(j) >= smlnum) then
                       ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) )
                       grow = grow*(tjj/(tjj + cnorm(j)))
                    else
                       ! g(j) could overflow, set grow to 0.
                       grow = zero
                    end if
                    ip = ip + jinc*jlen
                    jlen = jlen - 1
                 end do
                 grow = xbnd
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, half/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 60
                    ! g(j) = g(j-1)*( 1 + cnorm(j) )
                    grow = grow*(one/(one + cnorm(j)))
                 end do
              end if
60      continue
           else
              ! compute the growth in a**t * x = b  or  a**h * x = b.
              if (upper) then
                 jfirst = 1
                 jlast = n
                 jinc = 1
              else
                 jfirst = n
                 jlast = 1
                 jinc = -1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 90
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, m(0) = max{x(i), i=1,...,n}.
                 grow = half/max(xbnd, smlnum)
                 xbnd = grow
                 ip = jfirst*(jfirst + 1)/2
                 jlen = 1
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 90
                    ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) )
                    xj = one + cnorm(j)
                    grow = min(grow, xbnd/xj)
                    tjjs = ap(ip)
                    tjj = cabs1(tjjs)
                    if (tjj >= smlnum) then
                       ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j))
                       if (xj > tjj) xbnd = xbnd*(tjj/xj)
                    else
                       ! m(j) could overflow, set xbnd to 0.
                       xbnd = zero
                    end if
                    jlen = jlen + 1
                    ip = ip + jinc*jlen
                 end do
                 grow = min(grow, xbnd)
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, half/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 90
                    ! g(j) = ( 1 + cnorm(j) )*g(j-1)
                    xj = one + cnorm(j)
                    grow = grow/xj
                 end do
              end if
90      continue
           end if
           if ((grow*tscal) > smlnum) then
              ! use the level 2 blas solve if the reciprocal of the bound on
              ! elements of x is not too small.
              call stdlib_ctpsv(uplo, trans, diag, n, ap, x, 1)
           else
              ! use a level 1 blas solve, scaling intermediate results.
              if (xmax > bignum*half) then
                 ! scale x so that its components are less than or equal to
                 ! bignum in absolute value.
                 scale = (bignum*half)/xmax
                 call stdlib_csscal(n, scale, x, 1)
                 xmax = bignum
              else
                 xmax = xmax*two
              end if
              if (notran) then
                 ! solve a * x = b
                 ip = jfirst*(jfirst + 1)/2
                 loop_110: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) / a(j,j), scaling x if necessary.
                    xj = cabs1(x(j))
                    if (nounit) then
                       tjjs = ap(ip)*tscal
                    else
                       tjjs = tscal
                       if (tscal == one) go to 105
                    end if
                       tjj = cabs1(tjjs)
                       if (tjj > smlnum) then
                          ! abs(a(j,j)) > smlnum:
                          if (tjj < one) then
                             if (xj > tjj*bignum) then
                                ! scale x by 1/b(j).
                                rec = one/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                          end if
                          x(j) = stdlib_cladiv(x(j), tjjs)
                          xj = cabs1(x(j))
                       else if (tjj > zero) then
                          ! 0 < abs(a(j,j)) <= smlnum:
                          if (xj > tjj*bignum) then
                             ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum
                             ! to avoid overflow when dividing by a(j,j).
                             rec = (tjj*bignum)/xj
                             if (cnorm(j) > one) then
                                ! scale by 1/cnorm(j) to avoid overflow when
                                ! multiplying x(j) times column j.
                                rec = rec/cnorm(j)
                             end if
                             call stdlib_csscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                          x(j) = stdlib_cladiv(x(j), tjjs)
                          xj = cabs1(x(j))
                       else
                          ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                          ! scale = 0, and compute a solution to a*x = 0.
                          do i = 1, n
                             x(i) = zero
                          end do
                          x(j) = one
                          xj = one
                          scale = zero
                          xmax = zero
                       end if
105    continue
                    ! scale x if necessary to avoid overflow when adding a
                    ! multiple of column j of a.
                    if (xj > one) then
                       rec = one/xj
                       if (cnorm(j) > (bignum - xmax)*rec) then
                          ! scale x by 1/(2*abs(x(j))).
                          rec = rec*half
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                       end if
                    else if (xj*cnorm(j) > (bignum - xmax)) then
                       ! scale x by 1/2.
                       call stdlib_csscal(n, half, x, 1)
                       scale = scale*half
                    end if
                    if (upper) then
                       if (j > 1) then
                          ! compute the update
                             ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j)
                          call stdlib_caxpy(j - 1, -x(j)*tscal, ap(ip - j + 1), 1, x, 1)
                          i = stdlib_icamax(j - 1, x, 1)
                          xmax = cabs1(x(i))
                       end if
                       ip = ip - j
                    else
                       if (j < n) then
                          ! compute the update
                             ! x(j+1:n) := x(j+1:n) - x(j) * a(j+1:n,j)
                          call stdlib_caxpy(n - j, -x(j)*tscal, ap(ip + 1), 1, x(j + 1), 1)
                                    
                          i = j + stdlib_icamax(n - j, x(j + 1), 1)
                          xmax = cabs1(x(i))
                       end if
                       ip = ip + n - j + 1
                    end if
                 end do loop_110
              else if (stdlib_lsame(trans, 't')) then
                 ! solve a**t * x = b
                 ip = jfirst*(jfirst + 1)/2
                 jlen = 1
                 loop_150: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = cabs1(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = ap(ip)*tscal
                       else
                          tjjs = tscal
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                             rec = min(one, rec*tjj)
                             uscal = stdlib_cladiv(uscal, tjjs)
                          end if
                       if (rec < one) then
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    csumj = zero
                    if (uscal == cmplx(one, KIND=sp)) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_cdotu to perform the dot product.
                       if (upper) then
                          csumj = stdlib_cdotu(j - 1, ap(ip - j + 1), 1, x, 1)
                       else if (j < n) then
                          csumj = stdlib_cdotu(n - j, ap(ip + 1), 1, x(j + 1), 1)
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          do i = 1, j - 1
                             csumj = csumj + (ap(ip - j + i)*uscal)*x(i)
                          end do
                       else if (j < n) then
                          do i = 1, n - j
                             csumj = csumj + (ap(ip + i)*uscal)*x(j + i)
                          end do
                       end if
                    end if
                    if (uscal == cmplx(tscal, KIND=sp)) then
                       ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - csumj
                       xj = cabs1(x(j))
                       if (nounit) then
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                          tjjs = ap(ip)*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 145
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                             if (tjj < one) then
                                if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                   rec = one/xj
                                   call stdlib_csscal(n, rec, x, 1)
                                   scale = scale*rec
                                   xmax = xmax*rec
                                end if
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                             if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                                rec = (tjj*bignum)/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0 and compute a solution to a**t *x = 0.
                             do i = 1, n
                                x(i) = zero
                             end do
                             x(j) = one
                             scale = zero
                             xmax = zero
                          end if
145    continue
                    else
                       ! compute x(j) := x(j) / a(j,j) - csumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = stdlib_cladiv(x(j), tjjs) - csumj
                    end if
                    xmax = max(xmax, cabs1(x(j)))
                    jlen = jlen + 1
                    ip = ip + jinc*jlen
                 end do loop_150
              else
                 ! solve a**h * x = b
                 ip = jfirst*(jfirst + 1)/2
                 jlen = 1
                 loop_190: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = cabs1(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = conjg(ap(ip))*tscal
                       else
                          tjjs = tscal
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                             rec = min(one, rec*tjj)
                             uscal = stdlib_cladiv(uscal, tjjs)
                          end if
                       if (rec < one) then
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    csumj = zero
                    if (uscal == cmplx(one, KIND=sp)) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_cdotc to perform the dot product.
                       if (upper) then
                          csumj = stdlib_cdotc(j - 1, ap(ip - j + 1), 1, x, 1)
                       else if (j < n) then
                          csumj = stdlib_cdotc(n - j, ap(ip + 1), 1, x(j + 1), 1)
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          do i = 1, j - 1
                             csumj = csumj + (conjg(ap(ip - j + i))*uscal)*x(i)
                          end do
                       else if (j < n) then
                          do i = 1, n - j
                             csumj = csumj + (conjg(ap(ip + i))*uscal)*x(j + i)
                          end do
                       end if
                    end if
                    if (uscal == cmplx(tscal, KIND=sp)) then
                       ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - csumj
                       xj = cabs1(x(j))
                       if (nounit) then
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                          tjjs = conjg(ap(ip))*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 185
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                             if (tjj < one) then
                                if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                   rec = one/xj
                                   call stdlib_csscal(n, rec, x, 1)
                                   scale = scale*rec
                                   xmax = xmax*rec
                                end if
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                             if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                                rec = (tjj*bignum)/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0 and compute a solution to a**h *x = 0.
                             do i = 1, n
                                x(i) = zero
                             end do
                             x(j) = one
                             scale = zero
                             xmax = zero
                          end if
185    continue
                    else
                       ! compute x(j) := x(j) / a(j,j) - csumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = stdlib_cladiv(x(j), tjjs) - csumj
                    end if
                    xmax = max(xmax, cabs1(x(j)))
                    jlen = jlen + 1
                    ip = ip + jinc*jlen
                 end do loop_190
              end if
              scale = scale/tscal
           end if
           ! scale the column norms by 1/tscal for return.
           if (tscal /= one) then
              call stdlib_sscal(n, one/tscal, cnorm, 1)
           end if
           return
           ! end of stdlib_clatps
     end subroutine stdlib_clatps

     ! CLATRD reduces NB rows and columns of a complex Hermitian matrix A to
     ! Hermitian tridiagonal form by a unitary similarity
     ! transformation Q**H * A * Q, and returns the matrices V and W which are
     ! needed to apply the transformation to the unreduced part of A.
     ! If UPLO = 'U', CLATRD reduces the last NB rows and columns of a
     ! matrix, of which the upper triangle is supplied;
     ! if UPLO = 'L', CLATRD reduces the first NB rows and columns of a
     ! matrix, of which the lower triangle is supplied.
     ! This is an auxiliary routine called by CHETRD.

     subroutine stdlib_clatrd(uplo, n, nb, a, lda, e, tau, w, ldw)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: lda, ldw, n, nb
           ! .. array arguments ..
           real(sp) :: e(*)
           complex(sp) :: a(lda, *), tau(*), w(ldw, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, iw
           complex(sp) :: alpha
     
           ! .. intrinsic functions ..
           intrinsic :: min, real
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) return
           if (stdlib_lsame(uplo, 'u')) then
              ! reduce last nb columns of upper triangle
              loop_10: do i = n, n - nb + 1, -1
                 iw = i - n + nb
                 if (i < n) then
                    ! update a(1:i,i)
                    a(i, i) = real(a(i, i))
                    call stdlib_clacgv(n - i, w(i, iw + 1), ldw)
                    call stdlib_cgemv('no transpose', i, n - i, -cone, a(1, i + 1), lda, w(i, iw + 1 &
                              ), ldw, cone, a(1, i), 1)
                    call stdlib_clacgv(n - i, w(i, iw + 1), ldw)
                    call stdlib_clacgv(n - i, a(i, i + 1), lda)
                    call stdlib_cgemv('no transpose', i, n - i, -cone, w(1, iw + 1), ldw, a(i, i + 1 &
                              ), lda, cone, a(1, i), 1)
                    call stdlib_clacgv(n - i, a(i, i + 1), lda)
                    a(i, i) = real(a(i, i))
                 end if
                 if (i > 1) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(1:i-2,i)
                    alpha = a(i - 1, i)
                    call stdlib_clarfg(i - 1, alpha, a(1, i), 1, tau(i - 1))
                    e(i - 1) = real(alpha)
                    a(i - 1, i) = cone
                    ! compute w(1:i-1,i)
                    call stdlib_chemv('upper', i - 1, cone, a, lda, a(1, i), 1, czero, w(1, iw), &
                               1)
                    if (i < n) then
                       call stdlib_cgemv('conjugate transpose', i - 1, n - i, cone, w(1, iw + 1), ldw, &
                                  a(1, i), 1, czero, w(i + 1, iw), 1)
                       call stdlib_cgemv('no transpose', i - 1, n - i, -cone, a(1, i + 1), lda, w(i + &
                                 1, iw), 1, cone, w(1, iw), 1)
                       call stdlib_cgemv('conjugate transpose', i - 1, n - i, cone, a(1, i + 1), lda, &
                                 a(1, i), 1, czero, w(i + 1, iw), 1)
                       call stdlib_cgemv('no transpose', i - 1, n - i, -cone, w(1, iw + 1), ldw, w(i + &
                                 1, iw), 1, cone, w(1, iw), 1)
                    end if
                    call stdlib_cscal(i - 1, tau(i - 1), w(1, iw), 1)
                    alpha = -chalf*tau(i - 1)*stdlib_cdotc(i - 1, w(1, iw), 1, a(1, i), 1)
                              
                    call stdlib_caxpy(i - 1, alpha, a(1, i), 1, w(1, iw), 1)
                 end if
              end do loop_10
           else
              ! reduce first nb columns of lower triangle
              loop_20: do i = 1, nb
                 ! update a(i:n,i)
                 a(i, i) = real(a(i, i))
                 call stdlib_clacgv(i - 1, w(i, 1), ldw)
                 call stdlib_cgemv('no transpose', n - i + 1, i - 1, -cone, a(i, 1), lda, w(i, 1), &
                           ldw, cone, a(i, i), 1)
                 call stdlib_clacgv(i - 1, w(i, 1), ldw)
                 call stdlib_clacgv(i - 1, a(i, 1), lda)
                 call stdlib_cgemv('no transpose', n - i + 1, i - 1, -cone, w(i, 1), ldw, a(i, 1), &
                           lda, cone, a(i, i), 1)
                 call stdlib_clacgv(i - 1, a(i, 1), lda)
                 a(i, i) = real(a(i, i))
                 if (i < n) then
                    ! generate elementary reflector h(i) to annihilate
                    ! a(i+2:n,i)
                    alpha = a(i + 1, i)
                    call stdlib_clarfg(n - i, alpha, a(min(i + 2, n), i), 1, tau(i))
                    e(i) = real(alpha)
                    a(i + 1, i) = cone
                    ! compute w(i+1:n,i)
                    call stdlib_chemv('lower', n - i, cone, a(i + 1, i + 1), lda, a(i + 1, i), 1, &
                              czero, w(i + 1, i), 1)
                    call stdlib_cgemv('conjugate transpose', n - i, i - 1, cone, w(i + 1, 1), ldw, a( &
                              i + 1, i), 1, czero, w(1, i), 1)
                    call stdlib_cgemv('no transpose', n - i, i - 1, -cone, a(i + 1, 1), lda, w(1, i) &
                              , 1, cone, w(i + 1, i), 1)
                    call stdlib_cgemv('conjugate transpose', n - i, i - 1, cone, a(i + 1, 1), lda, a( &
                              i + 1, i), 1, czero, w(1, i), 1)
                    call stdlib_cgemv('no transpose', n - i, i - 1, -cone, w(i + 1, 1), ldw, w(1, i) &
                              , 1, cone, w(i + 1, i), 1)
                    call stdlib_cscal(n - i, tau(i), w(i + 1, i), 1)
                    alpha = -chalf*tau(i)*stdlib_cdotc(n - i, w(i + 1, i), 1, a(i + 1, i), 1)
                              
                    call stdlib_caxpy(n - i, alpha, a(i + 1, i), 1, w(i + 1, i), 1)
                 end if
              end do loop_20
           end if
           return
           ! end of stdlib_clatrd
     end subroutine stdlib_clatrd

     ! CLATRS solves one of the triangular systems
     ! A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
     ! with scaling to prevent overflow.  Here A is an upper or lower
     ! triangular matrix, A**T denotes the transpose of A, A**H denotes the
     ! conjugate transpose of A, x and b are n-element vectors, and s is a
     ! scaling factor, usually less than or equal to 1, chosen so that the
     ! components of x will be less than the overflow threshold.  If the
     ! unscaled problem will not cause overflow, the Level 2 BLAS routine
     ! CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
     ! then s is set to 0 and a non-trivial solution to A*x = 0 is returned.

     subroutine stdlib_clatrs(uplo, trans, diag, normin, n, a, lda, x, scale, cnorm, info)
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: diag, normin, trans, uplo
           integer(ilp) :: info, lda, n
           real(sp) :: scale
           ! .. array arguments ..
           real(sp) :: cnorm(*)
           complex(sp) :: a(lda, *), x(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: notran, nounit, upper
           integer(ilp) :: i, imax, j, jfirst, jinc, jlast
           real(sp) :: bignum, grow, rec, smlnum, tjj, tmax, tscal, xbnd, xj, xmax
           complex(sp) :: csumj, tjjs, uscal, zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, cmplx, conjg, max, min, real
           ! .. statement functions ..
           real(sp) :: cabs1, cabs2
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           cabs2(zdum) = abs(real(zdum)/2.) + abs(aimag(zdum)/2.)
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           notran = stdlib_lsame(trans, 'n')
           nounit = stdlib_lsame(diag, 'n')
           ! test the input parameters.
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. notran .and. .not. stdlib_lsame(trans, 't') .and. .not. stdlib_lsame( &
                     trans, 'c')) then
              info = -2
           else if (.not. nounit .and. .not. stdlib_lsame(diag, 'u')) then
              info = -3
           else if (.not. stdlib_lsame(normin, 'y') .and. .not. stdlib_lsame(normin, 'n')) &
                     then
              info = -4
           else if (n < 0) then
              info = -5
           else if (lda < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clatrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine machine dependent parameters to control overflow.
           smlnum = stdlib_slamch('safe minimum')
           bignum = one/smlnum
           call stdlib_slabad(smlnum, bignum)
           smlnum = smlnum/stdlib_slamch('precision')
           bignum = one/smlnum
           scale = one
           if (stdlib_lsame(normin, 'n')) then
              ! compute the 1-norm of each column, not including the diagonal.
              if (upper) then
                 ! a is upper triangular.
                 do j = 1, n
                    cnorm(j) = stdlib_scasum(j - 1, a(1, j), 1)
                 end do
              else
                 ! a is lower triangular.
                 do j = 1, n - 1
                    cnorm(j) = stdlib_scasum(n - j, a(j + 1, j), 1)
                 end do
                 cnorm(n) = zero
              end if
           end if
           ! scale the column norms by tscal if the maximum element in cnorm is
           ! greater than bignum/2.
           imax = stdlib_isamax(n, cnorm, 1)
           tmax = cnorm(imax)
           if (tmax <= bignum*half) then
              tscal = one
           else
              tscal = half/(smlnum*tmax)
              call stdlib_sscal(n, tscal, cnorm, 1)
           end if
           ! compute a bound on the computed solution vector to see if the
           ! level 2 blas routine stdlib_ctrsv can be used.
           xmax = zero
           do j = 1, n
              xmax = max(xmax, cabs2(x(j)))
           end do
           xbnd = xmax
           if (notran) then
              ! compute the growth in a * x = b.
              if (upper) then
                 jfirst = n
                 jlast = 1
                 jinc = -1
              else
                 jfirst = 1
                 jlast = n
                 jinc = 1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 60
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, g(0) = max{x(i), i=1,...,n}.
                 grow = half/max(xbnd, smlnum)
                 xbnd = grow
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 60
                    tjjs = a(j, j)
                    tjj = cabs1(tjjs)
                    if (tjj >= smlnum) then
                       ! m(j) = g(j-1) / abs(a(j,j))
                       xbnd = min(xbnd, min(one, tjj)*grow)
                    else
                       ! m(j) could overflow, set xbnd to 0.
                       xbnd = zero
                    end if
                    if (tjj + cnorm(j) >= smlnum) then
                       ! g(j) = g(j-1)*( 1 + cnorm(j) / abs(a(j,j)) )
                       grow = grow*(tjj/(tjj + cnorm(j)))
                    else
                       ! g(j) could overflow, set grow to 0.
                       grow = zero
                    end if
                 end do
                 grow = xbnd
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, half/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 60
                    ! g(j) = g(j-1)*( 1 + cnorm(j) )
                    grow = grow*(one/(one + cnorm(j)))
                 end do
              end if
60      continue
           else
              ! compute the growth in a**t * x = b  or  a**h * x = b.
              if (upper) then
                 jfirst = 1
                 jlast = n
                 jinc = 1
              else
                 jfirst = n
                 jlast = 1
                 jinc = -1
              end if
              if (tscal /= one) then
                 grow = zero
                 go to 90
              end if
              if (nounit) then
                 ! a is non-unit triangular.
                 ! compute grow = 1/g(j) and xbnd = 1/m(j).
                 ! initially, m(0) = max{x(i), i=1,...,n}.
                 grow = half/max(xbnd, smlnum)
                 xbnd = grow
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 90
                    ! g(j) = max( g(j-1), m(j-1)*( 1 + cnorm(j) ) )
                    xj = one + cnorm(j)
                    grow = min(grow, xbnd/xj)
                    tjjs = a(j, j)
                    tjj = cabs1(tjjs)
                    if (tjj >= smlnum) then
                       ! m(j) = m(j-1)*( 1 + cnorm(j) ) / abs(a(j,j))
                       if (xj > tjj) xbnd = xbnd*(tjj/xj)
                    else
                       ! m(j) could overflow, set xbnd to 0.
                       xbnd = zero
                    end if
                 end do
                 grow = min(grow, xbnd)
              else
                 ! a is unit triangular.
                 ! compute grow = 1/g(j), where g(0) = max{x(i), i=1,...,n}.
                 grow = min(one, half/max(xbnd, smlnum))
                 do j = jfirst, jlast, jinc
                    ! exit the loop if the growth factor is too small.
                    if (grow <= smlnum) go to 90
                    ! g(j) = ( 1 + cnorm(j) )*g(j-1)
                    xj = one + cnorm(j)
                    grow = grow/xj
                 end do
              end if
90      continue
           end if
           if ((grow*tscal) > smlnum) then
              ! use the level 2 blas solve if the reciprocal of the bound on
              ! elements of x is not too small.
              call stdlib_ctrsv(uplo, trans, diag, n, a, lda, x, 1)
           else
              ! use a level 1 blas solve, scaling intermediate results.
              if (xmax > bignum*half) then
                 ! scale x so that its components are less than or equal to
                 ! bignum in absolute value.
                 scale = (bignum*half)/xmax
                 call stdlib_csscal(n, scale, x, 1)
                 xmax = bignum
              else
                 xmax = xmax*two
              end if
              if (notran) then
                 ! solve a * x = b
                 loop_110: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) / a(j,j), scaling x if necessary.
                    xj = cabs1(x(j))
                    if (nounit) then
                       tjjs = a(j, j)*tscal
                    else
                       tjjs = tscal
                       if (tscal == one) go to 105
                    end if
                       tjj = cabs1(tjjs)
                       if (tjj > smlnum) then
                          ! abs(a(j,j)) > smlnum:
                          if (tjj < one) then
                             if (xj > tjj*bignum) then
                                ! scale x by 1/b(j).
                                rec = one/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                          end if
                          x(j) = stdlib_cladiv(x(j), tjjs)
                          xj = cabs1(x(j))
                       else if (tjj > zero) then
                          ! 0 < abs(a(j,j)) <= smlnum:
                          if (xj > tjj*bignum) then
                             ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum
                             ! to avoid overflow when dividing by a(j,j).
                             rec = (tjj*bignum)/xj
                             if (cnorm(j) > one) then
                                ! scale by 1/cnorm(j) to avoid overflow when
                                ! multiplying x(j) times column j.
                                rec = rec/cnorm(j)
                             end if
                             call stdlib_csscal(n, rec, x, 1)
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                          x(j) = stdlib_cladiv(x(j), tjjs)
                          xj = cabs1(x(j))
                       else
                          ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                          ! scale = 0, and compute a solution to a*x = 0.
                          do i = 1, n
                             x(i) = zero
                          end do
                          x(j) = one
                          xj = one
                          scale = zero
                          xmax = zero
                       end if
105    continue
                    ! scale x if necessary to avoid overflow when adding a
                    ! multiple of column j of a.
                    if (xj > one) then
                       rec = one/xj
                       if (cnorm(j) > (bignum - xmax)*rec) then
                          ! scale x by 1/(2*abs(x(j))).
                          rec = rec*half
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                       end if
                    else if (xj*cnorm(j) > (bignum - xmax)) then
                       ! scale x by 1/2.
                       call stdlib_csscal(n, half, x, 1)
                       scale = scale*half
                    end if
                    if (upper) then
                       if (j > 1) then
                          ! compute the update
                             ! x(1:j-1) := x(1:j-1) - x(j) * a(1:j-1,j)
                          call stdlib_caxpy(j - 1, -x(j)*tscal, a(1, j), 1, x, 1)
                          i = stdlib_icamax(j - 1, x, 1)
                          xmax = cabs1(x(i))
                       end if
                    else
                       if (j < n) then
                          ! compute the update
                             ! x(j+1:n) := x(j+1:n) - x(j) * a(j+1:n,j)
                          call stdlib_caxpy(n - j, -x(j)*tscal, a(j + 1, j), 1, x(j + 1), 1)
                                    
                          i = j + stdlib_icamax(n - j, x(j + 1), 1)
                          xmax = cabs1(x(i))
                       end if
                    end if
                 end do loop_110
              else if (stdlib_lsame(trans, 't')) then
                 ! solve a**t * x = b
                 loop_150: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = cabs1(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = a(j, j)*tscal
                       else
                          tjjs = tscal
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                             rec = min(one, rec*tjj)
                             uscal = stdlib_cladiv(uscal, tjjs)
                          end if
                       if (rec < one) then
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    csumj = zero
                    if (uscal == cmplx(one, KIND=sp)) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_cdotu to perform the dot product.
                       if (upper) then
                          csumj = stdlib_cdotu(j - 1, a(1, j), 1, x, 1)
                       else if (j < n) then
                          csumj = stdlib_cdotu(n - j, a(j + 1, j), 1, x(j + 1), 1)
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          do i = 1, j - 1
                             csumj = csumj + (a(i, j)*uscal)*x(i)
                          end do
                       else if (j < n) then
                          do i = j + 1, n
                             csumj = csumj + (a(i, j)*uscal)*x(i)
                          end do
                       end if
                    end if
                    if (uscal == cmplx(tscal, KIND=sp)) then
                       ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - csumj
                       xj = cabs1(x(j))
                       if (nounit) then
                          tjjs = a(j, j)*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 145
                       end if
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                          tjj = cabs1(tjjs)
                          if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                             if (tjj < one) then
                                if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                   rec = one/xj
                                   call stdlib_csscal(n, rec, x, 1)
                                   scale = scale*rec
                                   xmax = xmax*rec
                                end if
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                             if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                                rec = (tjj*bignum)/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0 and compute a solution to a**t *x = 0.
                             do i = 1, n
                                x(i) = zero
                             end do
                             x(j) = one
                             scale = zero
                             xmax = zero
                          end if
145    continue
                    else
                       ! compute x(j) := x(j) / a(j,j) - csumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = stdlib_cladiv(x(j), tjjs) - csumj
                    end if
                    xmax = max(xmax, cabs1(x(j)))
                 end do loop_150
              else
                 ! solve a**h * x = b
                 loop_190: do j = jfirst, jlast, jinc
                    ! compute x(j) = b(j) - sum a(k,j)*x(k).
                                          ! k<>j
                    xj = cabs1(x(j))
                    uscal = tscal
                    rec = one/max(xmax, one)
                    if (cnorm(j) > (bignum - xj)*rec) then
                       ! if x(j) could overflow, scale x by 1/(2*xmax).
                       rec = rec*half
                       if (nounit) then
                          tjjs = conjg(a(j, j))*tscal
                       else
                          tjjs = tscal
                       end if
                          tjj = cabs1(tjjs)
                          if (tjj > one) then
                             ! divide by a(j,j) when scaling x if a(j,j) > 1.
                             rec = min(one, rec*tjj)
                             uscal = stdlib_cladiv(uscal, tjjs)
                          end if
                       if (rec < one) then
                          call stdlib_csscal(n, rec, x, 1)
                          scale = scale*rec
                          xmax = xmax*rec
                       end if
                    end if
                    csumj = zero
                    if (uscal == cmplx(one, KIND=sp)) then
                       ! if the scaling needed for a in the dot product is 1,
                       ! call stdlib_cdotc to perform the dot product.
                       if (upper) then
                          csumj = stdlib_cdotc(j - 1, a(1, j), 1, x, 1)
                       else if (j < n) then
                          csumj = stdlib_cdotc(n - j, a(j + 1, j), 1, x(j + 1), 1)
                       end if
                    else
                       ! otherwise, use in-line code for the dot product.
                       if (upper) then
                          do i = 1, j - 1
                             csumj = csumj + (conjg(a(i, j))*uscal)*x(i)
                          end do
                       else if (j < n) then
                          do i = j + 1, n
                             csumj = csumj + (conjg(a(i, j))*uscal)*x(i)
                          end do
                       end if
                    end if
                    if (uscal == cmplx(tscal, KIND=sp)) then
                       ! compute x(j) := ( x(j) - csumj ) / a(j,j) if 1/a(j,j)
                       ! was not used to scale the dotproduct.
                       x(j) = x(j) - csumj
                       xj = cabs1(x(j))
                       if (nounit) then
                          tjjs = conjg(a(j, j))*tscal
                       else
                          tjjs = tscal
                          if (tscal == one) go to 185
                       end if
                          ! compute x(j) = x(j) / a(j,j), scaling if necessary.
                          tjj = cabs1(tjjs)
                          if (tjj > smlnum) then
                             ! abs(a(j,j)) > smlnum:
                             if (tjj < one) then
                                if (xj > tjj*bignum) then
                                   ! scale x by 1/abs(x(j)).
                                   rec = one/xj
                                   call stdlib_csscal(n, rec, x, 1)
                                   scale = scale*rec
                                   xmax = xmax*rec
                                end if
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else if (tjj > zero) then
                             ! 0 < abs(a(j,j)) <= smlnum:
                             if (xj > tjj*bignum) then
                                ! scale x by (1/abs(x(j)))*abs(a(j,j))*bignum.
                                rec = (tjj*bignum)/xj
                                call stdlib_csscal(n, rec, x, 1)
                                scale = scale*rec
                                xmax = xmax*rec
                             end if
                             x(j) = stdlib_cladiv(x(j), tjjs)
                          else
                             ! a(j,j) = 0:  set x(1:n) = 0, x(j) = 1, and
                             ! scale = 0 and compute a solution to a**h *x = 0.
                             do i = 1, n
                                x(i) = zero
                             end do
                             x(j) = one
                             scale = zero
                             xmax = zero
                          end if
185    continue
                    else
                       ! compute x(j) := x(j) / a(j,j) - csumj if the dot
                       ! product has already been divided by 1/a(j,j).
                       x(j) = stdlib_cladiv(x(j), tjjs) - csumj
                    end if
                    xmax = max(xmax, cabs1(x(j)))
                 end do loop_190
              end if
              scale = scale/tscal
           end if
           ! scale the column norms by 1/tscal for return.
           if (tscal /= one) then
              call stdlib_sscal(n, one/tscal, cnorm, 1)
           end if
           return
           ! end of stdlib_clatrs
     end subroutine stdlib_clatrs

     ! CLATRZ factors the M-by-(M+L) complex upper trapezoidal matrix
     ! [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R  0 ) * Z by means
     ! of unitary transformations, where  Z is an (M+L)-by-(M+L) unitary
     ! matrix and, R and A1 are M-by-M upper triangular matrices.

     subroutine stdlib_clatrz(m, n, l, a, lda, tau, work)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: l, lda, m, n
           ! .. array arguments ..
           complex(sp) :: a(lda, *), tau(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           complex(sp) :: alpha
     
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           ! quick return if possible
           if (m == 0) then
              return
           else if (m == n) then
              do i = 1, n
                 tau(i) = czero
              end do
              return
           end if
           do i = m, 1, -1
              ! generate elementary reflector h(i) to annihilate
              ! [ a(i,i) a(i,n-l+1:n) ]
              call stdlib_clacgv(l, a(i, n - l + 1), lda)
              alpha = conjg(a(i, i))
              call stdlib_clarfg(l + 1, alpha, a(i, n - l + 1), lda, tau(i))
              tau(i) = conjg(tau(i))
              ! apply h(i) to a(1:i-1,i:n) from the right
              call stdlib_clarz('right', i - 1, n - i + 1, l, a(i, n - l + 1), lda, conjg(tau(i)), a( &
                        1, i), lda, work)
              a(i, i) = conjg(alpha)
           end do
           return
           ! end of stdlib_clatrz
     end subroutine stdlib_clatrz

     ! CLAUNHR_COL_GETRFNP2 computes the modified LU factorization without
     ! pivoting of a complex general M-by-N matrix A. The factorization has
     ! the form:
     ! A - S = L * U,
     ! where:
     ! S is a m-by-n diagonal sign matrix with the diagonal D, so that
     ! D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
     ! as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
     ! i-1 steps of Gaussian elimination. This means that the diagonal
     ! element at each step of "modified" Gaussian elimination is at
     ! least one in absolute value (so that division-by-zero not
     ! possible during the division by the diagonal element);
     ! L is a M-by-N lower triangular matrix with unit diagonal elements
     ! (lower trapezoidal if M > N);
     ! and U is a M-by-N upper triangular matrix
     ! (upper trapezoidal if M < N).
     ! This routine is an auxiliary routine used in the Householder
     ! reconstruction routine CUNHR_COL. In CUNHR_COL, this routine is
     ! applied to an M-by-N matrix A with orthonormal columns, where each
     ! element is bounded by one in absolute value. With the choice of
     ! the matrix S above, one can show that the diagonal element at each
     ! step of Gaussian elimination is the largest (in absolute value) in
     ! the column on or below the diagonal, so that no pivoting is required
     ! for numerical stability [1].
     ! For more details on the Householder reconstruction algorithm,
     ! including the modified LU factorization, see [1].
     ! This is the recursive version of the LU factorization algorithm.
     ! Denote A - S by B. The algorithm divides the matrix B into four
     ! submatrices:
     ! [  B11 | B12  ]  where B11 is n1 by n1,
     ! B = [ -----|----- ]        B21 is (m-n1) by n1,
     ! [  B21 | B22  ]        B12 is n1 by n2,
     ! B22 is (m-n1) by n2,
     ! with n1 = min(m,n)/2, n2 = n-n1.
     ! The subroutine calls itself to factor B11, solves for B21,
     ! solves for B12, updates B22, then calls itself to factor B22.
     ! For more details on the recursive LU algorithm, see [2].
     ! CLAUNHR_COL_GETRFNP2 is called to factorize a block by the blocked
     ! routine CLAUNHR_COL_GETRFNP, which uses blocked code calling
     ! Level 3 BLAS to update the submatrix. However, CLAUNHR_COL_GETRFNP2
     ! is self-sufficient and can be used without CLAUNHR_COL_GETRFNP.
     ! [1] "Reconstructing Householder vectors from tall-skinny QR",
     ! G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
     ! E. Solomonik, J. Parallel Distrib. Comput.,
     ! vol. 85, pp. 3-31, 2015.
     ! [2] "Recursion leads to automatic variable blocking for dense linear
     ! algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
     ! vol. 41, no. 6, pp. 737-755, 1997.

     recursive subroutine stdlib_claunhr_col_getrfnp2(m, n, a, lda, d, info)
     
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, m, n
           ! .. array arguments ..
           complex(sp) :: a(lda, *), d(*)
        ! =====================================================================
           
           ! .. local scalars ..
           real(sp) :: sfmin
           integer(ilp) :: i, iinfo, n1, n2
           complex(sp) :: z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, real, cmplx, aimag, sign, max, min
           ! .. statement functions ..
           real(dp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           if (m < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, m)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_claunhr_col_getrfnp2', -info)
              return
           end if
           ! quick return if possible
           if (min(m, n) == 0) return
           if (m == 1) then
              ! one row case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d(1) = cmplx(-sign(one, real(a(1, 1))))
              ! construct the row of u
              a(1, 1) = a(1, 1) - d(1)
           else if (n == 1) then
              ! one column case, (also recursion termination case),
              ! use unblocked code
              ! transfer the sign
              d(1) = cmplx(-sign(one, real(a(1, 1))))
              ! construct the row of u
              a(1, 1) = a(1, 1) - d(1)
              ! scale the elements 2:m of the column
              ! determine machine safe minimum
              sfmin = stdlib_slamch('s')
              ! construct the subdiagonal elements of l
              if (cabs1(a(1, 1)) >= sfmin) then
                 call stdlib_cscal(m - 1, cone/a(1, 1), a(2, 1), 1)
              else
                 do i = 2, m
                    a(i, 1) = a(i, 1)/a(1, 1)
                 end do
              end if
           else
              ! divide the matrix b into four submatrices
              n1 = min(m, n)/2
              n2 = n - n1
              ! factor b11, recursive call
              call stdlib_claunhr_col_getrfnp2(n1, n1, a, lda, d, iinfo)
              ! solve for b21
              call stdlib_ctrsm('r', 'u', 'n', 'n', m - n1, n1, cone, a, lda, a(n1 + 1, 1), lda)
                        
              ! solve for b12
              call stdlib_ctrsm('l', 'l', 'n', 'u', n1, n2, cone, a, lda, a(1, n1 + 1), lda)
                        
              ! update b22, i.e. compute the schur complement
              ! b22 := b22 - b21*b12
              call stdlib_cgemm('n', 'n', m - n1, n2, n1, -cone, a(n1 + 1, 1), lda, a(1, n1 + 1), &
                        lda, cone, a(n1 + 1, n1 + 1), lda)
              ! factor b22, recursive call
              call stdlib_claunhr_col_getrfnp2(m - n1, n2, a(n1 + 1, n1 + 1), lda, d(n1 + 1), iinfo)
                        
           end if
           return
           ! end of stdlib_claunhr_col_getrfnp2
     end subroutine stdlib_claunhr_col_getrfnp2

     ! CLAUU2 computes the product U * U**H or L**H * L, where the triangular
     ! factor U or L is stored in the upper or lower triangular part of
     ! the array A.
     ! If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
     ! overwriting the factor U in A.
     ! If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
     ! overwriting the factor L in A.
     ! This is the unblocked form of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_clauu2(uplo, n, a, lda, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i
           real(sp) :: aii
     
           ! .. intrinsic functions ..
           intrinsic :: cmplx, max, real
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clauu2', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! compute the product u * u**h.
              do i = 1, n
                 aii = real(a(i, i))
                 if (i < n) then
                    a(i, i) = aii*aii + real(stdlib_cdotc(n - i, a(i, i + 1), lda, a(i, i + 1), &
                              lda))
                    call stdlib_clacgv(n - i, a(i, i + 1), lda)
                    call stdlib_cgemv('no transpose', i - 1, n - i, cone, a(1, i + 1), lda, a(i, i + 1 &
                              ), lda, cmplx(aii, KIND=sp), a(1, i), 1)
                    call stdlib_clacgv(n - i, a(i, i + 1), lda)
                 else
                    call stdlib_csscal(i, aii, a(1, i), 1)
                 end if
              end do
           else
              ! compute the product l**h * l.
              do i = 1, n
                 aii = real(a(i, i))
                 if (i < n) then
                    a(i, i) = aii*aii + real(stdlib_cdotc(n - i, a(i + 1, i), 1, a(i + 1, i), 1) &
                               )
                    call stdlib_clacgv(i - 1, a(i, 1), lda)
                    call stdlib_cgemv('conjugate transpose', n - i, i - 1, cone, a(i + 1, 1), lda, a( &
                              i + 1, i), 1, cmplx(aii, KIND=sp), a(i, 1), lda)
                    call stdlib_clacgv(i - 1, a(i, 1), lda)
                 else
                    call stdlib_csscal(i, aii, a(i, 1), lda)
                 end if
              end do
           end if
           return
           ! end of stdlib_clauu2
     end subroutine stdlib_clauu2

     ! CLAUUM computes the product U * U**H or L**H * L, where the triangular
     ! factor U or L is stored in the upper or lower triangular part of
     ! the array A.
     ! If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
     ! overwriting the factor U in A.
     ! If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
     ! overwriting the factor L in A.
     ! This is the blocked form of the algorithm, calling Level 3 BLAS.

     subroutine stdlib_clauum(uplo, n, a, lda, info)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, ib, nb
     
           ! .. intrinsic functions ..
           intrinsic :: max, min
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_clauum', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! determine the block size for this environment.
           nb = stdlib_ilaenv(1, 'stdlib_clauum', uplo, n, -1, -1, -1)
           if (nb <= 1 .or. nb >= n) then
              ! use unblocked code
              call stdlib_clauu2(uplo, n, a, lda, info)
           else
              ! use blocked code
              if (upper) then
                 ! compute the product u * u**h.
                 do i = 1, n, nb
                    ib = min(nb, n - i + 1)
                    call stdlib_ctrmm('right', 'upper', 'conjugate transpose', 'non-unit', i - 1, &
                              ib, cone, a(i, i), lda, a(1, i), lda)
                    call stdlib_clauu2('upper', ib, a(i, i), lda, info)
                    if (i + ib <= n) then
                       call stdlib_cgemm('no transpose', 'conjugate transpose', i - 1, ib, n - i - ib + 1, &
                                  cone, a(1, i + ib), lda, a(i, i + ib), lda, cone, a(1, i), lda)
                       call stdlib_cherk('upper', 'no transpose', ib, n - i - ib + 1, one, a(i, i + ib), &
                                  lda, one, a(i, i), lda)
                    end if
                 end do
              else
                 ! compute the product l**h * l.
                 do i = 1, n, nb
                    ib = min(nb, n - i + 1)
                    call stdlib_ctrmm('left', 'lower', 'conjugate transpose', 'non-unit', ib, i - 1, &
                               cone, a(i, i), lda, a(i, 1), lda)
                    call stdlib_clauu2('lower', ib, a(i, i), lda, info)
                    if (i + ib <= n) then
                       call stdlib_cgemm('conjugate transpose', 'no transpose', ib, i - 1, n - i - ib + 1, &
                                  cone, a(i + ib, i), lda, a(i + ib, 1), lda, cone, a(i, 1), lda)
                       call stdlib_cherk('lower', 'conjugate transpose', ib, n - i - ib + 1, one, a(i + &
                                 ib, i), lda, one, a(i, i), lda)
                    end if
                 end do
              end if
           end if
           return
           ! end of stdlib_clauum
     end subroutine stdlib_clauum

     ! CPBEQU computes row and column scalings intended to equilibrate a
     ! Hermitian positive definite band matrix A and reduce its condition
     ! number (with respect to the two-norm).  S contains the scale factors,
     ! S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
     ! elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
     ! choice of S puts the condition number of B within a factor N of the
     ! smallest possible condition number over all possible diagonal
     ! scalings.

     subroutine stdlib_cpbequ(uplo, n, kd, ab, ldab, s, scond, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kd, ldab, n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, j
           real(sp) :: smin
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, real, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kd < 0) then
              info = -3
           else if (ldab < kd + 1) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpbequ', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) then
              scond = one
              amax = zero
              return
           end if
           if (upper) then
              j = kd + 1
           else
              j = 1
           end if
           ! initialize smin and amax.
           s(1) = real(ab(j, 1))
           smin = s(1)
           amax = s(1)
           ! find the minimum and maximum diagonal elements.
           do i = 2, n
              s(i) = real(ab(j, i))
              smin = min(smin, s(i))
              amax = max(amax, s(i))
           end do
           if (smin <= zero) then
              ! find the first non-positive diagonal element and return.
              do i = 1, n
                 if (s(i) <= zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! set the scale factors to the reciprocals
              ! of the diagonal elements.
              do i = 1, n
                 s(i) = one/sqrt(s(i))
              end do
              ! compute scond = min(s(i)) / max(s(i))
              scond = sqrt(smin)/sqrt(amax)
           end if
           return
           ! end of stdlib_cpbequ
     end subroutine stdlib_cpbequ

     ! CPBSTF computes a split Cholesky factorization of a complex
     ! Hermitian positive definite band matrix A.
     ! This routine is designed to be used in conjunction with CHBGST.
     ! The factorization has the form  A = S**H*S  where S is a band matrix
     ! of the same bandwidth as A and the following structure:
     ! S = ( U    )
     ! ( M  L )
     ! where U is upper triangular of order m = (n+kd)/2, and L is lower
     ! triangular of order n-m.

     subroutine stdlib_cpbstf(uplo, n, kd, ab, ldab, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kd, ldab, n
           ! .. array arguments ..
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, kld, km, m
           real(sp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, real, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kd < 0) then
              info = -3
           else if (ldab < kd + 1) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpbstf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           kld = max(1, ldab - 1)
           ! set the splitting point m.
           m = (n + kd)/2
           if (upper) then
              ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real(ab(kd + 1, j))
                 if (ajj <= zero) then
                    ab(kd + 1, j) = ajj
                    go to 50
                 end if
                 ajj = sqrt(ajj)
                 ab(kd + 1, j) = ajj
                 km = min(j - 1, kd)
                 ! compute elements j-km:j-1 of the j-th column and update the
                 ! the leading submatrix within the band.
                 call stdlib_csscal(km, one/ajj, ab(kd + 1 - km, j), 1)
                 call stdlib_cher('upper', km, -one, ab(kd + 1 - km, j), 1, ab(kd + 1, j - km), kld)
                           
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**h*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real(ab(kd + 1, j))
                 if (ajj <= zero) then
                    ab(kd + 1, j) = ajj
                    go to 50
                 end if
                 ajj = sqrt(ajj)
                 ab(kd + 1, j) = ajj
                 km = min(kd, m - j)
                 ! compute elements j+1:j+km of the j-th row and update the
                 ! trailing submatrix within the band.
                 if (km > 0) then
                    call stdlib_csscal(km, one/ajj, ab(kd, j + 1), kld)
                    call stdlib_clacgv(km, ab(kd, j + 1), kld)
                    call stdlib_cher('upper', km, -one, ab(kd, j + 1), kld, ab(kd + 1, j + 1), kld)
                              
                    call stdlib_clacgv(km, ab(kd, j + 1), kld)
                 end if
              end do
           else
              ! factorize a(m+1:n,m+1:n) as l**h*l, and update a(1:m,1:m).
              do j = n, m + 1, -1
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real(ab(1, j))
                 if (ajj <= zero) then
                    ab(1, j) = ajj
                    go to 50
                 end if
                 ajj = sqrt(ajj)
                 ab(1, j) = ajj
                 km = min(j - 1, kd)
                 ! compute elements j-km:j-1 of the j-th row and update the
                 ! trailing submatrix within the band.
                 call stdlib_csscal(km, one/ajj, ab(km + 1, j - km), kld)
                 call stdlib_clacgv(km, ab(km + 1, j - km), kld)
                 call stdlib_cher('lower', km, -one, ab(km + 1, j - km), kld, ab(1, j - km), kld)
                           
                 call stdlib_clacgv(km, ab(km + 1, j - km), kld)
              end do
              ! factorize the updated submatrix a(1:m,1:m) as u**h*u.
              do j = 1, m
                 ! compute s(j,j) and test for non-positive-definiteness.
                 ajj = real(ab(1, j))
                 if (ajj <= zero) then
                    ab(1, j) = ajj
                    go to 50
                 end if
                 ajj = sqrt(ajj)
                 ab(1, j) = ajj
                 km = min(kd, m - j)
                 ! compute elements j+1:j+km of the j-th column and update the
                 ! trailing submatrix within the band.
                 if (km > 0) then
                    call stdlib_csscal(km, one/ajj, ab(2, j), 1)
                    call stdlib_cher('lower', km, -one, ab(2, j), 1, ab(1, j + 1), kld)
                 end if
              end do
           end if
           return
50      continue
           info = j
           return
           ! end of stdlib_cpbstf
     end subroutine stdlib_cpbstf

     ! CPBTF2 computes the Cholesky factorization of a complex Hermitian
     ! positive definite band matrix A.
     ! The factorization has the form
     ! A = U**H * U ,  if UPLO = 'U', or
     ! A = L  * L**H,  if UPLO = 'L',
     ! where U is an upper triangular matrix, U**H is the conjugate transpose
     ! of U, and L is lower triangular.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_cpbtf2(uplo, n, kd, ab, ldab, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kd, ldab, n
           ! .. array arguments ..
           complex(sp) :: ab(ldab, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, kld, kn
           real(sp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, real, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kd < 0) then
              info = -3
           else if (ldab < kd + 1) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpbtf2', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           kld = max(1, ldab - 1)
           if (upper) then
              ! compute the cholesky factorization a = u**h * u.
              do j = 1, n
                 ! compute u(j,j) and test for non-positive-definiteness.
                 ajj = real(ab(kd + 1, j))
                 if (ajj <= zero) then
                    ab(kd + 1, j) = ajj
                    go to 30
                 end if
                 ajj = sqrt(ajj)
                 ab(kd + 1, j) = ajj
                 ! compute elements j+1:j+kn of row j and update the
                 ! trailing submatrix within the band.
                 kn = min(kd, n - j)
                 if (kn > 0) then
                    call stdlib_csscal(kn, one/ajj, ab(kd, j + 1), kld)
                    call stdlib_clacgv(kn, ab(kd, j + 1), kld)
                    call stdlib_cher('upper', kn, -one, ab(kd, j + 1), kld, ab(kd + 1, j + 1), kld)
                              
                    call stdlib_clacgv(kn, ab(kd, j + 1), kld)
                 end if
              end do
           else
              ! compute the cholesky factorization a = l*l**h.
              do j = 1, n
                 ! compute l(j,j) and test for non-positive-definiteness.
                 ajj = real(ab(1, j))
                 if (ajj <= zero) then
                    ab(1, j) = ajj
                    go to 30
                 end if
                 ajj = sqrt(ajj)
                 ab(1, j) = ajj
                 ! compute elements j+1:j+kn of column j and update the
                 ! trailing submatrix within the band.
                 kn = min(kd, n - j)
                 if (kn > 0) then
                    call stdlib_csscal(kn, one/ajj, ab(2, j), 1)
                    call stdlib_cher('lower', kn, -one, ab(2, j), 1, ab(1, j + 1), kld)
                 end if
              end do
           end if
           return
30      continue
           info = j
           return
           ! end of stdlib_cpbtf2
     end subroutine stdlib_cpbtf2

     ! CPBTRS solves a system of linear equations A*X = B with a Hermitian
     ! positive definite band matrix A using the Cholesky factorization
     ! A = U**H*U or A = L*L**H computed by CPBTRF.

     subroutine stdlib_cpbtrs(uplo, n, kd, nrhs, ab, ldab, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, kd, ldab, ldb, n, nrhs
           ! .. array arguments ..
           complex(sp) :: ab(ldab, *), b(ldb, *)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (kd < 0) then
              info = -3
           else if (nrhs < 0) then
              info = -4
           else if (ldab < kd + 1) then
              info = -6
           else if (ldb < max(1, n)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpbtrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b where a = u**h *u.
              do j = 1, nrhs
                 ! solve u**h *x = b, overwriting b with x.
                 call stdlib_ctbsv('upper', 'conjugate transpose', 'non-unit', n, kd, ab, ldab, b( &
                            1, j), 1)
                 ! solve u*x = b, overwriting b with x.
                 call stdlib_ctbsv('upper', 'no transpose', 'non-unit', n, kd, ab, ldab, b(1, j) &
                           , 1)
              end do
           else
              ! solve a*x = b where a = l*l**h.
              do j = 1, nrhs
                 ! solve l*x = b, overwriting b with x.
                 call stdlib_ctbsv('lower', 'no transpose', 'non-unit', n, kd, ab, ldab, b(1, j) &
                           , 1)
                 ! solve l**h *x = b, overwriting b with x.
                 call stdlib_ctbsv('lower', 'conjugate transpose', 'non-unit', n, kd, ab, ldab, b( &
                            1, j), 1)
              end do
           end if
           return
           ! end of stdlib_cpbtrs
     end subroutine stdlib_cpbtrs

     ! CPOEQU computes row and column scalings intended to equilibrate a
     ! Hermitian positive definite matrix A and reduce its condition number
     ! (with respect to the two-norm).  S contains the scale factors,
     ! S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
     ! elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
     ! choice of S puts the condition number of B within a factor N of the
     ! smallest possible condition number over all possible diagonal
     ! scalings.

     subroutine stdlib_cpoequ(n, a, lda, s, scond, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           real(sp) :: smin
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, real, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (n < 0) then
              info = -1
           else if (lda < max(1, n)) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpoequ', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) then
              scond = one
              amax = zero
              return
           end if
           ! find the minimum and maximum diagonal elements.
           s(1) = real(a(1, 1))
           smin = s(1)
           amax = s(1)
           do i = 2, n
              s(i) = real(a(i, i))
              smin = min(smin, s(i))
              amax = max(amax, s(i))
           end do
           if (smin <= zero) then
              ! find the first non-positive diagonal element and return.
              do i = 1, n
                 if (s(i) <= zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! set the scale factors to the reciprocals
              ! of the diagonal elements.
              do i = 1, n
                 s(i) = one/sqrt(s(i))
              end do
              ! compute scond = min(s(i)) / max(s(i))
              scond = sqrt(smin)/sqrt(amax)
           end if
           return
           ! end of stdlib_cpoequ
     end subroutine stdlib_cpoequ

     ! CPOEQUB computes row and column scalings intended to equilibrate a
     ! Hermitian positive definite matrix A and reduce its condition number
     ! (with respect to the two-norm).  S contains the scale factors,
     ! S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
     ! elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
     ! choice of S puts the condition number of B within a factor N of the
     ! smallest possible condition number over all possible diagonal
     ! scalings.
     ! This routine differs from CPOEQU by restricting the scaling factors
     ! to a power of the radix.  Barring over- and underflow, scaling by
     ! these factors introduces no additional rounding errors.  However, the
     ! scaled diagonal entries are no longer approximately 1 but lie
     ! between sqrt(radix) and 1/sqrt(radix).

     subroutine stdlib_cpoequb(n, a, lda, s, scond, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, n
           real(sp) :: amax, scond
           ! .. array arguments ..
           complex(sp) :: a(lda, *)
           real(sp) :: s(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i
           real(sp) :: smin, base, tmp
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, sqrt, log, int
           ! .. executable statements ..
           ! test the input parameters.
           ! positive definite only performs 1 pass of equilibration.
           info = 0
           if (n < 0) then
              info = -1
           else if (lda < max(1, n)) then
              info = -3
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpoequb', -info)
              return
           end if
           ! quick return if possible.
           if (n == 0) then
              scond = one
              amax = zero
              return
           end if
           base = stdlib_slamch('b')
           tmp = -0.5/log(base)
           ! find the minimum and maximum diagonal elements.
           s(1) = real(a(1, 1))
           smin = s(1)
           amax = s(1)
           do i = 2, n
              s(i) = real(a(i, i))
              smin = min(smin, s(i))
              amax = max(amax, s(i))
           end do
           if (smin <= zero) then
              ! find the first non-positive diagonal element and return.
              do i = 1, n
                 if (s(i) <= zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! set the scale factors to the reciprocals
              ! of the diagonal elements.
              do i = 1, n
                 s(i) = base**int(tmp*log(s(i)))
              end do
              ! compute scond = min(s(i)) / max(s(i)).
              scond = sqrt(smin)/sqrt(amax)
           end if
           return
           ! end of stdlib_cpoequb
     end subroutine stdlib_cpoequb

     ! CPOTF2 computes the Cholesky factorization of a complex Hermitian
     ! positive definite matrix A.
     ! The factorization has the form
     ! A = U**H * U ,  if UPLO = 'U', or
     ! A = L  * L**H,  if UPLO = 'L',
     ! where U is an upper triangular matrix and L is lower triangular.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_cpotf2(uplo, n, a, lda, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j
           real(sp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: max, real, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpotf2', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! compute the cholesky factorization a = u**h *u.
              do j = 1, n
                 ! compute u(j,j) and test for non-positive-definiteness.
                 ajj = real(real(a(j, j)) - stdlib_cdotc(j - 1, a(1, j), 1, a(1, j), 1))
                           
                 if (ajj <= zero .or. stdlib_sisnan(ajj)) then
                    a(j, j) = ajj
                    go to 30
                 end if
                 ajj = sqrt(ajj)
                 a(j, j) = ajj
                 ! compute elements j+1:n of row j.
                 if (j < n) then
                    call stdlib_clacgv(j - 1, a(1, j), 1)
                    call stdlib_cgemv('transpose', j - 1, n - j, -cone, a(1, j + 1), lda, a(1, j), &
                              1, cone, a(j, j + 1), lda)
                    call stdlib_clacgv(j - 1, a(1, j), 1)
                    call stdlib_csscal(n - j, one/ajj, a(j, j + 1), lda)
                 end if
              end do
           else
              ! compute the cholesky factorization a = l*l**h.
              do j = 1, n
                 ! compute l(j,j) and test for non-positive-definiteness.
                 ajj = real(real(a(j, j)) - stdlib_cdotc(j - 1, a(j, 1), lda, a(j, 1), lda &
                           ))
                 if (ajj <= zero .or. stdlib_sisnan(ajj)) then
                    a(j, j) = ajj
                    go to 30
                 end if
                 ajj = sqrt(ajj)
                 a(j, j) = ajj
                 ! compute elements j+1:n of column j.
                 if (j < n) then
                    call stdlib_clacgv(j - 1, a(j, 1), lda)
                    call stdlib_cgemv('no transpose', n - j, j - 1, -cone, a(j + 1, 1), lda, a(j, 1) &
                              , lda, cone, a(j + 1, j), 1)
                    call stdlib_clacgv(j - 1, a(j, 1), lda)
                    call stdlib_csscal(n - j, one/ajj, a(j + 1, j), 1)
                 end if
              end do
           end if
           go to 40
30      continue
           info = j
40      continue
           return
           ! end of stdlib_cpotf2
     end subroutine stdlib_cpotf2

     ! CPOTRF2 computes the Cholesky factorization of a Hermitian
     ! positive definite matrix A using the recursive algorithm.
     ! The factorization has the form
     ! A = U**H * U,  if UPLO = 'U', or
     ! A = L  * L**H,  if UPLO = 'L',
     ! where U is an upper triangular matrix and L is lower triangular.
     ! This is the recursive version of the algorithm. It divides
     ! the matrix into four submatrices:
     ! [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2
     ! A = [ -----|----- ]  with n1 = n/2
     ! [  A21 | A22  ]       n2 = n-n1
     ! The subroutine calls itself to factor A11. Update and scale A21
     ! or A12, update A22 then calls itself to factor A22.

     recursive subroutine stdlib_cpotrf2(uplo, n, a, lda, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           complex(sp) :: a(lda, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: n1, n2, iinfo
           real(sp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: max, real, sqrt
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpotrf2', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! n=1 case
           if (n == 1) then
              ! test for non-positive-definiteness
              ajj = real(a(1, 1))
              if (ajj <= zero .or. stdlib_sisnan(ajj)) then
                 info = 1
                 return
              end if
              ! factor
              a(1, 1) = sqrt(ajj)
           ! use recursive code
           else
              n1 = n/2
              n2 = n - n1
              ! factor a11
              call stdlib_cpotrf2(uplo, n1, a(1, 1), lda, iinfo)
              if (iinfo /= 0) then
                 info = iinfo
                 return
              end if
              ! compute the cholesky factorization a = u**h*u
              if (upper) then
                 ! update and scale a12
                 call stdlib_ctrsm('l', 'u', 'c', 'n', n1, n2, cone, a(1, 1), lda, a(1, n1 + 1), &
                            lda)
                 ! update and factor a22
                 call stdlib_cherk(uplo, 'c', n2, n1, -one, a(1, n1 + 1), lda, one, a(n1 + 1, n1 + 1 &
                           ), lda)
                 call stdlib_cpotrf2(uplo, n2, a(n1 + 1, n1 + 1), lda, iinfo)
                 if (iinfo /= 0) then
                    info = iinfo + n1
                    return
                 end if
              ! compute the cholesky factorization a = l*l**h
              else
                 ! update and scale a21
                 call stdlib_ctrsm('r', 'l', 'c', 'n', n2, n1, cone, a(1, 1), lda, a(n1 + 1, 1), &
                            lda)
                 ! update and factor a22
                 call stdlib_cherk(uplo, 'n', n2, n1, -one, a(n1 + 1, 1), lda, one, a(n1 + 1, n1 + 1 &
                           ), lda)
                 call stdlib_cpotrf2(uplo, n2, a(n1 + 1, n1 + 1), lda, iinfo)
                 if (iinfo /= 0) then
                    info = iinfo + n1
                    return
                 end if
              end if
           end if
           return
           ! end of stdlib_cpotrf2
     end subroutine stdlib_cpotrf2

     ! CPOTRS solves a system of linear equations A*X = B with a Hermitian
     ! positive definite matrix A using the Cholesky factorization
     ! A = U**H*U or A = L*L**H computed by CPOTRF.

     subroutine stdlib_cpotrs(uplo, n, nrhs, a, lda, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           complex(sp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpotrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b where a = u**h *u.
              ! solve u**h *x = b, overwriting b with x.
              call stdlib_ctrsm('left', 'upper', 'conjugate transpose', 'non-unit', n, nrhs, cone, &
                         a, lda, b, ldb)
              ! solve u*x = b, overwriting b with x.
              call stdlib_ctrsm('left', 'upper', 'no transpose', 'non-unit', n, nrhs, cone, a, &
                        lda, b, ldb)
           else
              ! solve a*x = b where a = l*l**h.
              ! solve l*x = b, overwriting b with x.
              call stdlib_ctrsm('left', 'lower', 'no transpose', 'non-unit', n, nrhs, cone, a, &
                        lda, b, ldb)
              ! solve l**h *x = b, overwriting b with x.
              call stdlib_ctrsm('left', 'lower', 'conjugate transpose', 'non-unit', n, nrhs, cone, &
                         a, lda, b, ldb)
           end if
           return
           ! end of stdlib_cpotrs
     end subroutine stdlib_cpotrs

     ! CPPEQU computes row and column scalings intended to equilibrate a
     ! Hermitian positive definite matrix A in packed storage and reduce
     ! its condition number (with respect to the two-norm).  S contains the
     ! scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix
     ! B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal.
     ! This choice of S puts the condition number of B within a factor N of
     ! the smallest possible condition number over all possible diagonal
     ! scalings.

     subroutine stdlib_cppequ(uplo, n, ap, s, scond, amax, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           real(sp) :: amax, scond
           ! .. array arguments ..
           real(sp) :: s(*)
           complex(sp) :: ap(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, jj
           real(sp) :: smin
     
           ! .. intrinsic functions ..
           intrinsic :: max, min, real, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cppequ', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) then
              scond = one
              amax = zero
              return
           end if
           ! initialize smin and amax.
           s(1) = real(ap(1))
           smin = s(1)
           amax = s(1)
           if (upper) then
              ! uplo = 'u':  upper triangle of a is stored.
              ! find the minimum and maximum diagonal elements.
              jj = 1
              do i = 2, n
                 jj = jj + i
                 s(i) = real(ap(jj))
                 smin = min(smin, s(i))
                 amax = max(amax, s(i))
              end do
           else
              ! uplo = 'l':  lower triangle of a is stored.
              ! find the minimum and maximum diagonal elements.
              jj = 1
              do i = 2, n
                 jj = jj + n - i + 2
                 s(i) = real(ap(jj))
                 smin = min(smin, s(i))
                 amax = max(amax, s(i))
              end do
           end if
           if (smin <= zero) then
              ! find the first non-positive diagonal element and return.
              do i = 1, n
                 if (s(i) <= zero) then
                    info = i
                    return
                 end if
              end do
           else
              ! set the scale factors to the reciprocals
              ! of the diagonal elements.
              do i = 1, n
                 s(i) = one/sqrt(s(i))
              end do
              ! compute scond = min(s(i)) / max(s(i))
              scond = sqrt(smin)/sqrt(amax)
           end if
           return
           ! end of stdlib_cppequ
     end subroutine stdlib_cppequ

     ! CPPTRF computes the Cholesky factorization of a complex Hermitian
     ! positive definite matrix A stored in packed format.
     ! The factorization has the form
     ! A = U**H * U,  if UPLO = 'U', or
     ! A = L  * L**H,  if UPLO = 'L',
     ! where U is an upper triangular matrix and L is lower triangular.

     subroutine stdlib_cpptrf(uplo, n, ap, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           complex(sp) :: ap(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, jc, jj
           real(sp) :: ajj
     
           ! .. intrinsic functions ..
           intrinsic :: real, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpptrf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! compute the cholesky factorization a = u**h * u.
              jj = 0
              do j = 1, n
                 jc = jj + 1
                 jj = jj + j
                 ! compute elements 1:j-1 of column j.
                 if (j > 1) call stdlib_ctpsv('upper', 'conjugate transpose', 'non-unit', j - 1, ap, &
                           ap(jc), 1)
                 ! compute u(j,j) and test for non-positive-definiteness.
                 ajj = real(real(ap(jj)) - stdlib_cdotc(j - 1, ap(jc), 1, ap(jc), 1))
                           
                 if (ajj <= zero) then
                    ap(jj) = ajj
                    go to 30
                 end if
                 ap(jj) = sqrt(ajj)
              end do
           else
              ! compute the cholesky factorization a = l * l**h.
              jj = 1
              do j = 1, n
                 ! compute l(j,j) and test for non-positive-definiteness.
                 ajj = real(ap(jj))
                 if (ajj <= zero) then
                    ap(jj) = ajj
                    go to 30
                 end if
                 ajj = sqrt(ajj)
                 ap(jj) = ajj
                 ! compute elements j+1:n of column j and update the trailing
                 ! submatrix.
                 if (j < n) then
                    call stdlib_csscal(n - j, one/ajj, ap(jj + 1), 1)
                    call stdlib_chpr('lower', n - j, -one, ap(jj + 1), 1, ap(jj + n - j + 1))
                    jj = jj + n - j + 1
                 end if
              end do
           end if
           go to 40
30      continue
           info = j
40      continue
           return
           ! end of stdlib_cpptrf
     end subroutine stdlib_cpptrf

     ! CPPTRS solves a system of linear equations A*X = B with a Hermitian
     ! positive definite matrix A in packed storage using the Cholesky
     ! factorization A = U**H*U or A = L*L**H computed by CPPTRF.

     subroutine stdlib_cpptrs(uplo, n, nrhs, ap, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, ldb, n, nrhs
           ! .. array arguments ..
           complex(sp) :: ap(*), b(ldb, *)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (ldb < max(1, n)) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpptrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b where a = u**h * u.
              do i = 1, nrhs
                 ! solve u**h *x = b, overwriting b with x.
                 call stdlib_ctpsv('upper', 'conjugate transpose', 'non-unit', n, ap, b(1, i), &
                           1)
                 ! solve u*x = b, overwriting b with x.
                 call stdlib_ctpsv('upper', 'no transpose', 'non-unit', n, ap, b(1, i), 1)
                           
              end do
           else
              ! solve a*x = b where a = l * l**h.
              do i = 1, nrhs
                 ! solve l*y = b, overwriting b with x.
                 call stdlib_ctpsv('lower', 'no transpose', 'non-unit', n, ap, b(1, i), 1)
                           
                 ! solve l**h *x = y, overwriting b with x.
                 call stdlib_ctpsv('lower', 'conjugate transpose', 'non-unit', n, ap, b(1, i), &
                           1)
              end do
           end if
           return
           ! end of stdlib_cpptrs
     end subroutine stdlib_cpptrs

     ! CPSTF2 computes the Cholesky factorization with complete
     ! pivoting of a complex Hermitian positive semidefinite matrix A.
     ! The factorization has the form
     ! P**T * A * P = U**H * U ,  if UPLO = 'U',
     ! P**T * A * P = L  * L**H,  if UPLO = 'L',
     ! where U is an upper triangular matrix and L is lower triangular, and
     ! P is stored as vector PIV.
     ! This algorithm does not attempt to check that A is positive
     ! semidefinite. This version of the algorithm calls level 2 BLAS.

     subroutine stdlib_cpstf2(uplo, n, a, lda, piv, rank, tol, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           real(sp) :: tol
           integer(ilp) :: info, lda, n, rank
           character :: uplo
           ! .. array arguments ..
           complex(sp) :: a(lda, *)
           real(sp) :: work(2*n)
           integer(ilp) :: piv(n)
        ! =====================================================================
           
           ! .. local scalars ..
           complex(sp) :: ctemp
           real(sp) :: ajj, sstop, stemp
           integer(ilp) :: i, itemp, j, pvt
           logical(lk) :: upper
     
           ! .. intrinsic functions ..
           intrinsic :: conjg, max, real, sqrt
           ! .. executable statements ..
           ! test the input parameters
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cpstf2', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! initialize piv
           do i = 1, n
              piv(i) = i
           end do
           ! compute stopping value
           do i = 1, n
              work(i) = real(a(i, i))
           end do
           pvt = maxloc(work(1:n), 1)
           ajj = real(a(pvt, pvt))
           if (ajj <= zero .or. stdlib_sisnan(ajj)) then
              rank = 0
              info = 1
              go to 200
           end if
           ! compute stopping value if not supplied
           if (tol < zero) then
              sstop = n*stdlib_slamch('epsilon')*ajj
           else
              sstop = tol
           end if
           ! set first chalf of work to zero, holds dot products
           do i = 1, n
              work(i) = 0
           end do
           if (upper) then
              ! compute the cholesky factorization p**t * a * p = u**h * u
              loop_150: do j = 1, n
              ! find pivot, test for exit, else swap rows and columns
              ! update dot products, compute possible pivots which are
              ! stored in the second chalf of work
                 do i = j, n
                    if (j > 1) then
                       work(i) = work(i) + real(conjg(a(j - 1, i))*a(j - 1, i))
                    end if
                    work(n + i) = real(a(i, i)) - work(i)
                 end do
                 if (j > 1) then
                    itemp = maxloc(work((n + j):(2*n)), 1)
                    pvt = itemp + j - 1
                    ajj = work(n + pvt)
                    if (ajj <= sstop .or. stdlib_sisnan(ajj)) then
                       a(j, j) = ajj
                       go to 190
                    end if
                 end if
                 if (j /= pvt) then
                    ! pivot ok, so can now swap pivot rows and columns
                    a(pvt, pvt) = a(j, j)
                    call stdlib_cswap(j - 1, a(1, j), 1, a(1, pvt), 1)
                    if (pvt < n) call stdlib_cswap(n - pvt, a(j, pvt + 1), lda, a(pvt, pvt + 1), lda)
                              
                    do i = j + 1, pvt - 1
                       ctemp = conjg(a(j, i))
                       a(j, i) = conjg(a(i, pvt))
                       a(i, pvt) = ctemp
                    end do
                    a(j, pvt) = conjg(a(j, pvt))
                    ! swap dot products and piv
                    stemp = work(j)
                    work(j) = work(pvt)
                    work(pvt) = stemp
                    itemp = piv(pvt)
                    piv(pvt) = piv(j)
                    piv(j) = itemp
                 end if
                 ajj = sqrt(ajj)
                 a(j, j) = ajj
                 ! compute elements j+1:n of row j
                 if (j < n) then
                    call stdlib_clacgv(j - 1, a(1, j), 1)
                    call stdlib_cgemv('trans', j - 1, n - j, -cone, a(1, j + 1), lda, a(1, j), 1, &
                              cone, a(j, j + 1), lda)
                    call stdlib_clacgv(j - 1, a(1, j), 1)
                    call stdlib_csscal(n - j, one/ajj, a(j, j + 1), lda)
                 end if
              end do loop_150
           else
              ! compute the cholesky factorization p**t * a * p = l * l**h
              loop_180: do j = 1, n
              ! find pivot, test for exit, else swap rows and columns
              ! update dot products, compute possible pivots which are
              ! stored in the second chalf of work
                 do i = j, n
                    if (j > 1) then
                       work(i) = work(i) + real(conjg(a(i, j - 1))*a(i, j - 1))
                    end if
                    work(n + i) = real(a(i, i)) - work(i)
                 end do
                 if (j > 1) then
                    itemp = maxloc(work((n + j):(2*n)), 1)
                    pvt = itemp + j - 1
                    ajj = work(n + pvt)
                    if (ajj <= sstop .or. stdlib_sisnan(ajj)) then
                       a(j, j) = ajj
                       go to 190
                    end if
                 end if
                 if (j /= pvt) then
                    ! pivot ok, so can now swap pivot rows and columns
                    a(pvt, pvt) = a(j, j)
                    call stdlib_cswap(j - 1, a(j, 1), lda, a(pvt, 1), lda)
                    if (pvt < n) call stdlib_cswap(n - pvt, a(pvt + 1, j), 1, a(pvt + 1, pvt), 1)
                              
                    do i = j + 1, pvt - 1
                       ctemp = conjg(a(i, j))
                       a(i, j) = conjg(a(pvt, i))
                       a(pvt, i) = ctemp
                    end do
                    a(pvt, j) = conjg(a(pvt, j))
                    ! swap dot products and piv
                    stemp = work(j)
                    work(j) = work(pvt)
                    work(pvt) = stemp
                    itemp = piv(pvt)
                    piv(pvt) = piv(j)
                    piv(j) = itemp
                 end if
                 ajj = sqrt(ajj)
                 a(j, j) = ajj
                 ! compute elements j+1:n of column j
                 if (j < n) then
                    call stdlib_clacgv(j - 1, a(j, 1), lda)
                    call stdlib_cgemv('no trans', n - j, j - 1, -cone, a(j + 1, 1), lda, a(j, 1), &
                              lda, cone, a(j + 1, j), 1)
                    call stdlib_clacgv(j - 1, a(j, 1), lda)
                    call stdlib_csscal(n - j, one/ajj, a(j + 1, j), 1)
                 end if
              end do loop_180
           end if
           ! ran to completion, a has full rank
           rank = n
           go to 200
190    continue
           ! rank is number of steps completed.  set info = 1 to signal
           ! that the factorization cannot be used to solve a system.
           rank = j - 1
           info = 1
200    continue
           return
           ! end of stdlib_cpstf2
     end subroutine stdlib_cpstf2

     ! CPTCON computes the reciprocal of the condition number (in the
     ! 1-norm) of a complex Hermitian positive definite tridiagonal matrix
     ! using the factorization A = L*D*L**H or A = U**H*D*U computed by
     ! CPTTRF.
     ! Norm(inv(A)) is computed by a direct method, and the reciprocal of
     ! the condition number is computed as
     ! RCOND = 1 / (ANORM * norm(inv(A))).

     subroutine stdlib_cptcon(n, d, e, anorm, rcond, rwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, n
           real(sp) :: anorm, rcond
           ! .. array arguments ..
           real(sp) :: d(*), rwork(*)
           complex(sp) :: e(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, ix
           real(sp) :: ainvnm
     
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           ! test the input arguments.
           info = 0
           if (n < 0) then
              info = -1
           else if (anorm < zero) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cptcon', -info)
              return
           end if
           ! quick return if possible
           rcond = zero
           if (n == 0) then
              rcond = one
              return
           else if (anorm == zero) then
              return
           end if
           ! check that d(1:n) is positive.
           do i = 1, n
              if (d(i) <= zero) return
           end do
           ! solve m(a) * x = e, where m(a) = (m(i,j)) is given by
              ! m(i,j) =  abs(a(i,j)), i = j,
              ! m(i,j) = -abs(a(i,j)), i .ne. j,
           ! and e = [ 1, 1, ..., 1 ]**t.  note m(a) = m(l)*d*m(l)**h.
           ! solve m(l) * x = e.
           rwork(1) = one
           do i = 2, n
              rwork(i) = one + rwork(i - 1)*abs(e(i - 1))
           end do
           ! solve d * m(l)**h * x = b.
           rwork(n) = rwork(n)/d(n)
           do i = n - 1, 1, -1
              rwork(i) = rwork(i)/d(i) + rwork(i + 1)*abs(e(i))
           end do
           ! compute ainvnm = max(x(i)), 1<=i<=n.
           ix = stdlib_isamax(n, rwork, 1)
           ainvnm = abs(rwork(ix))
           ! compute the reciprocal condition number.
           if (ainvnm /= zero) rcond = (one/ainvnm)/anorm
           return
           ! end of stdlib_cptcon
     end subroutine stdlib_cptcon

     ! CPTTRF computes the L*D*L**H factorization of a complex Hermitian
     ! positive definite tridiagonal matrix A.  The factorization may also
     ! be regarded as having the form A = U**H *D*U.

     subroutine stdlib_cpttrf(n, d, e, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, n
           ! .. array arguments ..
           real(sp) :: d(*)
           complex(sp) :: e(*)
        ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, i4
           real(sp) :: eii, eir, f, g
     
           ! .. intrinsic functions ..
           intrinsic :: aimag, cmplx, mod, real
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (n < 0) then
              info = -1
              call stdlib_xerbla('stdlib_cpttrf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! compute the l*d*l**h (or u**h *d*u) factorization of a.
           i4 = mod(n - 1, 4)
           do i = 1, i4
              if (d(i) <= zero) then
                 info = i
                 go to 20
              end if
              eir = real(e(i))
              eii = aimag(e(i))
              f = eir/d(i)
              g = eii/d(i)
              e(i) = cmplx(f, g, KIND=sp)
              d(i + 1) = d(i + 1) - f*eir - g*eii
           end do
           loop_110: do i = i4 + 1, n - 4, 4
              ! drop out of the loop if d(i) <= 0: the matrix is not positive
              ! definite.
              if (d(i) <= zero) then
                 info = i
                 go to 20
              end if
              ! solve for e(i) and d(i+1).
              eir = real(e(i))
              eii = aimag(e(i))
              f = eir/d(i)
              g = eii/d(i)
              e(i) = cmplx(f, g, KIND=sp)
              d(i + 1) = d(i + 1) - f*eir - g*eii
              if (d(i + 1) <= zero) then
                 info = i + 1
                 go to 20
              end if
              ! solve for e(i+1) and d(i+2).
              eir = real(e(i + 1))
              eii = aimag(e(i + 1))
              f = eir/d(i + 1)
              g = eii/d(i + 1)
              e(i + 1) = cmplx(f, g, KIND=sp)
              d(i + 2) = d(i + 2) - f*eir - g*eii
              if (d(i + 2) <= zero) then
                 info = i + 2
                 go to 20
              end if
              ! solve for e(i+2) and d(i+3).
              eir = real(e(i + 2))
              eii = aimag(e(i + 2))
              f = eir/d(i + 2)
              g = eii/d(i + 2)
              e(i + 2) = cmplx(f, g, KIND=sp)
              d(i + 3) = d(i + 3) - f*eir - g*eii
              if (d(i + 3) <= zero) then
                 info = i + 3
                 go to 20
              end if
              ! solve for e(i+3) and d(i+4).
              eir = real(e(i + 3))
              eii = aimag(e(i + 3))
              f = eir/d(i + 3)
              g = eii/d(i + 3)
              e(i + 3) = cmplx(f, g, KIND=sp)
              d(i + 4) = d(i + 4) - f*eir - g*eii
           end do loop_110
           ! check d(n) for positive definiteness.
           if (d(n) <= zero) info = n
20      continue
           return
           ! end of stdlib_cpttrf
     end subroutine stdlib_cpttrf

     ! CPTTS2 solves a tridiagonal system of the form
     ! A * X = B
     ! using the factorization A = U**H*D*U or A = L*D*L**H computed by CPTTRF.
     ! D is a diagonal matrix specified in the vector D, U (or L) is a unit
     ! bidiagonal matrix whose superdiagonal (subdiagonal) is specified in
     ! the vector E, and X and B are N by NRHS matrices.

     subroutine stdlib_cptts2(iuplo, n, nrhs, d, e, b, ldb)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: iuplo, ldb, n, nrhs
           ! .. array arguments ..
           real(sp) :: d(*)
           complex(sp) :: b(ldb, *), e(*)
        ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, j
     
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 1) then
              if (n == 1) call stdlib_csscal(nrhs, 1./d(1), b, ldb)
              return
           end if
           if (iuplo == 1) then
              ! solve a * x = b using the factorization a = u**h *d*u,
              ! overwriting each right hand side vector with its solution.
              if (nrhs <= 2) then
                 j = 1
5        continue
                 ! solve u**h * x = b.
                 do i = 2, n
                    b(i, j) = b(i, j) - b(i - 1, j)*conjg(e(i - 1))
                 end do
                 ! solve d * u * x = b.
                 do i = 1, n
                    b(i, j) = b(i, j)/d(i)
                 end do
                 do i = n - 1, 1, -1
                    b(i, j) = b(i, j) - b(i + 1, j)*e(i)
                 end do
                 if (j < nrhs) then
                    j = j + 1
                    go to 5
                 end if
              else
                 do j = 1, nrhs
                    ! solve u**h * x = b.
                    do i = 2, n
                       b(i, j) = b(i, j) - b(i - 1, j)*conjg(e(i - 1))
                    end do
                    ! solve d * u * x = b.
                    b(n, j) = b(n, j)/d(n)
                    do i = n - 1, 1, -1
                       b(i, j) = b(i, j)/d(i) - b(i + 1, j)*e(i)
                    end do
                 end do
              end if
           else
              ! solve a * x = b using the factorization a = l*d*l**h,
              ! overwriting each right hand side vector with its solution.
              if (nrhs <= 2) then
                 j = 1
65      continue
                 ! solve l * x = b.
                 do i = 2, n
                    b(i, j) = b(i, j) - b(i - 1, j)*e(i - 1)
                 end do
                 ! solve d * l**h * x = b.
                 do i = 1, n
                    b(i, j) = b(i, j)/d(i)
                 end do
                 do i = n - 1, 1, -1
                    b(i, j) = b(i, j) - b(i + 1, j)*conjg(e(i))
                 end do
                 if (j < nrhs) then
                    j = j + 1
                    go to 65
                 end if
              else
                 do j = 1, nrhs
                    ! solve l * x = b.
                    do i = 2, n
                       b(i, j) = b(i, j) - b(i - 1, j)*e(i - 1)
                    end do
                    ! solve d * l**h * x = b.
                    b(n, j) = b(n, j)/d(n)
                    do i = n - 1, 1, -1
                       b(i, j) = b(i, j)/d(i) - b(i + 1, j)*conjg(e(i))
                    end do
                 end do
              end if
           end if
           return
           ! end of stdlib_cptts2
     end subroutine stdlib_cptts2

     ! CROT   applies a plane rotation, where the cos (C) is real and the
     ! sin (S) is complex, and the vectors CX and CY are complex.

     subroutine stdlib_crot(n, cx, incx, cy, incy, c, s)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, incy, n
           real(sp) :: c
           complex(sp) :: s
           ! .. array arguments ..
           complex(sp) :: cx(*), cy(*)
       ! =====================================================================
           ! .. local scalars ..
           integer(ilp) :: i, ix, iy
           complex(sp) :: stemp
           ! .. intrinsic functions ..
           intrinsic :: conjg
           ! .. executable statements ..
           if (n <= 0) return
           if (incx == 1 .and. incy == 1) go to 20
           ! code for unequal increments or equal increments not equal to 1
           ix = 1
           iy = 1
           if (incx < 0) ix = (-n + 1)*incx + 1
           if (incy < 0) iy = (-n + 1)*incy + 1
           do i = 1, n
              stemp = c*cx(ix) + s*cy(iy)
              cy(iy) = c*cy(iy) - conjg(s)*cx(ix)
              cx(ix) = stemp
              ix = ix + incx
              iy = iy + incy
           end do
           return
           ! code for both increments equal to 1
20      continue
           do i = 1, n
              stemp = c*cx(i) + s*cy(i)
              cy(i) = c*cy(i) - conjg(s)*cx(i)
              cx(i) = stemp
           end do
           return
     end subroutine stdlib_crot

     ! CSPMV  performs the matrix-vector operation
     ! y := alpha*A*x + beta*y,
     ! where alpha and beta are scalars, x and y are n element vectors and
     ! A is an n by n symmetric matrix, supplied in packed form.

     subroutine stdlib_cspmv(uplo, n, alpha, ap, x, incx, beta, y, incy)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: incx, incy, n
           complex(sp) :: alpha, beta
           ! .. array arguments ..
           complex(sp) :: ap(*), x(*), y(*)
       ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, info, ix, iy, j, jx, jy, k, kk, kx, ky
           complex(sp) :: temp1, temp2
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (.not. stdlib_lsame(uplo, 'u') .and. .not. stdlib_lsame(uplo, 'l')) then
              info = 1
           else if (n < 0) then
              info = 2
           else if (incx == 0) then
              info = 6
           else if (incy == 0) then
              info = 9
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cspmv ', info)
              return
           end if
           ! quick return if possible.
           if ((n == 0) .or. ((alpha == czero) .and. (beta == cone))) return
           ! set up the start points in  x  and  y.
           if (incx > 0) then
              kx = 1
           else
              kx = 1 - (n - 1)*incx
           end if
           if (incy > 0) then
              ky = 1
           else
              ky = 1 - (n - 1)*incy
           end if
           ! start the operations. in this version the elements of the array ap
           ! are accessed sequentially with cone pass through ap.
           ! first form  y := beta*y.
           if (beta /= cone) then
              if (incy == 1) then
                 if (beta == czero) then
                    do i = 1, n
                       y(i) = czero
                    end do
                 else
                    do i = 1, n
                       y(i) = beta*y(i)
                    end do
                 end if
              else
                 iy = ky
                 if (beta == czero) then
                    do i = 1, n
                       y(iy) = czero
                       iy = iy + incy
                    end do
                 else
                    do i = 1, n
                       y(iy) = beta*y(iy)
                       iy = iy + incy
                    end do
                 end if
              end if
           end if
           if (alpha == czero) return
           kk = 1
           if (stdlib_lsame(uplo, 'u')) then
              ! form  y  when ap contains the upper triangle.
              if ((incx == 1) .and. (incy == 1)) then
                 do j = 1, n
                    temp1 = alpha*x(j)
                    temp2 = czero
                    k = kk
                    do i = 1, j - 1
                       y(i) = y(i) + temp1*ap(k)
                       temp2 = temp2 + ap(k)*x(i)
                       k = k + 1
                    end do
                    y(j) = y(j) + temp1*ap(kk + j - 1) + alpha*temp2
                    kk = kk + j
                 end do
              else
                 jx = kx
                 jy = ky
                 do j = 1, n
                    temp1 = alpha*x(jx)
                    temp2 = czero
                    ix = kx
                    iy = ky
                    do k = kk, kk + j - 2
                       y(iy) = y(iy) + temp1*ap(k)
                       temp2 = temp2 + ap(k)*x(ix)
                       ix = ix + incx
                       iy = iy + incy
                    end do
                    y(jy) = y(jy) + temp1*ap(kk + j - 1) + alpha*temp2
                    jx = jx + incx
                    jy = jy + incy
                    kk = kk + j
                 end do
              end if
           else
              ! form  y  when ap contains the lower triangle.
              if ((incx == 1) .and. (incy == 1)) then
                 do j = 1, n
                    temp1 = alpha*x(j)
                    temp2 = czero
                    y(j) = y(j) + temp1*ap(kk)
                    k = kk + 1
                    do i = j + 1, n
                       y(i) = y(i) + temp1*ap(k)
                       temp2 = temp2 + ap(k)*x(i)
                       k = k + 1
                    end do
                    y(j) = y(j) + alpha*temp2
                    kk = kk + (n - j + 1)
                 end do
              else
                 jx = kx
                 jy = ky
                 do j = 1, n
                    temp1 = alpha*x(jx)
                    temp2 = czero
                    y(jy) = y(jy) + temp1*ap(kk)
                    ix = jx
                    iy = jy
                    do k = kk + 1, kk + n - j
                       ix = ix + incx
                       iy = iy + incy
                       y(iy) = y(iy) + temp1*ap(k)
                       temp2 = temp2 + ap(k)*x(ix)
                    end do
                    y(jy) = y(jy) + alpha*temp2
                    jx = jx + incx
                    jy = jy + incy
                    kk = kk + (n - j + 1)
                 end do
              end if
           end if
           return
           ! end of stdlib_cspmv
     end subroutine stdlib_cspmv

     ! CSPR    performs the symmetric rank 1 operation
     ! A := alpha*x*x**H + A,
     ! where alpha is a complex scalar, x is an n element vector and A is an
     ! n by n symmetric matrix, supplied in packed form.

     subroutine stdlib_cspr(uplo, n, alpha, x, incx, ap)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: incx, n
           complex(sp) :: alpha
           ! .. array arguments ..
           complex(sp) :: ap(*), x(*)
       ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, info, ix, j, jx, k, kk, kx
           complex(sp) :: temp
     
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (.not. stdlib_lsame(uplo, 'u') .and. .not. stdlib_lsame(uplo, 'l')) then
              info = 1
           else if (n < 0) then
              info = 2
           else if (incx == 0) then
              info = 5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cspr  ', info)
              return
           end if
           ! quick return if possible.
           if ((n == 0) .or. (alpha == czero)) return
           ! set the start point in x if the increment is not unity.
           if (incx <= 0) then
              kx = 1 - (n - 1)*incx
           else if (incx /= 1) then
              kx = 1
           end if
           ! start the operations. in this version the elements of the array ap
           ! are accessed sequentially with cone pass through ap.
           kk = 1
           if (stdlib_lsame(uplo, 'u')) then
              ! form  a  when upper triangle is stored in ap.
              if (incx == 1) then
                 do j = 1, n
                    if (x(j) /= czero) then
                       temp = alpha*x(j)
                       k = kk
                       do i = 1, j - 1
                          ap(k) = ap(k) + x(i)*temp
                          k = k + 1
                       end do
                       ap(kk + j - 1) = ap(kk + j - 1) + x(j)*temp
                    else
                       ap(kk + j - 1) = ap(kk + j - 1)
                    end if
                    kk = kk + j
                 end do
              else
                 jx = kx
                 do j = 1, n
                    if (x(jx) /= czero) then
                       temp = alpha*x(jx)
                       ix = kx
                       do k = kk, kk + j - 2
                          ap(k) = ap(k) + x(ix)*temp
                          ix = ix + incx
                       end do
                       ap(kk + j - 1) = ap(kk + j - 1) + x(jx)*temp
                    else
                       ap(kk + j - 1) = ap(kk + j - 1)
                    end if
                    jx = jx + incx
                    kk = kk + j
                 end do
              end if
           else
              ! form  a  when lower triangle is stored in ap.
              if (incx == 1) then
                 do j = 1, n
                    if (x(j) /= czero) then
                       temp = alpha*x(j)
                       ap(kk) = ap(kk) + temp*x(j)
                       k = kk + 1
                       do i = j + 1, n
                          ap(k) = ap(k) + x(i)*temp
                          k = k + 1
                       end do
                    else
                       ap(kk) = ap(kk)
                    end if
                    kk = kk + n - j + 1
                 end do
              else
                 jx = kx
                 do j = 1, n
                    if (x(jx) /= czero) then
                       temp = alpha*x(jx)
                       ap(kk) = ap(kk) + temp*x(jx)
                       ix = jx
                       do k = kk + 1, kk + n - j
                          ix = ix + incx
                          ap(k) = ap(k) + x(ix)*temp
                       end do
                    else
                       ap(kk) = ap(kk)
                    end if
                    jx = jx + incx
                    kk = kk + n - j + 1
                 end do
              end if
           end if
           return
           ! end of stdlib_cspr
     end subroutine stdlib_cspr

     ! CSPTRF computes the factorization of a complex symmetric matrix A
     ! stored in packed format using the Bunch-Kaufman diagonal pivoting
     ! method:
     ! A = U*D*U**T  or  A = L*D*L**T
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, and D is symmetric and block diagonal with
     ! 1-by-1 and 2-by-2 diagonal blocks.

     subroutine stdlib_csptrf(uplo, n, ap, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: ap(*)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, imax, j, jmax, k, kc, kk, knc, kp, kpc, kstep, kx, npp
           real(sp) :: absakk, alpha, colmax, rowmax
           complex(sp) :: d11, d12, d21, d22, r1, t, wk, wkm1, wkp1, zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, max, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csptrf', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
              kc = (n - 1)*n/2 + 1
10      continue
              knc = kc
              ! if k < 1, exit from loop
              if (k < 1) go to 110
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(ap(kc + k - 1))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, ap(kc), 1)
                 colmax = cabs1(ap(kc + imax - 1))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    rowmax = zero
                    jmax = imax
                    kx = imax*(imax + 1)/2 + imax
                    do j = imax + 1, k
                       if (cabs1(ap(kx)) > rowmax) then
                          rowmax = cabs1(ap(kx))
                          jmax = j
                       end if
                       kx = kx + j
                    end do
                    kpc = (imax - 1)*imax/2 + 1
                    if (imax > 1) then
                       jmax = stdlib_icamax(imax - 1, ap(kpc), 1)
                       rowmax = max(rowmax, cabs1(ap(kpc + jmax - 1)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (cabs1(ap(kpc + imax - 1)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k-1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k - kstep + 1
                 if (kstep == 2) knc = knc - k + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    call stdlib_cswap(kp - 1, ap(knc), 1, ap(kpc), 1)
                    kx = kpc + kp - 1
                    do j = kp + 1, kk - 1
                       kx = kx + j - 1
                       t = ap(knc + j - 1)
                       ap(knc + j - 1) = ap(kx)
                       ap(kx) = t
                    end do
                    t = ap(knc + kk - 1)
                    ap(knc + kk - 1) = ap(kpc + kp - 1)
                    ap(kpc + kp - 1) = t
                    if (kstep == 2) then
                       t = ap(kc + k - 2)
                       ap(kc + k - 2) = ap(kc + kp - 1)
                       ap(kc + kp - 1) = t
                    end if
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! perform a rank-1 update of a(1:k-1,1:k-1) as
                    ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t
                    r1 = cone/ap(kc + k - 1)
                    call stdlib_cspr(uplo, k - 1, -r1, ap(kc), 1, ap)
                    ! store u(k) in column k
                    call stdlib_cscal(k - 1, r1, ap(kc), 1)
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t
                    if (k > 2) then
                       d12 = ap(k - 1 + (k - 1)*k/2)
                       d22 = ap(k - 1 + (k - 2)*(k - 1)/2)/d12
                       d11 = ap(k + (k - 1)*k/2)/d12
                       t = cone/(d11*d22 - cone)
                       d12 = t/d12
                       do j = k - 2, 1, -1
                          wkm1 = d12*(d11*ap(j + (k - 2)*(k - 1)/2) - ap(j + (k - 1)*k/2))
                                    
                          wk = d12*(d22*ap(j + (k - 1)*k/2) - ap(j + (k - 2)*(k - 1)/2))
                                    
                          do i = j, 1, -1
                             ap(i + (j - 1)*j/2) = ap(i + (j - 1)*j/2) - ap(i + (k - 1)*k/2) &
                                       *wk - ap(i + (k - 2)*(k - 1)/2)*wkm1
                          end do
                          ap(j + (k - 1)*k/2) = wk
                          ap(j + (k - 2)*(k - 1)/2) = wkm1
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              kc = knc - k
              go to 10
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
              kc = 1
              npp = n*(n + 1)/2
60      continue
              knc = kc
              ! if k > n, exit from loop
              if (k > n) go to 110
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(ap(kc))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, ap(kc + 1), 1)
                 colmax = cabs1(ap(kc + imax - k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero) then
                 ! column k is zero: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    rowmax = zero
                    kx = kc + imax - k
                    do j = k, imax - 1
                       if (cabs1(ap(kx)) > rowmax) then
                          rowmax = cabs1(ap(kx))
                          jmax = j
                       end if
                       kx = kx + n - j
                    end do
                    kpc = npp - (n - imax + 1)*(n - imax + 2)/2 + 1
                    if (imax < n) then
                       jmax = imax + stdlib_icamax(n - imax, ap(kpc + 1), 1)
                       rowmax = max(rowmax, cabs1(ap(kpc + jmax - imax)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (cabs1(ap(kpc)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k+1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k + kstep - 1
                 if (kstep == 2) knc = knc + n - k + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_cswap(n - kp, ap(knc + kp - kk + 1), 1, ap(kpc + 1), 1)
                              
                    kx = knc + kp - kk
                    do j = kk + 1, kp - 1
                       kx = kx + n - j + 1
                       t = ap(knc + j - kk)
                       ap(knc + j - kk) = ap(kx)
                       ap(kx) = t
                    end do
                    t = ap(knc)
                    ap(knc) = ap(kpc)
                    ap(kpc) = t
                    if (kstep == 2) then
                       t = ap(kc + 1)
                       ap(kc + 1) = ap(kc + kp - k)
                       ap(kc + kp - k) = t
                    end if
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                       ! perform a rank-1 update of a(k+1:n,k+1:n) as
                       ! a := a - l(k)*d(k)*l(k)**t = a - w(k)*(1/d(k))*w(k)**t
                       r1 = cone/ap(kc)
                       call stdlib_cspr(uplo, n - k, -r1, ap(kc + 1), 1, ap(kc + n - k + 1))
                       ! store l(k) in column k
                       call stdlib_cscal(n - k, r1, ap(kc + 1), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    if (k < n - 1) then
                       ! perform a rank-2 update of a(k+2:n,k+2:n) as
                       ! a := a - ( l(k) l(k+1) )*d(k)*( l(k) l(k+1) )**t
                          ! = a - ( w(k) w(k+1) )*inv(d(k))*( w(k) w(k+1) )**t
                       ! where l(k) and l(k+1) are the k-th and (k+1)-th
                       ! columns of l
                       d21 = ap(k + 1 + (k - 1)*(2*n - k)/2)
                       d11 = ap(k + 1 + k*(2*n - k - 1)/2)/d21
                       d22 = ap(k + (k - 1)*(2*n - k)/2)/d21
                       t = cone/(d11*d22 - cone)
                       d21 = t/d21
                       do j = k + 2, n
                          wk = d21*(d11*ap(j + (k - 1)*(2*n - k)/2) - ap(j + k*(2*n - k - 1)/2))
                                    
                          wkp1 = d21*(d22*ap(j + k*(2*n - k - 1)/2) - ap(j + (k - 1)*(2*n - k)/2) &
                                     )
                          do i = j, n
                             ap(i + (j - 1)*(2*n - j)/2) = ap(i + (j - 1)*(2*n - j)/2) - ap( &
                                       i + (k - 1)*(2*n - k)/2)*wk - ap(i + k*(2*n - k - 1)/2)*wkp1
                          end do
                          ap(j + (k - 1)*(2*n - k)/2) = wk
                          ap(j + k*(2*n - k - 1)/2) = wkp1
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              kc = knc + n - k + 2
              go to 60
           end if
110    continue
           return
           ! end of stdlib_csptrf
     end subroutine stdlib_csptrf

     ! CSPTRI computes the inverse of a complex symmetric indefinite matrix
     ! A in packed storage using the factorization A = U*D*U**T or
     ! A = L*D*L**T computed by CSPTRF.

     subroutine stdlib_csptri(uplo, n, ap, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: ap(*), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kc, kcnext, kp, kpc, kstep, kx, npp
           complex(sp) :: ak, akkp1, akp1, d, t, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csptri', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              kp = n*(n + 1)/2
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. ap(kp) == czero) return
                 kp = kp - info
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              kp = 1
              do info = 1, n
                 if (ipiv(info) > 0 .and. ap(kp) == czero) return
                 kp = kp + n - info + 1
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
              kc = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              kcnext = kc + k
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 ap(kc + k - 1) = cone/ap(kc + k - 1)
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, ap(kc), 1, work, 1)
                    call stdlib_cspmv(uplo, k - 1, -cone, ap, work, 1, czero, ap(kc), 1)
                    ap(kc + k - 1) = ap(kc + k - 1) - stdlib_cdotu(k - 1, work, 1, ap(kc), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = ap(kcnext + k - 1)
                 ak = ap(kc + k - 1)/t
                 akp1 = ap(kcnext + k)/t
                 akkp1 = ap(kcnext + k - 1)/t
                 d = t*(ak*akp1 - cone)
                 ap(kc + k - 1) = akp1/d
                 ap(kcnext + k) = ak/d
                 ap(kcnext + k - 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, ap(kc), 1, work, 1)
                    call stdlib_cspmv(uplo, k - 1, -cone, ap, work, 1, czero, ap(kc), 1)
                    ap(kc + k - 1) = ap(kc + k - 1) - stdlib_cdotu(k - 1, work, 1, ap(kc), 1)
                    ap(kcnext + k - 1) = ap(kcnext + k - 1) - stdlib_cdotu(k - 1, ap(kc), 1, ap( &
                              kcnext), 1)
                    call stdlib_ccopy(k - 1, ap(kcnext), 1, work, 1)
                    call stdlib_cspmv(uplo, k - 1, -cone, ap, work, 1, czero, ap(kcnext), 1)
                              
                    ap(kcnext + k) = ap(kcnext + k) - stdlib_cdotu(k - 1, work, 1, ap(kcnext), 1)
                              
                 end if
                 kstep = 2
                 kcnext = kcnext + k + 1
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the leading
                 ! submatrix a(1:k+1,1:k+1)
                 kpc = (kp - 1)*kp/2 + 1
                 call stdlib_cswap(kp - 1, ap(kc), 1, ap(kpc), 1)
                 kx = kpc + kp - 1
                 do j = kp + 1, k - 1
                    kx = kx + j - 1
                    temp = ap(kc + j - 1)
                    ap(kc + j - 1) = ap(kx)
                    ap(kx) = temp
                 end do
                 temp = ap(kc + k - 1)
                 ap(kc + k - 1) = ap(kpc + kp - 1)
                 ap(kpc + kp - 1) = temp
                 if (kstep == 2) then
                    temp = ap(kc + k + k - 1)
                    ap(kc + k + k - 1) = ap(kc + k + kp - 1)
                    ap(kc + k + kp - 1) = temp
                 end if
              end if
              k = k + kstep
              kc = kcnext
              go to 30
50      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              npp = n*(n + 1)/2
              k = n
              kc = npp
60      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 80
              kcnext = kc - (n - k + 2)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 ap(kc) = cone/ap(kc)
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, ap(kc + 1), 1, work, 1)
                    call stdlib_cspmv(uplo, n - k, -cone, ap(kc + n - k + 1), work, 1, czero, ap(kc + 1) &
                              , 1)
                    ap(kc) = ap(kc) - stdlib_cdotu(n - k, work, 1, ap(kc + 1), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = ap(kcnext + 1)
                 ak = ap(kcnext)/t
                 akp1 = ap(kc)/t
                 akkp1 = ap(kcnext + 1)/t
                 d = t*(ak*akp1 - cone)
                 ap(kcnext) = akp1/d
                 ap(kc) = ak/d
                 ap(kcnext + 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, ap(kc + 1), 1, work, 1)
                    call stdlib_cspmv(uplo, n - k, -cone, ap(kc + (n - k + 1)), work, 1, czero, ap( &
                              kc + 1), 1)
                    ap(kc) = ap(kc) - stdlib_cdotu(n - k, work, 1, ap(kc + 1), 1)
                    ap(kcnext + 1) = ap(kcnext + 1) - stdlib_cdotu(n - k, ap(kc + 1), 1, ap(kcnext + &
                              2), 1)
                    call stdlib_ccopy(n - k, ap(kcnext + 2), 1, work, 1)
                    call stdlib_cspmv(uplo, n - k, -cone, ap(kc + (n - k + 1)), work, 1, czero, ap( &
                              kcnext + 2), 1)
                    ap(kcnext) = ap(kcnext) - stdlib_cdotu(n - k, work, 1, ap(kcnext + 2), 1)
                              
                 end if
                 kstep = 2
                 kcnext = kcnext - (n - k + 3)
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the trailing
                 ! submatrix a(k-1:n,k-1:n)
                 kpc = npp - (n - kp + 1)*(n - kp + 2)/2 + 1
                 if (kp < n) call stdlib_cswap(n - kp, ap(kc + kp - k + 1), 1, ap(kpc + 1), 1)
                 kx = kc + kp - k
                 do j = k + 1, kp - 1
                    kx = kx + n - j + 1
                    temp = ap(kc + j - k)
                    ap(kc + j - k) = ap(kx)
                    ap(kx) = temp
                 end do
                 temp = ap(kc)
                 ap(kc) = ap(kpc)
                 ap(kpc) = temp
                 if (kstep == 2) then
                    temp = ap(kc - n + k - 1)
                    ap(kc - n + k - 1) = ap(kc - n + kp - 1)
                    ap(kc - n + kp - 1) = temp
                 end if
              end if
              k = k - kstep
              kc = kcnext
              go to 60
80      continue
           end if
           return
           ! end of stdlib_csptri
     end subroutine stdlib_csptri

     ! CSPTRS solves a system of linear equations A*X = B with a complex
     ! symmetric matrix A stored in packed format using the factorization
     ! A = U*D*U**T or A = L*D*L**T computed by CSPTRF.

     subroutine stdlib_csptrs(uplo, n, nrhs, ap, ipiv, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: ap(*), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kc, kp
           complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (ldb < max(1, n)) then
              info = -7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csptrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b, where a = u*d*u**t.
              ! first solve u*d*x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
              kc = n*(n + 1)/2 + 1
10      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 30
              kc = kc - k
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_cgeru(k - 1, nrhs, -cone, ap(kc), 1, b(k, 1), ldb, b(1, 1), ldb)
                           
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_cscal(nrhs, cone/ap(kc + k - 1), b(k, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k-1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k - 1) call stdlib_cswap(nrhs, b(k - 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in columns k-1 and k of a.
                 call stdlib_cgeru(k - 2, nrhs, -cone, ap(kc), 1, b(k, 1), ldb, b(1, 1), ldb)
                           
                 call stdlib_cgeru(k - 2, nrhs, -cone, ap(kc - (k - 1)), 1, b(k - 1, 1), ldb, b(1, &
                           1), ldb)
                 ! multiply by the inverse of the diagonal block.
                 akm1k = ap(kc + k - 2)
                 akm1 = ap(kc - 1)/akm1k
                 ak = ap(kc + k - 1)/akm1k
                 denom = akm1*ak - cone
                 do j = 1, nrhs
                    bkm1 = b(k - 1, j)/akm1k
                    bk = b(k, j)/akm1k
                    b(k - 1, j) = (ak*bkm1 - bk)/denom
                    b(k, j) = (akm1*bk - bkm1)/denom
                 end do
                 kc = kc - k + 1
                 k = k - 2
              end if
              go to 10
30      continue
              ! next solve u**t*x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
              kc = 1
40      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(u**t(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_cgemv('transpose', k - 1, nrhs, -cone, b, ldb, ap(kc), 1, cone, b(k, &
                            1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kc = kc + k
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation
                 ! stored in columns k and k+1 of a.
                 call stdlib_cgemv('transpose', k - 1, nrhs, -cone, b, ldb, ap(kc), 1, cone, b(k, &
                            1), ldb)
                 call stdlib_cgemv('transpose', k - 1, nrhs, -cone, b, ldb, ap(kc + k), 1, cone, b( &
                           k + 1, 1), ldb)
                 ! interchange rows k and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kc = kc + 2*k + 1
                 k = k + 2
              end if
              go to 40
50      continue
           else
              ! solve a*x = b, where a = l*d*l**t.
              ! first solve l*d*x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
              kc = 1
60      continue
              ! if k > n, exit from loop.
              if (k > n) go to 80
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(l(k)), where l(k) is the transformation
                 ! stored in column k of a.
                 if (k < n) call stdlib_cgeru(n - k, nrhs, -cone, ap(kc + 1), 1, b(k, 1), ldb, b(k + &
                           1, 1), ldb)
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_cscal(nrhs, cone/ap(kc), b(k, 1), ldb)
                 kc = kc + n - k + 1
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k+1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k + 1) call stdlib_cswap(nrhs, b(k + 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(l(k)), where l(k) is the transformation
                 ! stored in columns k and k+1 of a.
                 if (k < n - 1) then
                    call stdlib_cgeru(n - k - 1, nrhs, -cone, ap(kc + 2), 1, b(k, 1), ldb, b(k + 2, &
                              1), ldb)
                    call stdlib_cgeru(n - k - 1, nrhs, -cone, ap(kc + n - k + 2), 1, b(k + 1, 1), ldb, b( &
                              k + 2, 1), ldb)
                 end if
                 ! multiply by the inverse of the diagonal block.
                 akm1k = ap(kc + 1)
                 akm1 = ap(kc)/akm1k
                 ak = ap(kc + n - k + 1)/akm1k
                 denom = akm1*ak - cone
                 do j = 1, nrhs
                    bkm1 = b(k, j)/akm1k
                    bk = b(k + 1, j)/akm1k
                    b(k, j) = (ak*bkm1 - bk)/denom
                    b(k + 1, j) = (akm1*bk - bkm1)/denom
                 end do
                 kc = kc + 2*(n - k) + 1
                 k = k + 2
              end if
              go to 60
80      continue
              ! next solve l**t*x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
              kc = n*(n + 1)/2 + 1
90      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 100
              kc = kc - (n - k + 1)
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(l**t(k)), where l(k) is the transformation
                 ! stored in column k of a.
                 if (k < n) call stdlib_cgemv('transpose', n - k, nrhs, -cone, b(k + 1, 1), ldb, ap( &
                           kc + 1), 1, cone, b(k, 1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(l**t(k-1)), where l(k-1) is the transformation
                 ! stored in columns k-1 and k of a.
                 if (k < n) then
                    call stdlib_cgemv('transpose', n - k, nrhs, -cone, b(k + 1, 1), ldb, ap(kc + 1), &
                               1, cone, b(k, 1), ldb)
                    call stdlib_cgemv('transpose', n - k, nrhs, -cone, b(k + 1, 1), ldb, ap(kc - (n - &
                              k)), 1, cone, b(k - 1, 1), ldb)
                 end if
                 ! interchange rows k and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 kc = kc - (n - k + 2)
                 k = k - 2
              end if
              go to 90
100    continue
           end if
           return
           ! end of stdlib_csptrs
     end subroutine stdlib_csptrs

     ! CSRSCL multiplies an n-element complex vector x by the real scalar
     ! 1/a.  This is done without overflow or underflow as long as
     ! the final result x/a does not overflow or underflow.

     subroutine stdlib_csrscl(n, sa, sx, incx)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: incx, n
           real(sp) :: sa
           ! .. array arguments ..
           complex(sp) :: sx(*)
       ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: done
           real(sp) :: bignum, cden, cden1, cnum, cnum1, mul, smlnum
     
           ! .. intrinsic functions ..
           intrinsic :: abs
           ! .. executable statements ..
           ! quick return if possible
           if (n <= 0) return
           ! get machine parameters
           smlnum = stdlib_slamch('s')
           bignum = one/smlnum
           call stdlib_slabad(smlnum, bignum)
           ! initialize the denominator to sa and the numerator to 1.
           cden = sa
           cnum = one
10      continue
           cden1 = cden*smlnum
           cnum1 = cnum/bignum
           if (abs(cden1) > abs(cnum) .and. cnum /= zero) then
              ! pre-multiply x by smlnum if cden is large compared to cnum.
              mul = smlnum
              done = .false.
              cden = cden1
           else if (abs(cnum1) > abs(cden)) then
              ! pre-multiply x by bignum if cden is small compared to cnum.
              mul = bignum
              done = .false.
              cnum = cnum1
           else
              ! multiply x by cnum / cden and return.
              mul = cnum/cden
              done = .true.
           end if
           ! scale the vector x by mul
           call stdlib_csscal(n, mul, sx, incx)
           if (.not. done) go to 10
           return
           ! end of stdlib_csrscl
     end subroutine stdlib_csrscl

     ! CSTEIN computes the eigenvectors of a real symmetric tridiagonal
     ! matrix T corresponding to specified eigenvalues, using inverse
     ! iteration.
     ! The maximum number of iterations allowed for each eigenvector is
     ! specified by an internal parameter MAXITS (currently set to 5).
     ! Although the eigenvectors are real, they are stored in a complex
     ! array, which may be passed to CUNMTR or CUPMTR for back
     ! transformation to the eigenvectors of a complex Hermitian matrix
     ! which was reduced to tridiagonal form.

     subroutine stdlib_cstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info)
               
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, ldz, m, n
           ! .. array arguments ..
           integer(ilp) :: iblock(*), ifail(*), isplit(*), iwork(*)
           real(sp) :: d(*), e(*), w(*), work(*)
           complex(sp) :: z(ldz, *)
       ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: odm3 = 1.0e-3
           real(sp), parameter :: odm1 = 1.0e-1
           integer(ilp), parameter :: maxits = 5
           integer(ilp), parameter :: extra = 2
           
           ! .. local scalars ..
           integer(ilp) :: b1, blksiz, bn, gpind, i, iinfo, indrv1, indrv2, indrv3, indrv4, indrv5, &
                      its, j, j1, jblk, jmax, jr, nblk, nrmchk
           real(sp) :: ctr, eps, eps1, nrm, onenrm, ortol, pertol, scl, sep, stpcrt, tol, xj, &
                     xjm
           ! .. local arrays ..
           integer(ilp) :: iseed(4)
     
           ! .. intrinsic functions ..
           intrinsic :: abs, cmplx, max, real, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           do i = 1, m
              ifail(i) = 0
           end do
           if (n < 0) then
              info = -1
           else if (m < 0 .or. m > n) then
              info = -4
           else if (ldz < max(1, n)) then
              info = -9
           else
              do j = 2, m
                 if (iblock(j) < iblock(j - 1)) then
                    info = -6
                    go to 30
                 end if
                 if (iblock(j) == iblock(j - 1) .and. w(j) < w(j - 1)) then
                    info = -5
                    go to 30
                 end if
              end do
30      continue
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_cstein', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. m == 0) then
              return
           else if (n == 1) then
              z(1, 1) = cone
              return
           end if
           ! get machine constants.
           eps = stdlib_slamch('precision')
           ! initialize seed for random number generator stdlib_slarnv.
           do i = 1, 4
              iseed(i) = 1
           end do
           ! initialize pointers.
           indrv1 = 0
           indrv2 = indrv1 + n
           indrv3 = indrv2 + n
           indrv4 = indrv3 + n
           indrv5 = indrv4 + n
           ! compute eigenvectors of matrix blocks.
           j1 = 1
           loop_180: do nblk = 1, iblock(m)
              ! find starting and ending indices of block nblk.
              if (nblk == 1) then
                 b1 = 1
              else
                 b1 = isplit(nblk - 1) + 1
              end if
              bn = isplit(nblk)
              blksiz = bn - b1 + 1
              if (blksiz == 1) go to 60
              gpind = j1
              ! compute reorthogonalization criterion and stopping criterion.
              onenrm = abs(d(b1)) + abs(e(b1))
              onenrm = max(onenrm, abs(d(bn)) + abs(e(bn - 1)))
              do i = b1 + 1, bn - 1
                 onenrm = max(onenrm, abs(d(i)) + abs(e(i - 1)) + abs(e(i)))
              end do
              ortol = odm3*onenrm
              stpcrt = sqrt(odm1/blksiz)
              ! loop through eigenvalues of block nblk.
60      continue
              jblk = 0
              loop_170: do j = j1, m
                 if (iblock(j) /= nblk) then
                    j1 = j
                    cycle loop_180
                 end if
                 jblk = jblk + 1
                 xj = w(j)
                 ! skip all the work if the block size is one.
                 if (blksiz == 1) then
                    work(indrv1 + 1) = one
                    go to 140
                 end if
                 ! if eigenvalues j and j-1 are too close, add a relatively
                 ! small perturbation.
                 if (jblk > 1) then
                    eps1 = abs(eps*xj)
                    pertol = ten*eps1
                    sep = xj - xjm
                    if (sep < pertol) xj = xjm + pertol
                 end if
                 its = 0
                 nrmchk = 0
                 ! get random starting vector.
                 call stdlib_slarnv(2, iseed, blksiz, work(indrv1 + 1))
                 ! copy the matrix t so it won't be destroyed in factorization.
                 call stdlib_scopy(blksiz, d(b1), 1, work(indrv4 + 1), 1)
                 call stdlib_scopy(blksiz - 1, e(b1), 1, work(indrv2 + 2), 1)
                 call stdlib_scopy(blksiz - 1, e(b1), 1, work(indrv3 + 1), 1)
                 ! compute lu factors with partial pivoting  ( pt = lu )
                 tol = zero
                 call stdlib_slagtf(blksiz, work(indrv4 + 1), xj, work(indrv2 + 2), work(indrv3 + &
                           1), tol, work(indrv5 + 1), iwork, iinfo)
                 ! update iteration count.
70      continue
                 its = its + 1
                 if (its > maxits) go to 120
                 ! normalize and scale the righthand side vector pb.
                 jmax = stdlib_isamax(blksiz, work(indrv1 + 1), 1)
                 scl = blksiz*onenrm*max(eps, abs(work(indrv4 + blksiz)))/abs(work(indrv1 + &
                           jmax))
                 call stdlib_sscal(blksiz, scl, work(indrv1 + 1), 1)
                 ! solve the system lu = pb.
                 call stdlib_slagts(-1, blksiz, work(indrv4 + 1), work(indrv2 + 2), work(indrv3 + &
                           1), work(indrv5 + 1), iwork, work(indrv1 + 1), tol, iinfo)
                 ! reorthogonalize by modified gram-schmidt if eigenvalues are
                 ! close enough.
                 if (jblk == 1) go to 110
                 if (abs(xj - xjm) > ortol) gpind = j
                 if (gpind /= j) then
                    do i = gpind, j - 1
                       ctr = zero
                       do jr = 1, blksiz
                          ctr = ctr + work(indrv1 + jr)*real(z(b1 - 1 + jr, i))
                       end do
                       do jr = 1, blksiz
                          work(indrv1 + jr) = work(indrv1 + jr) - ctr*real(z(b1 - 1 + jr, i))
                                    
                       end do
                    end do
                 end if
                 ! check the infinity norm of the iterate.
110    continue
                 jmax = stdlib_isamax(blksiz, work(indrv1 + 1), 1)
                 nrm = abs(work(indrv1 + jmax))
                 ! continue for additional iterations after norm reaches
                 ! stopping criterion.
                 if (nrm < stpcrt) go to 70
                 nrmchk = nrmchk + 1
                 if (nrmchk < extra + 1) go to 70
                 go to 130
                 ! if stopping criterion was not satisfied, update info and
                 ! store eigenvector number in array ifail.
120    continue
                 info = info + 1
                 ifail(info) = j
                 ! accept iterate as jth eigenvector.
130    continue
                 scl = one/stdlib_snrm2(blksiz, work(indrv1 + 1), 1)
                 jmax = stdlib_isamax(blksiz, work(indrv1 + 1), 1)
                 if (work(indrv1 + jmax) < zero) scl = -scl
                 call stdlib_sscal(blksiz, scl, work(indrv1 + 1), 1)
140    continue
                 do i = 1, n
                    z(i, j) = czero
                 end do
                 do i = 1, blksiz
                    z(b1 + i - 1, j) = cmplx(work(indrv1 + i), zero, KIND=sp)
                 end do
                 ! save the shift to check eigenvalue spacing at next
                 ! iteration.
                 xjm = xj
              end do loop_170
           end do loop_180
           return
           ! end of stdlib_cstein
     end subroutine stdlib_cstein

     ! CSTEQR computes all eigenvalues and, optionally, eigenvectors of a
     ! symmetric tridiagonal matrix using the implicit QL or QR method.
     ! The eigenvectors of a full or band complex Hermitian matrix can also
     ! be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
     ! matrix to tridiagonal form.

     subroutine stdlib_csteqr(compz, n, d, e, z, ldz, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: compz
           integer(ilp) :: info, ldz, n
           ! .. array arguments ..
           real(sp) :: d(*), e(*), work(*)
           complex(sp) :: z(ldz, *)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: maxit = 30
           
           ! .. local scalars ..
           integer(ilp) :: i, icompz, ii, iscale, j, jtot, k, l, l1, lend, lendm1, lendp1, lendsv, &
                     lm1, lsv, m, mm, mm1, nm1, nmaxit
           real(sp) :: anorm, b, c, eps, eps2, f, g, p, r, rt1, rt2, s, safmax, safmin, ssfmax, &
                     ssfmin, tst
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, sign, sqrt
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (stdlib_lsame(compz, 'n')) then
              icompz = 0
           else if (stdlib_lsame(compz, 'v')) then
              icompz = 1
           else if (stdlib_lsame(compz, 'i')) then
              icompz = 2
           else
              icompz = -1
           end if
           if (icompz < 0) then
              info = -1
           else if (n < 0) then
              info = -2
           else if ((ldz < 1) .or. (icompz > 0 .and. ldz < max(1, n))) then
              info = -6
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csteqr', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (n == 1) then
              if (icompz == 2) z(1, 1) = cone
              return
           end if
           ! determine the unit roundoff and over/underflow thresholds.
           eps = stdlib_slamch('e')
           eps2 = eps**2
           safmin = stdlib_slamch('s')
           safmax = one/safmin
           ssfmax = sqrt(safmax)/three
           ssfmin = sqrt(safmin)/eps2
           ! compute the eigenvalues and eigenvectors of the tridiagonal
           ! matrix.
           if (icompz == 2) call stdlib_claset('full', n, n, czero, cone, z, ldz)
           nmaxit = n*maxit
           jtot = 0
           ! determine where the matrix splits and choose ql or qr iteration
           ! for each block, according to whether top or bottom diagonal
           ! element is smaller.
           l1 = 1
           nm1 = n - 1
10      continue
           if (l1 > n) go to 160
           if (l1 > 1) e(l1 - 1) = zero
           if (l1 <= nm1) then
              do m = l1, nm1
                 tst = abs(e(m))
                 if (tst == zero) go to 30
                 if (tst <= (sqrt(abs(d(m)))*sqrt(abs(d(m + 1))))*eps) then
                    e(m) = zero
                    go to 30
                 end if
              end do
           end if
           m = n
30      continue
           l = l1
           lsv = l
           lend = m
           lendsv = lend
           l1 = m + 1
           if (lend == l) go to 10
           ! scale submatrix in rows and columns l to lend
           anorm = stdlib_slanst('i', lend - l + 1, d(l), e(l))
           iscale = 0
           if (anorm == zero) go to 10
           if (anorm > ssfmax) then
              iscale = 1
              call stdlib_slascl('g', 0, 0, anorm, ssfmax, lend - l + 1, 1, d(l), n, info)
              call stdlib_slascl('g', 0, 0, anorm, ssfmax, lend - l, 1, e(l), n, info)
           else if (anorm < ssfmin) then
              iscale = 2
              call stdlib_slascl('g', 0, 0, anorm, ssfmin, lend - l + 1, 1, d(l), n, info)
              call stdlib_slascl('g', 0, 0, anorm, ssfmin, lend - l, 1, e(l), n, info)
           end if
           ! choose between ql and qr iteration
           if (abs(d(lend)) < abs(d(l))) then
              lend = lsv
              l = lendsv
           end if
           if (lend > l) then
              ! ql iteration
              ! look for small subdiagonal element.
40      continue
              if (l /= lend) then
                 lendm1 = lend - 1
                 do m = l, lendm1
                    tst = abs(e(m))**2
                    if (tst <= (eps2*abs(d(m)))*abs(d(m + 1)) + safmin) go to 60
                 end do
              end if
              m = lend
60      continue
              if (m < lend) e(m) = zero
              p = d(l)
              if (m == l) go to 80
              ! if remaining matrix is 2-by-2, use stdlib_slae2 or stdlib_slaev2
              ! to compute its eigensystem.
              if (m == l + 1) then
                 if (icompz > 0) then
                    call stdlib_slaev2(d(l), e(l), d(l + 1), rt1, rt2, c, s)
                    work(l) = c
                    work(n - 1 + l) = s
                    call stdlib_clasr('r', 'v', 'b', n, 2, work(l), work(n - 1 + l), z(1, l), &
                              ldz)
                 else
                    call stdlib_slae2(d(l), e(l), d(l + 1), rt1, rt2)
                 end if
                 d(l) = rt1
                 d(l + 1) = rt2
                 e(l) = zero
                 l = l + 2
                 if (l <= lend) go to 40
                 go to 140
              end if
              if (jtot == nmaxit) go to 140
              jtot = jtot + 1
              ! form shift.
              g = (d(l + 1) - p)/(two*e(l))
              r = stdlib_slapy2(g, one)
              g = d(m) - p + (e(l)/(g + sign(r, g)))
              s = one
              c = one
              p = zero
              ! inner loop
              mm1 = m - 1
              do i = mm1, l, -1
                 f = s*e(i)
                 b = c*e(i)
                 call stdlib_slartg(g, f, c, s, r)
                 if (i /= m - 1) e(i + 1) = r
                 g = d(i + 1) - p
                 r = (d(i) - g)*s + two*c*b
                 p = s*r
                 d(i + 1) = g + p
                 g = c*r - b
                 ! if eigenvectors are desired, then save rotations.
                 if (icompz > 0) then
                    work(i) = c
                    work(n - 1 + i) = -s
                 end if
              end do
              ! if eigenvectors are desired, then apply saved rotations.
              if (icompz > 0) then
                 mm = m - l + 1
                 call stdlib_clasr('r', 'v', 'b', n, mm, work(l), work(n - 1 + l), z(1, l), ldz &
                           )
              end if
              d(l) = d(l) - p
              e(l) = g
              go to 40
              ! eigenvalue found.
80      continue
              d(l) = p
              l = l + 1
              if (l <= lend) go to 40
              go to 140
           else
              ! qr iteration
              ! look for small superdiagonal element.
90      continue
              if (l /= lend) then
                 lendp1 = lend + 1
                 do m = l, lendp1, -1
                    tst = abs(e(m - 1))**2
                    if (tst <= (eps2*abs(d(m)))*abs(d(m - 1)) + safmin) go to 110
                 end do
              end if
              m = lend
110    continue
              if (m > lend) e(m - 1) = zero
              p = d(l)
              if (m == l) go to 130
              ! if remaining matrix is 2-by-2, use stdlib_slae2 or stdlib_slaev2
              ! to compute its eigensystem.
              if (m == l - 1) then
                 if (icompz > 0) then
                    call stdlib_slaev2(d(l - 1), e(l - 1), d(l), rt1, rt2, c, s)
                    work(m) = c
                    work(n - 1 + m) = s
                    call stdlib_clasr('r', 'v', 'f', n, 2, work(m), work(n - 1 + m), z(1, l - 1), &
                              ldz)
                 else
                    call stdlib_slae2(d(l - 1), e(l - 1), d(l), rt1, rt2)
                 end if
                 d(l - 1) = rt1
                 d(l) = rt2
                 e(l - 1) = zero
                 l = l - 2
                 if (l >= lend) go to 90
                 go to 140
              end if
              if (jtot == nmaxit) go to 140
              jtot = jtot + 1
              ! form shift.
              g = (d(l - 1) - p)/(two*e(l - 1))
              r = stdlib_slapy2(g, one)
              g = d(m) - p + (e(l - 1)/(g + sign(r, g)))
              s = one
              c = one
              p = zero
              ! inner loop
              lm1 = l - 1
              do i = m, lm1
                 f = s*e(i)
                 b = c*e(i)
                 call stdlib_slartg(g, f, c, s, r)
                 if (i /= m) e(i - 1) = r
                 g = d(i) - p
                 r = (d(i + 1) - g)*s + two*c*b
                 p = s*r
                 d(i) = g + p
                 g = c*r - b
                 ! if eigenvectors are desired, then save rotations.
                 if (icompz > 0) then
                    work(i) = c
                    work(n - 1 + i) = s
                 end if
              end do
              ! if eigenvectors are desired, then apply saved rotations.
              if (icompz > 0) then
                 mm = l - m + 1
                 call stdlib_clasr('r', 'v', 'f', n, mm, work(m), work(n - 1 + m), z(1, m), ldz &
                           )
              end if
              d(l) = d(l) - p
              e(lm1) = g
              go to 90
              ! eigenvalue found.
130    continue
              d(l) = p
              l = l - 1
              if (l >= lend) go to 90
              go to 140
           end if
           ! undo scaling if necessary
140    continue
           if (iscale == 1) then
              call stdlib_slascl('g', 0, 0, ssfmax, anorm, lendsv - lsv + 1, 1, d(lsv), n, info)
                        
              call stdlib_slascl('g', 0, 0, ssfmax, anorm, lendsv - lsv, 1, e(lsv), n, info)
                        
           else if (iscale == 2) then
              call stdlib_slascl('g', 0, 0, ssfmin, anorm, lendsv - lsv + 1, 1, d(lsv), n, info)
                        
              call stdlib_slascl('g', 0, 0, ssfmin, anorm, lendsv - lsv, 1, e(lsv), n, info)
                        
           end if
           ! check for no convergence to an eigenvalue after a total
           ! of n*maxit iterations.
           if (jtot == nmaxit) then
              do i = 1, n - 1
                 if (e(i) /= zero) info = info + 1
              end do
              return
           end if
           go to 10
           ! order eigenvalues and eigenvectors.
160    continue
           if (icompz == 0) then
              ! use quick sort
              call stdlib_slasrt('i', n, d, info)
           else
              ! use selection sort to minimize swaps of eigenvectors
              do ii = 2, n
                 i = ii - 1
                 k = i
                 p = d(i)
                 do j = ii, n
                    if (d(j) < p) then
                       k = j
                       p = d(j)
                    end if
                 end do
                 if (k /= i) then
                    d(k) = d(i)
                    d(i) = p
                    call stdlib_cswap(n, z(1, i), 1, z(1, k), 1)
                 end if
              end do
           end if
           return
           ! end of stdlib_csteqr
     end subroutine stdlib_csteqr

     ! CSYCONV convert A given by TRF into L and D and vice-versa.
     ! Get Non-diag elements of D (returned in workspace) and
     ! apply or reverse permutation done in TRF.

     subroutine stdlib_csyconv(uplo, way, n, a, lda, ipiv, e, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo, way
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), e(*)
        ! =====================================================================
           
           ! .. external subroutines ..
     
           logical(lk) :: upper, convert
           integer(ilp) :: i, ip, j
           complex(sp) :: temp
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           convert = stdlib_lsame(way, 'c')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. convert .and. .not. stdlib_lsame(way, 'r')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csyconv', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
            ! a is upper
            ! convert a (a is upper)
              ! convert value
              if (convert) then
                 i = n
                 e(1) = czero
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       e(i) = a(i - 1, i)
                       e(i - 1) = czero
                       a(i - 1, i) = czero
                       i = i - 1
                    else
                       e(i) = czero
                    end if
                    i = i - 1
                 end do
              ! convert permutations
              i = n
              do while (i >= 1)
                 if (ipiv(i) > 0) then
                    ip = ipiv(i)
                    if (i < n) then
                       do j = i + 1, n
                         temp = a(ip, j)
                         a(ip, j) = a(i, j)
                         a(i, j) = temp
                       end do
                    end if
                 else
                   ip = -ipiv(i)
                    if (i < n) then
                  do j = i + 1, n
                      temp = a(ip, j)
                      a(ip, j) = a(i - 1, j)
                      a(i - 1, j) = temp
                  end do
                     end if
                     i = i - 1
                end if
                i = i - 1
             end do
              else
            ! revert a (a is upper)
              ! revert permutations
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ip = ipiv(i)
                       if (i < n) then
                       do j = i + 1, n
                         temp = a(ip, j)
                         a(ip, j) = a(i, j)
                         a(i, j) = temp
                       end do
                       end if
                    else
                      ip = -ipiv(i)
                      i = i + 1
                      if (i < n) then
                         do j = i + 1, n
                            temp = a(ip, j)
                            a(ip, j) = a(i - 1, j)
                            a(i - 1, j) = temp
                         end do
                      end if
                    end if
                    i = i + 1
                 end do
              ! revert value
                 i = n
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       a(i - 1, i) = e(i)
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              end if
           else
            ! a is lower
              if (convert) then
            ! convert a (a is lower)
              ! convert value
                 i = 1
                 e(n) = czero
                 do while (i <= n)
                    if (i < n .and. ipiv(i) < 0) then
                       e(i) = a(i + 1, i)
                       e(i + 1) = czero
                       a(i + 1, i) = czero
                       i = i + 1
                    else
                       e(i) = czero
                    end if
                    i = i + 1
                 end do
              ! convert permutations
              i = 1
              do while (i <= n)
                 if (ipiv(i) > 0) then
                    ip = ipiv(i)
                    if (i > 1) then
                    do j = 1, i - 1
                      temp = a(ip, j)
                      a(ip, j) = a(i, j)
                      a(i, j) = temp
                    end do
                    end if
                 else
                   ip = -ipiv(i)
                   if (i > 1) then
                   do j = 1, i - 1
                      temp = a(ip, j)
                      a(ip, j) = a(i + 1, j)
                      a(i + 1, j) = temp
                   end do
                   end if
                   i = i + 1
                end if
                i = i + 1
             end do
              else
            ! revert a (a is lower)
              ! revert permutations
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ip = ipiv(i)
                       if (i > 1) then
                          do j = 1, i - 1
                             temp = a(i, j)
                             a(i, j) = a(ip, j)
                             a(ip, j) = temp
                          end do
                       end if
                    else
                       ip = -ipiv(i)
                       i = i - 1
                       if (i > 1) then
                          do j = 1, i - 1
                             temp = a(i + 1, j)
                             a(i + 1, j) = a(ip, j)
                             a(ip, j) = temp
                          end do
                       end if
                    end if
                    i = i - 1
                 end do
              ! revert value
                 i = 1
                 do while (i <= n - 1)
                    if (ipiv(i) < 0) then
                       a(i + 1, i) = e(i)
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              end if
           end if
           return
           ! end of stdlib_csyconv
     end subroutine stdlib_csyconv

     ! If parameter WAY = 'C':
     ! CSYCONVF converts the factorization output format used in
     ! CSYTRF provided on entry in parameter A into the factorization
     ! output format used in CSYTRF_RK (or CSYTRF_BK) that is stored
     ! on exit in parameters A and E. It also converts in place details of
     ! the intechanges stored in IPIV from the format used in CSYTRF into
     ! the format used in CSYTRF_RK (or CSYTRF_BK).
     ! If parameter WAY = 'R':
     ! CSYCONVF performs the conversion in reverse direction, i.e.
     ! converts the factorization output format used in CSYTRF_RK
     ! (or CSYTRF_BK) provided on entry in parameters A and E into
     ! the factorization output format used in CSYTRF that is stored
     ! on exit in parameter A. It also converts in place details of
     ! the intechanges stored in IPIV from the format used in CSYTRF_RK
     ! (or CSYTRF_BK) into the format used in CSYTRF.
     ! CSYCONVF can also convert in Hermitian matrix case, i.e. between
     ! formats used in CHETRF and CHETRF_RK (or CHETRF_BK).

     subroutine stdlib_csyconvf(uplo, way, n, a, lda, e, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo, way
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), e(*)
        ! =====================================================================
           
           ! .. external subroutines ..
     
           logical(lk) :: upper, convert
           integer(ilp) :: i, ip
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           convert = stdlib_lsame(way, 'c')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. convert .and. .not. stdlib_lsame(way, 'r')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csyconvf', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! begin a is upper
              if (convert) then
                 ! convert a (a is upper)
                 ! convert value
                 ! assign superdiagonal entries of d to array e and czero out
                 ! corresponding entries in input storage a
                 i = n
                 e(1) = czero
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       e(i) = a(i - 1, i)
                       e(i - 1) = czero
                       a(i - 1, i) = czero
                       i = i - 1
                    else
                       e(i) = czero
                    end if
                    i = i - 1
                 end do
                 ! convert permutations and ipiv
                 ! apply permutations to submatrices of upper part of a
                 ! in factorization order where i decreases from n to 1
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(1:i,n-i:n)
                       ip = ipiv(i)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_cswap(n - i, a(i, i + 1), lda, a(ip, i + 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i-1 and ipiv(i) in a(1:i,n-i:n)
                       ip = -ipiv(i)
                       if (i < n) then
                          if (ip /= (i - 1)) then
                             call stdlib_cswap(n - i, a(i - 1, i + 1), lda, a(ip, i + 1), lda)
                                       
                          end if
                       end if
                       ! convert ipiv
                       ! there is no interchnge of rows i and and ipiv(i),
                       ! so this should be reflected in ipiv format for
                       ! *sytrf_rk ( or *sytrf_bk)
                       ipiv(i) = i
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              else
                 ! revert a (a is upper)
                 ! revert permutations and ipiv
                 ! apply permutations to submatrices of upper part of a
                 ! in reverse factorization order where i increases from 1 to n
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(1:i,n-i:n)
                       ip = ipiv(i)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_cswap(n - i, a(ip, i + 1), lda, a(i, i + 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i-1 and ipiv(i) in a(1:i,n-i:n)
                       i = i + 1
                       ip = -ipiv(i)
                       if (i < n) then
                          if (ip /= (i - 1)) then
                             call stdlib_cswap(n - i, a(ip, i + 1), lda, a(i - 1, i + 1), lda)
                                       
                          end if
                       end if
                       ! convert ipiv
                       ! there is cone interchange of rows i-1 and ipiv(i-1),
                       ! so this should be recorded in two consecutive entries
                       ! in ipiv format for *sytrf
                       ipiv(i) = ipiv(i - 1)
                    end if
                    i = i + 1
                 end do
                 ! revert value
                 ! assign superdiagonal entries of d from array e to
                 ! superdiagonal entries of a.
                 i = n
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       a(i - 1, i) = e(i)
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              ! end a is upper
              end if
           else
              ! begin a is lower
              if (convert) then
                 ! convert a (a is lower)
                 ! convert value
                 ! assign subdiagonal entries of d to array e and czero out
                 ! corresponding entries in input storage a
                 i = 1
                 e(n) = czero
                 do while (i <= n)
                    if (i < n .and. ipiv(i) < 0) then
                       e(i) = a(i + 1, i)
                       e(i + 1) = czero
                       a(i + 1, i) = czero
                       i = i + 1
                    else
                       e(i) = czero
                    end if
                    i = i + 1
                 end do
                 ! convert permutations and ipiv
                 ! apply permutations to submatrices of lower part of a
                 ! in factorization order where k increases from 1 to n
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(i:n,1:i-1)
                       ip = ipiv(i)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_cswap(i - 1, a(i, 1), lda, a(ip, 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1)
                       ip = -ipiv(i)
                       if (i > 1) then
                          if (ip /= (i + 1)) then
                             call stdlib_cswap(i - 1, a(i + 1, 1), lda, a(ip, 1), lda)
                          end if
                       end if
                       ! convert ipiv
                       ! there is no interchnge of rows i and and ipiv(i),
                       ! so this should be reflected in ipiv format for
                       ! *sytrf_rk ( or *sytrf_bk)
                       ipiv(i) = i
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              else
                 ! revert a (a is lower)
                 ! revert permutations and ipiv
                 ! apply permutations to submatrices of lower part of a
                 ! in reverse factorization order where i decreases from n to 1
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(i:n,1:i-1)
                       ip = ipiv(i)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_cswap(i - 1, a(ip, 1), lda, a(i, 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i+1 and ipiv(i) in a(i:n,1:i-1)
                       i = i - 1
                       ip = -ipiv(i)
                       if (i > 1) then
                          if (ip /= (i + 1)) then
                             call stdlib_cswap(i - 1, a(ip, 1), lda, a(i + 1, 1), lda)
                          end if
                       end if
                       ! convert ipiv
                       ! there is cone interchange of rows i+1 and ipiv(i+1),
                       ! so this should be recorded in consecutive entries
                       ! in ipiv format for *sytrf
                       ipiv(i) = ipiv(i + 1)
                    end if
                    i = i - 1
                 end do
                 ! revert value
                 ! assign subdiagonal entries of d from array e to
                 ! subgiagonal entries of a.
                 i = 1
                 do while (i <= n - 1)
                    if (ipiv(i) < 0) then
                       a(i + 1, i) = e(i)
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              end if
              ! end a is lower
           end if
           return
           ! end of stdlib_csyconvf
     end subroutine stdlib_csyconvf

     ! If parameter WAY = 'C':
     ! CSYCONVF_ROOK converts the factorization output format used in
     ! CSYTRF_ROOK provided on entry in parameter A into the factorization
     ! output format used in CSYTRF_RK (or CSYTRF_BK) that is stored
     ! on exit in parameters A and E. IPIV format for CSYTRF_ROOK and
     ! CSYTRF_RK (or CSYTRF_BK) is the same and is not converted.
     ! If parameter WAY = 'R':
     ! CSYCONVF_ROOK performs the conversion in reverse direction, i.e.
     ! converts the factorization output format used in CSYTRF_RK
     ! (or CSYTRF_BK) provided on entry in parameters A and E into
     ! the factorization output format used in CSYTRF_ROOK that is stored
     ! on exit in parameter A. IPIV format for CSYTRF_ROOK and
     ! CSYTRF_RK (or CSYTRF_BK) is the same and is not converted.
     ! CSYCONVF_ROOK can also convert in Hermitian matrix case, i.e. between
     ! formats used in CHETRF_ROOK and CHETRF_RK (or CHETRF_BK).

     subroutine stdlib_csyconvf_rook(uplo, way, n, a, lda, e, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo, way
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), e(*)
        ! =====================================================================
           
           ! .. external subroutines ..
     
           logical(lk) :: upper, convert
           integer(ilp) :: i, ip, ip2
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           convert = stdlib_lsame(way, 'c')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (.not. convert .and. .not. stdlib_lsame(way, 'r')) then
              info = -2
           else if (n < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csyconvf_rook', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           if (upper) then
              ! begin a is upper
              if (convert) then
                 ! convert a (a is upper)
                 ! convert value
                 ! assign superdiagonal entries of d to array e and czero out
                 ! corresponding entries in input storage a
                 i = n
                 e(1) = czero
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       e(i) = a(i - 1, i)
                       e(i - 1) = czero
                       a(i - 1, i) = czero
                       i = i - 1
                    else
                       e(i) = czero
                    end if
                    i = i - 1
                 end do
                 ! convert permutations
                 ! apply permutations to submatrices of upper part of a
                 ! in factorization order where i decreases from n to 1
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(1:i,n-i:n)
                       ip = ipiv(i)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_cswap(n - i, a(i, i + 1), lda, a(ip, i + 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i and ipiv(i) and i-1 and ipiv(i-1)
                       ! in a(1:i,n-i:n)
                       ip = -ipiv(i)
                       ip2 = -ipiv(i - 1)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_cswap(n - i, a(i, i + 1), lda, a(ip, i + 1), lda)
                          end if
                          if (ip2 /= (i - 1)) then
                             call stdlib_cswap(n - i, a(i - 1, i + 1), lda, a(ip2, i + 1), lda)
                                       
                          end if
                       end if
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              else
                 ! revert a (a is upper)
                 ! revert permutations
                 ! apply permutations to submatrices of upper part of a
                 ! in reverse factorization order where i increases from 1 to n
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(1:i,n-i:n)
                       ip = ipiv(i)
                       if (i < n) then
                          if (ip /= i) then
                             call stdlib_cswap(n - i, a(ip, i + 1), lda, a(i, i + 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i-1 and ipiv(i-1) and i and ipiv(i)
                       ! in a(1:i,n-i:n)
                       i = i + 1
                       ip = -ipiv(i)
                       ip2 = -ipiv(i - 1)
                       if (i < n) then
                          if (ip2 /= (i - 1)) then
                             call stdlib_cswap(n - i, a(ip2, i + 1), lda, a(i - 1, i + 1), lda)
                                       
                          end if
                          if (ip /= i) then
                             call stdlib_cswap(n - i, a(ip, i + 1), lda, a(i, i + 1), lda)
                          end if
                       end if
                    end if
                    i = i + 1
                 end do
                 ! revert value
                 ! assign superdiagonal entries of d from array e to
                 ! superdiagonal entries of a.
                 i = n
                 do while (i > 1)
                    if (ipiv(i) < 0) then
                       a(i - 1, i) = e(i)
                       i = i - 1
                    end if
                    i = i - 1
                 end do
              ! end a is upper
              end if
           else
              ! begin a is lower
              if (convert) then
                 ! convert a (a is lower)
                 ! convert value
                 ! assign subdiagonal entries of d to array e and czero out
                 ! corresponding entries in input storage a
                 i = 1
                 e(n) = czero
                 do while (i <= n)
                    if (i < n .and. ipiv(i) < 0) then
                       e(i) = a(i + 1, i)
                       e(i + 1) = czero
                       a(i + 1, i) = czero
                       i = i + 1
                    else
                       e(i) = czero
                    end if
                    i = i + 1
                 end do
                 ! convert permutations
                 ! apply permutations to submatrices of lower part of a
                 ! in factorization order where i increases from 1 to n
                 i = 1
                 do while (i <= n)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(i:n,1:i-1)
                       ip = ipiv(i)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_cswap(i - 1, a(i, 1), lda, a(ip, 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i and ipiv(i) and i+1 and ipiv(i+1)
                       ! in a(i:n,1:i-1)
                       ip = -ipiv(i)
                       ip2 = -ipiv(i + 1)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_cswap(i - 1, a(i, 1), lda, a(ip, 1), lda)
                          end if
                          if (ip2 /= (i + 1)) then
                             call stdlib_cswap(i - 1, a(i + 1, 1), lda, a(ip2, 1), lda)
                          end if
                       end if
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              else
                 ! revert a (a is lower)
                 ! revert permutations
                 ! apply permutations to submatrices of lower part of a
                 ! in reverse factorization order where i decreases from n to 1
                 i = n
                 do while (i >= 1)
                    if (ipiv(i) > 0) then
                       ! 1-by-1 pivot interchange
                       ! swap rows i and ipiv(i) in a(i:n,1:i-1)
                       ip = ipiv(i)
                       if (i > 1) then
                          if (ip /= i) then
                             call stdlib_cswap(i - 1, a(ip, 1), lda, a(i, 1), lda)
                          end if
                       end if
                    else
                       ! 2-by-2 pivot interchange
                       ! swap rows i+1 and ipiv(i+1) and i and ipiv(i)
                       ! in a(i:n,1:i-1)
                       i = i - 1
                       ip = -ipiv(i)
                       ip2 = -ipiv(i + 1)
                       if (i > 1) then
                          if (ip2 /= (i + 1)) then
                             call stdlib_cswap(i - 1, a(ip2, 1), lda, a(i + 1, 1), lda)
                          end if
                          if (ip /= i) then
                             call stdlib_cswap(i - 1, a(ip, 1), lda, a(i, 1), lda)
                          end if
                       end if
                    end if
                    i = i - 1
                 end do
                 ! revert value
                 ! assign subdiagonal entries of d from array e to
                 ! subgiagonal entries of a.
                 i = 1
                 do while (i <= n - 1)
                    if (ipiv(i) < 0) then
                       a(i + 1, i) = e(i)
                       i = i + 1
                    end if
                    i = i + 1
                 end do
              end if
              ! end a is lower
           end if
           return
           ! end of stdlib_csyconvf_rook
     end subroutine stdlib_csyconvf_rook

     ! CSYEQUB computes row and column scalings intended to equilibrate a
     ! symmetric matrix A (with respect to the Euclidean norm) and reduce
     ! its condition number. The scale factors S are computed by the BIN
     ! algorithm (see references) so that the scaled matrix B with elements
     ! B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of
     ! the smallest possible condition number over all possible diagonal
     ! scalings.

     subroutine stdlib_csyequb(uplo, n, a, lda, s, scond, amax, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           integer(ilp) :: info, lda, n
           real(sp) :: amax, scond
           character :: uplo
           ! .. array arguments ..
           complex(sp) :: a(lda, *), work(*)
           real(sp) :: s(*)
        ! =====================================================================
           ! .. parameters ..
           integer(ilp), parameter :: max_iter = 100
           
           ! .. local scalars ..
           integer(ilp) :: i, j, iter
           real(sp) :: avg, std, tol, c0, c1, c2, t, u, si, d, base, smin, smax, smlnum, bignum, &
                     scale, sumsq
           logical(lk) :: up
           complex(sp) :: zdum
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, int, log, max, min, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (.not. (stdlib_lsame(uplo, 'u') .or. stdlib_lsame(uplo, 'l'))) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csyequb', -info)
              return
           end if
           up = stdlib_lsame(uplo, 'u')
           amax = zero
           ! quick return if possible.
           if (n == 0) then
              scond = one
              return
           end if
           do i = 1, n
              s(i) = zero
           end do
           amax = zero
           if (up) then
              do j = 1, n
                 do i = 1, j - 1
                    s(i) = max(s(i), cabs1(a(i, j)))
                    s(j) = max(s(j), cabs1(a(i, j)))
                    amax = max(amax, cabs1(a(i, j)))
                 end do
                 s(j) = max(s(j), cabs1(a(j, j)))
                 amax = max(amax, cabs1(a(j, j)))
              end do
           else
              do j = 1, n
                 s(j) = max(s(j), cabs1(a(j, j)))
                 amax = max(amax, cabs1(a(j, j)))
                 do i = j + 1, n
                    s(i) = max(s(i), cabs1(a(i, j)))
                    s(j) = max(s(j), cabs1(a(i, j)))
                    amax = max(amax, cabs1(a(i, j)))
                 end do
              end do
           end if
           do j = 1, n
              s(j) = 1.0/s(j)
           end do
           tol = one/sqrt(2.0e0*n)
           do iter = 1, max_iter
              scale = 0.0e0
              sumsq = 0.0e0
              ! beta = |a|s
              do i = 1, n
                  work(i) = zero
              end do
              if (up) then
                 do j = 1, n
                    do i = 1, j - 1
                       work(i) = work(i) + cabs1(a(i, j))*s(j)
                       work(j) = work(j) + cabs1(a(i, j))*s(i)
                    end do
                    work(j) = work(j) + cabs1(a(j, j))*s(j)
                 end do
              else
                 do j = 1, n
                    work(j) = work(j) + cabs1(a(j, j))*s(j)
                    do i = j + 1, n
                       work(i) = work(i) + cabs1(a(i, j))*s(j)
                       work(j) = work(j) + cabs1(a(i, j))*s(i)
                    end do
                 end do
              end if
              ! avg = s^t beta / n
              avg = 0.0e0
              do i = 1, n
                 avg = avg + real(s(i)*work(i))
              end do
              avg = avg/n
              std = 0.0e0
              do i = n + 1, 2*n
                 work(i) = s(i - n)*work(i - n) - avg
              end do
              call stdlib_classq(n, work(n + 1), 1, scale, sumsq)
              std = scale*sqrt(sumsq/n)
              if (std < tol*avg) goto 999
              do i = 1, n
                 t = cabs1(a(i, i))
                 si = s(i)
                 c2 = (n - 1)*t
                 c1 = real(n - 2)*(real(work(i)) - t*si)
                 c0 = -(t*si)*si + 2*real(work(i))*si - n*avg
                 d = c1*c1 - 4*c0*c2
                 if (d <= 0) then
                    info = -1
                    return
                 end if
                 si = -2*c0/(c1 + sqrt(d))
                 d = si - s(i)
                 u = zero
                 if (up) then
                    do j = 1, i
                       t = cabs1(a(j, i))
                       u = u + s(j)*t
                       work(j) = work(j) + d*t
                    end do
                    do j = i + 1, n
                       t = cabs1(a(i, j))
                       u = u + s(j)*t
                       work(j) = work(j) + d*t
                    end do
                 else
                    do j = 1, i
                       t = cabs1(a(i, j))
                       u = u + s(j)*t
                       work(j) = work(j) + d*t
                    end do
                    do j = i + 1, n
                       t = cabs1(a(j, i))
                       u = u + s(j)*t
                       work(j) = work(j) + d*t
                    end do
                 end if
                 avg = avg + (u + real(work(i)))*d/n
                 s(i) = si
              end do
           end do
999   continue
           smlnum = stdlib_slamch('safemin')
           bignum = one/smlnum
           smin = bignum
           smax = zero
           t = one/sqrt(avg)
           base = stdlib_slamch('b')
           u = one/log(base)
           do i = 1, n
              s(i) = base**int(u*log(s(i)*t))
              smin = min(smin, s(i))
              smax = max(smax, s(i))
           end do
           scond = max(smin, smlnum)/min(smax, bignum)
     end subroutine stdlib_csyequb

     ! CSYMV  performs the matrix-vector  operation
     ! y := alpha*A*x + beta*y,
     ! where alpha and beta are scalars, x and y are n element vectors and
     ! A is an n by n symmetric matrix.

     subroutine stdlib_csymv(uplo, n, alpha, a, lda, x, incx, beta, y, incy)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: incx, incy, lda, n
           complex(sp) :: alpha, beta
           ! .. array arguments ..
           complex(sp) :: a(lda, *), x(*), y(*)
       ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, info, ix, iy, j, jx, jy, kx, ky
           complex(sp) :: temp1, temp2
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (.not. stdlib_lsame(uplo, 'u') .and. .not. stdlib_lsame(uplo, 'l')) then
              info = 1
           else if (n < 0) then
              info = 2
           else if (lda < max(1, n)) then
              info = 5
           else if (incx == 0) then
              info = 7
           else if (incy == 0) then
              info = 10
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csymv ', info)
              return
           end if
           ! quick return if possible.
           if ((n == 0) .or. ((alpha == czero) .and. (beta == cone))) return
           ! set up the start points in  x  and  y.
           if (incx > 0) then
              kx = 1
           else
              kx = 1 - (n - 1)*incx
           end if
           if (incy > 0) then
              ky = 1
           else
              ky = 1 - (n - 1)*incy
           end if
           ! start the operations. in this version the elements of a are
           ! accessed sequentially with cone pass through the triangular part
           ! of a.
           ! first form  y := beta*y.
           if (beta /= cone) then
              if (incy == 1) then
                 if (beta == czero) then
                    do i = 1, n
                       y(i) = czero
                    end do
                 else
                    do i = 1, n
                       y(i) = beta*y(i)
                    end do
                 end if
              else
                 iy = ky
                 if (beta == czero) then
                    do i = 1, n
                       y(iy) = czero
                       iy = iy + incy
                    end do
                 else
                    do i = 1, n
                       y(iy) = beta*y(iy)
                       iy = iy + incy
                    end do
                 end if
              end if
           end if
           if (alpha == czero) return
           if (stdlib_lsame(uplo, 'u')) then
              ! form  y  when a is stored in upper triangle.
              if ((incx == 1) .and. (incy == 1)) then
                 do j = 1, n
                    temp1 = alpha*x(j)
                    temp2 = czero
                    do i = 1, j - 1
                       y(i) = y(i) + temp1*a(i, j)
                       temp2 = temp2 + a(i, j)*x(i)
                    end do
                    y(j) = y(j) + temp1*a(j, j) + alpha*temp2
                 end do
              else
                 jx = kx
                 jy = ky
                 do j = 1, n
                    temp1 = alpha*x(jx)
                    temp2 = czero
                    ix = kx
                    iy = ky
                    do i = 1, j - 1
                       y(iy) = y(iy) + temp1*a(i, j)
                       temp2 = temp2 + a(i, j)*x(ix)
                       ix = ix + incx
                       iy = iy + incy
                    end do
                    y(jy) = y(jy) + temp1*a(j, j) + alpha*temp2
                    jx = jx + incx
                    jy = jy + incy
                 end do
              end if
           else
              ! form  y  when a is stored in lower triangle.
              if ((incx == 1) .and. (incy == 1)) then
                 do j = 1, n
                    temp1 = alpha*x(j)
                    temp2 = czero
                    y(j) = y(j) + temp1*a(j, j)
                    do i = j + 1, n
                       y(i) = y(i) + temp1*a(i, j)
                       temp2 = temp2 + a(i, j)*x(i)
                    end do
                    y(j) = y(j) + alpha*temp2
                 end do
              else
                 jx = kx
                 jy = ky
                 do j = 1, n
                    temp1 = alpha*x(jx)
                    temp2 = czero
                    y(jy) = y(jy) + temp1*a(j, j)
                    ix = jx
                    iy = jy
                    do i = j + 1, n
                       ix = ix + incx
                       iy = iy + incy
                       y(iy) = y(iy) + temp1*a(i, j)
                       temp2 = temp2 + a(i, j)*x(ix)
                    end do
                    y(jy) = y(jy) + alpha*temp2
                    jx = jx + incx
                    jy = jy + incy
                 end do
              end if
           end if
           return
           ! end of stdlib_csymv
     end subroutine stdlib_csymv

     ! CSYR   performs the symmetric rank 1 operation
     ! A := alpha*x*x**H + A,
     ! where alpha is a complex scalar, x is an n element vector and A is an
     ! n by n symmetric matrix.

     subroutine stdlib_csyr(uplo, n, alpha, x, incx, a, lda)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: incx, lda, n
           complex(sp) :: alpha
           ! .. array arguments ..
           complex(sp) :: a(lda, *), x(*)
       ! =====================================================================
           
           ! .. local scalars ..
           integer(ilp) :: i, info, ix, j, jx, kx
           complex(sp) :: temp
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           if (.not. stdlib_lsame(uplo, 'u') .and. .not. stdlib_lsame(uplo, 'l')) then
              info = 1
           else if (n < 0) then
              info = 2
           else if (incx == 0) then
              info = 5
           else if (lda < max(1, n)) then
              info = 7
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csyr  ', info)
              return
           end if
           ! quick return if possible.
           if ((n == 0) .or. (alpha == czero)) return
           ! set the start point in x if the increment is not unity.
           if (incx <= 0) then
              kx = 1 - (n - 1)*incx
           else if (incx /= 1) then
              kx = 1
           end if
           ! start the operations. in this version the elements of a are
           ! accessed sequentially with cone pass through the triangular part
           ! of a.
           if (stdlib_lsame(uplo, 'u')) then
              ! form  a  when a is stored in upper triangle.
              if (incx == 1) then
                 do j = 1, n
                    if (x(j) /= czero) then
                       temp = alpha*x(j)
                       do i = 1, j
                          a(i, j) = a(i, j) + x(i)*temp
                       end do
                    end if
                 end do
              else
                 jx = kx
                 do j = 1, n
                    if (x(jx) /= czero) then
                       temp = alpha*x(jx)
                       ix = kx
                       do i = 1, j
                          a(i, j) = a(i, j) + x(ix)*temp
                          ix = ix + incx
                       end do
                    end if
                    jx = jx + incx
                 end do
              end if
           else
              ! form  a  when a is stored in lower triangle.
              if (incx == 1) then
                 do j = 1, n
                    if (x(j) /= czero) then
                       temp = alpha*x(j)
                       do i = j, n
                          a(i, j) = a(i, j) + x(i)*temp
                       end do
                    end if
                 end do
              else
                 jx = kx
                 do j = 1, n
                    if (x(jx) /= czero) then
                       temp = alpha*x(jx)
                       ix = jx
                       do i = j, n
                          a(i, j) = a(i, j) + x(ix)*temp
                          ix = ix + incx
                       end do
                    end if
                    jx = jx + incx
                 end do
              end if
           end if
           return
           ! end of stdlib_csyr
     end subroutine stdlib_csyr

     ! CSYSWAPR applies an elementary permutation on the rows and the columns of
     ! a symmetric matrix.

     subroutine stdlib_csyswapr(uplo, n, a, lda, i1, i2)
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: i1, i2, lda, n
           ! .. array arguments ..
           complex(sp) :: a(lda, n)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i
           complex(sp) :: tmp
     
           ! .. executable statements ..
           upper = stdlib_lsame(uplo, 'u')
           if (upper) then
               ! upper
               ! first swap
                ! - swap column i1 and i2 from i1 to i1-1
              call stdlib_cswap(i1 - 1, a(1, i1), 1, a(1, i2), 1)
                ! second swap :
                ! - swap a(i1,i1) and a(i2,i2)
                ! - swap row i1 from i1+1 to i2-1 with col i2 from i1+1 to i2-1
              tmp = a(i1, i1)
              a(i1, i1) = a(i2, i2)
              a(i2, i2) = tmp
              do i = 1, i2 - i1 - 1
                 tmp = a(i1, i1 + i)
                 a(i1, i1 + i) = a(i1 + i, i2)
                 a(i1 + i, i2) = tmp
              end do
                ! third swap
                ! - swap row i1 and i2 from i2+1 to n
              do i = i2 + 1, n
                 tmp = a(i1, i)
                 a(i1, i) = a(i2, i)
                 a(i2, i) = tmp
              end do
             else
               ! lower
               ! first swap
                ! - swap row i1 and i2 from i1 to i1-1
              call stdlib_cswap(i1 - 1, a(i1, 1), lda, a(i2, 1), lda)
               ! second swap :
                ! - swap a(i1,i1) and a(i2,i2)
                ! - swap col i1 from i1+1 to i2-1 with row i2 from i1+1 to i2-1
               tmp = a(i1, i1)
               a(i1, i1) = a(i2, i2)
               a(i2, i2) = tmp
               do i = 1, i2 - i1 - 1
                  tmp = a(i1 + i, i1)
                  a(i1 + i, i1) = a(i2, i1 + i)
                  a(i2, i1 + i) = tmp
               end do
               ! third swap
                ! - swap col i1 and i2 from i2+1 to n
               do i = i2 + 1, n
                  tmp = a(i, i1)
                  a(i, i1) = a(i, i2)
                  a(i, i2) = tmp
               end do
           end if
     end subroutine stdlib_csyswapr

     ! CSYTF2 computes the factorization of a complex symmetric matrix A
     ! using the Bunch-Kaufman diagonal pivoting method:
     ! A = U*D*U**T  or  A = L*D*L**T
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, U**T is the transpose of U, and D is symmetric and
     ! block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_csytf2(uplo, n, a, lda, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: i, imax, j, jmax, k, kk, kp, kstep
           real(sp) :: absakk, alpha, colmax, rowmax
           complex(sp) :: d11, d12, d21, d22, r1, t, wk, wkm1, wkp1, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, aimag, max, real, sqrt
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytf2', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 70
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, a(1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero .or. stdlib_sisnan(absakk)) then
                 ! column k is zero or underflow, or contains a nan:
                 ! set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    jmax = imax + stdlib_icamax(k - imax, a(imax, imax + 1), lda)
                    rowmax = cabs1(a(imax, jmax))
                    if (imax > 1) then
                       jmax = stdlib_icamax(imax - 1, a(1, imax), 1)
                       rowmax = max(rowmax, cabs1(a(jmax, imax)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (cabs1(a(imax, imax)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k-1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k - kstep + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    call stdlib_cswap(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    call stdlib_cswap(kk - kp - 1, a(kp + 1, kk), 1, a(kp, kp + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k - 1, k)
                       a(k - 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    ! perform a rank-1 update of a(1:k-1,1:k-1) as
                    ! a := a - u(k)*d(k)*u(k)**t = a - w(k)*1/d(k)*w(k)**t
                    r1 = cone/a(k, k)
                    call stdlib_csyr(uplo, k - 1, -r1, a(1, k), 1, a, lda)
                    ! store u(k) in column k
                    call stdlib_cscal(k - 1, r1, a(1, k), 1)
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( w(k-1) w(k) )*inv(d(k))*( w(k-1) w(k) )**t
                    if (k > 2) then
                       d12 = a(k - 1, k)
                       d22 = a(k - 1, k - 1)/d12
                       d11 = a(k, k)/d12
                       t = cone/(d11*d22 - cone)
                       d12 = t/d12
                       do j = k - 2, 1, -1
                          wkm1 = d12*(d11*a(j, k - 1) - a(j, k))
                          wk = d12*(d22*a(j, k) - a(j, k - 1))
                          do i = j, 1, -1
                             a(i, j) = a(i, j) - a(i, k)*wk - a(i, k - 1)*wkm1
                          end do
                          a(j, k) = wk
                          a(j, k - 1) = wkm1
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
40      continue
              ! if k > n, exit from loop
              if (k > n) go to 70
              kstep = 1
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, a(k + 1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if (max(absakk, colmax) == zero .or. stdlib_sisnan(absakk)) then
                 ! column k is zero or underflow, or contains a nan:
                 ! set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 if (absakk >= alpha*colmax) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    ! jmax is the column-index of the largest off-diagonal
                    ! element in row imax, and rowmax is its absolute value
                    jmax = k - 1 + stdlib_icamax(imax - k, a(imax, k), lda)
                    rowmax = cabs1(a(imax, jmax))
                    if (imax < n) then
                       jmax = imax + stdlib_icamax(n - imax, a(imax + 1, imax), 1)
                       rowmax = max(rowmax, cabs1(a(jmax, imax)))
                    end if
                    if (absakk >= alpha*colmax*(colmax/rowmax)) then
                       ! no interchange, use 1-by-1 pivot block
                       kp = k
                    else if (cabs1(a(imax, imax)) >= alpha*rowmax) then
                       ! interchange rows and columns k and imax, use 1-by-1
                       ! pivot block
                       kp = imax
                    else
                       ! interchange rows and columns k+1 and imax, use 2-by-2
                       ! pivot block
                       kp = imax
                       kstep = 2
                    end if
                 end if
                 kk = k + kstep - 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    call stdlib_cswap(kp - kk - 1, a(kk + 1, kk), 1, a(kp, kk + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k + 1, k)
                       a(k + 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                       ! perform a rank-1 update of a(k+1:n,k+1:n) as
                       ! a := a - l(k)*d(k)*l(k)**t = a - w(k)*(1/d(k))*w(k)**t
                       r1 = cone/a(k, k)
                       call stdlib_csyr(uplo, n - k, -r1, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                 
                       ! store l(k) in column k
                       call stdlib_cscal(n - k, r1, a(k + 1, k), 1)
                    end if
                 else
                    ! 2-by-2 pivot block d(k)
                    if (k < n - 1) then
                       ! perform a rank-2 update of a(k+2:n,k+2:n) as
                       ! a := a - ( l(k) l(k+1) )*d(k)*( l(k) l(k+1) )**t
                          ! = a - ( w(k) w(k+1) )*inv(d(k))*( w(k) w(k+1) )**t
                       ! where l(k) and l(k+1) are the k-th and (k+1)-th
                       ! columns of l
                       d21 = a(k + 1, k)
                       d11 = a(k + 1, k + 1)/d21
                       d22 = a(k, k)/d21
                       t = cone/(d11*d22 - cone)
                       d21 = t/d21
                       do j = k + 2, n
                          wk = d21*(d11*a(j, k) - a(j, k + 1))
                          wkp1 = d21*(d22*a(j, k + 1) - a(j, k))
                          do i = j, n
                             a(i, j) = a(i, j) - a(i, k)*wk - a(i, k + 1)*wkp1
                          end do
                          a(j, k) = wk
                          a(j, k + 1) = wkp1
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -kp
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 40
           end if
70      continue
           return
           ! end of stdlib_csytf2
     end subroutine stdlib_csytf2

     ! CSYTF2_RK computes the factorization of a complex symmetric matrix A
     ! using the bounded Bunch-Kaufman (rook) diagonal pivoting method:
     ! A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
     ! where U (or L) is unit upper (or lower) triangular matrix,
     ! U**T (or L**T) is the transpose of U (or L), P is a permutation
     ! matrix, P**T is the transpose of P, and D is symmetric and block
     ! diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.
     ! For more information see Further Details section.

     subroutine stdlib_csytf2_rk(uplo, n, a, lda, e, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), e(*)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: upper, done
           integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii
           real(sp) :: absakk, alpha, colmax, rowmax, stemp, sfmin
           complex(sp) :: d11, d12, d21, d22, t, wk, wkm1, wkp1, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, sqrt, aimag, real
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytf2_rk', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! initialize the first entry of array e, where superdiagonal
              ! elements of d are stored
              e(1) = czero
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 34
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, a(1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 ! set e( k ) to zero
                 if (k > 1) e(k) = czero
              else
                 ! test for interchange
                 ! equivalent to testing for (used to handle nan and inf)
                 ! absakk>=alpha*colmax
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange,
                    ! use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_icamax(k - imax, a(imax, imax + 1), lda)
                          rowmax = cabs1(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_icamax(imax - 1, a(1, imax), 1)
                          stemp = cabs1(a(itemp, imax))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for (used to handle nan and inf)
                       ! abs( a( imax, imax ) )>=alpha*rowmax
                       if (.not. (cabs1(a(imax, imax)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! equivalent to testing for rowmax == colmax,
                       ! used to handle nan and inf
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found, set variables and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! swap two rows and two columns
                 ! first swap
                 if ((kstep == 2) .and. (p /= k)) then
                    ! interchange rows and column k and p in the leading
                    ! submatrix a(1:k,1:k) if we have a 2-by-2 pivot
                    if (p > 1) call stdlib_cswap(p - 1, a(1, k), 1, a(1, p), 1)
                    if (p < (k - 1)) call stdlib_cswap(k - p - 1, a(p + 1, k), 1, a(p, p + 1), lda)
                              
                    t = a(k, k)
                    a(k, k) = a(p, p)
                    a(p, p) = t
                    ! convert upper triangle of a into u form by applying
                    ! the interchanges in columns k+1:n.
                    if (k < n) call stdlib_cswap(n - k, a(k, k + 1), lda, a(p, k + 1), lda)
                 end if
                 ! second swap
                 kk = k - kstep + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    if ((kk > 1) .and. (kp < (kk - 1))) call stdlib_cswap(kk - kp - 1, a(kp + 1, kk), &
                              1, a(kp, kp + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k - 1, k)
                       a(k - 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                    ! convert upper triangle of a into u form by applying
                    ! the interchanges in columns k+1:n.
                    if (k < n) call stdlib_cswap(n - k, a(kk, k + 1), lda, a(kp, k + 1), lda)
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    if (k > 1) then
                       ! perform a rank-1 update of a(1:k-1,1:k-1) and
                       ! store u(k) in column k
                       if (cabs1(a(k, k)) >= sfmin) then
                          ! perform a rank-1 update of a(1:k-1,1:k-1) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*1/d(k)*w(k)**t
                          d11 = cone/a(k, k)
                          call stdlib_csyr(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                          ! store u(k) in column k
                          call stdlib_cscal(k - 1, d11, a(1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = a(k, k)
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_csyr(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                       end if
                       ! store the superdiagonal element of d in array e
                       e(k) = czero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k > 2) then
                       d12 = a(k - 1, k)
                       d22 = a(k - 1, k - 1)/d12
                       d11 = a(k, k)/d12
                       t = cone/(d11*d22 - cone)
                       do j = k - 2, 1, -1
                          wkm1 = t*(d11*a(j, k - 1) - a(j, k))
                          wk = t*(d22*a(j, k) - a(j, k - 1))
                          do i = j, 1, -1
                             a(i, j) = a(i, j) - (a(i, k)/d12)*wk - (a(i, k - 1)/d12) &
                                       *wkm1
                          end do
                          ! store u(k) and u(k-1) in cols k and k-1 for row j
                          a(j, k) = wk/d12
                          a(j, k - 1) = wkm1/d12
                       end do
                    end if
                    ! copy superdiagonal elements of d(k) to e(k) and
                    ! zero out superdiagonal entry of a
                    e(k) = a(k - 1, k)
                    e(k - 1) = czero
                    a(k - 1, k) = czero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
34      continue
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! initialize the unused last entry of the subdiagonal array e.
              e(n) = czero
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
40      continue
              ! if k > n, exit from loop
              if (k > n) go to 64
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, a(k + 1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
                 ! set e( k ) to zero
                 if (k < n) e(k) = czero
              else
                 ! test for interchange
                 ! equivalent to testing for (used to handle nan and inf)
                 ! absakk>=alpha*colmax
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
42      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_icamax(imax - k, a(imax, k), lda)
                          rowmax = cabs1(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_icamax(n - imax, a(imax + 1, imax), 1)
                          stemp = cabs1(a(itemp, imax))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for (used to handle nan and inf)
                       ! abs( a( imax, imax ) )>=alpha*rowmax
                       if (.not. (cabs1(a(imax, imax)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! equivalent to testing for rowmax == colmax,
                       ! used to handle nan and inf
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found, set variables and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 42
                 end if
                 ! swap two rows and two columns
                 ! first swap
                 if ((kstep == 2) .and. (p /= k)) then
                    ! interchange rows and column k and p in the trailing
                    ! submatrix a(k:n,k:n) if we have a 2-by-2 pivot
                    if (p < n) call stdlib_cswap(n - p, a(p + 1, k), 1, a(p + 1, p), 1)
                    if (p > (k + 1)) call stdlib_cswap(p - k - 1, a(k + 1, k), 1, a(p, k + 1), lda)
                              
                    t = a(k, k)
                    a(k, k) = a(p, p)
                    a(p, p) = t
                    ! convert lower triangle of a into l form by applying
                    ! the interchanges in columns 1:k-1.
                    if (k > 1) call stdlib_cswap(k - 1, a(k, 1), lda, a(p, 1), lda)
                 end if
                 ! second swap
                 kk = k + kstep - 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    if ((kk < n) .and. (kp > (kk + 1))) call stdlib_cswap(kp - kk - 1, a(kk + 1, kk), &
                              1, a(kp, kk + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k + 1, k)
                       a(k + 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                    ! convert lower triangle of a into l form by applying
                    ! the interchanges in columns 1:k-1.
                    if (k > 1) call stdlib_cswap(k - 1, a(kk, 1), lda, a(kp, 1), lda)
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                    ! perform a rank-1 update of a(k+1:n,k+1:n) and
                    ! store l(k) in column k
                       if (cabs1(a(k, k)) >= sfmin) then
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                          d11 = cone/a(k, k)
                          call stdlib_csyr(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                          ! store l(k) in column k
                          call stdlib_cscal(n - k, d11, a(k + 1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = a(k, k)
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_csyr(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                       end if
                       ! store the subdiagonal element of d in array e
                       e(k) = czero
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! perform a rank-2 update of a(k+2:n,k+2:n) as
                    ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t
                       ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k < n - 1) then
                       d21 = a(k + 1, k)
                       d11 = a(k + 1, k + 1)/d21
                       d22 = a(k, k)/d21
                       t = cone/(d11*d22 - cone)
                       do j = k + 2, n
                          ! compute  d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j
                          wk = t*(d11*a(j, k) - a(j, k + 1))
                          wkp1 = t*(d22*a(j, k + 1) - a(j, k))
                          ! perform a rank-2 update of a(k+2:n,k+2:n)
                          do i = j, n
                             a(i, j) = a(i, j) - (a(i, k)/d21)*wk - (a(i, k + 1)/d21) &
                                       *wkp1
                          end do
                          ! store l(k) and l(k+1) in cols k and k+1 for row j
                          a(j, k) = wk/d21
                          a(j, k + 1) = wkp1/d21
                       end do
                    end if
                    ! copy subdiagonal elements of d(k) to e(k) and
                    ! zero out subdiagonal entry of a
                    e(k) = a(k + 1, k)
                    e(k + 1) = czero
                    a(k + 1, k) = czero
                 end if
                 ! end column k is nonsingular
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 40
64      continue
           end if
           return
           ! end of stdlib_csytf2_rk
     end subroutine stdlib_csytf2_rk

     ! CSYTF2_ROOK computes the factorization of a complex symmetric matrix A
     ! using the bounded Bunch-Kaufman ("rook") diagonal pivoting method:
     ! A = U*D*U**T  or  A = L*D*L**T
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, U**T is the transpose of U, and D is symmetric and
     ! block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the unblocked version of the algorithm, calling Level 2 BLAS.

     subroutine stdlib_csytf2_rook(uplo, n, a, lda, ipiv, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *)
        ! =====================================================================
           ! .. parameters ..
           real(sp), parameter :: sevten = 17.0_sp
           
           ! .. local scalars ..
           logical(lk) :: upper, done
           integer(ilp) :: i, imax, j, jmax, itemp, k, kk, kp, kstep, p, ii
           real(sp) :: absakk, alpha, colmax, rowmax, stemp, sfmin
           complex(sp) :: d11, d12, d21, d22, t, wk, wkm1, wkp1, z
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max, sqrt, aimag, real
           ! .. statement functions ..
           real(sp) :: cabs1
           ! .. statement function definitions ..
           cabs1(z) = abs(real(z)) + abs(aimag(z))
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytf2_rook', -info)
              return
           end if
           ! initialize alpha for use in choosing pivot block size.
           alpha = (one + sqrt(sevten))/eight
           ! compute machine safe minimum
           sfmin = stdlib_slamch('s')
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 70
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k > 1) then
                 imax = stdlib_icamax(k - 1, a(1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 ! test for interchange
                 ! equivalent to testing for (used to handle nan and inf)
                 ! absakk>=alpha*colmax
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange,
                    ! use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
12      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = imax + stdlib_icamax(k - imax, a(imax, imax + 1), lda)
                          rowmax = cabs1(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax > 1) then
                          itemp = stdlib_icamax(imax - 1, a(1, imax), 1)
                          stemp = cabs1(a(itemp, imax))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for (used to handle nan and inf)
                       ! cabs1( a( imax, imax ) )>=alpha*rowmax
                       if (.not. (cabs1(a(imax, imax)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! equivalent to testing for rowmax == colmax,
                       ! used to handle nan and inf
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found, set variables and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 12
                 end if
                 ! swap two rows and two columns
                 ! first swap
                 if ((kstep == 2) .and. (p /= k)) then
                    ! interchange rows and column k and p in the leading
                    ! submatrix a(1:k,1:k) if we have a 2-by-2 pivot
                    if (p > 1) call stdlib_cswap(p - 1, a(1, k), 1, a(1, p), 1)
                    if (p < (k - 1)) call stdlib_cswap(k - p - 1, a(p + 1, k), 1, a(p, p + 1), lda)
                              
                    t = a(k, k)
                    a(k, k) = a(p, p)
                    a(p, p) = t
                 end if
                 ! second swap
                 kk = k - kstep + 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the leading
                    ! submatrix a(1:k,1:k)
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, kk), 1, a(1, kp), 1)
                    if ((kk > 1) .and. (kp < (kk - 1))) call stdlib_cswap(kk - kp - 1, a(kp + 1, kk), &
                              1, a(kp, kp + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k - 1, k)
                       a(k - 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 end if
                 ! update the leading submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = u(k)*d(k)
                    ! where u(k) is the k-th column of u
                    if (k > 1) then
                       ! perform a rank-1 update of a(1:k-1,1:k-1) and
                       ! store u(k) in column k
                       if (cabs1(a(k, k)) >= sfmin) then
                          ! perform a rank-1 update of a(1:k-1,1:k-1) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*1/d(k)*w(k)**t
                          d11 = cone/a(k, k)
                          call stdlib_csyr(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                          ! store u(k) in column k
                          call stdlib_cscal(k - 1, d11, a(1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = a(k, k)
                          do ii = 1, k - 1
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - u(k)*d(k)*u(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_csyr(uplo, k - 1, -d11, a(1, k), 1, a, lda)
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k-1 now hold
                    ! ( w(k-1) w(k) ) = ( u(k-1) u(k) )*d(k)
                    ! where u(k) and u(k-1) are the k-th and (k-1)-th columns
                    ! of u
                    ! perform a rank-2 update of a(1:k-2,1:k-2) as
                    ! a := a - ( u(k-1) u(k) )*d(k)*( u(k-1) u(k) )**t
                       ! = a - ( ( a(k-1)a(k) )*inv(d(k)) ) * ( a(k-1)a(k) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k > 2) then
                       d12 = a(k - 1, k)
                       d22 = a(k - 1, k - 1)/d12
                       d11 = a(k, k)/d12
                       t = cone/(d11*d22 - cone)
                       do j = k - 2, 1, -1
                          wkm1 = t*(d11*a(j, k - 1) - a(j, k))
                          wk = t*(d22*a(j, k) - a(j, k - 1))
                          do i = j, 1, -1
                             a(i, j) = a(i, j) - (a(i, k)/d12)*wk - (a(i, k - 1)/d12) &
                                       *wkm1
                          end do
                          ! store u(k) and u(k-1) in cols k and k-1 for row j
                          a(j, k) = wk/d12
                          a(j, k - 1) = wkm1/d12
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k - 1) = -kp
              end if
              ! decrease k and return to the start of the main loop
              k = k - kstep
              go to 10
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2
              k = 1
40      continue
              ! if k > n, exit from loop
              if (k > n) go to 70
              kstep = 1
              p = k
              ! determine rows and columns to be interchanged and whether
              ! a 1-by-1 or 2-by-2 pivot block will be used
              absakk = cabs1(a(k, k))
              ! imax is the row-index of the largest off-diagonal element in
              ! column k, and colmax is its absolute value.
              ! determine both colmax and imax.
              if (k < n) then
                 imax = k + stdlib_icamax(n - k, a(k + 1, k), 1)
                 colmax = cabs1(a(imax, k))
              else
                 colmax = zero
              end if
              if ((max(absakk, colmax) == zero)) then
                 ! column k is zero or underflow: set info and continue
                 if (info == 0) info = k
                 kp = k
              else
                 ! test for interchange
                 ! equivalent to testing for (used to handle nan and inf)
                 ! absakk>=alpha*colmax
                 if (.not. (absakk < alpha*colmax)) then
                    ! no interchange, use 1-by-1 pivot block
                    kp = k
                 else
                    done = .false.
                    ! loop until pivot found
42      continue
                       ! begin pivot search loop body
                       ! jmax is the column-index of the largest off-diagonal
                       ! element in row imax, and rowmax is its absolute value.
                       ! determine both rowmax and jmax.
                       if (imax /= k) then
                          jmax = k - 1 + stdlib_icamax(imax - k, a(imax, k), lda)
                          rowmax = cabs1(a(imax, jmax))
                       else
                          rowmax = zero
                       end if
                       if (imax < n) then
                          itemp = imax + stdlib_icamax(n - imax, a(imax + 1, imax), 1)
                          stemp = cabs1(a(itemp, imax))
                          if (stemp > rowmax) then
                             rowmax = stemp
                             jmax = itemp
                          end if
                       end if
                       ! equivalent to testing for (used to handle nan and inf)
                       ! cabs1( a( imax, imax ) )>=alpha*rowmax
                       if (.not. (cabs1(a(imax, imax)) < alpha*rowmax)) then
                          ! interchange rows and columns k and imax,
                          ! use 1-by-1 pivot block
                          kp = imax
                          done = .true.
                       ! equivalent to testing for rowmax == colmax,
                       ! used to handle nan and inf
                       else if ((p == jmax) .or. (rowmax <= colmax)) then
                          ! interchange rows and columns k+1 and imax,
                          ! use 2-by-2 pivot block
                          kp = imax
                          kstep = 2
                          done = .true.
                       else
                          ! pivot not found, set variables and repeat
                          p = imax
                          colmax = rowmax
                          imax = jmax
                       end if
                       ! end pivot search loop body
                    if (.not. done) goto 42
                 end if
                 ! swap two rows and two columns
                 ! first swap
                 if ((kstep == 2) .and. (p /= k)) then
                    ! interchange rows and column k and p in the trailing
                    ! submatrix a(k:n,k:n) if we have a 2-by-2 pivot
                    if (p < n) call stdlib_cswap(n - p, a(p + 1, k), 1, a(p + 1, p), 1)
                    if (p > (k + 1)) call stdlib_cswap(p - k - 1, a(k + 1, k), 1, a(p, k + 1), lda)
                              
                    t = a(k, k)
                    a(k, k) = a(p, p)
                    a(p, p) = t
                 end if
                 ! second swap
                 kk = k + kstep - 1
                 if (kp /= kk) then
                    ! interchange rows and columns kk and kp in the trailing
                    ! submatrix a(k:n,k:n)
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, kk), 1, a(kp + 1, kp), 1)
                              
                    if ((kk < n) .and. (kp > (kk + 1))) call stdlib_cswap(kp - kk - 1, a(kk + 1, kk), &
                              1, a(kp, kk + 1), lda)
                    t = a(kk, kk)
                    a(kk, kk) = a(kp, kp)
                    a(kp, kp) = t
                    if (kstep == 2) then
                       t = a(k + 1, k)
                       a(k + 1, k) = a(kp, k)
                       a(kp, k) = t
                    end if
                 end if
                 ! update the trailing submatrix
                 if (kstep == 1) then
                    ! 1-by-1 pivot block d(k): column k now holds
                    ! w(k) = l(k)*d(k)
                    ! where l(k) is the k-th column of l
                    if (k < n) then
                    ! perform a rank-1 update of a(k+1:n,k+1:n) and
                    ! store l(k) in column k
                       if (cabs1(a(k, k)) >= sfmin) then
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                          d11 = cone/a(k, k)
                          call stdlib_csyr(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                          ! store l(k) in column k
                          call stdlib_cscal(n - k, d11, a(k + 1, k), 1)
                       else
                          ! store l(k) in column k
                          d11 = a(k, k)
                          do ii = k + 1, n
                             a(ii, k) = a(ii, k)/d11
                          end do
                          ! perform a rank-1 update of a(k+1:n,k+1:n) as
                          ! a := a - l(k)*d(k)*l(k)**t
                             ! = a - w(k)*(1/d(k))*w(k)**t
                             ! = a - (w(k)/d(k))*(d(k))*(w(k)/d(k))**t
                          call stdlib_csyr(uplo, n - k, -d11, a(k + 1, k), 1, a(k + 1, k + 1), lda)
                                    
                       end if
                    end if
                 else
                    ! 2-by-2 pivot block d(k): columns k and k+1 now hold
                    ! ( w(k) w(k+1) ) = ( l(k) l(k+1) )*d(k)
                    ! where l(k) and l(k+1) are the k-th and (k+1)-th columns
                    ! of l
                    ! perform a rank-2 update of a(k+2:n,k+2:n) as
                    ! a := a - ( l(k) l(k+1) ) * d(k) * ( l(k) l(k+1) )**t
                       ! = a - ( ( a(k)a(k+1) )*inv(d(k) ) * ( a(k)a(k+1) )**t
                    ! and store l(k) and l(k+1) in columns k and k+1
                    if (k < n - 1) then
                       d21 = a(k + 1, k)
                       d11 = a(k + 1, k + 1)/d21
                       d22 = a(k, k)/d21
                       t = cone/(d11*d22 - cone)
                       do j = k + 2, n
                          ! compute  d21 * ( w(k)w(k+1) ) * inv(d(k)) for row j
                          wk = t*(d11*a(j, k) - a(j, k + 1))
                          wkp1 = t*(d22*a(j, k + 1) - a(j, k))
                          ! perform a rank-2 update of a(k+2:n,k+2:n)
                          do i = j, n
                             a(i, j) = a(i, j) - (a(i, k)/d21)*wk - (a(i, k + 1)/d21) &
                                       *wkp1
                          end do
                          ! store l(k) and l(k+1) in cols k and k+1 for row j
                          a(j, k) = wk/d21
                          a(j, k + 1) = wkp1/d21
                       end do
                    end if
                 end if
              end if
              ! store details of the interchanges in ipiv
              if (kstep == 1) then
                 ipiv(k) = kp
              else
                 ipiv(k) = -p
                 ipiv(k + 1) = -kp
              end if
              ! increase k and return to the start of the main loop
              k = k + kstep
              go to 40
           end if
70      continue
           return
           ! end of stdlib_csytf2_rook
     end subroutine stdlib_csytf2_rook

     ! CSYTRF computes the factorization of a complex symmetric matrix A
     ! using the Bunch-Kaufman diagonal pivoting method.  The form of the
     ! factorization is
     ! A = U*D*U**T  or  A = L*D*L**T
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, and D is symmetric and block diagonal with
     ! 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the blocked version of the algorithm, calling Level 3 BLAS.

     subroutine stdlib_csytrf(uplo, n, a, lda, ipiv, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, lwork, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), work(*)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: lquery, upper
           integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           lquery = (lwork == -1)
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           else if (lwork < 1 .and. .not. lquery) then
              info = -7
           end if
           if (info == 0) then
              ! determine the block size
              nb = stdlib_ilaenv(1, 'stdlib_csytrf', uplo, n, -1, -1, -1)
              lwkopt = n*nb
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytrf', -info)
              return
           else if (lquery) then
              return
           end if
           nbmin = 2
           ldwork = n
           if (nb > 1 .and. nb < n) then
              iws = ldwork*nb
              if (lwork < iws) then
                 nb = max(lwork/ldwork, 1)
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_csytrf', uplo, n, -1, -1, -1))
                           
              end if
           else
              iws = 1
           end if
           if (nb < nbmin) nb = n
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! kb, where kb is the number of columns factorized by stdlib_clasyf;
              ! kb is either nb or nb-1, or k for the last block
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 40
              if (k > nb) then
                 ! factorize columns k-kb+1:k of a and use blocked code to
                 ! update columns 1:k-kb
                 call stdlib_clasyf(uplo, k, nb, kb, a, lda, ipiv, work, n, iinfo)
              else
                 ! use unblocked code to factorize columns 1:k of a
                 call stdlib_csytf2(uplo, k, a, lda, ipiv, iinfo)
                 kb = k
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo
              ! decrease k and return to the start of the main loop
              k = k - kb
              go to 10
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! kb, where kb is the number of columns factorized by stdlib_clasyf;
              ! kb is either nb or nb-1, or n-k+1 for the last block
              k = 1
20      continue
              ! if k > n, exit from loop
              if (k > n) go to 40
              if (k <= n - nb) then
                 ! factorize columns k:k+kb-1 of a and use blocked code to
                 ! update columns k+kb:n
                 call stdlib_clasyf(uplo, n - k + 1, nb, kb, a(k, k), lda, ipiv(k), work, n, &
                           iinfo)
              else
                 ! use unblocked code to factorize columns k:n of a
                 call stdlib_csytf2(uplo, n - k + 1, a(k, k), lda, ipiv(k), iinfo)
                 kb = n - k + 1
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo + k - 1
              ! adjust ipiv
              do j = k, k + kb - 1
                 if (ipiv(j) > 0) then
                    ipiv(j) = ipiv(j) + k - 1
                 else
                    ipiv(j) = ipiv(j) - k + 1
                 end if
              end do
              ! increase k and return to the start of the main loop
              k = k + kb
              go to 20
           end if
40      continue
           work(1) = lwkopt
           return
           ! end of stdlib_csytrf
     end subroutine stdlib_csytrf

     ! CSYTRF_RK computes the factorization of a complex symmetric matrix A
     ! using the bounded Bunch-Kaufman (rook) diagonal pivoting method:
     ! A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
     ! where U (or L) is unit upper (or lower) triangular matrix,
     ! U**T (or L**T) is the transpose of U (or L), P is a permutation
     ! matrix, P**T is the transpose of P, and D is symmetric and block
     ! diagonal with 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the blocked version of the algorithm, calling Level 3 BLAS.
     ! For more information see Further Details section.

     subroutine stdlib_csytrf_rk(uplo, n, a, lda, e, ipiv, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, lwork, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), e(*), work(*)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: lquery, upper
           integer(ilp) :: i, iinfo, ip, iws, k, kb, ldwork, lwkopt, nb, nbmin
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           lquery = (lwork == -1)
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           else if (lwork < 1 .and. .not. lquery) then
              info = -8
           end if
           if (info == 0) then
              ! determine the block size
              nb = stdlib_ilaenv(1, 'stdlib_csytrf_rk', uplo, n, -1, -1, -1)
              lwkopt = n*nb
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytrf_rk', -info)
              return
           else if (lquery) then
              return
           end if
           nbmin = 2
           ldwork = n
           if (nb > 1 .and. nb < n) then
              iws = ldwork*nb
              if (lwork < iws) then
                 nb = max(lwork/ldwork, 1)
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_csytrf_rk', uplo, n, -1, -1, -1))
                           
              end if
           else
              iws = 1
           end if
           if (nb < nbmin) nb = n
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! kb, where kb is the number of columns factorized by stdlib_clasyf_rk;
              ! kb is either nb or nb-1, or k for the last block
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 15
              if (k > nb) then
                 ! factorize columns k-kb+1:k of a and use blocked code to
                 ! update columns 1:k-kb
                 call stdlib_clasyf_rk(uplo, k, nb, kb, a, lda, e, ipiv, work, ldwork, iinfo)
                           
              else
                 ! use unblocked code to factorize columns 1:k of a
                 call stdlib_csytf2_rk(uplo, k, a, lda, e, ipiv, iinfo)
                 kb = k
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo
              ! no need to adjust ipiv
              ! apply permutations to the leading panel 1:k-1
              ! read ipiv from the last block factored, i.e.
              ! indices  k-kb+1:k and apply row permutations to the
              ! last k+1 colunms k+1:n after that block
              ! (we can do the simple loop over ipiv with decrement -1,
              ! since the abs value of ipiv( i ) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              if (k < n) then
                 do i = k, (k - kb + 1), -1
                    ip = abs(ipiv(i))
                    if (ip /= i) then
                       call stdlib_cswap(n - k, a(i, k + 1), lda, a(ip, k + 1), lda)
                    end if
                 end do
              end if
              ! decrease k and return to the start of the main loop
              k = k - kb
              go to 10
              ! this label is the exit from main loop over k decreasing
              ! from n to 1 in steps of kb
15      continue
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! kb, where kb is the number of columns factorized by stdlib_clasyf_rk;
              ! kb is either nb or nb-1, or n-k+1 for the last block
              k = 1
20      continue
              ! if k > n, exit from loop
              if (k > n) go to 35
              if (k <= n - nb) then
                 ! factorize columns k:k+kb-1 of a and use blocked code to
                 ! update columns k+kb:n
                 call stdlib_clasyf_rk(uplo, n - k + 1, nb, kb, a(k, k), lda, e(k), ipiv(k), &
                           work, ldwork, iinfo)
              else
                 ! use unblocked code to factorize columns k:n of a
                 call stdlib_csytf2_rk(uplo, n - k + 1, a(k, k), lda, e(k), ipiv(k), iinfo)
                           
                 kb = n - k + 1
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo + k - 1
              ! adjust ipiv
              do i = k, k + kb - 1
                 if (ipiv(i) > 0) then
                    ipiv(i) = ipiv(i) + k - 1
                 else
                    ipiv(i) = ipiv(i) - k + 1
                 end if
              end do
              ! apply permutations to the leading panel 1:k-1
              ! read ipiv from the last block factored, i.e.
              ! indices  k:k+kb-1 and apply row permutations to the
              ! first k-1 colunms 1:k-1 before that block
              ! (we can do the simple loop over ipiv with increment 1,
              ! since the abs value of ipiv( i ) represents the row index
              ! of the interchange with row i in both 1x1 and 2x2 pivot cases)
              if (k > 1) then
                 do i = k, (k + kb - 1), 1
                    ip = abs(ipiv(i))
                    if (ip /= i) then
                       call stdlib_cswap(k - 1, a(i, 1), lda, a(ip, 1), lda)
                    end if
                 end do
              end if
              ! increase k and return to the start of the main loop
              k = k + kb
              go to 20
              ! this label is the exit from main loop over k increasing
              ! from 1 to n in steps of kb
35      continue
           ! end lower
           end if
           work(1) = lwkopt
           return
           ! end of stdlib_csytrf_rk
     end subroutine stdlib_csytrf_rk

     ! CSYTRF_ROOK computes the factorization of a complex symmetric matrix A
     ! using the bounded Bunch-Kaufman ("rook") diagonal pivoting method.
     ! The form of the factorization is
     ! A = U*D*U**T  or  A = L*D*L**T
     ! where U (or L) is a product of permutation and unit upper (lower)
     ! triangular matrices, and D is symmetric and block diagonal with
     ! 1-by-1 and 2-by-2 diagonal blocks.
     ! This is the blocked version of the algorithm, calling Level 3 BLAS.

     subroutine stdlib_csytrf_rook(uplo, n, a, lda, ipiv, work, lwork, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, lwork, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), work(*)
        ! =====================================================================
           ! .. local scalars ..
           logical(lk) :: lquery, upper
           integer(ilp) :: iinfo, iws, j, k, kb, ldwork, lwkopt, nb, nbmin
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           lquery = (lwork == -1)
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           else if (lwork < 1 .and. .not. lquery) then
              info = -7
           end if
           if (info == 0) then
              ! determine the block size
              nb = stdlib_ilaenv(1, 'stdlib_csytrf_rook', uplo, n, -1, -1, -1)
              lwkopt = max(1, n*nb)
              work(1) = lwkopt
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytrf_rook', -info)
              return
           else if (lquery) then
              return
           end if
           nbmin = 2
           ldwork = n
           if (nb > 1 .and. nb < n) then
              iws = ldwork*nb
              if (lwork < iws) then
                 nb = max(lwork/ldwork, 1)
                 nbmin = max(2, stdlib_ilaenv(2, 'stdlib_csytrf_rook', uplo, n, -1, -1, -1))
                           
              end if
           else
              iws = 1
           end if
           if (nb < nbmin) nb = n
           if (upper) then
              ! factorize a as u*d*u**t using the upper triangle of a
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! kb, where kb is the number of columns factorized by stdlib_clasyf_rook;
              ! kb is either nb or nb-1, or k for the last block
              k = n
10      continue
              ! if k < 1, exit from loop
              if (k < 1) go to 40
              if (k > nb) then
                 ! factorize columns k-kb+1:k of a and use blocked code to
                 ! update columns 1:k-kb
                 call stdlib_clasyf_rook(uplo, k, nb, kb, a, lda, ipiv, work, ldwork, iinfo)
                           
              else
                 ! use unblocked code to factorize columns 1:k of a
                 call stdlib_csytf2_rook(uplo, k, a, lda, ipiv, iinfo)
                 kb = k
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo
              ! no need to adjust ipiv
              ! decrease k and return to the start of the main loop
              k = k - kb
              go to 10
           else
              ! factorize a as l*d*l**t using the lower triangle of a
              ! k is the main loop index, increasing from 1 to n in steps of
              ! kb, where kb is the number of columns factorized by stdlib_clasyf_rook;
              ! kb is either nb or nb-1, or n-k+1 for the last block
              k = 1
20      continue
              ! if k > n, exit from loop
              if (k > n) go to 40
              if (k <= n - nb) then
                 ! factorize columns k:k+kb-1 of a and use blocked code to
                 ! update columns k+kb:n
                 call stdlib_clasyf_rook(uplo, n - k + 1, nb, kb, a(k, k), lda, ipiv(k), work, &
                           ldwork, iinfo)
              else
                 ! use unblocked code to factorize columns k:n of a
                 call stdlib_csytf2_rook(uplo, n - k + 1, a(k, k), lda, ipiv(k), iinfo)
                 kb = n - k + 1
              end if
              ! set info on the first occurrence of a zero pivot
              if (info == 0 .and. iinfo > 0) info = iinfo + k - 1
              ! adjust ipiv
              do j = k, k + kb - 1
                 if (ipiv(j) > 0) then
                    ipiv(j) = ipiv(j) + k - 1
                 else
                    ipiv(j) = ipiv(j) - k + 1
                 end if
              end do
              ! increase k and return to the start of the main loop
              k = k + kb
              go to 20
           end if
40      continue
           work(1) = lwkopt
           return
           ! end of stdlib_csytrf_rook
     end subroutine stdlib_csytrf_rook

     ! CSYTRI computes the inverse of a complex symmetric indefinite matrix
     ! A using the factorization A = U*D*U**T or A = L*D*L**T computed by
     ! CSYTRF.

     subroutine stdlib_csytri(uplo, n, a, lda, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: k, kp, kstep
           complex(sp) :: ak, akkp1, akp1, d, t, temp
     
           ! .. intrinsic functions ..
           intrinsic :: abs, max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytri', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. a(info, info) == czero) return
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              do info = 1, n
                 if (ipiv(info) > 0 .and. a(info, info) == czero) return
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 40
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = cone/a(k, k)
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_csymv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - stdlib_cdotu(k - 1, work, 1, a(1, k), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = a(k, k + 1)
                 ak = a(k, k)/t
                 akp1 = a(k + 1, k + 1)/t
                 akkp1 = a(k, k + 1)/t
                 d = t*(ak*akp1 - cone)
                 a(k, k) = akp1/d
                 a(k + 1, k + 1) = ak/d
                 a(k, k + 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_csymv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - stdlib_cdotu(k - 1, work, 1, a(1, k), 1)
                    a(k, k + 1) = a(k, k + 1) - stdlib_cdotu(k - 1, a(1, k), 1, a(1, k + 1), 1)
                              
                    call stdlib_ccopy(k - 1, a(1, k + 1), 1, work, 1)
                    call stdlib_csymv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k + 1), 1)
                              
                    a(k + 1, k + 1) = a(k + 1, k + 1) - stdlib_cdotu(k - 1, work, 1, a(1, k + 1), 1)
                              
                 end if
                 kstep = 2
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the leading
                 ! submatrix a(1:k+1,1:k+1)
                 call stdlib_cswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                 call stdlib_cswap(k - kp - 1, a(kp + 1, k), 1, a(kp, kp + 1), lda)
                 temp = a(k, k)
                 a(k, k) = a(kp, kp)
                 a(kp, kp) = temp
                 if (kstep == 2) then
                    temp = a(k, k + 1)
                    a(k, k + 1) = a(kp, k + 1)
                    a(kp, k + 1) = temp
                 end if
              end if
              k = k + kstep
              go to 30
40      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
50      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 60
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = cone/a(k, k)
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_csymv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k), 1)
                    a(k, k) = a(k, k) - stdlib_cdotu(n - k, work, 1, a(k + 1, k), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = a(k, k - 1)
                 ak = a(k - 1, k - 1)/t
                 akp1 = a(k, k)/t
                 akkp1 = a(k, k - 1)/t
                 d = t*(ak*akp1 - cone)
                 a(k - 1, k - 1) = akp1/d
                 a(k, k) = ak/d
                 a(k, k - 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_csymv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k), 1)
                    a(k, k) = a(k, k) - stdlib_cdotu(n - k, work, 1, a(k + 1, k), 1)
                    a(k, k - 1) = a(k, k - 1) - stdlib_cdotu(n - k, a(k + 1, k), 1, a(k + 1, k - 1), 1 &
                              )
                    call stdlib_ccopy(n - k, a(k + 1, k - 1), 1, work, 1)
                    call stdlib_csymv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + &
                              1, k - 1), 1)
                    a(k - 1, k - 1) = a(k - 1, k - 1) - stdlib_cdotu(n - k, work, 1, a(k + 1, k - 1), 1)
                              
                 end if
                 kstep = 2
              end if
              kp = abs(ipiv(k))
              if (kp /= k) then
                 ! interchange rows and columns k and kp in the trailing
                 ! submatrix a(k-1:n,k-1:n)
                 if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                 call stdlib_cswap(kp - k - 1, a(k + 1, k), 1, a(kp, k + 1), lda)
                 temp = a(k, k)
                 a(k, k) = a(kp, kp)
                 a(kp, kp) = temp
                 if (kstep == 2) then
                    temp = a(k, k - 1)
                    a(k, k - 1) = a(kp, k - 1)
                    a(kp, k - 1) = temp
                 end if
              end if
              k = k - kstep
              go to 50
60      continue
           end if
           return
           ! end of stdlib_csytri
     end subroutine stdlib_csytri

     ! CSYTRI_ROOK computes the inverse of a complex symmetric
     ! matrix A using the factorization A = U*D*U**T or A = L*D*L**T
     ! computed by CSYTRF_ROOK.

     subroutine stdlib_csytri_rook(uplo, n, a, lda, ipiv, work, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, n
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), work(*)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: k, kp, kstep
           complex(sp) :: ak, akkp1, akp1, d, t, temp
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           ! test the input parameters.
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (lda < max(1, n)) then
              info = -4
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytri_rook', -info)
              return
           end if
           ! quick return if possible
           if (n == 0) return
           ! check that the diagonal matrix d is nonsingular.
           if (upper) then
              ! upper triangular storage: examine d from bottom to top
              do info = n, 1, -1
                 if (ipiv(info) > 0 .and. a(info, info) == czero) return
              end do
           else
              ! lower triangular storage: examine d from top to bottom.
              do info = 1, n
                 if (ipiv(info) > 0 .and. a(info, info) == czero) return
              end do
           end if
           info = 0
           if (upper) then
              ! compute inv(a) from the factorization a = u*d*u**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
30      continue
              ! if k > n, exit from loop.
              if (k > n) go to 40
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = cone/a(k, k)
                 ! compute column k of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_csymv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - stdlib_cdotu(k - 1, work, 1, a(1, k), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = a(k, k + 1)
                 ak = a(k, k)/t
                 akp1 = a(k + 1, k + 1)/t
                 akkp1 = a(k, k + 1)/t
                 d = t*(ak*akp1 - cone)
                 a(k, k) = akp1/d
                 a(k + 1, k + 1) = ak/d
                 a(k, k + 1) = -akkp1/d
                 ! compute columns k and k+1 of the inverse.
                 if (k > 1) then
                    call stdlib_ccopy(k - 1, a(1, k), 1, work, 1)
                    call stdlib_csymv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k), 1)
                              
                    a(k, k) = a(k, k) - stdlib_cdotu(k - 1, work, 1, a(1, k), 1)
                    a(k, k + 1) = a(k, k + 1) - stdlib_cdotu(k - 1, a(1, k), 1, a(1, k + 1), 1)
                              
                    call stdlib_ccopy(k - 1, a(1, k + 1), 1, work, 1)
                    call stdlib_csymv(uplo, k - 1, -cone, a, lda, work, 1, czero, a(1, k + 1), 1)
                              
                    a(k + 1, k + 1) = a(k + 1, k + 1) - stdlib_cdotu(k - 1, work, 1, a(1, k + 1), 1)
                              
                 end if
                 kstep = 2
              end if
              if (kstep == 1) then
                 ! interchange rows and columns k and ipiv(k) in the leading
                 ! submatrix a(1:k+1,1:k+1)
                 kp = ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    call stdlib_cswap(k - kp - 1, a(kp + 1, k), 1, a(kp, kp + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              else
                 ! interchange rows and columns k and k+1 with -ipiv(k) and
                 ! -ipiv(k+1)in the leading submatrix a(1:k+1,1:k+1)
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    call stdlib_cswap(k - kp - 1, a(kp + 1, k), 1, a(kp, kp + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                    temp = a(k, k + 1)
                    a(k, k + 1) = a(kp, k + 1)
                    a(kp, k + 1) = temp
                 end if
                 k = k + 1
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp > 1) call stdlib_cswap(kp - 1, a(1, k), 1, a(1, kp), 1)
                    call stdlib_cswap(k - kp - 1, a(kp + 1, k), 1, a(kp, kp + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              end if
              k = k + 1
              go to 30
40      continue
           else
              ! compute inv(a) from the factorization a = l*d*l**t.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
50      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 60
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! invert the diagonal block.
                 a(k, k) = cone/a(k, k)
                 ! compute column k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_csymv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + 1, &
                               k), 1)
                    a(k, k) = a(k, k) - stdlib_cdotu(n - k, work, 1, a(k + 1, k), 1)
                 end if
                 kstep = 1
              else
                 ! 2 x 2 diagonal block
                 ! invert the diagonal block.
                 t = a(k, k - 1)
                 ak = a(k - 1, k - 1)/t
                 akp1 = a(k, k)/t
                 akkp1 = a(k, k - 1)/t
                 d = t*(ak*akp1 - cone)
                 a(k - 1, k - 1) = akp1/d
                 a(k, k) = ak/d
                 a(k, k - 1) = -akkp1/d
                 ! compute columns k-1 and k of the inverse.
                 if (k < n) then
                    call stdlib_ccopy(n - k, a(k + 1, k), 1, work, 1)
                    call stdlib_csymv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + 1, &
                               k), 1)
                    a(k, k) = a(k, k) - stdlib_cdotu(n - k, work, 1, a(k + 1, k), 1)
                    a(k, k - 1) = a(k, k - 1) - stdlib_cdotu(n - k, a(k + 1, k), 1, a(k + 1, k - 1), 1 &
                              )
                    call stdlib_ccopy(n - k, a(k + 1, k - 1), 1, work, 1)
                    call stdlib_csymv(uplo, n - k, -cone, a(k + 1, k + 1), lda, work, 1, czero, a(k + 1, &
                               k - 1), 1)
                    a(k - 1, k - 1) = a(k - 1, k - 1) - stdlib_cdotu(n - k, work, 1, a(k + 1, k - 1), 1)
                              
                 end if
                 kstep = 2
              end if
              if (kstep == 1) then
                 ! interchange rows and columns k and ipiv(k) in the trailing
                 ! submatrix a(k-1:n,k-1:n)
                 kp = ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    call stdlib_cswap(kp - k - 1, a(k + 1, k), 1, a(kp, k + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              else
                 ! interchange rows and columns k and k-1 with -ipiv(k) and
                 ! -ipiv(k-1) in the trailing submatrix a(k-1:n,k-1:n)
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    call stdlib_cswap(kp - k - 1, a(k + 1, k), 1, a(kp, k + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                    temp = a(k, k - 1)
                    a(k, k - 1) = a(kp, k - 1)
                    a(kp, k - 1) = temp
                 end if
                 k = k - 1
                 kp = -ipiv(k)
                 if (kp /= k) then
                    if (kp < n) call stdlib_cswap(n - kp, a(kp + 1, k), 1, a(kp + 1, kp), 1)
                    call stdlib_cswap(kp - k - 1, a(k + 1, k), 1, a(kp, k + 1), lda)
                    temp = a(k, k)
                    a(k, k) = a(kp, kp)
                    a(kp, kp) = temp
                 end if
              end if
              k = k - 1
              go to 50
60      continue
           end if
           return
           ! end of stdlib_csytri_rook
     end subroutine stdlib_csytri_rook

     ! CSYTRS solves a system of linear equations A*X = B with a complex
     ! symmetric matrix A using the factorization A = U*D*U**T or
     ! A = L*D*L**T computed by CSYTRF.

     subroutine stdlib_csytrs(uplo, n, nrhs, a, lda, ipiv, b, ldb, info)
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           ! .. scalar arguments ..
           character :: uplo
           integer(ilp) :: info, lda, ldb, n, nrhs
           ! .. array arguments ..
           integer(ilp) :: ipiv(*)
           complex(sp) :: a(lda, *), b(ldb, *)
        ! =====================================================================
           
           ! .. local scalars ..
           logical(lk) :: upper
           integer(ilp) :: j, k, kp
           complex(sp) :: ak, akm1, akm1k, bk, bkm1, denom
     
           ! .. intrinsic functions ..
           intrinsic :: max
           ! .. executable statements ..
           info = 0
           upper = stdlib_lsame(uplo, 'u')
           if (.not. upper .and. .not. stdlib_lsame(uplo, 'l')) then
              info = -1
           else if (n < 0) then
              info = -2
           else if (nrhs < 0) then
              info = -3
           else if (lda < max(1, n)) then
              info = -5
           else if (ldb < max(1, n)) then
              info = -8
           end if
           if (info /= 0) then
              call stdlib_xerbla('stdlib_csytrs', -info)
              return
           end if
           ! quick return if possible
           if (n == 0 .or. nrhs == 0) return
           if (upper) then
              ! solve a*x = b, where a = u*d*u**t.
              ! first solve u*d*x = b, overwriting b with x.
              ! k is the main loop index, decreasing from n to 1 in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = n
10      continue
              ! if k < 1, exit from loop.
              if (k < 1) go to 30
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_cgeru(k - 1, nrhs, -cone, a(1, k), 1, b(k, 1), ldb, b(1, 1), ldb &
                           )
                 ! multiply by the inverse of the diagonal block.
                 call stdlib_cscal(nrhs, cone/a(k, k), b(k, 1), ldb)
                 k = k - 1
              else
                 ! 2 x 2 diagonal block
                 ! interchange rows k-1 and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k - 1) call stdlib_cswap(nrhs, b(k - 1, 1), ldb, b(kp, 1), ldb)
                 ! multiply by inv(u(k)), where u(k) is the transformation
                 ! stored in columns k-1 and k of a.
                 call stdlib_cgeru(k - 2, nrhs, -cone, a(1, k), 1, b(k, 1), ldb, b(1, 1), ldb &
                           )
                 call stdlib_cgeru(k - 2, nrhs, -cone, a(1, k - 1), 1, b(k - 1, 1), ldb, b(1, 1), &
                           ldb)
                 ! multiply by the inverse of the diagonal block.
                 akm1k = a(k - 1, k)
                 akm1 = a(k - 1, k - 1)/akm1k
                 ak = a(k, k)/akm1k
                 denom = akm1*ak - cone
                 do j = 1, nrhs
                    bkm1 = b(k - 1, j)/akm1k
                    bk = b(k, j)/akm1k
                    b(k - 1, j) = (ak*bkm1 - bk)/denom
                    b(k, j) = (akm1*bk - bkm1)/denom
                 end do
                 k = k - 2
              end if
              go to 10
30      continue
              ! next solve u**t *x = b, overwriting b with x.
              ! k is the main loop index, increasing from 1 to n in steps of
              ! 1 or 2, depending on the size of the diagonal blocks.
              k = 1
40      continue
              ! if k > n, exit from loop.
              if (k > n) go to 50
              if (ipiv(k) > 0) then
                 ! 1 x 1 diagonal block
                 ! multiply by inv(u**t(k)), where u(k) is the transformation
                 ! stored in column k of a.
                 call stdlib_cgemv('transpose', k - 1, nrhs, -cone, b, ldb, a(1, k), 1, cone, b( &
                           k, 1), ldb)
                 ! interchange rows k and ipiv(k).
                 kp = ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k + 1
              else
                 ! 2 x 2 diagonal block
                 ! multiply by inv(u**t(k+1)), where u(k+1) is the transformation
                 ! stored in columns k and k+1 of a.
                 call stdlib_cgemv('transpose', k - 1, nrhs, -cone, b, ldb, a(1, k), 1, cone, b( &
                           k, 1), ldb)
                 call stdlib_cgemv('transpose', k - 1, nrhs, -cone, b, ldb, a(1, k + 1), 1, cone, b( &
                            k + 1, 1), ldb)
                 ! interchange rows k and -ipiv(k).
                 kp = -ipiv(k)
                 if (kp /= k) call stdlib_cswap(nrhs, b(k, 1), ldb, b(kp, 1), ldb)
                 k = k + 2
              end if
              g