!================================================================================
!
! Module:
!
! (1) mtxel_sxch()          Originally By ?         Last Modified 10/5/2009 (gsm)
!
!     Computes the sum (for information on current processor) of the
!     screened exchange and Coulomb hole (excluding bare exchange) for
!     the self-energy operator between bands n (sig%diag(in)) and m
!     (sig%diag(im)) at the frequency of the band l (sig%diag(il)).
!
!     Screened exchange is put into asxt(iw), Coulomb hole into acht(iw),
!     iw goes from 1 to 3 (see below for what it is).
!
!     On entry, asxt(:) and acht(:) are zeroed.
!
!     Physically, this routine will compute:
!
!     < psi_n ( k ) | Sigma_SX ( E ) + Sigma_CH ( E ) | psi_m ( k ) >
!
!     where E = E_l ( k ) + ( iw - 2 ) * sig%dw and iw = 1, 2, 3
!     so we evaluate the matrix elements of Sigma at three different
!     energies, E_l ( k ) - sig%dw, E_l ( k ), and E_l ( k ) + sig%dw.
!
!     Sigma(E) is computed within different approximations:
!     - static COHSEX approximation, both exact and partial sum CH (gsm)
!     - Generalized Plasmon Pole model (?)
!     - full frequency dependent inverse dielectric matrix (CDS/CHP/gsm)
!
!     If sig%ggpsum=1 we compute a half of the sum over G and G` vectors
!     (igp=1,ncouls and ig=1,igp) and take the complex conjugate for the
!     other half. This is only valid if we use the same truncation scheme
!     in both Epsilon and Sigma. If sig%ggpsum=2 we compute the full sum.
!
!     ch2 variables are used to calculate the CH part of the imaginary part
!     of the self energy assuming a zero energy broadening in the energy
!     denominator for the evaluation of the self energy (CHP).
!
!================================================================================

#include "f_defs.h"

module mtxel_sxch_m

  use global_m
  use misc_m
  implicit none

  public :: mtxel_sxch

contains

subroutine mtxel_sxch(in,il,ispin,ncouls,neps,gvec,eps,ph,ind, &
  isrtrqi,isrtrq,vcoul,crys,sig,wpg,wfnk,wfnkq,ncoulch, &
  aqsn,aqsm,aqsch,asigt_imag,acht_n1,asxt,acht,achtcor,nspin,qk, &
  coulfact,igown,iggown,epsR,epsA, &
  achtD_n1,asxtDyn,achtDyn,ach2tDyn,icalc)

  integer, intent(in) :: in,il,ispin,ncouls,neps
  type (gspace), intent(in) :: gvec
  SCALAR, intent(in) :: eps(:,:) !< (neps,ngown)
  SCALAR, intent(in) :: ph(:) !< (gvec%ng)
  integer, intent(in) :: ind(:),isrtrqi(:),isrtrq(:) !< (gvec%ng)
  real(DP), intent(in) :: vcoul(:) !< (ncoul)
  type (crystal), intent(in) :: crys
  type (siginfo), intent(in) :: sig
  type (wpgen), intent(in) :: wpg
  type (wfnkstates), intent(in) :: wfnk
  type (wfnkqstates), intent(in) :: wfnkq
  integer, intent(in) :: ncoulch
  SCALAR, intent(in) :: aqsn(:,:), aqsm(:,:) !< (peinf%ntband_max,ncoul)
  SCALAR, intent(in) :: aqsch(:) !< (ncoulch)
  SCALAR, intent(out) :: acht_n1(:) !< (sig%ntband)
  SCALAR, intent(out) :: asxt(:), acht(:) !< (3)
  SCALAR, intent(out) :: achtcor
  SCALAR, intent(out) :: asigt_imag
  integer, intent(in) :: nspin
  real(DP), intent(in) :: qk(:) !< (3)
  real(DP), intent(in) :: coulfact
  integer, intent(in) :: iggown(:), igown(:) !< (neps)
  complex(DPC), intent(in) :: epsR(:,:,:),epsA(:,:,:) !< (sig%nFreq,neps,ngown)
  complex(DPC), intent(out) :: achtD_n1(:) !< (sig%ntband)
  complex(DPC), intent(out) :: asxtDyn(:), achtDyn(:), ach2tDyn(:) !< (sig%nfreqeval)
  integer, intent(in) :: icalc

  SCALAR, allocatable :: epstemp(:)
  SCALAR :: asigtemp_imag
  complex(DPC), allocatable :: epsRtemp(:,:),epsAtemp(:,:)

  integer :: ijk, cycle_count, ncouls2
  real(DP) :: qkk(3),diff,diffmin
  logical :: flag_occ
  integer :: ig,igp,igpp,igpp2,iw,iwlda,n1,iband,n1true,nstart,nend,igmax,gpp(3),iinter
  real(DP) :: wx,ssxcutoff,ssxcutoff2,delw2,occ,tempval,dinterfrac, dinterfracinv
! chs - partial sum static CH, chx - exact static CH
  SCALAR :: asxtemp(3),achtemp(3),achstemp,achxtemp,ssx,sch, &
    schs,schx,matngmatmgp,matngpmatmg,epsggp,I_epsggp, &
    Omega2,wtilde2,invwtilde2
  SCALAR, allocatable :: wpmtx(:)
  real(DP) :: e_lk, e_n1kq, lambda, phi
  complex(DPC) :: wtilde,invwtilde,halfinvwtilde,wtilde2_temp,delw
  integer :: iout, nc_on_node
  logical :: imaginary_flag

! full-frequency

  integer :: ifreq
  real(DP) :: E_max
  real(DP), allocatable :: pref(:)
  complex(DPC) :: schD,schDgpg,matngmatmgpD,matngpmatmgD,achsDtemp,schsD
  complex(DPC), allocatable :: asxDtemp(:),achDtemp(:),ach2Dtemp(:)
  complex(DPC), allocatable :: schDi(:), schDigpg(:)
  complex(DPC), allocatable :: sch2Di(:)
  complex(DPC), allocatable :: ssxDi(:), ssxDigpg(:)
  real(DP), allocatable :: dinterfraci(:),dinterfracii(:),wxi(:)
  complex(DPC) :: cedifft
  complex(DPC), allocatable :: epsRggp(:),epsAggp(:),I_epsRggp(:),I_epsAggp(:)
  complex(DPC) :: I_epsRggp_int, I_epsAggp_int
  
  PUSH_SUB(mtxel_sxch)

  if(sig%freq_dep .eq. -1) then
    call die("BUG: cannot call mtxel_sxch for Hartree-Fock!")
  endif

!-----------------------
! Initialization for full-frequency CH integral

  if (sig%freq_dep.eq.2) then
    SAFE_ALLOCATE(epsRtemp, (sig%nFreq,neps))
    SAFE_ALLOCATE(epsAtemp, (sig%nFreq,neps))
    SAFE_ALLOCATE(pref, (sig%nFreq))
    do ifreq=1,sig%nFreq
      if (ifreq .lt. sig%nFreq) then
        pref(ifreq)=(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq))/PI_D/dble(sig%ninter)
      else
        pref(ifreq)=pref(ifreq-1)
      endif
    enddo
    pref(1)=pref(1)*0.5d0
    pref(sig%nFreq)=pref(sig%nFreq)*0.5d0
#ifdef CPLX
    do ifreq=1,sig%nFreq
      pref(ifreq)=pref(ifreq)*0.5d0
    enddo
#endif
    E_max=sig%dFreqGrid(sig%nFreq)
    SAFE_ALLOCATE(asxDtemp, (sig%nfreqeval))
    asxDtemp = 0D0
    SAFE_ALLOCATE(achDtemp, (sig%nfreqeval))
    achDtemp = 0D0
    SAFE_ALLOCATE(ach2Dtemp, (sig%nfreqeval))
    ach2Dtemp = 0D0
    SAFE_ALLOCATE(schDi, (sig%nfreqeval))
    schDi=0D0
    SAFE_ALLOCATE(sch2Di, (sig%nfreqeval))
    sch2Di=0D0
    SAFE_ALLOCATE(ssxDi, (sig%nfreqeval))
    ssxDi=0D0
    SAFE_ALLOCATE(schDigpg, (sig%nfreqeval))
    schDigpg=0D0
    SAFE_ALLOCATE(ssxDigpg, (sig%nfreqeval))
    ssxDigpg=0D0
    SAFE_ALLOCATE(wxi, (sig%nfreqeval))
    wxi=0D0
    SAFE_ALLOCATE(dinterfraci, (sig%ninter))
    SAFE_ALLOCATE(dinterfracii, (sig%ninter))
    dinterfraci=0D0
    dinterfracii=0D0

  else
    SAFE_ALLOCATE(epstemp, (neps))
  endif

! Initialize Output Arrays
! SIB: Zero contribution to asx(n) and ach(n) for this irq

  if (sig%freq_dep.eq.0.or.sig%freq_dep.eq.1) then
    asxt(:) = ZERO
    acht(:) = ZERO
    acht_n1(:) = ZERO
  elseif (sig%freq_dep.eq.2) then
    asxtDyn(:) = (0.0d0,0.0d0)
    achtDyn(:) = (0.0d0,0.0d0)
    ach2tDyn(:) = (0.0d0,0.0d0)
    achtD_n1(:) = (0.0d0,0.0d0)
  endif
  achtcor = ZERO
  asigt_imag = ZERO

! SIB: the array wfnk%ek has bands indexed by sig%diag
! take the right band index

  iband = sig%diag(il)
  e_lk = wfnk%ek(iband,ispin)

! Allocate Temporary Arrays

  if (sig%freq_dep.eq.1) then
    SAFE_ALLOCATE(wpmtx, (neps))
  endif
  if (sig%freq_dep.eq.2) then
    SAFE_ALLOCATE(epsRggp, (sig%nFreq))
    SAFE_ALLOCATE(I_epsRggp, (sig%nFreq))
#ifdef CPLX
    SAFE_ALLOCATE(epsAggp, (sig%nFreq))
    SAFE_ALLOCATE(I_epsAggp, (sig%nFreq))
#endif
  endif

!------------------------
! JRD: This changes qkk to q0vec for q=0 instead of using exactly zero

#ifdef VERBOSE
  qkk=qk
  diff = abs(qkk(1)) + abs(qkk(2)) + abs(qkk(3))
  if (diff.lt.TOL_Zero) then
    qkk(:) = sig%q0vec(:)
    if (in .eq. 1 .and. il .eq. 1) then ! no need to write again for off-diagonals
      if (peinf%inode.eq.0) then
        write(7,998) qkk
      endif
    endif
  endif
998 format(/,6x,"q0vec =",3f7.4)
#endif

!-------------- Main loop over G` (igp) ---------------------------------------

  cycle_count = 0

  do igp=1,ncouls

    if (sig%ggpsum.eq.1) then
      igmax=igp
    else
      igmax=ncouls
    endif
    
!!------------- Initialize eps^-1 for this G` ---------------------------------

    if (peinf%inode.eq.igown(ind(igp))) then
      if (sig%freq_dep.eq.0.or.sig%freq_dep.eq.1) then
        epstemp(:)=eps(:,iggown(ind(igp)))
      endif
      if (sig%freq_dep.eq.2) then
        epsRtemp(:,:)=epsR(:,:,iggown(ind(igp)))
#ifdef CPLX
        epsAtemp(:,:)=epsA(:,:,iggown(ind(igp)))
#endif
      endif
    endif

! JRD: Possible Time Hazard.  Can`t really speed up easily.

#ifdef MPI
    if (sig%freq_dep.eq.0.or.sig%freq_dep.eq.1) then
      call MPI_Bcast(epstemp,neps,MPI_SCALAR,igown(ind(igp)),MPI_COMM_WORLD,mpierr)
    endif
    if (sig%freq_dep.eq.2) then
      call MPI_Bcast(epsRtemp,neps*sig%nFreq,MPI_COMPLEX_DPC, &
        igown(ind(igp)),MPI_COMM_WORLD,mpierr)
#ifdef CPLX
      call MPI_Bcast(epsAtemp,neps*sig%nFreq,MPI_COMPLEX_DPC, &
        igown(ind(igp)),MPI_COMM_WORLD,mpierr)
#endif
    endif
#endif

!------------------------------------------------------------------------------
! (gsm) Below you`ll find the code to compute SX & CH in the static COHSEX
!       approximation (both the exact and partial sum expressions for CH),
!       within the GPP model, and using the full frequency dependent RPA
!       inverse dielectric matrix. The GPP section of the code is well
!       commented (thanks to Sohrab), while the COHSEX and RPA sections
!       are not... But most of the variables are the same anyways,
!       so just look at the comments in the GPP section.
!------------------------------------------------------------------------------

! (gsm) <<<<<< static COHSEX approximation - exact static CH >>>>>>

! < n k | \Sigma_{CH} (r, r`; 0) | m k > =
! \frac{1}{2} \sum_{q G G`}
! < n k | e^{i (G - G`) \cdot r} | m k >
! [\eps_{G G`}^{-1} (q; 0) - \delta_{G G`}] v (q + G`)

    if (sig%freq_dep.eq.0.or.sig%exact_ch.eq.1) then
      
      achxtemp = ZERO

      if (mod(peinf%inode,peinf%npes/peinf%npools).eq.0) then
        do ig=1,igmax
          if (sig%freq_dep.eq.0.or.sig%freq_dep.eq.1) then
            epsggp = ph(ig)*MYCONJG(ph(igp))*epstemp(ind(ig))
          else
            epsggp = ph(ig)*MYCONJG(ph(igp))*SCALARIFY(epsRtemp(1,ind(ig)))
          endif
          if (ig.eq.igp) then
            I_epsggp = epsggp - 1.0d0
          else
            I_epsggp = epsggp
          endif
          if (abs(I_epsggp).lt.TOL_Small) cycle
          if (ig.ne.igp) then
            gpp(:)=gvec%k(:,isrtrq(ig))-gvec%k(:,isrtrq(igp))
            call findvector(iout,gpp(1),gpp(2),gpp(3),gvec)
            if (iout.eq.0) cycle
            igpp=isrtrqi(iout)
            if (igpp.lt.1.or.igpp.gt.ncoulch) cycle
            gpp(:)=gvec%k(:,isrtrq(igp))-gvec%k(:,isrtrq(ig))
            call findvector(iout,gpp(1),gpp(2),gpp(3),gvec)
            if (iout.eq.0) cycle
            igpp2=isrtrqi(iout)
            if (igpp2.lt.1.or.igpp2.gt.ncoulch) cycle
          else
            gpp(:)=0
            call findvector(iout,gpp(1),gpp(2),gpp(3),gvec)
            if (iout.eq.0) cycle
            igpp=isrtrqi(iout)
            if (igpp.lt.1.or.igpp.gt.ncoulch) cycle
          endif
          schx = aqsch(igpp) * I_epsggp
          if (sig%ggpsum.eq.1.and.ig.lt.igp) then
            schx = schx + aqsch(igpp2) * MYCONJG(I_epsggp)
          endif
          achxtemp = achxtemp + schx * 0.5d0
        enddo ! over G (ig)
      endif

    endif ! sig%freq_dep.eq.0.or.sig%exact_ch.eq.1

! (gsm) <<<<<< static COHSEX approximation - CH as a partial sum over empty bands >>>>>>

! < n k | \Sigma_{SX} (r, r`; 0) | m k > =
! - \sum_{n_1}^{occ} \sum_{q G G`}
! < n k | e^{i (q + G) \cdot r} | n_1 k - q >
! < n_1 k - q | e^{- i (q + G`) \cdot r`} | m k >
! [\eps_{G G`}^{-1} (q; 0) - \delta_{G G`}] v (q + G`)

! < n k | \Sigma_{CH} (r, r`; 0) | m k > =
! \frac{1}{2} \sum_{n_1} \sum_{q G G`}
! < n k | e^{i (q + G) \cdot r} | n_1 k - q >
! < n_1 k - q | e^{- i (q + G`) \cdot r`} | m k >
! [\eps_{G G`}^{-1} (q; 0) - \delta_{G G`}] v (q + G`)

    if (sig%freq_dep.eq.0) then
      
      asxtemp(:) = ZERO
      achtemp(:) = ZERO

      do ig=1,igmax
        epsggp = ph(ig)*MYCONJG(ph(igp))*epstemp(ind(ig))
        if (ig.eq.igp) then
          I_epsggp = epsggp - 1.0d0
        else
          I_epsggp = epsggp
        endif
        if (abs(I_epsggp).lt.TOL_Small) cycle
        nc_on_node = peinf%ntband_node
        do n1=1,nc_on_node
          n1true = peinf%indext(n1)
          e_n1kq = wfnkq%ekq(n1true,ispin)
          matngmatmgp = aqsn(n1,ig) * MYCONJG(aqsm(n1,igp))
          if (sig%ggpsum.eq.1) &
            matngpmatmg = aqsn(n1,igp) * MYCONJG(aqsm(n1,ig))
          flag_occ = (n1true.le.(sig%nvband+sig%ncrit)) &
            .and.((sig%ncrit.eq.0).or.(e_n1kq.le.(sig%efermi+TOL_Degeneracy)))
          
          tempval=abs(e_n1kq-sig%efermi)
          if (tempval .lt. TOL_Degeneracy) then
            occ = 0.5D0 ! Fermi-Dirac distribution = 1/2 at Fermi level
          else
            occ = 1.0D0
          endif
          
          ssx = I_epsggp
          if(sig%exact_ch == 0) sch = I_epsggp * 0.5d0
          if (sig%ggpsum.eq.1.and.ig.lt.igp) then
            ssx = matngmatmgp*ssx + matngpmatmg*MYCONJG(ssx)
           if(sig%exact_ch == 0) sch = matngmatmgp*sch + matngpmatmg*MYCONJG(sch)
          else
            ssx = matngmatmgp*ssx
            if(sig%exact_ch == 0) sch = matngmatmgp*sch
          endif
          if (flag_occ) then
            asxtemp(2) = asxtemp(2) - ssx*occ
          endif
          if(sig%exact_ch == 0) achtemp(2) = achtemp(2) + sch
          if(sig%exact_ch == 0) acht_n1(n1true) = acht_n1(n1true) + sch * vcoul(igp)
        enddo ! over bands (n1)
      enddo ! over G (ig)

! (gsm) <<<<<< Generalized Plasmon Pole model >>>>>>

    elseif (sig%freq_dep.eq.1) then

! Zero out temporary accumulation variables

      asxtemp(:) = ZERO
      achtemp(:) = ZERO
      asigtemp_imag = ZERO

      if (sig%exact_ch.eq.1) achstemp = ZERO

!----------------------
! Calculate Plasma Frequencies
!
! SIB: Here we get the plasmon-pole effective plasma frequecies
! Omega(G,G`)^2 (equation (31) of Hybersten & Louie, PRB 34, 1986, p 5396)
! which come in the vector wp(G) for current G`.

! SIB: We calculate wp(G) for a given G` (trade-off for memory)
! Note that wp(G) G=1,ncouls requires O(N) operations
! Even if we redo it for each band n, not so bad

! JRD: I changed this to use qk instead of qkk because we use vcoul at q=0
! (instead of small q) throughout

! given how many times this routine is called, timing it appears to take a non-negligible amount of time
!      if (peinf%inode.eq.0) call timacc(12,1,tsec)
      call wpeff(crys,gvec,wpg,sig,neps,isrtrq,igp,ncouls,wpmtx,nspin,qk,vcoul,coulfact)
!      if (peinf%inode.eq.0) call timacc(12,2,tsec)

!!------ For given G` (igp), loop over all G vectors (ig) in lower triangle ---

      do ig=1,igmax

        imaginary_flag = .false.

! Put epsilon(G,G`) into epsggp

!       epsggp = ph(ig)*MYCONJG(ph(igp))*eps(ind(ig),ind(igp))
        epsggp = ph(ig)*MYCONJG(ph(igp))*epstemp(ind(ig))

! I_epsggp = Kronecker(G,G`) - eps(G,G`)

        if (ig.eq.igp) then
          I_epsggp = ONE - epsggp
        else
          I_epsggp = ZERO - epsggp
        endif

! If I_epsggp is too small, then we skip this (G,G`) entry
! This only happens when eps is 1 on diagonal or 0 off diagonal
! but, this means no screening correct and is already treated properly in bare
! exchange term

        if (abs(I_epsggp).lt.TOL_Small) cycle

! (gsm) compute the static CH for the static remainder

        if (sig%exact_ch.eq.1) then
          nc_on_node = peinf%ntband_node
          do n1=1,nc_on_node
            matngmatmgp = aqsn(n1,ig) * MYCONJG(aqsm(n1,igp))
            if (sig%ggpsum.eq.1) &
              matngpmatmg = aqsn(n1,igp) * MYCONJG(aqsm(n1,ig))
            schs=-I_epsggp*0.5d0
            if (sig%ggpsum.eq.1.and.ig.lt.igp) then
              schs = matngmatmgp*schs + matngpmatmg*MYCONJG(schs)
            else
              schs = matngmatmgp*schs
            endif
            achstemp = achstemp + schs
          enddo ! over bands (n1)
        endif ! sig%exact_ch.eq.1

! Omega2 = Omega(G,G`)^2 = effective plasma freq^2

        Omega2 = wpmtx(ig)

! If Omega2 is too small, then we skip this (G,G`) entry
! JRD: I am not sure why we have to cycle here... :/ Probably not needed

        if (abs(Omega2).lt.TOL_Small) cycle

#ifdef CPLX

! <<<<<<<<<<<< COMPLEX GPP >>>>>>>>>>>>

! (gsm) equations (17), (20), (21) from [PRB 40, 3162 (1989)]

        wtilde2_temp = Omega2 / I_epsggp
 
        if (dble(wtilde2_temp)/abs(wtilde2_temp).lt.TOL_Small) then
          cycle_count = cycle_count + 1
          imaginary_flag = .true.
          !cycle
        else 
          imaginary_flag = .false.
        endif

        lambda = abs(wtilde2_temp)
        if (lambda .lt. TOL_Small) cycle
        phi = atan2(IMAG(wtilde2_temp), dble(wtilde2_temp))
        if (abs(cos(phi)) .lt. TOL_Small) cycle
        wtilde2 = lambda / cos(phi)
        Omega2 = Omega2 * CMPLX(1.0d0, -tan(phi))

#else

! <<<<<<<<<<<< REAL GPP >>>>>>>>>>>>

! (gsm) equation (30) from [PRB 34, 5390 (1986)]

        wtilde2 = Omega2 / I_epsggp
        if (abs(wtilde2) .lt. TOL_Small) cycle

#endif

! gsm & jrd: find "complex" mode frequency wtilde

        wtilde = sqrt(COMPLEXIFY(wtilde2))

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
! <<<<<<<<<<<< OLD GPP CODE FOR BOTH REAL AND COMPLEX >>>>>>>>>>>>
!
!! Compute the mode frequency squared, wtilde2 (for "omega_tilde^2")
!! (Equation (30) on p. 5396 in above reference).  The GPP model has
!! its pole at wtilde.
!
!        wtilde2 = Omega2 / I_epsggp
!
!! Check for wtilde2 being too small
!
!        if (ig.eq.igp) then
!
!! SIB: For G.eq.G`, wtilde2 is real, so we just check if it`s positive
!! JRD corrects SIB: if it is negative we should multiply it by -1 and continue...
!! See equation 20 in PRB Volume 40 3162 1989
!
!          if (dble(wtilde2).lt.TOL_Small) cycle
!        else
!
!! SIB: For G.neq.G`, wtilde2 can be complex, so we check
!! that its magnitude is large enough, and that its
!! real part is large enough.
!
!          if (abs(wtilde2).lt.TOL_Small) cycle
!          if (dble(wtilde2)/abs(wtilde2).lt.TOL_Small) cycle
!        endif
!
!! Find "positive" mode frequency wtilde
!
!        wtilde = sqrt(wtilde2)
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

! Some constants used in the loop below, computed here to save
! floating point operations

        invwtilde = 1.0d0/wtilde
        halfinvwtilde = 0.5d0*invwtilde
        invwtilde2 = 1.0d0/wtilde2
        ssxcutoff = sig%sexcut*abs(I_epsggp)
        ssxcutoff2 = ssxcutoff**2

!!!----- Loop over all bands n1 on curr. processor -----

!-----------------
! Set up Variables for Band Loop

        nc_on_node = peinf%ntband_node

!---------------------
! Begin Actual Band Loop

        do n1=1,nc_on_node

! n1true = "True" band index of the band n1 w.r.t. all bands

          n1true = peinf%indext(n1)

! energy of the |n1,k-q> state

          e_n1kq = wfnkq%ekq(n1true,ispin)

! Product of matrix elements M(n,G)*conj(M(m,G`))
! where M(j,G) = <j,k|exp(iGr)|n1,k-q> and also for G,G` flipped
! (since we are only summing on G.le.G`)

          matngmatmgp = aqsn(n1,ig) * MYCONJG(aqsm(n1,igp))
          if (sig%ggpsum.eq.1) &
            matngpmatmg = aqsn(n1,igp) * MYCONJG(aqsm(n1,ig))

! occupation of the |n1,k-q> state

          flag_occ = (n1true.le.(sig%nvband+sig%ncrit)) &
            .and.((sig%ncrit.eq.0).or.(e_n1kq.le.(sig%efermi+TOL_Degeneracy)))
          
          tempval=abs(e_n1kq-sig%efermi)
          if (tempval .lt. TOL_Degeneracy) then
            occ = 0.5d0 ! Fermi-Dirac distribution = 1/2 at Fermi level
          else
            occ = 1.0d0
          endif
          
!!!!--- Loop over three energy values which we compute Sigma -----------
!
! SIB: In terms of formulae, the two terms we will compute are given in
! formulae (34a,34b) on p. 5397 of above reference.
!
!                                         Omega^2(G,G`)
!    SX(E) = M(n,G)*conj(M(m,G`)) * ------------------------ * Vcoul(G`)
!                                   (E-E_n1(k-q))^2-wtilde^2
!
!                                           Omega^2(G,G`)
!    CH(E) = M(n,G)*conj(M(m,G`)) * ----------------------------- * Vcoul(G`)
!                                   2*wtilde*[E-E_n1(k-q)-wtilde]
!
! and we are evaluating both at E = E_l(k) and E = E_l(k) +/- dE.
! SX only gets contributions for the band n1 being an occupied state.
!
! For diagonal elements, we need matrix elements at various
! energies (to get quasi-particle energies), i.e. for iw=1,2,3;
! but for off-diagonal elements, we only need them at iw=2
! (the actual energy) so we won`t even bother calculating
! them at other energies
!
! Note: below assumes that we will use iw=1,2,3 in
! subroutine shiftenergy depending on value of sig%fdf

          if (sig%fdf.eq.-1) then
            nstart = 1
            nend = 2
          elseif (sig%fdf.eq.0) then
            nstart = 1
            nend = 3
          elseif (sig%fdf.eq.1.or.(sig%fdf.eq.2.and.icalc.eq.1)) then
            nstart = 2
            nend = 3
          else
            nstart = 2
            nend = 2
          endif
          
          do iw=nstart,nend

! wx = E_l(k) - E_n1(k-q) + dE = difference in energies
! of the two states appearing as E - E_n1(k-q) above.

            wx = e_lk - e_n1kq + sig%dw*(iw-2)

! delw measures how close to zero the difference
! wx - wtilde = E - E_n1(k-q) - wtilde is relative to wtilde:
! delw = (E - E_n1(k-q) - wtilde) / (2 * wtilde)
! delw2 is the squared absolute value of delw

            delw = (wx - wtilde) * halfinvwtilde
            delw2 = abs(delw)**2

! If delw is small, both SX and CH blow up, but their sum (for
! an occupied state n1) is finite.  In such a case, their sum
! is Omega^2 / (4 * wtilde2) / (1 + delw). Then the sum is
! assigned to ssx and sch is set to zero.

            if (abs(wx-wtilde).lt.sig%gamma .or. delw2.lt.TOL_Small) then
              ssx = -Omega2 / (4.0d0 * wtilde2 * (1.0d0 + delw))
              sch = 0.0d0
            else
              ssx = Omega2 / (wx**2 - wtilde2)
! gsm & jrd: this is a part of the old GPP code
!              sch = ssx * (wx + wtilde) * halfinvwtilde
              sch = halfinvwtilde * Omega2 / (wx - wtilde)
            endif

! If ssx is too large (which can happen due to the pole at
! wx + wtilde = 0 of the SX term), then we should drop this term.
! See the discussion at the bottom of p. 5411-5412 of Hybertsen & Louie.

            if (abs(ssx)**2 .gt. ssxcutoff2 .and. wx .lt. 0.0d0) ssx=0.0d0

! If G.neq.G`, then since we sum over only lower triangle,
! we include the contribution we would have had from (G`,G).

            if (sig%ggpsum.eq.1.and.ig.lt.igp) then
              ssx = matngmatmgp*ssx + matngpmatmg*MYCONJG(ssx)
              sch = matngmatmgp*sch + matngpmatmg*MYCONJG(sch)
            else
              ssx = matngmatmgp*ssx
              sch = matngmatmgp*sch
            endif

! If a valence band, then accumulate SX contribution.

            if (flag_occ) then
              asxtemp(iw) = asxtemp(iw) - ssx*occ
            endif

! Accumulate CH contribution.

            achtemp(iw) = achtemp(iw) + sch

! Logging CH convergence.

            if (iw.eq.2) acht_n1(n1true) = &
              acht_n1(n1true) + sch * vcoul(igp)

!-----------------------
! JRD: Compute GPP Error...
! GPP Model Error Estimate

            if (imaginary_flag .and. iw .eq. 2) then
              asigtemp_imag = asigtemp_imag + sch
              if (flag_occ) then
                asigtemp_imag = asigtemp_imag - ssx*occ
              endif
            endif

          enddo ! over energies (iw)

! End Loop on Energy Values
!!!!------------------------------------------------------------------

        enddo ! over bands (n1)

! End Loop on Bands
!!!-------------------------------------------------------------------

      enddo ! over G (ig)

! End Loop on G
!!--------------------------------------------------------------------

! (gsm) <<<<<< full frequency dependent inverse dielectric matrix >>>>>>

! The code below makes use of the following relations:
!
! {eps_{G G`}^r}^{-1}(q, -E) = {eps_{G G`}^a}^{-1}(q, E)
! for general systems (both real and complex versions of the code)
!
! {eps_{G G`}^a}^{-1}(q, E) = {{eps_{G G`}^r}^{-1}}^{*}(q, E)
! for systems with inversion symmetry (the real version of the code)
! since plane-wave matrix elements are real
!
! {eps_{G G}^a}^{-1}(q, E) = {{eps_{G G}^r}^{-1}}^{*}(q, E)
! for general systems, the diagonal of the matrix (G` = G)
! since plane-wave matrix elements are complex conjugates of each other
!
! The complex version of the code uses
! {eps_{G G`}^r}^{-1}(q, E) and {eps_{G G`}^a}^{-1}(q, E) for E >= 0
!
! The real version of the code uses
! {eps_{G G`}^r}^{-1}(q, E) for E >= 0

! CHP: full frequency routine - the case for sig%ggpsum == 1
!
! On top of the above relations, we need the following:
! Assuming that W_{G,G`}={eps_{G,G`}}^{-1} v(q+G`),
!
! W_{G`,G}^r(E) = {W_{G,G`}^a}^{*}(E)
!               = {W_{G,G`}^r}^{*}(-E) (this form is used if E<0)
! for general systems (both real and complex version of the code)
!
! W_{G`,G}^a(E) = {W_{G,G`}^r}^{*}(E)
!               = {W_{G,G`}^a}^{*}(-E) (this form is used if E<0)
! for general systems (both real and complex version of the code)
!
! W_{G`,G}^r(E) = W_{G,G`}^r(E)
! for systems with inversion symmetry
!
! W_{G`,G}^a(E) = W_{G,G`}^a(E)
! for systems with inversion symmetry
!
! Note that eps^{-1} does not have these symmetries.

    elseif (sig%freq_dep.eq.2) then

      asxDtemp(:) = (0.0d0,0.0d0)
      achDtemp(:) = (0.0d0,0.0d0)
      ach2Dtemp(:) = (0.0d0,0.0d0)
      if (sig%exact_ch.eq.1) achsDtemp = (0.0d0, 0.0d0)

      do ig=1,igmax
        
        epsRggp(:) = ph(ig)*MYCONJG(ph(igp))*epsRtemp(:,ind(ig))
#ifdef CPLX
        epsAggp(:) = ph(ig)*MYCONJG(ph(igp))*epsAtemp(:,ind(ig))
#endif
        
        if (ig.eq.igp) then
          I_epsRggp = 1.0d0 - epsRggp
#ifdef CPLX
          I_epsAggp = 1.0d0 - epsAggp
#endif
        else
          I_epsRggp = - epsRggp
#ifdef CPLX
          I_epsAggp = - epsAggp
#endif
        endif

        nc_on_node = peinf%ntband_node

! (gsm) compute the static CH for the static remainder

        if (sig%exact_ch.eq.1) then
          do n1=1,nc_on_node
            matngmatmgpD = aqsn(n1,ig) * MYCONJG(aqsm(n1,igp))
            if (sig%ggpsum.eq.1) &
              matngpmatmgD = aqsn(n1,igp) * MYCONJG(aqsm(n1,ig))
            schsD=-I_epsRggp(1)*0.5d0 ! this is retarded
            if (sig%ggpsum.eq.1.and.ig.lt.igp) then
              schsD = matngmatmgpD*schsD + matngpmatmgD*CONJG(schsD)
            else
              schsD = matngmatmgpD*schsD
            endif
            achsDtemp = achsDtemp + schsD
          enddo ! over bands (n1)
        endif ! sig%exact_ch.eq.1

        do n1=1,nc_on_node
          
          n1true = peinf%indext(n1)
          e_n1kq = wfnkq%ekq(n1true,ispin)
          matngmatmgpD = aqsn(n1,ig) * MYCONJG(aqsm(n1,igp))
          if (sig%ggpsum.eq.1) &
            matngpmatmgD = aqsn(n1,igp) * MYCONJG(aqsm(n1,ig))
          
          flag_occ = (n1true.le.(sig%nvband+sig%ncrit)) &
            .and.((sig%ncrit.eq.0).or.(e_n1kq.le.(sig%efermi+TOL_Degeneracy)))
          
          tempval=abs(e_n1kq-sig%efermi)
          if (tempval .lt. TOL_Degeneracy) then
            occ=0.5  ! Fermi-Dirac distribution = 1/2 at Fermi level
          else
            occ = 1D0
          endif
          
! JRD: Find iw closest to e_lk

          diffmin = INF
          do iw=1,sig%nfreqeval
            diff = abs(sig%freqevalmin + (iw-1)*sig%freqevalstep - e_lk)
            if (diff .lt. diffmin) then
              diffmin=diff
              iwlda=iw
            endif
          enddo

          do iw=1,sig%nfreqeval
            wx = sig%freqevalmin - e_n1kq + (iw-1)*sig%freqevalstep
            wxi(iw) = wx
          enddo

          ssxDi = (0D0,0D0)
          schDi = (0D0,0D0)
          sch2Di = (0D0,0D0)
          
          ssxDigpg = (0D0,0D0)
          schDigpg = (0D0,0D0)
          
          do iw=1,sig%nfreqeval
            
            wx = wxi(iw)

! SX and CH terms: equation (1.42) of Catalin`s thesis
! Note the negative sign in I_epsRggp and I_epsAggp

            if (flag_occ) then

              if(wx.ge.0.0d0) then
                !ifreq=int(wx/sig%dDeltaFreq)+1
                ifreq=0
                do ijk = 1, sig%nFreq-1
                  if (wx .ge. sig%dFreqGrid(ijk) .and. wx .lt. sig%dFreqGrid(ijk+1)) then
                    ifreq=ijk
                  endif
                enddo
                if (ifreq .eq. 0) then
                  ifreq = sig%nfreq+3 ! three is for luck
                endif
              else
                !ifreq=int(-wx/sig%dDeltaFreq)+1
                ifreq=0
                do ijk = 1, sig%nFreq-1
                  if (-wx .ge. sig%dFreqGrid(ijk) .and. -wx .lt. sig%dFreqGrid(ijk+1)) then
                    ifreq=ijk
                  endif
                enddo
                if (ifreq .eq. 0) then
                  ifreq = sig%nfreq+3 ! three is for luck
                endif
              endif
              
              if(ifreq.ge.sig%nFreq) then
                if(igp.eq.1.and.ig.eq.1) then
                  write(0,777) iband,n1true,e_lk,e_n1kq,wx,E_max
                endif
                ifreq=sig%nFreq-1
              endif
777           format(1x,"WARNING: The real frequency range is too small." &
                ,/,3x,"l =",i3,1x,"n1 =",i5,1x,"E_l =",f8.3,1x,"E_n1" &
                ,1x,"=",f8.3,1x,"wx =",f8.3,1x,"E_max =",f8.3)
              
#ifdef CPLX
              if(wx.ge.0.d0) then
                
                ssxDi(iw)=(I_epsRggp(ifreq)* &
                  (sig%dFreqGrid(ifreq+1)-wx)+ &
                  I_epsRggp(ifreq+1)* &
                  (wx-sig%dFreqGrid(ifreq)))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq))
                
                if (sig%ggpsum.eq.1) then
! (gsm) macro CONJG must be written in a single line
                  ssxDigpg(iw)=(I_epsAggp(ifreq)*(sig%dFreqGrid(ifreq+1)-wx)+ &
                    I_epsAggp(ifreq+1)*(wx-sig%dFreqGrid(ifreq)))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq))
                  ssxDigpg(iw)=CONJG(ssxDigpg(iw))
                endif

              else

                ssxDi(iw)=(I_epsAggp(ifreq)*(sig%dFreqGrid(ifreq+1)+wx)+ &
                    I_epsAggp(ifreq+1)*(-sig%dFreqGrid(ifreq)-wx))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq))

                if (sig%ggpsum.eq.1) then
! (gsm) macro CONJG must be written in a single line
                  ssxDigpg(iw)=(I_epsRggp(ifreq)*(sig%dFreqGrid(ifreq+1)+wx) + &
                    I_epsRggp(ifreq+1)*(-sig%dFreqGrid(ifreq)-wx))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq))
                  ssxDigpg(iw)=CONJG(ssxDigpg(iw))
                endif
                
              endif
#else
              if(wx.ge.0.d0) then
                ssxDi(iw)=(I_epsRggp(ifreq)*(sig%dFreqGrid(ifreq+1)-wx)+ &
                  I_epsRggp(ifreq+1)*(wx-sig%dFreqGrid(ifreq)))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq))
              else
                ssxDi(iw)=(CONJG(I_epsRggp(ifreq))*(sig%dFreqGrid(ifreq+1)+wx) + &
                  CONJG(I_epsRggp(ifreq+1))*(-sig%dFreqGrid(ifreq)-wx))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq))
              endif
#endif
            endif
          enddo
          
          schD = (0.d0,0.d0)
          
          do iinter = 1, sig%ninter
            dinterfraci(iinter) = dble(iinter-1)/dble(sig%ninter)
            dinterfracii(iinter) = 1D0 - dinterfraci(iinter)
          enddo
          
          do ifreq=1,sig%Nfreq-1
            do iinter=1,sig%ninter
              dinterfrac=dinterfraci(iinter)
              dinterfracinv=dinterfracii(iinter)

              I_epsRggp_int = dinterfracinv * I_epsRggp(ifreq) + dinterfrac * I_epsRggp(ifreq+1)
              cedifft= CMPLX((dinterfrac*sig%dFreqGrid(ifreq+1))+(dinterfracinv*sig%dFreqGrid(ifreq)),0D0) - &
                sig%dFreqBrd(ifreq)
#ifdef CPLX

              I_epsAggp_int = dinterfracinv * I_epsAggp(ifreq) + dinterfrac  * I_epsAggp(ifreq+1)

              ! for G,G` components
              schD=-CMPLX(0.d0,pref(ifreq))*(I_epsRggp_int-I_epsAggp_int)

              ! for G`,G components
              if (sig%ggpsum.eq.1) then
                schDgpg=-CMPLX(0.d0,pref(ifreq))*CONJG(I_epsAggp_int-I_epsRggp_int)
                schDigpg(:) = schDigpg(:) + schDgpg / ( wxi(:)-cedifft)
              endif
#else
              schD= CMPLX(pref(ifreq)*IMAG(I_epsRggp_int),0.0d0)
#endif
              schDi(:) = schDi(:) + schD / ( wxi(:)-cedifft)
            enddo
          enddo

! JRD: Can We Do Imaginary part by Delta Function?  Zero out schD.  We can if have inversion.

! CHP: We calculate sch2Di for both complex and real version, considering the case that one
!      uses complex version for a system with inversion symmetry, or for tests, etc.
!      The following routine will give the same answer as using the real version (if there is
!      an inversion symmetry) even if one uses complex wavefunctions (again, by mistake or for a test)
!      because the product of two matrix elements are real due to the cancellation of an arbitrary
!      phase factor, which, however, is not the case if one is calculating the off-diagonal
!      self-energy matrix elements.

!#ifndef CPLX
          do iw = 1, sig%nfreqeval
            wx = wxi(iw)
            sch2Di(iw) = CMPLX(0.D0,0D0)
            if(wx .ge. 0.0d0) then
              !ifreq=int(wx/sig%dDeltaFreq)+1
              ifreq=0
              do ijk = 1, sig%nFreq-1
                if (wx .ge. sig%dFreqGrid(ijk) .and. wx .lt. sig%dFreqGrid(ijk+1)) then
                  ifreq=ijk
                endif
              enddo
              if (ifreq .eq. 0) then
                ifreq=sig%nFreq-1
              endif

              sch2Di(iw) = sch2Di(iw) + CMPLX(0D0,-1D0)* &
                !(IMAG(I_epsRggp(ifreq))*(dble(ifreq)-wx/sig%dDeltaFreq)+ &
                !IMAG(I_epsRggp(ifreq+1))*(wx/sig%dDeltaFreq-dble(ifreq-1))) 
                (IMAG(I_epsRggp(ifreq))*(sig%dFreqGrid(ifreq+1)-wx)+ &
                IMAG(I_epsRggp(ifreq+1))*(wx-sig%dFreqGrid(ifreq)))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 
            endif
          enddo
!#endif

          if (sig%ggpsum.eq.1.and.ig.lt.igp) then
#ifdef CPLX
            ssxDi(:) = matngmatmgpD*ssxDi(:) + matngpmatmgD*ssxDigpg(:)
            schDi(:) = matngmatmgpD*schDi(:) + matngpmatmgD*schDigpg(:)
#else
            ssxDi(:) = matngmatmgpD*ssxDi(:) + matngpmatmgD*ssxDi(:)
            schDi(:) = matngmatmgpD*schDi(:) + matngpmatmgD*schDi(:)
#endif

!
! CHP: note that sch2Di is not meaningful for systems without inversion symmetry
!      therefore, one should neglect it unless one is using complex version for
!      a system with inversion (for a test or by mistake).
!
            sch2Di(:) = matngmatmgpD*sch2Di(:) + matngpmatmgD*sch2Di(:)
            
          else
            ssxDi(:) = matngmatmgpD*ssxDi(:)
            schDi(:) = matngmatmgpD*schDi(:)
            sch2Di(:) = matngmatmgpD*sch2Di(:)
          endif
          
          do iw = 1, sig%nfreqeval
            
            if (flag_occ) then
              asxDtemp(iw) = asxDtemp(iw) + ssxDi(iw)*occ
            endif
            
            achDtemp(iw) = achDtemp(iw) + schDi(iw)
            ach2Dtemp(iw) = ach2Dtemp(iw) + sch2Di(iw)
            
! JRD: This is now close to LDA

            if (iw.eq.iwlda) achtD_n1(n1true) = &
              achtD_n1(n1true) + schDi(iw) * vcoul(igp)

          enddo ! over iw
          
        enddo ! over bands (n1)
        
      enddo ! over G (ig)
      
    endif ! sig%freq_dep

!------------------------
! (gsm) exchange partial sum static CH and exact static CH

    if (sig%freq_dep.eq.0.and.sig%exact_ch.eq.1) then
      achtemp(2)=achxtemp
      achxtemp=ZERO
    endif

!------------------------
! (gsm) since static SX & CH don`t depend on energy
!       we don`t compute them for iw=1,3

    if (sig%freq_dep.eq.0) then
      if (sig%fdf.eq.-1.or.sig%fdf.eq.0) then
        asxtemp(1) = asxtemp(2)
        achtemp(1) = achtemp(2)
      endif
      if (sig%fdf.eq.0.or.sig%fdf.eq.1.or. &
        (sig%fdf.eq.2.and.icalc.eq.1)) then
        asxtemp(3) = asxtemp(2)
        achtemp(3) = achtemp(2)
      endif
    endif

!------------------------
! We have performed the sum over G. We now scale the resulting
! summand by the coulomb interaction, and accumulate into the
! result that is returned to the calling routine.

    if (sig%freq_dep.eq.0.or.sig%freq_dep.eq.1) then
      do iw=1,3
        asxt(iw) = asxt(iw) + asxtemp(iw) * vcoul(igp)
        acht(iw) = acht(iw) + achtemp(iw) * vcoul(igp)
        if (sig%freq_dep.eq.1 .and. iw .eq. 2) then
          asigt_imag = asigt_imag + asigtemp_imag * vcoul(igp)
        endif
      enddo
    elseif (sig%freq_dep.eq.2) then
      do iw=1,sig%nfreqeval
        asxtDyn(iw) = asxtDyn(iw) + asxDtemp(iw) * vcoul(igp)
        achtDyn(iw) = achtDyn(iw) + achDtemp(iw) * vcoul(igp)
        ach2tDyn(iw) = ach2tDyn(iw) + ach2Dtemp(iw) * vcoul(igp)
      enddo
    endif

!------------------------
! (gsm) compute the static remainder
!       a factor of 0.5 accounts for the fact that GPP/FF CH converges
!       with the number of empty bands two times faster than COHSEX CH

    if (sig%freq_dep .eq. 0 .and. sig%exact_ch .eq. 0) then
      achtcor = achtcor + (achxtemp - achtemp(2)) * vcoul(igp)
    elseif (sig%freq_dep .eq. 1 .and. sig%exact_ch .eq. 1) then
      achtcor = achtcor + 0.5d0 * (achxtemp - achstemp) * vcoul(igp)
    elseif (sig%freq_dep .eq. 2 .and. sig%exact_ch .eq. 1) then
      achtcor = achtcor + 0.5d0 * (achxtemp - SCALARIFY(achsDtemp)) * vcoul(igp)
    endif

  enddo ! over G` (igp)

  if (sig%ggpsum.eq.1) then
    ncouls2 = ncouls**2/2
  else
    ncouls2 = ncouls**2
  endif

! End Loop on G`
!----------------------------------------------------------------------

!-----------------------
! Deallocate and Finish

  if (sig%freq_dep.eq.1) then
    SAFE_DEALLOCATE(wpmtx)
    SAFE_DEALLOCATE(epstemp)
  endif
  if (sig%freq_dep.eq.2) then
    SAFE_DEALLOCATE(pref)
    SAFE_DEALLOCATE(epsRggp)
    SAFE_DEALLOCATE(I_epsRggp)
    SAFE_DEALLOCATE(asxDtemp)
    SAFE_DEALLOCATE(achDtemp)
    SAFE_DEALLOCATE(ach2Dtemp)
    SAFE_DEALLOCATE(schDi)
    SAFE_DEALLOCATE(sch2Di)
    SAFE_DEALLOCATE(ssxDi)
    SAFE_DEALLOCATE(schDigpg)
    SAFE_DEALLOCATE(ssxDigpg)
    SAFE_DEALLOCATE(wxi)
    SAFE_DEALLOCATE(dinterfraci)
    SAFE_DEALLOCATE(dinterfracii)
    SAFE_DEALLOCATE(epsRtemp)
    SAFE_DEALLOCATE(epsAtemp)
#ifdef CPLX
    SAFE_DEALLOCATE(epsAggp)
    SAFE_DEALLOCATE(I_epsAggp)
#endif
  endif

  POP_SUB(mtxel_sxch)
  
  return
end subroutine mtxel_sxch

end module mtxel_sxch_m
