!==========================================================================
!
! Routines:
!
! (1) genwf_mpi()  Originally By (JRD)             Last Modified 11/2009 (JRD)
!
!     On entry:
!     q0  = current q-vector
!     rq  = current k point in irr. zone
!     irq = its index
!     iflagq0 = whether q0 is the "zero" vector
!
!     On exit:
!     vwfn%ev and vwfn%zv hold eigenvalues and wavefunctions (valence)
!     cwfn%ec and cwfn%zc hold eigenvalues and wavefunctions (conduction)
!
!     with proper phases and ordering for the k-point rq (given the
!     data on disk for irr. zone)
!
!     subroutine generates valence band wavefunctions for rq(irq)
!     and conduction band wavefunctions for rq(irq) from the
!     wavefunctions available for rk in the irreducible wedge of the
!     bz
!
!     i   rq                 rq(irq)
!     o c nkptv,...,ev       valence-band wavefunctions for rq+q
!     and associated data
!     o c nkptc,...,ec       conduction-band wavefunctions for rq
!     and associated data
!
!==========================================================================

#include "f_defs.h"

subroutine genwf_mpi(syms,gvec,crys,kp,kpq,irq,rq,q0,vwfn,pol,cwfn,iflagq0,intwfnv,intwfnvq,intwfnc,ivin)

  use global_m
  use blas_m
  use gmap_m
  use sort_m
  implicit none

  type (symmetry), intent(in) :: syms
  type (gspace), intent(in) :: gvec
  type (crystal), intent(in) :: crys
  type (kpoints), target, intent(in) :: kp
  type (kpoints), target, intent(in) :: kpq
  integer, intent(in) :: irq
  real(DP), intent(in) :: rq(3)
  real(DP), intent(in) :: q0(3)
  type (valence_wfns), intent(inout) :: vwfn
  type (polarizability), intent(in) :: pol
  type (conduction_wfns), intent(inout) :: cwfn
  integer, intent(in) :: iflagq0
  type (int_wavefunction), intent(in) :: intwfnv
  type (int_wavefunction), intent(in) :: intwfnvq
  type (int_wavefunction), intent(in) :: intwfnc
  integer, intent(in) :: ivin

  integer :: itval,jband
  real(DP) :: tsec(2)

  integer :: nkpt, ngdist
  integer, allocatable :: isortc(:)
  real(DP), allocatable :: eig(:,:)
  SCALAR, allocatable :: zin(:,:),zinc(:,:)
  SCALAR, allocatable :: zintemp(:,:)
  type(kpoints), pointer :: kp_point

  integer :: irk,itq,itqq,i,k
  integer :: n,ig,iband
  integer :: naddc
  integer :: ik,ikrqq,kgq(3),kgqq(3)
  integer, allocatable :: ind(:),isorti(:)
  real(DP), allocatable :: xnorm(:,:)
  real(DP) :: xnormc(2),qk(3),del(3),rqq(3),xnormcc(2)
  real(DP), allocatable :: ekin(:),vmid(:,:),qkv(:,:)
  real(DP) :: rkmatch(3)

  SCALAR, allocatable :: ph(:)

  SCALAR, allocatable, save :: ph_old(:)

  integer, allocatable, save :: ind_old(:)
  integer, save :: irq_old=0

  PUSH_SUB(genwf_mpi)

  SAFE_ALLOCATE(isortc, (gvec%ng))
  SAFE_ALLOCATE(eig, (cwfn%nband,kp%nspin))
  SAFE_ALLOCATE(xnorm, (1,kp%nspin))

  if(peinf%inode.eq.0) call timacc(16,1,tsec)

! rqq = rq + q0   (i.e. rqq = current kpoint in irr. zone + q0)

  rqq(1:3) = rq(1:3) + q0(1:3)

! If this is the "zero" q vector, look in WFNq, otherwise in WFN.
  if(iflagq0 .eq. 1 .and. pol%iqexactlyzero .eq. 0) then
    kp_point => kpq
  else
    kp_point => kp
  endif

! This is what we used to do for q->0, but it is better to unfold the shifted grid.
!    itqq = 0
!    do ik=1,kpq%nrk
!      if(all(abs(rqq(1:3) - kpq%rk(1:3, ik)) .lt. TOL_Small)) then
!        ikrqq=ik
!        itqq=1
!        kgqq(:)=0
!        rkmatch(:)=kpq%rk(:,ik)
!        exit
!      endif
!    enddo
!    if(itqq == 0) then
!      if(peinf%inode .eq. 0 ) write(0,'("Could not find point matching:",3f12.6)') rqq
!      call die('Could not find rq+q0 in file WFNq.')
!    endif
!  else

! SIB:  look for rqq in the list
! of kpoints rotated by the symmetries.  If found, it puts
! its index in ikrqq, rkmatch has the vector, itqq is the index
! of the symmetry that worked, and kgqq is an "umklapp" vector (i.e.
! integer 3-vector) so that rqq = symm*kvec + kgqq.

  ikrqq = 0
  ik_loop: do ik=1,kp_point%nrk
    do itq=1,syms%ntran
      do i=1,3
        qk(i)=DOT_PRODUCT(dble(syms%mtrx(i,:,itq)),kp_point%rk(:,ik))
        del(i)=rqq(i)-qk(i)
        if(del(i).ge.0.0d0) kgqq(i)=del(i)+TOL_Small
        if(del(i).lt.0.0d0) kgqq(i)=del(i)-TOL_Small
      enddo
      if(all(abs(del(1:3)-kgqq(1:3)) .lt. TOL_Small)) then
        ikrqq=ik
        rkmatch(:)=kp_point%rk(:,ikrqq)
        itqq=itq
        exit ik_loop
      endif
    enddo
  enddo ik_loop
  
  if(ikrqq .eq. 0) then
    if(peinf%inode == 0) then
      write(0,*) rq(:)
      write(0,*) q0(:)
      write(0,'(a,3f8.3,a)') 'No match for rqq point:',rqq,' in file WFN or WFNq'
    endif
    call die('genwf: rqq match', only_root_writes = .true.)
  endif

!      if(peinf%inode.eq.0) then
  itval=vwfn%nband+pol%ncrit
  if (iflagq0 .ne. 1 .or. pol%iqexactlyzero .ne. 0) then
    nkpt = intwfnv%ng(ikrqq)
    if (irq .ne. irq_old) then
      eig(1:itval,1:kp%nspin)=intwfnv%el(1:itval,1:kp%nspin,ikrqq)
      isortc(1:nkpt)=intwfnv%isort(1:nkpt,ikrqq)
    endif
    qk(:)=intwfnv%qk(:,ikrqq)
  else
    nkpt = intwfnvq%ng(ikrqq)
    if (irq .ne. irq_old) then
      eig(1:itval,1:kp%nspin)=intwfnvq%el(1:itval,1:kp%nspin,ikrqq)
      isortc(1:nkpt)=intwfnvq%isort(1:nkpt,ikrqq)
    endif
    qk(:)=intwfnvq%qk(:,ikrqq)
  endif
!      endif

  SAFE_ALLOCATE(zintemp, (nkpt,kp%nspin))
  SAFE_ALLOCATE(zin, (nkpt,kp%nspin))
  
  itval = vwfn%nband+pol%ncrit
  jband = (ikrqq-1)*peinf%nvownt + ivin
  zintemp=0D0
  if (iflagq0 .ne. 1 .or. pol%iqexactlyzero .ne. 0) then
    zintemp(1:nkpt,1:kp%nspin)=intwfnv%cg(1:nkpt,jband,1:kp%nspin)
  else
    zintemp(1:nkpt,1:kp%nspin)=intwfnvq%cg(1:nkpt,jband,1:kp%nspin)
  endif

  zin(:,:)=zintemp(:,:) 
  SAFE_DEALLOCATE(zintemp)

! JRD: check kpoint

  if(any(abs(rkmatch(1:3) - qk(1:3)) .gt. TOL_Small)) call die('genwf: rkmatch')

! JRD: Compute kinetic energies for rqq+g

!      if (peinf%inode .eq. 0) write(6,*) 'Inside genwf 4 '

  if (irq .ne. irq_old) then
    SAFE_ALLOCATE(ekin, (gvec%ng))
    ngdist = (gvec%ng / peinf%npes) + 1
    SAFE_ALLOCATE(vmid, (3,gvec%ng))
    SAFE_ALLOCATE(qkv, (3,gvec%ng))
    do i=1,gvec%ng
      qkv(:,i)=rqq(:)+gvec%k(:,i)
    enddo
    call DGEMM('n', 'n', 3, gvec%ng, 3, 1D0, crys%bdot, 3, qkv, 3, 0D0, vmid, 3)
    do i=1,gvec%ng
      ekin(i)=qkv(1,i)*vmid(1,i)+qkv(2,i)*vmid(2,i)+qkv(3,i)*vmid(3,i)
    enddo

! JRD: sort array ekin to ascending order, store indices in array vwfn%isort
! WARNING: one should not assume that in the case of
! q-->0 the orders as provided below and as read in from
! WFNq file is the same (sorting may change....)
! ===> need to call gmap also for q-->0 (see below)
! We initialize vwfn%isort to the appropriate array
! before reading in. this way we do not get zeroes in the array
! these are the valence wave-functions that do not need
! to be changed

    do i = 1, gvec%ng
      vwfn%isort(i)=i
    enddo

    call sortrx_D(gvec%ng,ekin,vwfn%isort,gvec=gvec%k)

    SAFE_ALLOCATE(isorti, (gvec%ng))
    do i=1,gvec%ng
      isorti(vwfn%isort(i))=i
    enddo
    do i=1,nkpt
      isorti(isortc(i))=i
    enddo

    vwfn%nkptv=nkpt

! SIB: put read eigenvalues into vwfn%ev(band,spin).
! Set xnorm(1:(vwfn%nband+pol%ncrit),1:nspin)=0

    SAFE_ALLOCATE(vwfn%ev, ((vwfn%nband+pol%ncrit),kp%nspin))
    vwfn%ev(1:(vwfn%nband+pol%ncrit),:) = eig(1:(vwfn%nband+pol%ncrit),:)

! SIB: get phases (ph) and indices (ind) for g-vectors
! gvec%k(:,vwfn%isort(1:nkpt2))+kgqq

    SAFE_ALLOCATE(ind, (vwfn%nkptv))
    SAFE_ALLOCATE(ph, (vwfn%nkptv))
    call gmap(gvec,syms,vwfn%nkptv,itqq,kgqq,vwfn%isort,isorti,ind,ph,.true.)
    if (irq_old .ne. 0) then
      SAFE_DEALLOCATE(ph_old)
      SAFE_DEALLOCATE(ind_old)
    endif
    SAFE_ALLOCATE(ph_old, (vwfn%nkptv))
    SAFE_ALLOCATE(ind_old, (vwfn%nkptv))
    ph_old(:) = ph(:)
    ind_old(:) = ind(:)
  else

!        if (peinf%inode .eq. 0) write(6,*) 'Genwf else',vwfn%nkptv

    SAFE_ALLOCATE(ph, (vwfn%nkptv))
    SAFE_ALLOCATE(ind, (vwfn%nkptv))
    ph(:)=ph_old(:)
    ind(:)=ind_old(:)

!        if (peinf%inode .eq. 0) write(6,*) 'Genwf else 2'

  endif  ! irq = irq_old
  SAFE_DEALLOCATE(eig)
  xnorm(:,:) = 0.0d0

!      if (peinf%inode .eq. 0) write(6,*) 'Inside genwf 5 '

! Loop over valence band wavefunctions

  SAFE_ALLOCATE(vwfn%zv, (vwfn%nkptv,kp%nspin))

  n=1

! XAV: vwfn%zv(ig)  corresponds really to the
! vwfn%isort(ig) G-vector (between 1 and ng)
! The subroutine gmap assumes that, as read from WFNq or WFN,
! zin(ig) corresponds really to isortc(ig) G-vector !!!

  do ig=1,vwfn%nkptv
    vwfn%zv(ig,:)=zin(ind(ig),:)*ph(ig)
    xnorm(n,:)=xnorm(n,:)+abs(vwfn%zv(ig,:))**2
  enddo !ig

!      if (peinf%inode .eq. 0) write(6,*) 'Inside genwf 6 '

! Renormalize wavefunctions
! SIB:  checks that zin vectors have norm greater than 1e-6, and then
! normalizes them to have unit square modulus.

  do k = 1, kp%nspin
    n = 1 
    if(xnorm(n,k).lt.TOL_Small)  then
      write(0,*) 'Bad norm:',n,k,xnorm(n,k)
      xnormc(k)=0.0d0
      xnormcc(k)=0.0d0
      do ig=1,vwfn%nkptv
        xnormc(k)=xnormc(k)+abs(zin(ind(ig),k))**2
        xnormcc(k)=xnormcc(k)+abs(zin(ind(ig),k)*ph(ig))**2
      enddo
      xnormc(k)=sqrt(xnormc(k))
      write(6,8000) ik,n,xnormc(k),xnormcc(k),k
8000  format(/,'In genwf, read file 15 state nrk,band=',2i5,/,'xnormc(k)=',f10.5,'xnormcc(k)=',f10.5,'spin=',i1/)
#ifndef CPLX
      write(0,*) '**Are you doing a supercell? If so you should use complex version with complex wavefunctions!**'
#endif
      call die('genwf: zin norm')
    endif
    xnorm(n,k)=sqrt(xnorm(n,k))
  end do !k (over spin)

  n=1
  do ig=1,vwfn%nkptv
    vwfn%zv(ig,:)=vwfn%zv(ig,:)/xnorm(n,:)
  enddo

!      if (peinf%inode .eq. 0) write(6,*) 'Inside genwf 7 '

! End calculation of valence band wavefunctions

  SAFE_DEALLOCATE(zin)
  SAFE_DEALLOCATE(ind)
  SAFE_DEALLOCATE(ph)
  SAFE_DEALLOCATE(xnorm)
  if(peinf%inode.eq.0) call timacc(16,2,tsec)
  
  if (irq .ne. irq_old) then
    
!------------------------------------------------------------------------
! Generate conduction-band wavefunctions for rq
! find rk, r, g0 such that rq=r(rk)+g0
!
! SIB: This seems redundant, but find a k-point and symmetry so that
!  rq = sym%mtrx(:,:,itq)*kp%rk(irk,:) + kgq   where kgq is integer 3-vec

    if(peinf%inode.eq.0) call timacc(17,1,tsec)
    ikrqq = 0
    irk_loop: do irk=1,kp%nrk
      do itq=1,syms%ntran
        do i=1,3
          qk(i)=DOT_PRODUCT(dble(syms%mtrx(i,:,itq)),kp%rk(:,irk))
          del(i)=rq(i)-qk(i)
          if(del(i).ge.0.0d0) kgq(i)=del(i)+TOL_Small
          if(del(i).lt.0.0d0) kgq(i)=del(i)-TOL_Small
        enddo
        if(all(abs(del(1:3)-kgq(1:3)) .lt. TOL_Small)) then
          ikrqq = irk
          itqq = itq
          exit irk_loop
        endif
      enddo
    enddo irk_loop

    if(ikrqq == 0) call die('genwf: kgq mismatch')

! Write out rq, it and kgq

    if(peinf%inode.eq.0) then
      write(6,7000) (rq(i),i=1,3),ikrqq,(kp%rk(i,ikrqq),i=1,3),itqq,(kgq(i),i=1,3)
7000  format(1x,'rq=',3f7.3,1x,'irk=',i5,1x,' rk=',3f7.3,1x,'it=',i5,1x,'kg0=',3i3)
    endif

! SIB:  if we already found this k-point last time, get its qk, nkpt,
! and isortc(:).  Otherwise, skip ikrqq-1 records, and read in information (qk,cwfn%ec,nkpt,isortc),

    SAFE_ALLOCATE(cwfn%ec, (cwfn%nband,kp%nspin))
    
    nkpt = intwfnc%ng(ikrqq)
    cwfn%ec(1:cwfn%nband,1:kp%nspin)=intwfnc%el(1:cwfn%nband,1:kp%nspin,ikrqq)
    qk(:)=intwfnc%qk(:,ikrqq)
    isortc(1:nkpt)=intwfnc%isort(1:nkpt,ikrqq)
    
! Check kpoint (again ...  boring...)
! Check that kp%rk(:,ikrqq) = qk  (why it wouldn`t is a mystery!)

    if(any(abs(kp%rk(1:3, ikrqq) - qk(1:3)) .gt. TOL_Small)) call die('genwf: qk mismatch')

    cwfn%nkptc=nkpt

! Compute inverse to isort
! NOTE: isortc orders   |kp%rk+G|^2
! It is not necessarily the same order than |rq+G|^2
! (particularly if umklapp, ie kgq non zero)

    if(peinf%inode.eq.0) call timacc(28,1,tsec)
    
    do i=1,gvec%ng
      qkv(:,i)=rq(:)+gvec%k(:,i)
    enddo
    call DGEMM('n', 'n', 3, gvec%ng, 3, 1D0, crys%bdot, 3, qkv, 3, 0D0, vmid, 3)
    
    do i=1,gvec%ng
      ekin(i)=qkv(1,i)*vmid(1,i)+qkv(2,i)*vmid(2,i)+qkv(3,i)*vmid(3,i)
    enddo
    SAFE_DEALLOCATE(vmid)
    SAFE_DEALLOCATE(qkv)

    if(peinf%inode.eq.0) call timacc(28,2,tsec)

! Sort array ekin to ascending order
! store indices in array isort

    if(peinf%inode.eq.0) call timacc(29,1,tsec)

    do i = 1, gvec%ng
      cwfn%isort(i)=i
    enddo

    call sortrx_D(gvec%ng,ekin,cwfn%isort,gvec=gvec%k)

    if(peinf%inode.eq.0) call timacc(29,2,tsec)

    do i=1,gvec%ng
      cwfn%ekin(i) = ekin(cwfn%isort(i))
    enddo
    do i=1,nkpt
      isorti(isortc(i))=i
    enddo
    SAFE_DEALLOCATE(ekin)

! map planewave components for rq to those of rk
! compute phases
! We do not the isorti related to kp%rk BUT the cwfn%isort related to rq

    SAFE_ALLOCATE(ind, (cwfn%nkptc))
    SAFE_ALLOCATE(ph, (cwfn%nkptc))
    call gmap(gvec,syms,cwfn%nkptc,itqq,kgq,cwfn%isort,isorti,ind,ph,.true.)
    SAFE_DEALLOCATE(isorti)

! generate conduction band wavefunctions
! loop over wavefunctions
! read conduction band from tape one by one

    SAFE_ALLOCATE(cwfn%zc, (peinf%ncownt*cwfn%nkptc,kp%nspin))
    SAFE_ALLOCATE(zinc, (cwfn%nkptc,kp%nspin))

    do n=1,peinf%ncownt
      
      jband= (ikrqq-1)*peinf%ncownt + n
      
      iband = intwfnc%cbi(jband)
      zinc(1:nkpt,1:kp%nspin)=intwfnc%cg(1:nkpt,jband,1:kp%nspin)
      
      if(iband .ne. peinf%invindexc(n)) call die('genwf: indexc mismatch')
      
      naddc=(n-1)*cwfn%nkptc

! Loop over components of wfns
! note that only conduction band wfns are stored-
! they start in the 1st position with the state nvband+1

      do ig=1,cwfn%nkptc
        cwfn%zc(naddc+ig,:)=ph(ig)*zinc(ind(ig),:)
      enddo
    enddo  ! n (cond-bands per node [ncownt] loop)
    SAFE_DEALLOCATE(zinc)
    SAFE_DEALLOCATE(ind)
    SAFE_DEALLOCATE(ph)
    
!     end generation of conduction-band wavefunctions for rq

    if(peinf%inode.eq.0) call timacc(17,2,tsec)
    
  endif ! (irq .ne. irq_old)

  irq_old = irq
  SAFE_DEALLOCATE(isortc)

  POP_SUB(genwf_mpi)

  return
end subroutine genwf_mpi
