!==========================================================================
!
! Routines:
!
! (1) plotxct(main)     Originally By MLT       Last Modified 6/2008 FJR
!
!     Reads files WFN_fi/WFNq_fi (output from paratec) and eigenvectors (output
!     from BSE/diag code) and plots selected exciton state in real
!     space according to:
!
!     Psi(r,r`) = Sum_scvk A_svck * phi_sck(r) * exp(ik.r) *
!                           conjg( phi_svk(r`) * exp(i[k+q].r`) )
!
!     where r`, hole coordinate, is fixed at some point and r,
!     electron coordinate, runs over a supercell of given size.
!
!     Output is written in format suitable for data explorer (IBM).
!     Mesh is defined by the supercell size and the FFT parameters
!     used to generate WFN_fi/WFNq_fi.
!
!     input option 'restrict_kpoints' reduces the sum over k-points
!     above to a sum over the ones that give most of the contribution
!     to the norm of eigenvectors. This is handy if there are many k-points
!     but only a few of them give sizable contribution.
!
!     input: WFN_fi WFNq_fi            wavefunction files, paratec format
!            eigenvectors               output from BSE/diag code
!            plotxct.inp          input parameters
!
!     output: xct.<ie>.r.asc      real space data file
!
!     compilation: make plotxct
!
!     Dependencies: uses FFTW library
!
!     Developers:
!
!        Murilo Tiago, Berkeley CA, mtiago@civet.berkeley.edu
!           original creator
!
!        Sohrab Ismail-Beigi, Berkeley CA, sohrab@civet.berkeley.edu
!           FFTW interface routines
!
!        Filipe Ribeiro, Berkeley CA, fribeiro@alum.berkeley.edu
!           single output xct.<ie>.r.asc
!
!=================================================================================

#include "f_defs.h"

program plotxct

  use global_m
  use genwf_m
  use fftw_m
  use sort_m
  implicit none

  type (crystal) :: crys
  type (symmetry) :: syms
  type (gspace) :: gvec
  type (xctinfo) :: xct
  type (grid) :: kg,kgq
  type (wavefunction) :: wfnc,wfnv
  type (work_genwf) :: work, workq
  type (int_wavefunction) :: intwfn

  integer :: ii,ikq,ik,ikt,ic,iv,is,ikcvs,i1,i2,i3,ir1,ir2,ir3
  integer :: ncount,ntim,pstate,nmat,itpc,itpv
  integer :: nk(3),nw(3),nfft(3),step
  real(DP) :: sum,sumf,scale
  real(DP) :: phhole,phel,es,kk(3),rhole(3),rel(3)
  real(DP) :: tsec(2),avec(3,3)
  complex(DPC) :: wfnvint
  character :: filename*20
  character*16, allocatable :: routnam(:)
  integer, allocatable :: indexq(:),index_k(:)
  real(DP), allocatable :: wwk(:),kgr(:,:)

! (ncb, nfft1, nfft2, nfft3)

  complex(DPC), allocatable :: fel(:,:,:,:)

! (nvb)

  complex(DPC), allocatable :: fhole(:)

! (nfft1, nfft2, nfft3)

  complex(DPC), allocatable :: ucfftc(:,:,:)
  complex(DPC), allocatable :: ucfftv(:,:,:)
  
  complex(DPC), allocatable :: ucfft1(:,:,:)
  complex(DPC), allocatable :: ucfft2(:,:,:)

! (nw1, nw2, nw3)

  complex(DPC), allocatable :: scfft(:,:,:)
  
  integer :: j1,j2,j3

  SCALAR, allocatable :: A(:)
      
  call peinfo_init()

  call timacc(1,0,tsec)
  call timacc(1,1,tsec)

  call write_program_header('PlotXct', .false.)

!---------------------------------
! Read plotxct.inp

  call inread(xct,nk,pstate,rhole,avec)
  if (peinf%inode.eq.0) then
    write(6,*) ' Lattice vectors: '
    write(6,'(3f12.5)') ((avec(ik,ii),ii=1,3),ik=1,3)
  endif

!---------------------------------
! Read eigenvectors

  if(peinf%inode.eq.0) then
    call open_file(unit=10,file='eigenvectors',form='unformatted',status='old')
    read(10) xct%nspin
    read(10) xct%nvband
    read(10) xct%ncband
    read(10) xct%nkpt
    SAFE_ALLOCATE(kgr, (3,xct%nkpt))
    read(10) ((kgr(ii,ik),ii=1,3),ik=1,xct%nkpt)
  endif

#ifdef MPI
  call MPI_BCAST(xct%nspin, 1, MPI_INTEGER,0, MPI_COMM_WORLD,mpierr)
  call MPI_BCAST(xct%nvband, 1, MPI_INTEGER,0, MPI_COMM_WORLD,mpierr)
  call MPI_BCAST(xct%ncband, 1, MPI_INTEGER,0, MPI_COMM_WORLD,mpierr)
  call MPI_BCAST(xct%nkpt, 1, MPI_INTEGER,0, MPI_COMM_WORLD,mpierr)
  if (peinf%inode.ne.0) then
    SAFE_ALLOCATE(kgr, (3,xct%nkpt))
  endif
  call MPI_BCAST(kgr, 3*xct%nkpt,MPI_REAL_DP,0,MPI_COMM_WORLD,mpierr)
#endif

!-------------------------------
! Read selected state

  nmat= xct%nkpt*xct%ncband*xct%nvband*xct%nspin
  SAFE_ALLOCATE(A, (nmat))
  if(peinf%inode.eq.0) then
    do ii=1,pstate-1
      read(10)
      read(10)
    enddo
    read(10) es
    read(10) (A(ii),ii=1,nmat)
    write(6,*) ' Reading state # ',pstate
    write(6,*) ' Excitation energy = ',es,' eV'
  endif
#ifdef MPI
  call MPI_BCAST(A,nmat,MPI_SCALAR,0,MPI_COMM_WORLD,mpierr)
#endif

!-------------------------------
! Restrict the # of k-points to nk

  if (xct%nn.eq.0) xct%nn=xct%nkpt
  SAFE_ALLOCATE(wwk, (xct%nkpt))
  wwk=0.d0
  do ik=1,xct%nkpt
    do iv=1,xct%nvband
      do ic=1,xct%ncband
        do is=1,xct%nspin
          ikcvs= is + (iv - 1 + (ic - 1 + (ik - 1)*xct%ncband)* &
            xct%nvband)*xct%nspin
          wwk(ik) = wwk(ik) + abs(A(ikcvs))**2
        enddo
      enddo
    enddo
  enddo

  SAFE_ALLOCATE(index_k, (xct%nkpt))
  index_k=0
  call sortrx_D(xct%nkpt, wwk, index_k)
  ! cannot use gvec here, not read in yet!
  do ii=1,xct%nkpt/2
    ik=index_k(ii)
    index_k(ii)=index_k(xct%nkpt+1-ii)
    index_k(xct%nkpt+1-ii)=ik
  enddo

! Index_k reversed (check if it works!)

  if(peinf%inode.eq.0) then
    write(6,*) ' Reducing k-points to ',xct%nn
    sumf=0.d0
    do ik=1,xct%nkpt
      sumf=sumf + wwk(ik)
    enddo
    sum=0.d0
    do ii=1,xct%nn
      sum=sum + wwk(index_k(ii))
    enddo
    write(6,*) ' Wfn norm (restricted sum): ',sum
    write(6,*) ' Wfn norm (full): ',sumf
  endif

! Project onto cv space

!      do ic=1,xct%ncband
!        do iv=1,xct%nvband
!          is=1
!          sum = 0.d0
!          do ik=1,xct%nkpt
!            ikcvs= is + (iv - 1 + (ic - 1 + (ik - 1)*xct%ncband)*
!     >       xct%nvband)*xct%nspin
!            sum = sum + abs(A(ikcvs))**2
!          enddo
!          write(6,'(2i5,f14.5)') ic,iv,sum
!        enddo
!      enddo

  SAFE_DEALLOCATE(wwk)

!------------------------------
! Read WFN_fi

  call timacc(2,1,tsec)
  call input(crys,gvec,kg,syms,xct,kgr,index_k)
  if(peinf%inode.eq.0) then
    write(6,*) ' # valence bands = ',xct%nvband
    write(6,*) ' # cond. bands   = ',xct%ncband
    write(6,*) ' Coordinates of hole = ',rhole(:)
    write(6,*) ' lattice box = ',nk(:)
    call timacc(2,2,tsec)
  endif
  SAFE_DEALLOCATE(kgr)

  sum = avec(1,1)*( avec(2,2)*avec(3,3) - avec(2,3)*avec(3,2) ) &
    + avec(1,2)*( avec(2,3)*avec(3,1) - avec(2,1)*avec(3,3) ) &
    + avec(1,3)*( avec(2,1)*avec(3,2) - avec(2,2)*avec(3,1) )
  if (abs(sum - crys%celvol).gt.1.d-6) then
    write(0,*) sum,crys%celvol
    call die('volume inconsistent with lattice vectors')
  endif

!----------------------------------
! Read WFNq_fi

  SAFE_ALLOCATE(indexq, (kg%nf))
  call timacc(3,1,tsec)
  call input_q(crys,gvec,kg,kgq,syms,xct,indexq)
  call timacc(3,2,tsec)

!----------------------------------
! Compute size of FFT box we need
  call setup_FFT_sizes(gvec%kmax,nfft,scale)

! nw defines the mesh in supercell. Should be defined so that
! nfft(ii)*nk(ii)/nw(ii) = integer for ii=1,3

  nw(:) = nfft(:)*nk(:)

!  Allocate FFT boxes

  SAFE_ALLOCATE(ucfftv, (nfft(1),nfft(2),nfft(3)))
  SAFE_ALLOCATE(ucfftc, (nfft(1),nfft(2),nfft(3)))
  SAFE_ALLOCATE(fel, (xct%ncband,nfft(1),nfft(2),nfft(3)))
  SAFE_ALLOCATE(fhole, (xct%nvband))
  SAFE_ALLOCATE(scfft, (nw(1),nw(2),nw(3)))
  scfft=0.d0
  if (peinf%inode.eq.0) then
    write(6,'(a,3i6)') ' FFT box size = ',nfft
    write(6,'(a,3i6)') ' Supercell grid = ',nw
  endif

  step = max(1,(peinf%nkpe/20))
  
  xct%iwriteint = 0 ! do not use intwfn
  do ikt=1,peinf%ikt(peinf%inode+1)
    ik=peinf%ik(peinf%inode+1,ikt)
    ikq=indexq(ik)
    call timacc(5,1,tsec)
    call genwf(crys,gvec,kg,syms,wfnc,xct,ik,ik,work,intwfn, is_cond = .true.)
    call timacc(5,2,tsec)
    
    call timacc(6,1,tsec)
    call genwf(crys,gvec,kgq,syms,wfnv,xct,ik,ikq,workq,intwfn, is_cond = .false.)
    call timacc(6,2,tsec)
    
    phhole = 2.0d0*PI_D*DOT_PRODUCT( kgq%f(:,ikq), rhole )
    kk(:) = kg%f(:,ik)
    
    do is=1,xct%nspin

!-------------------------------
! Calculate all needed valence wavefunctions in real space and
! store their value at hole position in fhole

      do iv=1,xct%nvband
        call put_into_fftbox(wfnv%ng,wfnv%cg(1:,iv,is),gvec%ng,gvec%k,wfnv%isort,ucfftv,nfft)
        call do_FFT(ucfftv,nfft,1)
        
        call interpol(rhole,nfft,wfnvint,ucfftv)
        fhole(iv) = CONJG(wfnvint)*CMPLX(cos(phhole),-sin(phhole))
      enddo ! iv

      fel=0.d0

!---------------------------------
! Calculate all needed conduction wavefunctions in the unit cell

      do ic=1,xct%ncband
        call put_into_fftbox(wfnc%ng,wfnc%cg(1:,ic,is),gvec%ng,gvec%k,wfnc%isort,ucfftc,nfft)
        call do_FFT(ucfftc,nfft,1)
        fel(ic,:,:,:)=ucfftc(:,:,:)
      enddo     !ic

!---------------------------------
! Calculate contribution from each kcvs quadruplet in the excited
! state

      do ic=1,xct%ncband
        do iv=1,xct%nvband
          ikcvs= is + (iv - 1 + (ic - 1 + (index_k(ik) - &
            1)*xct%ncband)*xct%nvband)*xct%nspin
          do i1=1,nw(1)
            rel(1) = dble(i1*nk(1))/dble(nw(1))
            ir1 = mod(i1*nk(1)*nfft(1)/nw(1),nfft(1))
            if (ir1.eq.0) ir1=nfft(1)
            do i2=1,nw(2)
              rel(2) = dble(i2*nk(2))/dble(nw(2))
              ir2 = mod(i2*nk(2)*nfft(2)/nw(2),nfft(2))
              if (ir2.eq.0) ir2=nfft(2)
              do i3=1,nw(3)
                rel(3) = dble(i3*nk(3))/dble(nw(3))
                ir3 = mod(i3*nk(3)*nfft(3)/nw(3),nfft(3))
                if (ir3.eq.0) ir3=nfft(3)
                
                phel = 2.0d0*PI_D*DOT_PRODUCT( kk, rel )
                scfft(i1,i2,i3) = scfft(i1,i2,i3) + &
                  A(ikcvs) * fhole(iv) * fel(ic,ir1,ir2,ir3) * &
                  CMPLX(cos(phel),sin(phel))
              enddo ! i3
            enddo ! i2
          enddo ! i1
          
        enddo ! iv
      enddo ! ic
    enddo ! is
    
    SAFE_DEALLOCATE_P(wfnc%cg)
    SAFE_DEALLOCATE_P(wfnc%isort)
    SAFE_DEALLOCATE_P(wfnv%cg)
    SAFE_DEALLOCATE_P(wfnv%isort)
    
    if (peinf%inode.eq.0.and.mod(ikt,step).eq.0) &
      write(6,*)'PE #0 working at k-point # ',ikt,' out of ',peinf%ikt(1)
    
  enddo ! ik

  call destroy_fftw_plans()
  
  ! typedefs initializes all of these ikolds to 0
  if(work%ikold.ne.0) then
    SAFE_DEALLOCATE_P(work%cg)
    SAFE_DEALLOCATE_P(work%ph)
    SAFE_DEALLOCATE_P(work%ind)
    SAFE_DEALLOCATE_P(work%isort)
  endif
  if(workq%ikold.ne.0) then
    SAFE_DEALLOCATE_P(workq%cg)
    SAFE_DEALLOCATE_P(workq%ph)
    SAFE_DEALLOCATE_P(workq%ind)
    SAFE_DEALLOCATE_P(workq%isort)
  endif
  
  SAFE_DEALLOCATE(ucfftc)
  SAFE_DEALLOCATE(ucfftv)
  SAFE_DEALLOCATE(fel)
  SAFE_DEALLOCATE(indexq)
  SAFE_DEALLOCATE(index_k)
  
!-----------------------------
! Synchronization

#ifdef MPI
  call MPI_BARRIER(MPI_COMM_WORLD,mpierr)
#endif

!-----------------------------
! Add up information on supercell 'scfft' one unitcell at a time
      
  SAFE_ALLOCATE(ucfft1, (nfft(1),nfft(2),nfft(3)))
  SAFE_ALLOCATE(ucfft2, (nfft(1),nfft(2),nfft(3)))
  
  do j1=1,nk(1)
    do j2=1,nk(2)
      do j3=1,nk(3)
        
        do i1=1, nfft(1)
          do i2=1, nfft(2)
            do i3=1, nfft(3)
              ucfft1(i1,i2,i3) = &
                scfft((j1-1)*nfft(1)+i1, &
                (j2-1)*nfft(2)+i2,(j3-1)*nfft(3)+i3)
            enddo
          enddo
        enddo
        
        ucfft2 = 0
        
#ifdef MPI
        call MPI_Allreduce(ucfft1(1,1,1),ucfft2(1,1,1),nfft(1)*nfft(2)*nfft(3), &
          MPI_COMPLEX_DPC,MPI_SUM,MPI_COMM_WORLD,mpierr)
#else
        ucfft2=ucfft1
#endif
        
        do i1=1,nfft(1)
          do i2=1,nfft(2)
            do i3=1,nfft(3)
              scfft((j1-1)*nfft(1)+i1, &
                (j2-1)*nfft(2)+i2,(j3-1)*nfft(3)+i3) = ucfft2(i1,i2,i3)
            enddo
          enddo
        enddo
        
      enddo
    enddo
  enddo
  
  SAFE_DEALLOCATE(ucfft2)
  SAFE_DEALLOCATE(ucfft1)
  
!-----------------------------------
! Write output (only PE 0)

  if (peinf%inode.eq.0) then
    call write_xct(es,rhole,avec,pstate,nk,nfft,scfft)
  endif

!------------------------------------
! Time accounting

  ntim=6
  SAFE_ALLOCATE(routnam, (ntim))
  routnam(1)='TOTAL:'
  routnam(2)='INPUT:'
  routnam(3)='INPUT_Q:'
  routnam(4)='FULLBZ:'
  routnam(5)='GENWF:'
  routnam(6)='GENWF_Q:'
  
  call timacc(1,2,tsec)
  if(peinf%inode.eq.0) then
    write(6,*)
    write(6,9000) 'CPU [s]','WALL [s]','#'
    write(6,*)
    do ii=2,ntim
      call timacc(ii,3,tsec,ncount)
      write(6,9001) routnam(ii),tsec(1),tsec(2),ncount
    enddo
    call timacc(1,3,tsec,ncount)
    write(6,9004) routnam(1),tsec(1),tsec(2)
    write(6,*)
    
9000 format(22x,a13,  3x,a13,  3x,a8)
9001 format(1x,a16,'      ',f13.3,3x,f13.3,3x,i8)
9004 format(1x,a16,'      ',f13.3,3x,f13.3)
    
  endif

  call write_memory_usage()

  itpv = 128+2*(peinf%inode-1)+2
  write(filename,'(a,i4.4)') 'INT_VWFNQ_', peinf%inode
  call open_file(itpv, filename, status = 'old')
  call close_file(itpv, delete = .true.) ! files INT_VWFNQ*

  itpc = 128+2*(peinf%inode-1)+1
  write(filename,'(a,i4.4)') 'INT_CWFN_', peinf%inode
  call open_file(itpc, filename, status = 'old')
  call close_file(itpc, delete = .true.) ! files INT_CWFN_*

#ifdef MPI
  call MPI_FINALIZE(mpierr)
#endif
  
end program plotxct
