!=============================================================================
!
! Module:
!
! (1) intwfn()  Originally By MLT               Last Modified 7/8/2008 (JRD)
!
!     Evaluate the transformation matrices and store them
!     in the arrays dcc,dvv.
!     The matrices are defined according to eq. (39) of Rohlfing & Louie:
!
!     dcc(ik,ic,jc,is) == d_{ic,ik}^{\tilde jc,\tilde jk)
!        ik: index of k-point in fine grid
!        ic: index of cond. band in fine grid
!        jk: index of k-point in coarse grid, imap(ik)=jk
!        jc: index of cond. band in coarse grid
!        is: spin index (the same in both grids)
!
!     dvv(ik,iv,jv,is) == d_{iv,ik}^{\tilde jv,\tilde jk)
!        ik: index of k-point in fine grid
!        iv: index of valence band in fine grid
!        jk: index of k-point in coarse grid, imap(ik)=jk
!        jv: index of valence band in coarse grid
!        is: spin index (the same in both grids)
!
!     Mixing between valence/conduction states is taken into account
!     by allowing jc to be also an occupied band, and similarly
!     for jv.
!
!     input: crys,syms,xct,gvec,kg_fi,kgq_fi types
!            indexq_fi (mapping between shifted and unshifted k-points)
!
!     output: dcc,dvv arrays
!             kco array
!             imap array
!
!===============================================================================

#include "f_defs.h"

module intwfn_m

  use global_m
  use genwf_m
  use intpts_m
  implicit none

  public :: intwfn

contains

subroutine intwfn(kp,crys,syms,xct,flag,gvec,kg_fi,kgq_fi, &
  dcc,dvv,kco,imap,indexq_fi,eqp,intwfnv,intwfnc)
  type (kpoints), intent(inout) :: kp
  type (crystal), intent(in) :: crys
  type (symmetry), intent(in) :: syms
  type (xctinfo), intent(inout) :: xct
  type (flags), intent(in) :: flag
  type (gspace), intent(in) :: gvec
  type (grid), intent(in) :: kg_fi
  type (grid), intent(in) :: kgq_fi
  SCALAR, intent(out) :: dcc(:,:,:,:) !< xct%nkpt,xct%ncband,xct%ncb_co,xct%nspin
  SCALAR, intent(out) :: dvv(:,:,:,:) !< xct%nkpt,xct%nvband,xct%nvb_co,xct%nspin
  real(DP), intent(out) :: kco(:,:) !< 3,xct%nkpt_co
  integer, intent(out) :: imap(:) !< xct%nkpt)
  integer, intent(in) :: indexq_fi(:) !< xct%nkpt
  type (eqpinfo), intent(inout) :: eqp
  type (int_wavefunction), intent(inout) :: intwfnc
  type (int_wavefunction), intent(inout) :: intwfnv

  integer :: closepts(4)
  real(DP) :: closeweights(4)
  type(work_genwf) :: work, workq, workco, workcoq
  type (grid) :: kg_co, kg_co_q
  type (wavefunction) :: wfnc_co,wfnc_fi
  type (wavefunction) :: wfnv_co,wfnvq_fi
  type (tdistgwf) :: distgwfco,distgwfcoq

  integer :: ik,ikt,ik_co,ik_fi,ikq_fi,vunit,eqp_dat_size
  integer :: ik_co_q, igumkq, ijk
  integer :: ic,ic_co,ic_fi,iv,iv_co,iv_fi,is
  integer :: ii,jj,kk,ll,gumk(3),igumk,iprint
  real(DP) :: dist,qg(3),dnorm,tempshift,tempsum,dweight
  real(DP), allocatable :: normc(:,:,:),normv(:,:,:)
  integer, allocatable :: idum(:)
  SCALAR, allocatable :: dummy(:,:,:,:)
  real(DP), allocatable :: edummy(:,:,:)
  integer, allocatable :: indexq_co(:)

  PUSH_SUB(intwfn)

  SAFE_ALLOCATE(normc, (xct%nkpt,xct%ncband,xct%nspin))
  SAFE_ALLOCATE(normv, (xct%nkpt,xct%nvband,xct%nspin))
  SAFE_ALLOCATE(indexq_co, (xct%nkpt_co))

  imap = 0
  dcc = 0.d0
  dvv = 0.d0
  if (flag%dtm.eq.1) then
    if (peinf%inode.eq.0) then

!-------------------------------
! Read transformation matrices, if they exist...

      write(6,*) 'Reading dcc,dvv matrices from file'
      
! Check if dtmat has the right parameters

      call open_file(unit=13,file='dtmat',form='unformatted',status='old')
      read(13) ik_co,ic_co,iv_co,ik,ic_fi,iv_fi,is

      if(ik_co /= xct%nkpt_co) then
        write(0,*) 'File has ', ik_co, '; we need ', xct%nkpt_co
        call die("dtmat does not have the correct number of k-points in coarse grid")
      endif
      if(ic_co /= xct%ncb_co) then
        write(0,*) 'File has ', ic_co, '; we need ', xct%ncb_co
        call die("dtmat does not have the correct number of conduction bands in coarse grid")
      endif
      if(iv_co /= xct%nvb_co) then
        write(0,*) 'File has ', iv_co, '; we need ', xct%nvb_co
        call die("dtmat does not have the correct number of valence bands in coarse grid")
      endif
      if(ik /= xct%nkpt) then
        write(0,*) 'File has ', ik, '; we need ', xct%nkpt
        call die("dtmat does not have the correct number of k-points in fine grid")
      endif
      if(ic_fi /= xct%ncband) then
        write(0,*) 'File has ', ic_fi, '; we need ', xct%ncband
        call die("dtmat does not have the correct number of conduction bands in fine grid")
      endif
      if(iv_fi /= xct%nvband) then
        write(0,*) 'File has ', iv_fi, '; we need ', xct%nvband
        call die("dtmat does not have the correct number of valence bands in fine grid")
      endif
      if(is /= xct%nspin) then
        write(0,*) 'File has ', is, '; we need ', xct%nspin
        call die("dtmat does not have the correct number of spins")
      endif
          
! kco(1:3,1:xct%nkpt_co) has the coordinates of k-points in
! the coarse grid used to generate "dtmat" (in general, this
! is equal to kg_co%fk(1:3,1:xct%nkpt_co) of a previous run)
!
! imap(1:xct%nkpt_fi) is a mapping array between a point in
! the fine grid and the closest one in the coarse grid

      do jj = 1, xct%nkpt_co
        read(13) kco(1,jj),kco(2,jj),kco(3,jj)
      enddo
      do jj = 1, xct%nkpt * xct%ncband * xct%ncb_co * xct%nspin
        read(13) ik_fi,ic_fi,ik_co,ic_co,is,dcc(ik_fi,ic_fi,ic_co,is)
      enddo
      do jj = 1, xct%nkpt * xct%nvband * xct%nvb_co * xct%nspin
        read(13) ik_fi,iv_fi,ik_co,iv_co,is,dvv(ik_fi,iv_fi,iv_co,is)
        imap(ik_fi) = ik_co
      enddo
      call close_file(13)
    endif

! PE # 0 distribute data

#ifdef MPI
    call MPI_BCAST(imap, xct%nkpt, MPI_INTEGER, 0, MPI_COMM_WORLD,mpierr)
    call MPI_BCAST(kco, 3*xct%nkpt_co, MPI_REAL_DP, 0, MPI_COMM_WORLD,mpierr)
    call MPI_BCAST(dcc(1,1,1,1), xct%nkpt*xct%ncband*xct%ncb_co*xct%nspin, MPI_SCALAR, 0, MPI_COMM_WORLD, mpierr)
    call MPI_BCAST(dvv(1,1,1,1), xct%nkpt*xct%nvband*xct%nvb_co*xct%nspin, MPI_SCALAR, 0, MPI_COMM_WORLD, mpierr)
#endif

  else

!-------------------------------
! ...Otherwise, start the computation of them

! Read the wavefunctions on the coarse grid and initialize dtmat

    if (xct%qflag .eq. 0) then
      if (peinf%inode .eq. 0) then
        write(6,999)
      endif
    endif
999 format(/,1x,'We are doing a Finite Q Calculation',/)

    call input_co(kp,crys,gvec,kg_co,syms,xct,flag%bzc,distgwfco,eqp)

!---------------------------------------------------- 
! If skip_interpolation keyword is present don`t interpolate

    if (xct%iskipinterp .eq. 1) then

      if (peinf%inode.eq.0) then
        write(6,'(a)') ' Skipping interpolation'
        write(6,'(a)')
      endif

      do ii = 1, xct%nkpt
        do jj = 1, xct%nkpt_co
          if (all(abs(kg_fi%f(1:3,ii)-kg_co%f(1:3,jj)) .lt. TOL_Small)) then
            imap(ii)=jj
            kco(1:3,jj)=kg_co%f(1:3,jj)
          endif
        enddo
      enddo
      
      do kk = 1, xct%nspin
        do jj = 1, xct%ncb_co
          do ii = 1, xct%ncband
            do ll = 1, xct%nkpt
              if (ii .eq. jj) then
                dcc(ll,ii,jj,kk)=1D0
              else
                dcc(ll,ii,jj,kk)=0D0
              endif
            enddo
          enddo
        enddo
      enddo
      
      do kk = 1, xct%nspin
        do jj = 1, xct%nvb_co
          do ii = 1, xct%nvband
            do ll = 1, xct%nkpt
              if (ii .eq. jj) then
                dvv(ll,ii,jj,kk)=1D0
              else
                dvv(ll,ii,jj,kk)=0D0
              endif
            enddo
          enddo
        enddo
      enddo

    else ! xct%iskipinterp .eq. 1

      call alloc_intpts(kg_co%nf,kg_co%f(:,:),umklapp = .false.)

!---------------------------------------------------- 
! JRD: If we do finite Q we need the coarse Q shifted grid wavefunctions

      if (xct%qflag .eq. 0) then
        call input_co_q(kp,crys,gvec,kg_co_q,syms,xct,flag%bzc, &
          indexq_co,kg_co,distgwfcoq)
      else
        do ijk = 1, xct%nkpt_co
          indexq_co(ijk) = ijk
        enddo
      endif
      
      do kk=1,xct%nkpt_co
        kco(:,kk) = kg_co%f(:,kk)
      enddo
      
      if (peinf%inode.eq.0) then
        call open_file(unit=13,file='dtmat',form='unformatted',status='replace')
        write(13) xct%nkpt_co,xct%ncb_co,xct%nvb_co, &
          xct%nkpt,xct%ncband,xct%nvband,xct%nspin
        do kk=1,xct%nkpt_co
          write(13) (kco(ii,kk),ii=1,3)
        enddo
        call close_file(13)
      endif

!-------------------------
! Loop over k-points in fine grid
  
      if (xct%inteqp .eq. 1) then
        SAFE_ALLOCATE(eqp%evshift, (xct%nvband,kg_fi%nf,xct%nspin))
        SAFE_ALLOCATE(eqp%ecshift, (xct%ncband,kg_fi%nf,xct%nspin))
        eqp%evshift=0D0
        eqp%ecshift=0D0
      endif

      if (peinf%inode.eq.0) then
        write(6,'(a)') ' Beginning wavefunction interpolation'
        write(6,'(a)')
      endif
      
      do ii=1,peinf%nkpe
        
        ik_fi=peinf%ik(peinf%inode+1,ii)

! JRD: Find the points in coarse grid closest to each point in the fine grid
! We need 4 points here for 3D

        if (ik_fi .ne. 0) then
          ikq_fi = indexq_fi(ik_fi)
        else
          ! this process has no work to do
          ikq_fi = 0
        endif
          
! JRD: In principle, we should be calling the below function twice. Once for ik_fi and once for ikq_fi. If using 
! momentum operator, it makes no difference. And for velocity operator, it makes insignificant difference for typical small q`s.

        if (kg_co%nf .gt. 1 .and. ik_fi > 0) then
          call intpts_local(crys,kg_fi%f(:,ik_fi),kg_co%nf,kg_co%f(:,:),xct,closepts(:),closeweights(:),umklapp = .false.)
        else
          closepts(:)=1
          closeweights(:)=0D0
          closeweights(1)=1D0
        endif

        do ijk = xct%idimensions + 1, 1, -1
          
          ik_co=closepts(ijk)
          dweight=closeweights(ijk)
          
          if (xct%qflag .eq. 0) then
            ik_co_q=indexq_co(ik_co)
          else
            ik_co_q=ik_co
          endif

! Store the umklapp G-vector, if not zero

          if (ik_fi .ne. 0) then
            qg(:)=kg_fi%f(:,ik_fi)-kg_co%f(:,ik_co)
            gumk(:)=nint(qg(:))
            do kk=1,gvec%ng
              if(all(gumk(1:3) == gvec%k(1:3,kk))) igumk=kk
            enddo

! Store the umklapp G-vector, if not zero - Finite Q

            if (xct%qflag .eq. 0) then
              qg(:)=kgq_fi%f(:,ik_fi)-kg_co_q%f(:,ik_co_q)
              gumk(:)=nint(qg(:))
              do kk=1,gvec%ng
                if(all(gumk(1:3) == gvec%k(1:3,kk))) igumkq=kk
              enddo
            else
              igumkq=igumk
            endif
          endif
      
!------------------------------
! Read needed wavefunctions: conduction bands from unshifted grid
! and valence bands from shifted grid

          if (ik_fi .ne. 0) then
            call genwf(crys,gvec,kg_fi,syms,wfnc_fi,xct,ik_fi,ik_fi,work,intwfnc, is_cond = .true.)
            call genwf(crys,gvec,kgq_fi,syms,wfnvq_fi,xct,ik_fi,ikq_fi,workq,intwfnv, is_cond = .false.)
          endif
          
          if (xct%qflag .eq. 0) then
            call genwf_co(crys,gvec,kg_co,kg_co_q,syms,wfnc_co, &
              wfnv_co,xct,ik_co,ik_co_q,distgwfco,distgwfcoq,workco,workcoq)
          else
            call genwf_co(crys,gvec,kg_co,kg_co_q,syms,wfnc_co, &
              wfnv_co,xct,ik_co,ik_co_q,distgwfco,distgwfco,workco,workcoq)
          endif
          
          if (ik_fi .ne. 0) then
            call mtxel_t(xct%nkpt,gvec,wfnc_co,wfnc_fi,wfnv_co,wfnvq_fi,dcc,dvv,igumk,ik_fi,igumkq)
          endif
          
          SAFE_DEALLOCATE_P(wfnc_co%cg)
          SAFE_DEALLOCATE_P(wfnc_co%isort)
          SAFE_DEALLOCATE_P(wfnv_co%cg)
          SAFE_DEALLOCATE_P(wfnv_co%isort)
          
          if (ik_fi .ne. 0) then
            SAFE_DEALLOCATE_P(wfnc_fi%cg)
            SAFE_DEALLOCATE_P(wfnc_fi%isort)
            SAFE_DEALLOCATE_P(wfnvq_fi%cg)
            SAFE_DEALLOCATE_P(wfnvq_fi%isort)
            imap(ik_fi) = ik_co
          endif

! Construct Interpolated eqp shifts
          
          if (xct%inteqp .eq. 1 .and. ik_fi .ne. 0) then
            
            do is=1,xct%nspin
              do iv_fi=1,xct%nvband
                tempshift=0d0
                tempsum=0d0
                do iv_co=1,xct%nvb_co
                  tempshift=tempshift+(abs(dvv(ik_fi,iv_fi,iv_co,is))**2)*eqp%evshift_co(iv_co,kg_co%indr(ik_co),is)
                  tempsum=tempsum+(abs(dvv(ik_fi,iv_fi,iv_co,is))**2)
                enddo
                eqp%evshift(iv_fi,ik_fi,is)= eqp%evshift(iv_fi,ik_fi,is)+tempshift*dweight/(tempsum)
              enddo
            enddo
            
            do is=1,xct%nspin
              do ic_fi=1,xct%ncband
                tempshift=0d0
                tempsum=0d0
                do ic_co=1,xct%ncb_co
                  tempshift=tempshift+(abs(dcc(ik_fi,ic_fi,ic_co,is))**2)*eqp%ecshift_co(ic_co,kg_co%indr(ik_co),is)
                  tempsum=tempsum+(abs(dcc(ik_fi,ic_fi,ic_co,is))**2)
                enddo
                eqp%ecshift(ic_fi,ik_fi,is)= eqp%ecshift(ic_fi,ik_fi,is)+ tempshift*dweight/(tempsum)
              enddo
            enddo
          endif
      
        enddo ! ijk
    
      enddo ! ii
  
      if (xct%inteqp .eq. 1) then
        
        if (xct%nkpt .ne. kg_fi%nf) then
          write(0,*) "nkpt != nf", xct%nkpt, kg_fi%nf
        endif
        
#ifdef MPI
        SAFE_ALLOCATE(edummy, (xct%nvband,xct%nkpt,xct%nspin))
        edummy = eqp%evshift
        call MPI_Allreduce(edummy(1,1,1),eqp%evshift(1,1,1),xct%nkpt*xct%nvband* &
          xct%nspin,MPI_DOUBLE_PRECISION,MPI_SUM,MPI_COMM_WORLD,mpierr)
        SAFE_DEALLOCATE(edummy)
        
        SAFE_ALLOCATE(edummy, (xct%ncband,xct%nkpt,xct%nspin))
        edummy = eqp%ecshift
        call MPI_Allreduce(edummy(1,1,1),eqp%ecshift(1,1,1),xct%nkpt*xct%ncband* &
          xct%nspin,MPI_DOUBLE_PRECISION,MPI_SUM,MPI_COMM_WORLD,mpierr)
        SAFE_DEALLOCATE(edummy)
#endif
        
        if (peinf%inode .eq. 0) then
          
          ! Only write eqp_q.dat for velocity. For momentum, it is identical to valence bands of eqp.dat.
          if(flag%opr == 0) then
            call open_file(unit=21,file='eqp_q.dat',form='formatted',status='replace')
            vunit = 21
            eqp_dat_size = xct%ncband*xct%nspin
          else
            vunit = 22
            eqp_dat_size = (xct%nvband+xct%ncband)*xct%nspin
          endif
          call open_file(unit=22,file='eqp.dat',  form='formatted',status='replace')
          do ik_fi = 1, kg_fi%nf
            if(flag%opr == 0) &
              write(21,'(3f13.9,i8)') kgq_fi%f(1,ik_fi),kgq_fi%f(2,ik_fi),kgq_fi%f(3,ik_fi),xct%nvband*xct%nspin
            write(22,'(3f13.9,i8)') kg_fi%f(1,ik_fi),kg_fi%f(2,ik_fi),kg_fi%f(3,ik_fi),eqp_dat_size
            do is = 1, xct%nspin
              do iv_fi = xct%nvband, 1, -1
                write(vunit,'(2i8,2f15.9)') is, xct%ifmaxq(ik_fi ,is) + 1 - iv_fi, eqp%evlda(iv_fi,ik_fi,is)*RYD, &
                  (eqp%evlda(iv_fi,ik_fi,is)+eqp%evshift(iv_fi,ik_fi,is))*RYD
              enddo
              do ic_fi = 1, xct%ncband
                write(22,'(2i8,2f15.9)') is, xct%ifmax(ik_fi ,is) + ic_fi, eqp%eclda(ic_fi,ik_fi,is)*RYD, &
                  (eqp%eclda(ic_fi,ik_fi,is)+eqp%ecshift(ic_fi,ik_fi,is))*RYD
              enddo
            enddo
          enddo
          if(flag%opr == 0) call close_file(21)
          call close_file(22)
          
          call open_file(unit=23,file='bandstructure.dat',form='formatted',status='replace')
          ! Write k-points in cart. coord. instead of cryst. coord. for the 
          ! band structure plot (as it is done in espresso and wannier90) --GSM
          write(23,'(a)') '# spin    band          kx          ky          kz          E(MF)          E(QP)        Delta E'
          write(23,'(a)') '#                        (Cartesian coordinates)             (eV)           (eV)           (eV)'

          do is = 1, xct%nspin
            do iv_fi = xct%nvband, 1, -1
              ! For the valence bands, the "q" structures are the ones that contained the calculated info. NOTE: kgq%fi = kg%fi
              do ik_fi = 1,kgq_fi%nf
                write(23,'(i6,i8,3f12.5,3f15.9)') is,xct%ifmaxq(ik_fi,is) + 1 - iv_fi, &
                  matmul(crys%bvec(1:3,1:3),kgq_fi%f(1:3,ik_fi))*crys%blat, &
                  eqp%evlda(iv_fi,ik_fi,is)*RYD,(eqp%evlda(iv_fi,ik_fi,is)+eqp%evshift(iv_fi,ik_fi,is))*RYD, &
                  eqp%evshift(iv_fi,ik_fi,is)*RYD
              enddo
            enddo
            do ic_fi = 1,xct%ncband
              do ik_fi = 1,kg_fi%nf
                write(23,'(i6,i8,3f12.5,3f15.9)') is,xct%ifmax(ik_fi,is) + ic_fi, &
                  matmul(crys%bvec(1:3,1:3),kg_fi%f(1:3,ik_fi))*crys%blat, &
                  eqp%eclda(ic_fi,ik_fi,is)*RYD,(eqp%eclda(ic_fi,ik_fi,is)+eqp%ecshift(ic_fi,ik_fi,is))*RYD, &
                  eqp%ecshift(ic_fi,ik_fi,is)*RYD
              enddo
            enddo
          enddo
          call close_file(23)

! JRD: We now update our fine-grid eigenvalues with the interpolated ones.
  
        endif ! peinf%inode .eq. 0

        eqp%ecqp(:,:,:)=eqp%eclda(:,:,:)+eqp%ecshift(:,:,:)
        eqp%evqp(:,:,:)=eqp%evlda(:,:,:)+eqp%evshift(:,:,:)    
        
      endif ! xct%inteqp .eq. 1
      
      SAFE_DEALLOCATE_P(kg_co%r)
      SAFE_DEALLOCATE_P(kg_co%f)
      SAFE_DEALLOCATE_P(kg_co%kg0)
      SAFE_DEALLOCATE_P(kg_co%indr)
      SAFE_DEALLOCATE_P(kg_co%itran)
      
      if (xct%qflag .eq. 0) then
        SAFE_DEALLOCATE_P(kg_co_q%r)
        SAFE_DEALLOCATE_P(kg_co_q%f)
        SAFE_DEALLOCATE_P(kg_co_q%kg0)
        SAFE_DEALLOCATE_P(kg_co_q%indr)
        SAFE_DEALLOCATE_P(kg_co_q%itran)
      endif
      
      ! typedefs initializes all of these ikolds to 0
      if (work%ikold .ne. 0) then
        SAFE_DEALLOCATE_P(work%cg)
        SAFE_DEALLOCATE_P(work%ph)
        SAFE_DEALLOCATE_P(work%ind)
        SAFE_DEALLOCATE_P(work%isort)
      endif
      if (workq%ikold .ne. 0) then
        SAFE_DEALLOCATE_P(workq%cg)
        SAFE_DEALLOCATE_P(workq%ph)
        SAFE_DEALLOCATE_P(workq%ind)
        SAFE_DEALLOCATE_P(workq%isort)
      endif
      if (workco%ikold .ne. 0) then
        SAFE_DEALLOCATE_P(workco%cg)
        SAFE_DEALLOCATE_P(workco%ph)
        SAFE_DEALLOCATE_P(workco%ind)
        SAFE_DEALLOCATE_P(workco%isort)
      endif
      if (workcoq%ikold .ne. 0) then
        SAFE_DEALLOCATE_P(workcoq%cg)
        SAFE_DEALLOCATE_P(workcoq%ph)
        SAFE_DEALLOCATE_P(workcoq%ind)
        SAFE_DEALLOCATE_P(workcoq%isort)
      endif

!--------------------------------
! Write out dcc,dvv. All PEs access dtmat (it may be faster if only one
! PE does the job...)
      
      do ii=1,peinf%npes
        if(peinf%inode+1.eq.ii) then
          call open_file(unit=13,file='dtmat',position='append',form='unformatted',status='old')
          do ikt=1,peinf%ikt(peinf%inode+1)
            ik_fi=peinf%ik(peinf%inode+1,ikt)
            do ic_fi=1,xct%ncband
              do ic_co=1,xct%ncb_co
                do is=1,xct%nspin
                  write(13) ik_fi,ic_fi,imap(ik_fi),ic_co,is, &
                    dcc(ik_fi,ic_fi,ic_co,is)
                enddo
              enddo
            enddo
          enddo
          call close_file(13)
        endif
#ifdef MPI
        call MPI_BARRIER(MPI_COMM_WORLD,mpierr)
#endif
      enddo
  
      do ii=1,peinf%npes
        if(peinf%inode+1.eq.ii) then
          call open_file(unit=13,file='dtmat',position='append',form='unformatted',status='old')
          do ikt=1,peinf%ikt(peinf%inode+1)
            ik_fi=peinf%ik(peinf%inode+1,ikt)
            do iv_fi=1,xct%nvband
              do iv_co=1,xct%nvb_co
                do is=1,xct%nspin
                  write(13) ik_fi,iv_fi,imap(ik_fi),iv_co,is, &
                    dvv(ik_fi,iv_fi,iv_co,is)
                enddo
              enddo
            enddo
          enddo
          call close_file(13)
        endif
#ifdef MPI
        call MPI_BARRIER(MPI_COMM_WORLD,mpierr)
#endif
      enddo
      
      if (xct%iwriteint.eq.1) then
        SAFE_DEALLOCATE_P(distgwfco%isort)
        SAFE_DEALLOCATE_P(distgwfco%zv)
        SAFE_DEALLOCATE_P(distgwfco%zc)
        if (xct%qflag .eq. 0) then
          SAFE_DEALLOCATE_P(distgwfcoq%isort)
          SAFE_DEALLOCATE_P(distgwfcoq%zv)
          SAFE_DEALLOCATE_P(distgwfcoq%zc)
        endif
      endif ! xct%iwriteint.eq.1
      
      if (xct%iwriteint.eq.0) then
        if(peinf%inode.eq.0) then
          call open_file(126, 'INT_CWFN_CO', status='old')
          call close_file(126, delete = .true.) ! delete INT_CWFN_CO
          call open_file(127, 'INT_VWFN_CO', status='old')
          call close_file(127, delete = .true.) ! delete INT_VWFN_CO
          if (xct%qflag .eq. 0) then
            call open_file(123, 'INT_CWFN_CO_Q', status='old')
            call close_file(123, delete = .true.) ! delete INT_CWFN_CO_Q
            call open_file(124, 'INT_VWFN_CO_Q', status='old')
            call close_file(124, delete = .true.) ! delete INT_VWFN_CO_Q
          endif
        endif
      endif ! xct%iwriteint.eq.0

!--------------------------------
! Finished the computation of transformation matrices, dcc/dvv
! Now, PEs share the information

#ifdef MPI
      SAFE_ALLOCATE(dummy, (xct%nkpt,xct%ncband,xct%ncb_co,xct%nspin))
      dummy = dcc
      call MPI_Allreduce(dummy(1,1,1,1),dcc(1,1,1,1),xct%nkpt*xct%ncband* &
        xct%ncb_co*xct%nspin,MPI_SCALAR,MPI_SUM,MPI_COMM_WORLD,mpierr)
      SAFE_DEALLOCATE(dummy)
      SAFE_ALLOCATE(dummy, (xct%nkpt,xct%nvband,xct%nvb_co,xct%nspin))
      dummy = dvv
      call MPI_Allreduce(dummy(1,1,1,1),dvv(1,1,1,1),xct%nkpt*xct%nvband* &
        xct%nvb_co*xct%nspin,MPI_SCALAR,MPI_SUM,MPI_COMM_WORLD,mpierr)
      SAFE_DEALLOCATE(dummy)
      SAFE_ALLOCATE(idum, (xct%nkpt))
      idum = imap
      call MPI_Allreduce(idum,imap,xct%nkpt,MPI_INTEGER, &
        MPI_SUM,MPI_COMM_WORLD,mpierr)
      SAFE_DEALLOCATE(idum)
#endif

      ! I am not sure why but this barrier is necessary to avoid hanging. --DAS
#ifdef MPI
      call MPI_BARRIER(MPI_COMM_WORLD,mpierr)
#endif
      call dealloc_intpts()
    
    endif ! xct%iskipinterp .eq. 1

!--------------------------------
! Check if transformation matrices are unitary and print out their
! norm (i.e., the norm of wavefunctions in the fine grid)
! The norm should be as close as possible to 1.d0. If it is not,
! important overlaps are being neglected.

    iprint=1
    if (iprint.eq.1 .and. peinf%inode.eq.0) then
      
      do is = 1, xct%nspin
        do ik_fi=1,xct%nkpt
          ik_co= imap(ik_fi)
          do iv=1,xct%nvband
            dnorm = 0.d0
            do iv_co=1,xct%nvb_co
              dnorm = dnorm + abs(dvv(ik_fi,iv,iv_co,is))**2
            enddo
            normv(ik_fi,iv,is) = dnorm
          enddo
          
          do ic=1,xct%ncband
            dnorm = 0.d0
            do ic_co=1,xct%ncb_co
              dnorm = dnorm + abs(dcc(ik_fi,ic,ic_co,is))**2
            enddo
            normc(ik_fi,ic,is) = dnorm
          enddo
        enddo
      enddo
      
      call open_file(20,file='dcmat_norm.dat',form='formatted',status='replace')
      write(20,'(a,i12,a)') ' -------  Norm of dcc matrices : Spins = ', xct%nspin,'  -------'
      if (xct%nspin.eq.1) then
        write(20,'(a)') '           k-point           ik_co    c   dist    |dcc|^2      '
      else
        write(20,'(a)') '           k-point           ik_co    c   dist    |dcc|^2 (spin up)  |dcc|^2 (spin down) '
      endif
      write(20,'(21a3)') ('---',kk=1,21)
      do ik_fi=1,xct%nkpt
        ik_co= imap(ik_fi)
        do kk=1,3
          qg(kk)=kg_fi%f(kk,ik_fi)-kco(kk,ik_co)
          qg(kk) = qg(kk) - anint( qg(kk) )
        enddo
        dist= DOT_PRODUCT(qg,MATMUL(crys%bdot,qg))
        do jj=1,xct%ncband
          write(20,250) kg_fi%f(:,ik_fi),ik_co,jj,sqrt(dist),(normc(ik_fi,jj,is), is = 1, xct%nspin)
        enddo
      enddo
      call close_file(20)
      
      call open_file(20,file='dvmat_norm.dat',form='formatted',status='replace')
      write(20,'(a,i12,a)') ' -------  Norm of dvv matrices : Spins = ', xct%nspin,'  -------'
      if (xct%nspin.eq.1) then
        write(20,'(a)') '           k-point           ik_co    v   dist    |dvv|^2      '
      else
        write(20,'(a)') '           k-point           ik_co    v   dist    |dvv|^2 (spin up)  |dvv|^2 (spin down) '
      endif
      write(20,'(21a3)') ('---',kk=1,21)
      do ik_fi=1,xct%nkpt
        ik_co= imap(ik_fi)
        do kk=1,3
          qg(kk)=kg_fi%f(kk,ik_fi)-kco(kk,ik_co)
          qg(kk) = qg(kk) - anint( qg(kk) )
        enddo
        dist= DOT_PRODUCT(qg,MATMUL(crys%bdot,qg))
        do jj=1,xct%nvband
          write(20,250) kg_fi%f(:,ik_fi),ik_co,jj,sqrt(dist),(normv(ik_fi,jj,is), is = 1, xct%nspin)
        enddo
      enddo
      call close_file(20)
250   format(' ( ',f6.3,' , ',f6.3,' , ',f6.3, ' ) ',i4,1x,i4,1x,f6.3,1x,f10.6,9x,f10.6)
      
    endif    ! Print out done
    
  endif

!---------------------------------
! Renormalize transformation matrices: matrices dcc,dvv are
! renormalized to 1.d0 (ideally, this step should not be
! performed but the interpolation can be really bad if dcc,dvv
! do not have unitary norm).

! FHJ: in some cases, the renormalization is a bad idea, in particular if the
!      vv or cc transitions are important (ex: near absp edge of doped systems)
  if (xct%renorm_transf) then
    if (peinf%inode.eq.0) &
      write(6,*) ' Renormalizing transformation matrices'
    
    do is=1, xct%nspin
      do ik=1,xct%nkpt
        do ic_fi=1,xct%ncband
          dnorm = 0.d0
          do ic=1,xct%ncb_co
            dnorm = dnorm + (abs(dcc(ik,ic_fi,ic,is)))**2
          enddo
          dnorm = sqrt(dnorm)
          do ic=1,xct%ncb_co
            dcc(ik,ic_fi,ic,is) = dcc(ik,ic_fi,ic,is)/dnorm
          enddo
        enddo
        do iv_fi=1,xct%nvband
          dnorm = 0.d0
          do iv=1,xct%nvb_co
            dnorm = dnorm + (abs(dvv(ik,iv_fi,iv,is)))**2
          enddo
          dnorm = sqrt(dnorm)
          do iv=1,xct%nvb_co
            dvv(ik,iv_fi,iv,is) = dvv(ik,iv_fi,iv,is)/dnorm
          enddo
        enddo
      enddo
    enddo
  endif

!-------------------------------------
! Print out information about the number of k-points in the fine grid
! interpolated from each point in the coarse grid
! ii : number of fine-grid points close to point ik_co

  iprint=1
  if (iprint.eq.1 .and. peinf%inode.eq.0) then
    write(6,*)
    write(6,*) ' K-points in coarse matrix : '
    write(6,*)
    do ik_co=1,xct%nkpt_co
      ii = 0
      do ik=1,xct%nkpt
        if (imap(ik).eq.ik_co) ii = ii + 1
      enddo
      write(6,'(i4,2x,3f10.4,2x,i4)') ik_co,kco(:,ik_co),ii
    enddo
  endif

  SAFE_DEALLOCATE(normc)
  SAFE_DEALLOCATE(normv)
  SAFE_DEALLOCATE(indexq_co)
  
  POP_SUB(intwfn)
  
  return
end subroutine intwfn

end module intwfn_m
