!============================================================================
!
! Routines:
!
! (1) mtxel()   Originally By ?         Last Modified 7/8/2008 (JRD)
!
!     Subroutine computes required matrix elements
!     of the form <nk|exp{i(q+g).r}|n1k-q>
!
!     input   n                  band index
!     input   ncoul              number of matrix elements required
!     input   isrtrq             index array for g-vectors in
!                                <nk|exp(i(q+g).r)|n1k-q>
!     output  aqs                matrix elements required
!
!============================================================================

#include "f_defs.h"

subroutine mtxel(n,gvec,wfnkq,wfnk,ncoul,isrtrq,aqs,ispin)

  use global_m
  use fftw_m
  implicit none

  integer, intent(in) :: n
  type (gspace), intent(in) :: gvec
  type (wfnkqstates), intent(in) :: wfnkq
  type (wfnkstates), intent(in) :: wfnk
  integer, intent(in) :: ncoul
  integer, intent(in) :: isrtrq(gvec%ng)
  SCALAR, intent(out) :: aqs(peinf%ntband_max,ncoul)
  integer, intent(in) :: ispin

  integer :: n1

!-------------------------
! If we are using FFT to calculate matrix elements...

! We use FFT to compute <u_nk|e^(iG.r)|u_n1,k-q> elements where
! u_nk is the periodic part of the wave function.
! The calculation is done in real space, and integration over
! the grid is replaced by the sum over the grid points p:
!
! <u_nk|e^(iG.r)|u_n1,k-q>  =
!     Volume/Np * sum_p { conj(u_nk(p))*e^(iG.p)*u_n1k-q(p) }
!
! Since u_nk(p) = Volume^-0.5 * sum_G { cnk(G)*e^(iG.p) },
! and FFT is defined as FFT(cnk,+,p) = sum_G { cnk(G)*e^{+iG.p} },
! we must compute
!
! <u_nk|e^(iG.r)|u_n1,k-q>
!   = 1/Np * sum_p { conj(FFT(cnk,+,p))*e^(iG.p)*FFT(cn1k-q,+,p) }
!   = 1/Np * FFT(conj(FFT(cnk,+,:)).*FFT(cn1k-q,+,:),+,G)
!
! where .* is a point by point multiplication on the grid

  complex(DPC), dimension(:,:,:), allocatable :: fftbox1,fftbox2
  integer, dimension(3) :: Nfft
  real(DP) :: scale
  SCALAR, dimension(:), allocatable :: tmparray

! Compute size of FFT box we need and scale factor

  PUSH_SUB(mtxel)

  call setup_FFT_sizes(gvec%kmax,Nfft,scale)

! Allocate FFT boxes

  SAFE_ALLOCATE(fftbox1, (Nfft(1),Nfft(2),Nfft(3)))
  SAFE_ALLOCATE(fftbox2, (Nfft(1),Nfft(2),Nfft(3)))

! Put the data for band n into FFT box 1 and do the FFT,zk(:,1)

  call put_into_fftbox(wfnk%nkpt,wfnk%zk((n-1)*wfnk%nkpt+1:,ispin),gvec%ng,gvec%k,wfnk%isrtk,fftbox1,Nfft)
  call do_FFT(fftbox1,Nfft,1)

! We need the complex conjugate of the |nk> band actually

  call conjg_fftbox(fftbox1,Nfft)

! Now we loop over the n1 states and get the matrix elements:
!  Get n1 wave function and put it into box 2,
!  do FFT,
!  multiply by box1 contents,
!  do FFT again,
!  and extract the resulting matrix elements

  SAFE_ALLOCATE(tmparray, (ncoul))
  do n1=1,peinf%ntband_node
    call put_into_fftbox(wfnkq%nkpt,wfnkq%zkq((n1-1)*wfnkq%nkpt+1:,ispin),gvec%ng,gvec%k,wfnkq%isrtkq,fftbox2,Nfft)
    call do_FFT(fftbox2,Nfft,1)
    call multiply_fftboxes(fftbox1,fftbox2,Nfft)
    call do_FFT(fftbox2,Nfft,1)
    call get_from_fftbox(ncoul,tmparray,gvec%ng,gvec%k,isrtrq,fftbox2,Nfft,scale)
    aqs(n1,:) = tmparray(:)
  enddo
  SAFE_DEALLOCATE(tmparray)

! We are done, so deallocate FFT boxes

  SAFE_DEALLOCATE(fftbox1)
  SAFE_DEALLOCATE(fftbox2)
  
  POP_SUB(mtxel)
  
  return
end subroutine mtxel
