!================================================================================
!
! Routines:
!
! (1) input()   Originally By ?                 Last Modified 7/7/2008
!
! Read in and setup up various data structures,
! read in and distributes wave functions, do all sorts of setup, etc.
!
!===============================================================================

#include "f_defs.h"

subroutine input(aheadinput,ajname,adate,kp,crys,syms,gvec, &
  ecuts,ecutsExtra,pol,indexq0,valueq0,q0norm,cwfn,vwfn, &
  intwfnv,intwfnc,omega_plasma)

  use global_m
  use eqpcor_m
  use input_utils_m
  use irrbz_m
  use fftw_m
  use fullbz_m
  use misc_m
  use sort_m
  use wfn_rho_vxc_io_m
  implicit none

  character, intent(in) :: aheadinput*60
  character, intent(in) :: ajname*6
  character, intent(in) :: adate*11
  type (kpoints), intent(out) :: kp
  type (crystal), intent(out) :: crys
  type (symmetry), intent(out) :: syms
  type (gspace), intent(out) :: gvec
  real(DP), intent(out) :: ecuts
  real(DP), intent(out) :: ecutsExtra, omega_plasma
  type (polarizability), intent(out) :: pol
  integer, intent(out) :: indexq0,valueq0
  real(DP), intent(out) :: q0norm
  type (conduction_wfns), intent(out) :: cwfn
  type (valence_wfns), intent(out) :: vwfn
  type (int_wavefunction), intent(out) :: intwfnv
  type (int_wavefunction), intent(out) :: intwfnc

  integer, allocatable :: isort(:),global_nvown_temp(:),global_ncown_temp(:)
  integer, allocatable :: global_doiownv(:,:),global_doiownc(:,:),global_pairowner_temp(:,:)
  integer, allocatable :: global_doiownv_temp(:,:),global_doiownc_temp(:,:)
  SCALAR, allocatable :: zc(:,:)

  integer :: itval
  character :: filename*20
  character :: filenamev*20
  character :: tmpfn*16
  character :: fncor*32
  character :: tmpstr1*16,tmpstr2*16,tmpstr3*16
  integer :: ii,ig,jj,itran,ik,ipe,iiii,ib,is
  integer :: dest
  integer :: iwritecb
  integer :: nrq,nrqmax,iq,ic,npools,mypool,mypoolrank
  integer :: itpv,itpc,iv,nqnonzero,nrkq,myipe,ipool
  integer :: nmtx,nmtx_l,npairs
  integer :: Nrod,Nplane,Nfft(3),dNfft(3),dkmax(3),nmpinode
  real(DP) :: qk(3),vcell,qtot
  real(DP) :: mem,fmem,rmem,rmem2,smem,scale,dscale
  real(DP), allocatable :: energies(:,:,:)
  logical :: dont_read
  
  character(len=3) :: sheader
  integer :: iflavor
  type(gspace) :: gvecq, gvec_kpt
  type(kpoints) :: kpq
  type(crystal) :: crysq
  type(symmetry) :: symsq
  type(grid) :: gr

  logical :: skip_checkbz

  PUSH_SUB(input)

!-------------------------------
! SIB: Read the input file

  call inread(pol,vwfn,cwfn,indexq0,valueq0,ecuts,ecutsExtra)

!-------------------------------
! Distribute Val/Cond Bands over Processors

! Create pools if not read from epsilon.inp

  if (peinf%npools .le. 0 .or. peinf%npools .gt. peinf%npes) then
    call createpools(vwfn%nband+pol%ncrit,cwfn%nband-vwfn%nband,peinf%npes,npools,peinf%nvown,peinf%ncown)
    peinf%npools = npools
  else
    npools = peinf%npools
    
    if (mod((vwfn%nband+pol%ncrit),npools) .eq. 0) then
      peinf%nvown = (vwfn%nband+pol%ncrit) / npools
    else
      peinf%nvown = ((vwfn%nband+pol%ncrit) / npools) + 1
    endif

    if (mod((cwfn%nband-vwfn%nband),(peinf%npes/npools)) .eq. 0) then
      peinf%ncown = (cwfn%nband-vwfn%nband) / (peinf%npes / npools)
    else
      peinf%ncown = (cwfn%nband-vwfn%nband) / (peinf%npes / npools) + 1
    endif
  endif

  if (peinf%inode .eq. 0) then
    write(tmpstr1,440) npools
    write(tmpstr2,440) peinf%ncown
    write(tmpstr3,440) peinf%nvown
    write(6,444) TRUNC(tmpstr1),TRUNC(tmpstr2),TRUNC(tmpstr3)
    write(7,444) TRUNC(tmpstr1),TRUNC(tmpstr2),TRUNC(tmpstr3)
  endif
440 format(i16)
444 format(1x,"Running with",1x,a,1x,"valence pools",/, &
      1x,"Number of conduction bands per processor:",1x,a,/, &
      1x,"Number of valence bands per processor:",1x,a,/)

  SAFE_ALLOCATE(peinf%global_pairowner,((vwfn%nband+pol%ncrit),(cwfn%nband-vwfn%nband)))
  peinf%global_pairowner=0
  SAFE_ALLOCATE(peinf%doiownv,(vwfn%nband+pol%ncrit))
  peinf%doiownv=0
  SAFE_ALLOCATE(peinf%doiownc,(cwfn%nband-vwfn%nband))
  peinf%doiownc=0
  SAFE_ALLOCATE(global_doiownv,((vwfn%nband+pol%ncrit),peinf%npes))
  global_doiownv=0
  SAFE_ALLOCATE(global_doiownc,((cwfn%nband-vwfn%nband),peinf%npes))
  global_doiownc=0
  SAFE_ALLOCATE(peinf%global_nvown,(peinf%npes))
  peinf%global_nvown=0
  SAFE_ALLOCATE(peinf%global_ncown,(peinf%npes))
  peinf%global_ncown=0
  SAFE_ALLOCATE(peinf%indexv,(vwfn%nband+pol%ncrit))
  peinf%indexv=0
  SAFE_ALLOCATE(peinf%indexc,(cwfn%nband-vwfn%nband))
  peinf%indexc=0
  SAFE_ALLOCATE(peinf%invindexv,(peinf%nvown))
  peinf%invindexv=0
  SAFE_ALLOCATE(peinf%invindexc,(peinf%ncown))
  peinf%invindexc=0
  SAFE_ALLOCATE(peinf%ivprincipalowner,(vwfn%nband+pol%ncrit))
  peinf%ivprincipalowner=0
  SAFE_ALLOCATE(peinf%nvprincipalown,(peinf%npes))
  peinf%nvprincipalown=0

  peinf%nvownt=0
  peinf%ncownt=0

  mypool = (peinf%inode/(peinf%npes/npools))
  mypoolrank = mod(peinf%inode,(peinf%npes/npools))
  myipe = peinf%inode + 1

  do iv = 1,vwfn%nband+pol%ncrit
    ipool = mod(iv,npools)
        
    ipe = ipool*(peinf%npes/npools)+mod(iv,(peinf%npes/npools))
    peinf%ivprincipalowner(iv) = ipe 
    peinf%nvprincipalown(ipe+1) = peinf%nvprincipalown(ipe+1) + 1
    
    if (mypool .eq. ipool) then

      peinf%nvownt=peinf%nvownt+1
      peinf%global_nvown(myipe)=peinf%nvownt
      peinf%indexv(iv)=peinf%nvownt
      peinf%invindexv(peinf%nvownt)=iv
      peinf%doiownv(iv)=1
      global_doiownv(iv,myipe)=1

      do ic = 1, cwfn%nband-vwfn%nband
        if ( mod(ic,(peinf%npes/npools)) .eq. mypoolrank) then
          if (peinf%nvownt .eq. 1) then
            peinf%ncownt=peinf%ncownt+1
            peinf%global_ncown(myipe) = peinf%ncownt
            peinf%invindexc(peinf%global_ncown(myipe))=ic
            peinf%indexc(ic)=peinf%ncownt
            peinf%doiownc(ic)=1
            global_doiownc(ic,myipe)=1
          endif
          peinf%global_pairowner(iv,ic)=myipe
        endif
      enddo

    endif
  enddo

#ifdef MPI
  SAFE_ALLOCATE(global_pairowner_temp,((vwfn%nband+pol%ncrit),(cwfn%nband-vwfn%nband)))
  call MPI_ALLREDUCE(peinf%global_pairowner(1,1),global_pairowner_temp(1,1), &
    (vwfn%nband+pol%ncrit)*(cwfn%nband-vwfn%nband),MPI_INTEGER,MPI_SUM, &
    MPI_COMM_WORLD,mpierr)
  peinf%global_pairowner=global_pairowner_temp
  SAFE_DEALLOCATE(global_pairowner_temp)
  
  SAFE_ALLOCATE(global_nvown_temp,(peinf%npes))
  call MPI_ALLREDUCE(peinf%global_nvown,global_nvown_temp,peinf%npes,MPI_INTEGER,MPI_SUM, &
    MPI_COMM_WORLD,mpierr)
  peinf%global_nvown=global_nvown_temp
  SAFE_DEALLOCATE(global_nvown_temp)
  
  SAFE_ALLOCATE(global_ncown_temp,(peinf%npes))
  call MPI_ALLREDUCE(peinf%global_ncown,global_ncown_temp,peinf%npes,MPI_INTEGER,MPI_SUM, &
    MPI_COMM_WORLD,mpierr)
  peinf%global_ncown=global_ncown_temp
  SAFE_DEALLOCATE(global_ncown_temp)

  SAFE_ALLOCATE(global_doiownv_temp,((vwfn%nband+pol%ncrit),peinf%npes))
  call MPI_ALLREDUCE(global_doiownv(1,1),global_doiownv_temp(1,1),(vwfn%nband+pol%ncrit)*peinf%npes,MPI_INTEGER,MPI_SUM, &
    MPI_COMM_WORLD,mpierr)
  global_doiownv=global_doiownv_temp
  SAFE_DEALLOCATE(global_doiownv)
  
  SAFE_ALLOCATE(global_doiownc_temp,((cwfn%nband-vwfn%nband),peinf%npes))
  call MPI_ALLREDUCE(global_doiownc(1,1),global_doiownc_temp(1,1),(cwfn%nband-vwfn%nband)*peinf%npes,MPI_INTEGER,MPI_SUM, &
    MPI_COMM_WORLD,mpierr)
  global_doiownc=global_doiownc_temp
  SAFE_DEALLOCATE(global_doiownc)
#endif

!---------------------------------
! Determine the available memory

  call procmem(mem,nmpinode)

  if(peinf%inode.eq.0) then
    write(6,998) mem / 1024**2
    write(7,998) mem / 1024**2
  endif
998 format(1x,'Memory available:',f10.1,1x,'MB per PE')

  fmem = mem / 8


  if (peinf%inode == 0) call open_file(26,file='WFNq',form='unformatted',status='old')

  sheader = 'WFN'
  iflavor = 0
  call read_binary_header_type(26, sheader, iflavor, kpq, gvecq, symsq, crysq, warn = .false.)
  nrkq = kpq%nrk
  
  if(peinf%inode == 0) then
    call close_file(26)
    call open_file(25,file='WFN',form='unformatted',status='old')
  endif

  call read_binary_header_type(25, sheader, iflavor, kp, gvec, syms, crys)

  call check_header('WFN', kp, gvec, syms, crys, 'WFNq', kpq, gvecq, symsq, crysq, is_wfn = .true.)

  if(any(kp%kgrid(1:3) /= kpq%kgrid(1:3))) then
    if(peinf%inode == 0) then
      write(0,*) 'WFN  kgrid = ', kp%kgrid(1:3)
      write(0,*) 'WFNq kgrid = ', kpq%kgrid(1:3)
    endif
    call die('kgrids for WFN and WFNq must be the same', only_root_writes = .true.)
  endif

  call dealloc_header_type(sheader, crysq, kpq)

  SAFE_ALLOCATE(gvec%k, (3, gvec%ng))
  call read_binary_gvectors(25, gvec%ng, gvec%ng, gvec%k)

!-------------------------------
! (gsm) Estimate the required memory

  gvec%nktot = product(gvec%kmax(1:3))

! estimate for nmtx
  if(peinf%inode == 0) then
    SAFE_ALLOCATE(gvec%ekin, (gvec%ng))
    do ig=1,gvec%ng
      gvec%ekin(ig)=DOT_PRODUCT(gvec%k(:,ig),MATMUL(crys%bdot,gvec%k(:,ig)))
    enddo
    SAFE_ALLOCATE(pol%isrtx, (gvec%ng))
    call sortrx_D(gvec%ng, gvec%ekin, pol%isrtx, gvec = gvec%k)
    nmtx = gcutoff(gvec, pol%isrtx, ecuts)
    SAFE_DEALLOCATE_P(pol%isrtx)
    SAFE_DEALLOCATE_P(gvec%ekin)

    nmtx_l = int(sqrt(dble(nmtx**2) / peinf%npes))
    
    gr%nr = kp%nrk
    SAFE_ALLOCATE(gr%r, (3, gr%nr))
    gr%r = kp%rk
    call fullbz(crys,syms,gr,syms%ntran,skip_checkbz,wigner_seitz=.false.,paranoid=.true.)
    tmpfn='WFN'
    if (.not. skip_checkbz) then
      call checkbz(gr%nf,gr%f,kp%kgrid,kp%shift,crys%bdot, &
        tmpfn,'k',.false.,pol%freplacebz,pol%fwritebz)
    endif
    nrqmax=0

    do iq=1,pol%nq
      nrq=0
      call subgrp(pol%qpt(:,iq),syms)
      call irrbz(syms,gr%nf,gr%f,nrq)
      if (nrq .gt. nrqmax) nrqmax=nrq
    enddo
    
! required memory
    rmem=0.0d0
! array pol%xi in program main (and xilocal, xilocal2 (if gcomm=-1))
    if (pol%freq_dep.eq.0) then
      rmem = rmem + 2 * dble(kp%nspin) * dble(nmtx_l)**2
      if (pol%gcomm .eq. -1) then
        rmem = rmem + dble(kp%nspin) * dble(nmtx_l)**2
      endif
    endif
! arrays pol%xiRDyn and pol%xiADyn in program main and xiRDynlocal
! and xilocal2RDyn (if gcomm=-1)
    if (pol%freq_dep.eq.2) then
      rmem = rmem + dble(kp%nspin) * dble(nmtx_l)**2 * dble(pol%nfreq) * 4
      if (pol%gcomm .eq. -1) then
        rmem = rmem + dble(kp%nspin) * dble(nmtx_l)**2 * dble(pol%nfreq) * 2
      endif
    endif
! arrays gmetempr and gmetempc in program main
    if (pol%freq_dep.eq.0) then
      if (kp%nrk .eq. 1) then
        rmem = rmem + 2 * dble(nmtx) * dble(vwfn%nband + pol%ncrit) / sqrt(dble(peinf%npes))
      else
        rmem = rmem + 2 * dble(nmtx) * dble(vwfn%nband + pol%ncrit) * dble(syms%ntran) / sqrt(dble(peinf%npes))
      endif
    endif
! arrays gmeR(A)Dyntempr and gmeR(A)Dyntempc in program main
    if (pol%freq_dep.eq.2) then
      if (pol%gcomm .eq. 0) then
        if (kp%nrk .eq. 1) then
          rmem = rmem + dble(nmtx) * dble(pol%nfreq + 1) * dble(vwfn%nband + pol%ncrit) / sqrt(dble(peinf%npes))
        else
          rmem = rmem + dble(nmtx) * dble(pol%nfreq + 2) * dble(vwfn%nband + pol%ncrit) * dble(syms%ntran) / sqrt(dble(peinf%npes))
        endif
#ifdef CPLX
        if (kp%nrk .eq. 1) then
          rmem = rmem + dble(nmtx) * dble(pol%nfreq + 2) * dble(vwfn%nband + pol%ncrit) / sqrt(dble(peinf%npes))
        else
          rmem = rmem + dble(nmtx) * dble(pol%nfreq + 2) * dble(vwfn%nband + pol%ncrit) * dble(syms%ntran) / sqrt(dble(peinf%npes))
        endif
#endif
      endif
      npairs=(vwfn%nband+pol%ncrit)*(cwfn%nband-vwfn%nband)
      if (pol%gcomm .eq. -1) then
        if (kp%nrk .eq. 1) then
          rmem = rmem + 2 * dble(nmtx) * dble(npairs) * dble(pol%nfreq) / peinf%npes**1.5
        else
          rmem = rmem + 2 * dble(nmtx) * dble(kp%nrk) * dble(npairs) * dble(syms%ntran) * dble(pol%nfreq) / peinf%npes**1.5
        endif
#ifdef CPLX
        if (kp%nrk .eq. 1) then
          rmem = rmem + 2 * dble(nmtx) * dble(pol%nfreq) * dble(npairs) / peinf%npes**1.5
        else
          rmem = rmem + 2 * dble(nmtx) * dble(kp%nrk) * dble(npairs) * dble(syms%ntran) * dble(pol%nfreq) / peinf%npes**1.5
        endif
#endif
      endif
    endif
! array gmetempn in program main
    rmem = rmem + dble(nmtx)
! array tmparray in program mtxel
    rmem = rmem + dble(nmtx)
! array pol%gme in program main
    rmem = rmem + dble(kp%nspin) * dble(nmtx) * dble(peinf%ncown) * dble(peinf%nvown) * dble(nrqmax)
! arrays zin, vwfn%zv, cwfn%zc, zinc and zinc_old in subroutine genwf
    rmem = rmem + dble(kp%nspin) * dble(2 * 1 + 2 * peinf%ncown + 1) * dble(kp%ngkmax)
! intwfn_files
    if (pol%iwriteint .eq. 1) then
      rmem = rmem + dble(peinf%nvown + peinf%ncown) * dble(kp%ngkmax) * dble(kp%nrk) * dble(kp%nspin)
! intvwfnq if have a q=0 point
      if (indexq0 .gt. 0 .and. valueq0 .eq. 1 .and. pol%iqexactlyzero .eq. 0) then
        rmem = rmem + dble(peinf%nvown) * dble(kp%ngkmax) * dble(nrkq) * dble(kp%nspin)
      endif
    endif

    rmem = rmem * sizeof_scalar()

! memory for pol%eden or pol%edenDyn
    if (pol%freq_dep .eq. 0) then
      rmem = rmem + dble(kp%nspin) * dble(vwfn%nband + pol%ncrit) * dble(cwfn%nband) * 8
    endif
    if (pol%freq_dep .eq. 2) then
      rmem = rmem + dble(kp%nspin) * dble(peinf%nvown) * dble(peinf%ncown) * dble(nrqmax) * 8
    endif
! array gvec%indv in input
    rmem = rmem + dble(gvec%nktot) * 4
! arrays fftbox1 and fftbox2 in subroutines mtxel
    call setup_FFT_sizes(gvec%kmax,Nfft,scale)
    rmem = rmem + dble(product(Nfft(1:3))) * 32

    write(6,989) rmem / 1024**2
    write(7,989) rmem / 1024**2

! store required memory in variable smem
    smem=rmem

! random numbers
    rmem=0.0D0
! (gsm) we don`t do random numbers in Epsilon anymore
!        if (pol%icutv.ne.5) then
! arrays ran, qran, and qran2
! (ran is deallocated before qran2 is allocated)
!          rmem=rmem+6.0D0*dble(nmc)*8.0D0
!        endif
! various truncation schemes
    rmem2=0.0d0
! cell wire truncation
    if (pol%icutv.eq.4) then
      dkmax(1) = gvec%kmax(1) * n_in_wire
      dkmax(2) = gvec%kmax(2) * n_in_wire
      dkmax(3) = 1
      call setup_FFT_sizes(dkmax,dNfft,dscale)
! array fftbox_2D
      rmem2 = rmem2 + dble(dNfft(1) * dNfft(2)) * 16
! array inv_indx
      rmem2 = rmem2 + dble(product(Nfft(1:3))) * 4
! array qran
      rmem2 = rmem2 + 3 * dble(nmc) * 8
    endif
! cell box truncation (parallel version only)
    if (pol%icutv.eq.5) then
      dkmax(1:3) = gvec%kmax(1:3) * n_in_box
      call setup_FFT_sizes(dkmax,dNfft,dscale)
      if (mod(dNfft(3),peinf%npes) == 0) then
        Nplane = dNfft(3)/peinf%npes
      else
        Nplane = dNfft(3)/peinf%npes+1
      endif
      if (mod(dNfft(1)*dNfft(2),peinf%npes) == 0) then
        Nrod = (dNfft(1)*dNfft(2))/peinf%npes
      else
        Nrod = (dNfft(1)*dNfft(2))/peinf%npes+1
      endif
! array fftbox_2D
      rmem2 = rmem2 + dble(dNfft(1)) * dble(dNfft(2)) * dble(Nplane) * 16
! array fftbox_1D
      rmem2 = rmem2 + dble(dNfft(3)) * dble(Nrod) * 16
! array dummy
!          rmem2=rmem2+dble(dNfft(1))*dble(dNfft(2))*16.0d0
! arrays dummy1 and dummy2
      rmem2 = rmem2 + dble(Nrod) * (peinf%npes + 1) * 16
! array inv_indx
      rmem2 = rmem2 + dble(product(Nfft(1:3))) * 4
    endif
    if (rmem2 .gt. rmem) rmem = rmem2
    write(6,988) rmem / 1024**2
    write(7,988) rmem / 1024**2
    if (smem .gt. mem) write(0,777)
  endif
989 format(1x,'Memory required for execution:',f7.1,1x,'MB per PE')
988 format(1x,'Memory required for vcoul:',f7.1,1x,'MB per PE',/)
777 format(1x,'WARNING: Required memory greater than memory available.',/, &
      3x,'This calculation will likely die. To reduce memory,',/, &
      3x,'first make sure you are running on an ideal # of PEs.',/, &
      3x,'Then, try increasing the number of PEs or try adding',/, &
      3x,'gcomm_elements and comm_disk to epsilon.inp',/)

!--------------------------------------------
! SIB: Read in crys%bdot and crys%celvol from file #25
! Complain if crys%celvol and (2pi)^3/sqrt[|det(b)|] are different
! bdot is read in row by row.  get_volume() assumes that bdot is symmetric.
! bdot should be dot products of the reciprocal lattice vectors.
! Therefore vol is the real space volume of the cell.

  if(peinf%inode.eq.0) then

    write(7,'(1x,"Cell Volume =",e16.9,/)') crys%celvol

!-------------------------
! Compute Cell Volume

    call get_volume(vcell,crys%bdot)
    if (abs(crys%celvol-vcell).gt.TOL_Small) then
      call die('volume mismatch')
    endif

#ifdef VERBOSE
    write(6,'(1x,"Num Syms =",i4)') syms%ntran
    write(6,'(1x,"Sym Trans Mtxs")')
    do itran=1,syms%ntran
      write(6,'(1x,9i4)') ((syms%mtrx(ii,jj,itran),ii=1,3),jj=1,3)
    enddo
    do itran=1,syms%ntran
      write(6,'(1x,3f10.6)') (syms%tnp(ii,itran),ii=1,3)
    enddo
    write(6,'(1x,"Sym Trans Vecs")')
    write(6,*)
#endif

  endif

  if(cwfn%nband.gt.kp%mnband) then
    if(peinf%inode == 0) then
      write(tmpstr1,660) cwfn%nband
      write(tmpstr2,660) kp%mnband
      write(0,666) TRUNC(tmpstr1), TRUNC(tmpstr2)
    endif
660 format(i16)
666 format(1x,'The total number of bands (',a,') specified in epsilon.inp',/, &
      3x,'is larger than the number of bands (',a,') available in WFN.',/)
    call die('More bands specified in epsilon.inp than available in WFN.')
  endif
  if(cwfn%nband.eq.kp%mnband) then
    call die("You must provide one more band in WFN than used in epsilon.inp in order to assess degeneracy.")
  endif
  
  SAFE_ALLOCATE(energies, (kp%mnband, kp%nrk, kp%nspin))
  energies(1:kp%mnband, 1:kp%nrk, 1:kp%nspin) = kp%el(1:kp%mnband, 1:kp%nrk, 1:kp%nspin)
  SAFE_DEALLOCATE_P(kp%el)
  SAFE_ALLOCATE(kp%el, (cwfn%nband + 1, kp%nrk, kp%nspin))
  do ib = 1, cwfn%nband
    kp%el(ib, 1:kp%nrk, 1:kp%nspin) = energies(cwfn%band_index(ib), 1:kp%nrk, 1:kp%nspin)
  enddo
  kp%el(cwfn%nband + 1, 1:kp%nrk, 1:kp%nspin) = energies(cwfn%band_index(cwfn%nband) + 1, 1:kp%nrk, 1:kp%nspin)
  SAFE_DEALLOCATE(energies)

  call scissor_shift(kp, cwfn%nband, vwfn%evs, vwfn%evdel, vwfn%ev0, cwfn%ecs, cwfn%ecdel, cwfn%ec0)

!----------------------------------------------------------------
! If quasi-particle corrections were requested, read the corrected
! quasiparticle energies from file (in eV)
    
  if(peinf%inode == 0) then
    if(any(abs(kp%el(cwfn%nband, 1:kp%nrk, 1:kp%nspin) - kp%el(cwfn%nband + 1, 1:kp%nrk, 1:kp%nspin)) .lt. TOL_Degeneracy)) then
      if(pol%degeneracy_check_override) then
        write(0,'(a)') &
          "WARNING: Selected number of bands breaks degenerate subspace. " // &
          "Run degeneracy_check.x for allowable numbers."
        write(0,*)
      else
        write(0,'(a)') &
          "Run degeneracy_check.x for allowable numbers, or use keyword " // &
          "degeneracy_check_override to run anyway (at your peril!)."
        call die("Selected number of bands breaks degenerate subspace.")
      endif
    endif

    if(pol%eqp_corrections) then
      fncor='eqp.dat'
      call eqpcor(fncor,0,1,kp,cwfn%nband,1,cwfn%nband, &
        cwfn%nband,0,cwfn%nband,0,kp%el,kp%el,kp%el,1,0)
    endif

    call find_efermi(pol%rfermi, pol%efermi, pol%efermi_input, kp, cwfn%nband, &
      "unshifted grid", should_search = .true., should_update = .true., write7 = .true.)
    ! note: here ifmax may be updated. We will never use it, so it does not matter that it only happens on proc 0. -DAS

    if(any (kp%ifmax(:,:) < vwfn%nband .or. kp%ifmax(:,:) > vwfn%nband + pol%ncrit)) then
      write(0,'(a,i6,a,i6,a)') 'epsilon.inp says there are ', vwfn%nband, ' fully occupied bands and ', &
        pol%ncrit, ' partially occupied.'
      write(0,'(a,2i6)') 'This is inconsistent with highest bands in WFN file; min, max = ', minval(kp%ifmax), maxval(kp%ifmax)
      call die("band_occupation, number_partial_occup, and WFN inconsistent.")
    endif
    
    if(maxval(kp%ifmax) - minval(kp%ifmax) > pol%ncrit) then
      write(0,'(a,i6,a)') 'epsilon.inp says there are ', pol%ncrit, ' partially occupied bands.'
      write(0,'(a,i6)') 'This is less than the number partially occupied in WFN file: ', maxval(kp%ifmax) - minval(kp%ifmax)
      call die("number_partial_occup and WFN inconsistent.")
    endif

    call assess_degeneracies(kp, kp%el(cwfn%nband + 1, :, :), cwfn%nband, pol%efermi, TOL_Degeneracy)

    call calc_qtot(kp, crys%celvol, pol%efermi, qtot, omega_plasma, write7 = .true.)

    if (mod(peinf%npes,npools) .ne.0) then
      write(0,'(a)') 'WARNING: The number of cpus does not divide evenly in the optimal number of pools.'
      write(0,*)  mod(peinf%npes,npools),'cpus are doing no work'
      write(0,'(a)') ''
    endif
    if (peinf%nvownt .ne. peinf%nvown) then
      write(0,'(a)') 'WARNING: Your valence bands are not equally distributed among the pools'
      write(0,*) 'Max valence bands per pool is',peinf%nvown
      write(0,*) 'Min valence bands per pool is',peinf%nvownt
      write(0,'(a)') ''
    endif
  endif

#ifdef MPI
  call MPI_Bcast(pol%efermi, 1, MPI_REAL_DP, 0, MPI_COMM_WORLD, mpierr)
  call MPI_BCAST(omega_plasma, 1, MPI_REAL_DP, 0, MPI_COMM_WORLD, mpierr)
  if(cwfn%nband.gt.kp%mnband) cwfn%nband=kp%mnband
  if(peinf%inode.ne.0) then
    SAFE_ALLOCATE(kp%degeneracy, (cwfn%nband, kp%nrk, kp%nspin))
  endif
  call MPI_Bcast(kp%degeneracy(1,1,1), cwfn%nband * kp%nrk * kp%nspin, MPI_INTEGER, 0, MPI_COMM_WORLD, mpierr)
#endif

! SIB: if indexq0 is not zero,
! then q0norm is set to the length of the q0 vector via
! q0norm = sqrt[q0'*b*q0]  where q0' is transpose of q0
! and q0 is pol%qpt(:,indexq0).
  
  if(indexq0 .ne. 0) then
    q0norm=sqrt(DOT_PRODUCT(MATMUL(crys%bdot, pol%qpt(:,indexq0)),pol%qpt(:,indexq0)))
  endif

  call gvec_index(gvec)
  
!------------------------------------
! Loop over kpoints and read in wavefunctions
!
! nkpt      is the size of the wavefunctions vectors
! nvec      # of bands per kpoint
! ndv       nvec*nkpt

! JRD: have proc 0 open unit (itpv) with name 'INT_VWFN'

  if (pol%iwriteint .eq. 0) then
    if(peinf%inode.eq.0) then
      write(filenamev,'(a)') 'INT_VWFN'
      itpv=200028
      call open_file(itpv,file=filenamev,form='unformatted',status='replace')
    endif
        
    if(peinf%inode.lt.10000) then
      write(filename,'(a,i4.4)') 'INT_CWFN_', peinf%inode
    else
      call die('too many nodes required')
    endif
    itpc=100028+peinf%inode
    call open_file(itpc,file=filename,form='unformatted',status='replace')
  else
    itval=vwfn%nband+pol%ncrit
    SAFE_ALLOCATE(intwfnv%ng, (kp%nrk))
    SAFE_ALLOCATE(intwfnv%isort, (kp%ngkmax,kp%nrk))
    SAFE_ALLOCATE(intwfnv%cg, (kp%ngkmax,kp%nrk*peinf%nvownt,kp%nspin))
    SAFE_ALLOCATE(intwfnv%el, (itval,kp%nspin,kp%nrk))
    SAFE_ALLOCATE(intwfnv%qk, (3,kp%nrk))
    SAFE_ALLOCATE(intwfnc%ng, (kp%nrk))
    SAFE_ALLOCATE(intwfnc%isort, (kp%ngkmax,kp%nrk))
    SAFE_ALLOCATE(intwfnc%cg, (kp%ngkmax,kp%nrk*peinf%ncownt,kp%nspin))
    SAFE_ALLOCATE(intwfnc%cbi, (kp%nrk*peinf%ncownt))
    SAFE_ALLOCATE(intwfnc%el, (cwfn%nband,kp%nspin,kp%nrk))
    SAFE_ALLOCATE(intwfnc%qk, (3,kp%nrk))
  endif

!----------------------------------------------------------------------------
! Beginning k-point loop

  do ik=1,kp%nrk
    qk(:)=kp%rk(:,ik)

! For each G-vector read, tries to find its match in gvec%k(:,:).
! If not found, aborts.  Otherwise stores its index in isort(i)
! where ik is the current k-point we are considering (loop we are in)
! and i is the index of the kx,ky,kz just read.

    SAFE_ALLOCATE(gvec_kpt%k, (3, kp%ngk(ik)))
    call read_binary_gvectors(25, kp%ngk(ik), kp%ngk(ik), gvec_kpt%k)

    SAFE_ALLOCATE(isort, (kp%ngk(ik)))
    do ig = 1, kp%ngk(ik)
      call findvector(isort(ig), gvec_kpt%k(1, ig), gvec_kpt%k(2, ig), gvec_kpt%k(3, ig), gvec)
      if (isort(ig) ==  0)  call die('input: could not find gvec')
    enddo
    SAFE_DEALLOCATE_P(gvec_kpt%k)

! JRD: 0 proc write nkptotal, isort, kp%el, and qk to unit itpv
! for current (ik-th) k-point and valence wave functions.

    if (pol%iwriteint .eq. 0) then
      if (peinf%inode.eq.0) then
        write(itpv) kp%ngk(ik), (isort(ig),ig=1,kp%ngk(ik)), &
          ((kp%el(ib,ik,is),ib=1,(vwfn%nband+pol%ncrit)), &
          is=1,kp%nspin),(qk(ii),ii=1,3)
      endif
    else
      itval=vwfn%nband+pol%ncrit
      intwfnv%ng(ik)=kp%ngk(ik)
      intwfnv%isort(1:kp%ngk(ik),ik)=isort(1:kp%ngk(ik))
      intwfnv%el(1:itval,1:kp%nspin,ik)=kp%el(1:itval,ik,1:kp%nspin)
!            write(6,*) 'Input Vband', ik, qk(:)
      intwfnv%qk(:,ik)=qk(:)
!            write(6,*) 'Input Vband2', ik, intwfnv%qk(:,ik)
    endif

! SIB: each proc writes to unit itpc qk, kp%el, kp%ngk(ik), and isort
! for current kpoint (ik) and all bands.
        
    if (pol%iwriteint .eq. 0) then
      write(itpc) (qk(ii),ii=1,3), &
        ((kp%el(ib,ik,is),ib=1,cwfn%nband),is=1,kp%nspin), &
        kp%ngk(ik),(isort(ig),ig=1,kp%ngk(ik))
    else
      intwfnc%ng(ik)=kp%ngk(ik)
      intwfnc%isort(1:kp%ngk(ik),ik)=isort(1:kp%ngk(ik))
      intwfnc%el(1:cwfn%nband,1:kp%nspin,ik)=kp%el(1:cwfn%nband,ik,1:kp%nspin)
      intwfnc%qk(:,ik)=qk(:)
    endif

! SIB: loop over i for kp%mnbands (number of bands) and do the following:
!   - reads the info from unit=25 and checks norm of wavefunction.
!   - if a valence band, proc 0 writes it to unit itpv
!   - if a conduction band, then it is sent to/received by the correct
!     destination node (which is proc # i-nvalence-1 mod npes)
!   - the destination processor then writes the data to unit itpc

    SAFE_ALLOCATE(zc, (kp%ngk(ik), kp%nspin))
    do ib=1,kp%mnband

! JRD: Dumb debugging
#ifdef VERBOSE
      if (peinf%inode .eq. 0) write(6,'("Reading Wavefunction: ik = ",i6," n = ",i6)') ik, ib
#endif

      dont_read = (ib > cwfn%nband)
      call read_binary_data(25, kp%ngk(ik), kp%ngk(ik), kp%nspin, zc, dont_read = dont_read)
      if(ib > cwfn%nband) cycle

      if(peinf%inode == 0) then
        do is = 1, kp%nspin
          call checknorm('WFN',ib,ik,kp%nspin,kp%ngk(ik),zc(:,is))
        enddo
      endif

      if(cwfn%band_index(ib).le.vwfn%nband) then
        
! Write to valence tape

        itval=vwfn%nband+pol%ncrit
            
        if (pol%iwriteint .eq. 0) then
          if(peinf%inode .eq. 0 ) then
            write(itpv) ((zc(ig,is),ig=1,kp%ngk(ik)),is=1,kp%nspin)
          endif
        else
          if (peinf%doiownv(cwfn%band_index(ib)) .eq. 1) then 
            iiii=peinf%indexv(cwfn%band_index(ib))+(ik-1)*peinf%nvownt
            intwfnv%cg(1:kp%ngk(ik),iiii,1:kp%nspin)=zc(1:kp%ngk(ik),1:kp%nspin)
          endif
        endif
        
      else
            
! JRD/JBN: For Metals

        if(cwfn%band_index(ib).le.vwfn%nband+pol%ncrit) then
          itval=vwfn%nband+pol%ncrit
          if (pol%iwriteint .eq. 0) then
            if(peinf%inode.eq.0) then
              write(itpv) ((zc(ig,is),ig=1,kp%ngk(ik)),is=1,kp%nspin)
            endif
          else
            if (peinf%doiownv(cwfn%band_index(ib)) .eq. 1) then 
              iiii=peinf%indexv(cwfn%band_index(ib))+(ik-1)*peinf%nvownt
              intwfnv%cg(1:kp%ngk(ik),iiii,1:kp%nspin)=zc(1:kp%ngk(ik),1:kp%nspin)
            endif
          endif
        endif

! Write to conduction tape

        iwritecb=0
        dest=cwfn%band_index(ib)-vwfn%nband
        if (peinf%doiownc(dest) .eq. 1) then
          iwritecb=1
        endif
        
        if(iwritecb .eq. 1) then
          if (pol%iwriteint .eq. 0) then
            write(itpc) cwfn%band_index(ib)-vwfn%nband,kp%ngk(ik),((zc(ig,is),ig=1,kp%ngk(ik)),is=1,kp%nspin)
          else
            iiii=peinf%indexc(ib-vwfn%nband)+(ik-1)*peinf%ncownt
!                if (peinf%inode .eq. 0) write(6,*) 'Cond Band: ',iiii,peinf%ciown(i-vwfn%nband) 
            intwfnc%cbi(iiii)=cwfn%band_index(ib)-vwfn%nband
            intwfnc%cg(1:kp%ngk(ik),iiii,1:kp%nspin)=zc(1:kp%ngk(ik),1:kp%nspin)
!                if (peinf%inode .eq. 0) write(6,*) 'Cond Band Finished' 
          endif
        endif
      endif
    enddo  ! of loop i over kp%mnbands
    
    SAFE_DEALLOCATE(isort)
    SAFE_DEALLOCATE(zc)
    
  enddo ! of loop over k-points

  if(peinf%inode.eq.0) then
    call close_file(25)
    if (pol%iwriteint .eq. 0) call close_file(itpv)
  endif
  if (pol%iwriteint .eq. 0) call close_file(itpc)

  if (pol%eqp_corrections) then
    vwfn%evs=0.0D0
    vwfn%evdel=0.0D0
    vwfn%ev0=0.0D0
    cwfn%ecs=0.0D0
    cwfn%ecdel=0.0D0
    cwfn%ec0=0.0D0
  endif

!-------------------------------------------
! Initialize chi0mat and eps0mat:
! SIB: only printed if indexq0>0 (i.e. some q vector is "zero")
! All sorts of stuff is printed, such as the epsilon cutoff, number of bands,
! gvectors, bdot matrix, the addresses (gvec%indv), and pol%qpt (q-points)
! to chi0mat, and eps0mat has some of it.

  if (peinf%inode.eq.0) then
    
    if (indexq0.gt.0) then
      
      if (pol%skip_epsilon) then
        write(6,'(a)') 'File chi0mat will be written.'
        call open_file(10,file='chi0mat',form='unformatted',status='replace')
        write(10) aheadinput,ajname,adate
        write(10) pol%freq_dep,pol%nFreq
        write(10) kp%kgrid(1:3)
        if (pol%freq_dep .eq. 2) then
          write(10) (pol%dFreqGrid(ii),ii=1,pol%nFreq),(pol%dFreqBrd(ii),ii=1,pol%nFreq)
        else
          write(10)
        endif
        write(10)
        write(10)
        write(10) ecuts,cwfn%nband
        write(10) kp%nrk, 1 ! invflag
        write(10) gvec%ng,gvec%nktot,gvec%kmax(1:3), &
          ((gvec%k(ii,ig),ii=1,3),ig=1,gvec%ng), &
          ((crys%bdot(ii,jj),jj=1,3),ii=1,3),(gvec%indv(ig),ig=1,gvec%nktot)
        write(10) (pol%qpt(ii,indexq0),ii=1,3)
      endif
      
      if (.not. pol%skip_epsilon) then
        call open_file(12,file='eps0mat',form='unformatted',status='replace')
        write(12) ajname,adate
        write(12) pol%freq_dep,pol%nFreq
        write(12) kp%kgrid(1:3)
        if (pol%freq_dep .eq. 2) then
          write(12) (pol%dFreqGrid(ii),ii=1,pol%nFreq),(pol%dFreqBrd(ii),ii=1,pol%nFreq)
        else
          write(12)
        endif
        write(12)
        write(12)
        write(12) ecuts
        write(12) 1,(pol%qpt(ii,indexq0),ii=1,3)
        write(12) gvec%ng, ((gvec%k(jj,ig),jj=1,3),ig=1,gvec%ng)
      endif
      
    endif
    
!----------------------------------------
! Initialize chimat and epsmat:
! SIB: chimat and epsmat are printed to for all non-zero q vectors.
! Same sort of information as above.

    nqnonzero=pol%nq
    if (indexq0.gt.0)  nqnonzero=pol%nq-1
    
    if (nqnonzero.ge.1) then
      
      if (pol%skip_epsilon) then
        write(6,'(a)') 'File chimat will be written.'
        call open_file(11,file='chimat',form='unformatted',status='replace')
        write(11) aheadinput,ajname,adate
        write(11) pol%freq_dep,pol%nFreq
        write(11) kp%kgrid(1:3)
        if (pol%freq_dep .eq. 2) then
          write(11) (pol%dFreqGrid(ii),ii=1,pol%nFreq),(pol%dFreqBrd(ii),ii=1,pol%nFreq)
        else
          write(11)
        endif
        write(11)
        write(11)
        write(11) ecuts,cwfn%nband
        write(11) kp%nrk, 1 ! invflag
        write(11) gvec%ng,gvec%nktot,gvec%kmax(1:3), &
          ((gvec%k(jj,ig),jj=1,3),ig=1,gvec%ng), &
          ((crys%bdot(ii,jj),jj=1,3),ii=1,3),(gvec%indv(ig),ig=1,gvec%nktot)
        write(11) pol%nq,indexq0,((pol%qpt(jj,iq),jj=1,3),iq=1,pol%nq)
      endif
      
      if (.not. pol%skip_epsilon) then
        call open_file(13,file='epsmat',form='unformatted',status='replace')
        write(13) ajname,adate
        write(13) pol%freq_dep,pol%nFreq
        write(13) kp%kgrid(1:3)
        if (pol%freq_dep .eq. 2) then
          write(13) (pol%dFreqGrid(ii),ii=1,pol%nFreq),(pol%dFreqBrd(ii),ii=1,pol%nFreq)
        else
          write(13)
        endif
        write(13)
        write(13)
        write(13) ecuts
        write(13) nqnonzero,((pol%qpt(jj,iq),jj=1,3),iq=1+indexq0,pol%nq)
        write(13) gvec%ng, ((gvec%k(jj,ig),jj=1,3),ig=1,gvec%ng)
      endif
      
    endif

!-------------------------------
! SIB: print to stdout and unit=7 (epsilon.log) some information:  cutoffs,
! number of bands, number of q points, and list of q-points.

!        write(6,'(a60,/ /)') aheadinput
    write(6,120) ecuts,cwfn%nband,pol%nq
    write(7,120) ecuts,cwfn%nband,pol%nq
120 format(/,/,1x,'ECUTS  =',f10.6,/,1x,'NBANDS =',i5,/,1x,'NQ     =',i4,/)
        
! SIB: report on scissors operator parameters to units 6 and 7

    write(6,915) vwfn%evs,vwfn%evdel,vwfn%ev0
    write(7,915) vwfn%evs,vwfn%evdel,vwfn%ev0
    write(6,916) cwfn%ecs,cwfn%ecdel,cwfn%ec0
    write(7,916) cwfn%ecs,cwfn%ecdel,cwfn%ec0
915 format(/,1x,'Valence scissors operator:',/,1x,'evs   =',f10.6, &
      1x,'eV',/,1x,'evdel =',f10.6,/,1x,'ev0   =',f10.6,1x,'eV')
916 format(1x,'Conduction scissors operator:',/,1x,'ecs   =',f10.6, &
      1x,'eV',/,1x,'ecdel =',f10.6,/,1x,'ec0   =',f10.6,1x,'eV',/)
    
    do iq=1,pol%nq
      if (indexq0.eq.iq) then
        write(6,925) (pol%qpt(jj,iq),jj=1,3),q0norm
      else
        write(6,926) (pol%qpt(jj,iq),jj=1,3)
      endif
    enddo
    write(6,*)
925 format(1x,'Q0 and |Q0| =',4f10.6)
926 format(1x,'Q =',3f10.6)

  endif

  if(pol%iwriteint == 0) then
#ifdef MPI
    call MPI_Barrier(MPI_COMM_WORLD, mpierr)
#endif
  endif

  POP_SUB(input)
      
  return
      
end subroutine input
