!===================================================================================
!
! Routines:
!
! (1) kernel (main)      Originally By MLT       Last Modified: 5/5/2008 (JRD)
!
!     See README_kernel for more information.
!
!     Calculates the kernel, direct and exchange parts, of the Bethe-Salpeter
!     equation. The direct part is decomposed in head, wing, body, and the
!     exchange part involves only the "proper" part of the Coulomb
!     interaction. Spin-polarized case implemented.
!
!     For more details, see:
!     Rohlfing & Louie, PRB 62:(8), 4927 (2000)
!     G. Strinati, Rivista del Nuovo Cimento 11:(12), 1 (1988)
!
!     Code originally written by Gian-Marco Rignanese, Eric K Chang.
!
!     All cited equations refer to Rohlfing & Louie (PRB (62):4927, 2000)
!     unless specified otherwise.
!
!===================================================================================

#include "f_defs.h"

program kernel

  use global_m
  use fftw_m
  use vcoul_generator_m
  implicit none

  type (crystal) :: crys
  type (symmetry) :: syms
  type (gspace) :: gvec
  type (xctinfo) :: xct
  type (grid) :: kg,qg,kgq
  type (kpoints) :: kp
  type (wavefunction) :: wfnc,wfncp
  type (wavefunction) :: wfnv,wfnvp
  type (int_wavefunction) :: intwfnv,intwfnc
  type (twork_scell) :: work_scell

  character :: filename*20
  integer :: ii,ijk,iparallel,iownsize,itempval
  integer :: ik,ikp
  integer :: ncount,ntim,flagbz
  integer :: imin,imax,ic,iv,icp,ivp,ifreq
  integer :: ifqa_dummy,irqa_dummy,g0a_dummy(3)
  real(DP) :: vq0, avgcut, oneoverq, q0len
  real(DP) :: tsec(2),tmin(2),tmax(2),q0vec(3),vq(3)
  real(DP) :: qqa_dummy,fqa_dummy(3),vcoul0(1)
  SCALAR :: epsheaddummy, wcoul0dummy

  character*16, allocatable :: routnam(:)
  integer, allocatable :: indexq(:),irqa(:),ifqa(:),g0a(:,:)
  integer, allocatable :: isrtq(:)
  real(DP), allocatable :: vcoul(:),vcoularray(:,:),fqa(:,:),qqa(:)
  SCALAR, allocatable :: &
    bsedbody(:,:,:),bsedhead(:,:,:), &
    bsedwing(:,:,:),bsex(:,:,:), &
    bsedbody1(:,:,:),bsedwing1(:,:,:), &
    bsedbody2(:,:,:),bsedwing2(:,:,:)

!----------------- Begin Program ----------------------------------------------

  call peinfo_init()

!----------------------
! Initialize random numbers

  peinf%jobtypeeval = 0

!----------------------
! Initialize timer

  call timacc(1,0,tsec)
  call timacc(1,1,tsec)
  
  call write_program_header('BSE/Kernel', .false.)
  
!------------------------
! Read kernel.inp

  call logit('Calling inread_kernel')
  call open_file(8,file='kernel.inp',form='formatted',status='old')
  call inread_kernel(xct,flagbz,imin,imax)
  call close_file(8)

  if (xct%iwritecoul .eq. 1 .and. peinf%inode .eq. 0) then
    call open_file(19,file='vcoul',form='formatted',status='replace')
  endif

!-------------------------
! Read WFN_fi

  call timacc(2,1,tsec)
  call logit('Calling input_kernel')

! JRD: What routine we call depends on if we have center-of-mass momentum.

  if (xct%qflag .eq. 0) then
    call die("Center-of-mass momentum is broken.")
!    call input_kernel_q(crys,gvec,kg,kgq,kp,kpq,syms,xct,flagbz)
    SAFE_ALLOCATE(indexq, (kg%nf))
    do ijk = 1, kg%nf
      indexq(ijk)=xct%indexq(ijk)
    enddo
  else
    call input_kernel(crys,gvec,kg,kp,syms,xct,flagbz,intwfnv,intwfnc)
    SAFE_ALLOCATE(indexq, (kg%nf))
    do ijk=1,kg%nf
      indexq(ijk)=ijk
    end do
  endif

  call timacc(2,2,tsec)

!      if(peinf%inode.eq.0) write(6,*) 'Exit input_kernel'

! JRD: Write some info about our calculation

  if (peinf%inode.eq.0) then
    write(6,*)
    write(6,*) ' valence bands limits : ', xct%nvband,' (total) '
    write(6,*) ' cond.   bands limits : ', xct%ncband,' (total) '
    write(6,*) ' number of spins   = ',xct%nspin
    write(6,'(a36,f6.2,a4)') ' energy cutoff (dielect. matrix) = ', &
      xct%ecute,' Ryd'
    write(6,'(a36,f6.2,a4)') ' energy cutoff (kernel matrices) = ', &
      xct%ecutg,' Ryd'
    if (xct%dynamic_screening) then
      write(6,'(a,f8.3,a)')'  Dynamic dielectric screening on'
    endif
    write(6,*)
    write(6,'(a8,i8,a8,i8)') ' neps = ',xct%neps,' ng = ', xct%ng
    write(6,*)
  endif

!----------------------------------
! Read eps0mat and epsmat

  call logit('Calling epscopy')
  call timacc(3,1,tsec)
  call epscopy(crys,gvec,syms,qg,xct,q0vec)
  call timacc(3,2,tsec)

!------------------ Initialize BSE Arrays ----------------------------------------------

!      write(6,*) ' Allocating bse arrays', peinf%inode, xct%ivpar, xct%icpar

  if ( xct%ivpar .eq. 1) then
    SAFE_ALLOCATE(bsedbody, (peinf%myown,xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsedhead, (peinf%myown, xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsedwing, (peinf%myown,xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsex, (peinf%myown,xct%nspin,xct%nspin))
    iownsize=1
  else if ( xct%icpar .eq. 1) then
    itempval=(xct%nvband)**2
    iownsize=itempval
    SAFE_ALLOCATE(bsedbody, (peinf%myown*itempval,xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsedhead, (peinf%myown*itempval,xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsedwing, (peinf%myown*itempval, xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsex, (peinf%myown*itempval, xct%nspin,xct%nspin))
  else
    itempval=(xct%nvband*xct%ncband)**2
    iownsize=itempval
!        write(6,*) ' Allocating bse arrays2', peinf%inode, xct%ivpar, xct%icpar, itempval
    SAFE_ALLOCATE(bsedbody, (peinf%myown*itempval,xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsedhead, (peinf%myown*itempval,xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsedwing, (peinf%myown*itempval, xct%nspin,xct%nspin))
    SAFE_ALLOCATE(bsex, (peinf%myown*itempval,xct%nspin,xct%nspin))
  endif

  bsedbody(:,:,:) = ZERO
  bsedhead(:,:,:) = ZERO
  bsedwing(:,:,:) = ZERO
  bsex(:,:,:) = ZERO

  if (xct%dynamic_screening) then
    if ( xct%ivpar .eq. 1) then
      SAFE_ALLOCATE(bsedbody1, (peinf%myown,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedwing1, (peinf%myown,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedbody2, (peinf%myown,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedwing2, (peinf%myown,xct%nspin,xct%nspin))
    else if (xct%icpar .eq. 1) then
      itempval=(xct%nvband)**2
      SAFE_ALLOCATE(bsedbody1, (peinf%myown*itempval,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedwing1, (peinf%myown*itempval,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedbody2, (peinf%myown*itempval,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedwing2, (peinf%myown*itempval,xct%nspin,xct%nspin))
    else
      itempval=(xct%nvband*xct%ncband)**2
      SAFE_ALLOCATE(bsedbody1, (peinf%myown*itempval,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedwing1, (peinf%myown*itempval,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedbody2, (peinf%myown*itempval,xct%nspin,xct%nspin))
      SAFE_ALLOCATE(bsedwing2, (peinf%myown*itempval,xct%nspin,xct%nspin))
    endif

    bsedbody1(:,:,:) = ZERO
    bsedbody2(:,:,:) = ZERO
    bsedwing1(:,:,:) = ZERO
    bsedwing2(:,:,:) = ZERO
  else
    ! this is stupid but it allows us to run with ifort -check all without getting errors
    ! about unallocated arrays being passed to routines. Better would be not to pass them!
    SAFE_ALLOCATE(bsedbody1, (1, 1, 1))
    SAFE_ALLOCATE(bsedwing1, (1, 1, 1))
    SAFE_ALLOCATE(bsedbody2, (1, 1, 1))
    SAFE_ALLOCATE(bsedwing2, (1, 1, 1))
  endif
  if (peinf%inode.eq.0) write(6,*)
  
!--------- Calculate Needed Coulomb Interaction -------------------------------

  SAFE_ALLOCATE(vcoul, (xct%ng))
  SAFE_ALLOCATE(vcoularray, (xct%ng,qg%nf))
  vcoularray=0d0
  SAFE_ALLOCATE(isrtq, (xct%ng))
  do ijk=1,xct%ng
    isrtq(ijk) = ijk
  enddo
  
  if (peinf%inode .eq. 0) then
    write(6,701)
  endif
  701 format(1x,'Calculating Vcoul',/)

  avgcut=TOL_ZERO
  q0len = sqrt(DOT_PRODUCT(q0vec,MATMUL(crys%bdot,q0vec)))
      
  iparallel=1

  do ik=1,qg%nf
    vq(:)=qg%f(:,ik)
    vq0 = DOT_PRODUCT(vq,MATMUL(crys%bdot,vq))
    
#ifdef VERBOSE
    if (peinf%inode .eq. 0) then
      write(6,*) 'Calculating Vcoul', ik, qg%nf
    endif
#endif

    call vcoul_generator(xct%icutv,xct%truncval,gvec, &
      crys%bdot,kg%nf,xct%ng,isrtq,xct%iscreen,vq,q0vec, &
      vcoul,xct%iwritecoul,iparallel,avgcut,oneoverq, &
      kp%kgrid,epsheaddummy,work_scell,.false.,wcoul0dummy)
    vcoularray(:,ik)=vcoul(:)

    if (vq0 .lt. TOL_Zero) then
      if (peinf%inode .eq. 0) then
        write(6,801) q0vec
      endif
      vq(:) = q0vec(:)
801 format(1x,'For G=0: setting q0 =',3f10.6,/)

      ifreq = 1

      call checkconsistency(xct%icutv,xct%iscreen,q0vec,crys%bdot,ifreq)
    
      vcoul0(1)=0d0

      call vcoul_generator(xct%icutv,xct%truncval,gvec, &
      crys%bdot,kg%nf,1,isrtq,xct%iscreen,vq,q0vec, &
        vcoul0(:),xct%iwritecoul,iparallel,avgcut,oneoverq, &
        kp%kgrid,epsheaddummy,work_scell,.false.,wcoul0dummy)

      vcoularray(1,ik)=vcoul0(1)
    endif

  enddo ! ik

  call destroy_qran()

  vcoularray=vcoularray/(8d0*PI_D)
  
#ifdef VERBOSE
  if (peinf%inode .eq. 0) then
    write(6,*) 'Finished Vcoul', ik, qg%nf
  endif
#endif

  SAFE_DEALLOCATE(vcoul)

!--------- Start the computation. ---------------------------------------------

  call logit('Starting main kernel loop')
  if (peinf%inode.eq.0) write(6,771) peinf%myown
771 format(1x,"Starting calculation of kernel with",i6,1x, &
      "blocks of pairs per PE",/)
  
! Write Partial Block Info

!      if (peinf%inode.eq.0) write(6,*)
!     >      ' Doing partial calculation, blocks = ',imin,imax

  SAFE_ALLOCATE(qqa, (peinf%myown))
  SAFE_ALLOCATE(fqa, (3,peinf%myown))
  SAFE_ALLOCATE(g0a, (3,peinf%myown))
  SAFE_ALLOCATE(irqa, (peinf%myown))
  SAFE_ALLOCATE(ifqa, (peinf%myown))
  
  call sortbyq(fqa,qqa,g0a,ifqa,irqa,qg,kg,crys)

! JRD: The below may be useful for debugging
!
!  call mpi_barrier(mpi_comm_world,mpierr)
!  write(6,*) 'myown',peinf%inode,peinf%myown
!  call mpi_barrier(mpi_comm_world,mpierr)
  
  do ii=1,peinf%nckpe

    call logitint('   Main loop:  ii=',ii)
    if (ii .le. peinf%myown) then
      ik=peinf%ik(peinf%inode+1,ii)
      ikp=peinf%ikp(peinf%inode+1,ii)
      
      if (xct%icpar .eq. 1) then
        ic=peinf%ic(peinf%inode+1,ii)
        icp=peinf%icp(peinf%inode+1,ii)
      else
        ic=1
        icp=1
      endif
      
      if (xct%ivpar .eq. 1) then
        iv=peinf%iv(peinf%inode+1,ii)
        ivp=peinf%ivp(peinf%inode+1,ii)
      else
        iv=1 
        ivp=1 
      endif
      
      call timacc(4,1,tsec)
      call logit('   Calling genwf_kernel')
      
!        write(6,*) peinf%inode,'calling genwf_kernel 1'

      call genwf_kernel(crys,gvec,kg,kgq,syms,wfnc, &
        wfnv,xct%nspin,ik,ic,iv,indexq,xct,intwfnv,intwfnc)

!        write(6,*) peinf%inode,'calling genwf_kernel 2'

      call genwf_kernel(crys,gvec,kg,kgq,syms,wfncp, &
        wfnvp,xct%nspin,ikp,icp,ivp,indexq,xct,intwfnv,intwfnc)

      call timacc(4,2,tsec)
      
!        call timacc(5,1,tsec)
!        call logit('      Calling excwf')
!        call excwf(gvec,wfncp,wfnct,wfnvp,
!     >   wfnvt,ipe)
!        call timacc(5,2,tsec)

    else
      ik=-1
      ikp=-1
      ic=-1
      icp=-1
      iv=-1
      ivp=-1
    endif

    if (peinf%inode.eq.0) then
      write(6,772) ii
      write(6,773) ik,ikp,ic,icp,iv,ivp
    endif
772 format(1x,"PE # 0 dealing with block",i6)
773 format(1x,"ik =",i6,1x,"ikp =",i6,1x,"ic =",i6,1x,"icp =",i6,1x,"iv =",i6,1x,"ivp =",i6)

    call logit('      Calling mtxel_kernel')
    call timacc(6,1,tsec)

    if (ii .le. peinf%myown) then
      call mtxel_kernel(crys,gvec,syms,qg,wfnc,wfncp,wfnvp, &
        wfnv,xct,peinf%myown*iownsize,bsedbody,bsedhead,bsedwing,bsex,ii,ik,ikp, &
        ic,icp,iv,ivp,bsedbody1,bsedwing1,bsedbody2,bsedwing2, &
        vcoularray,fqa(:,ii),qqa(ii),g0a(:,ii),ifqa(ii),irqa(ii),q0len)

      SAFE_DEALLOCATE_P(wfncp%cg)
      SAFE_DEALLOCATE_P(wfncp%isort)
      SAFE_DEALLOCATE_P(wfnvp%cg)
      SAFE_DEALLOCATE_P(wfnvp%isort)
      SAFE_DEALLOCATE_P(wfnc%cg)
      SAFE_DEALLOCATE_P(wfnc%isort)
      SAFE_DEALLOCATE_P(wfnv%cg)
      SAFE_DEALLOCATE_P(wfnv%isort)
    else
      if (xct%iwriteint .ne. 0) then
        !            write(6,*) peinf%inode, 'Calling mtxel without task'
        fqa_dummy(:)=0.0d0
        qqa_dummy=0.0d0
        g0a_dummy(:)=0
        ifqa_dummy=0
        irqa_dummy=0
        call mtxel_kernel(crys,gvec,syms,qg,wfnc,wfncp,wfnvp,wfnv, &
          xct,peinf%myown*iownsize,bsedbody,bsedhead,bsedwing,bsex,ii,ik,ikp,ic,icp, &
          iv,ivp,bsedbody1,bsedwing1,bsedbody2,bsedwing2, &
          vcoularray,fqa_dummy,qqa_dummy,g0a_dummy,ifqa_dummy,irqa_dummy,q0len)
      endif
    endif
    
    call timacc(6,2,tsec)

  enddo

!      write(6,*) peinf%inode, 'Done Main Loop'

  SAFE_DEALLOCATE_P(intwfnv%cg)
  SAFE_DEALLOCATE_P(intwfnc%cg)
  SAFE_DEALLOCATE_P(intwfnv%isort)
  SAFE_DEALLOCATE_P(intwfnc%isort)
  SAFE_DEALLOCATE_P(intwfnv%ng)
  SAFE_DEALLOCATE_P(intwfnc%ng)
  
  SAFE_DEALLOCATE(qqa)
  SAFE_DEALLOCATE(fqa)
  SAFE_DEALLOCATE(g0a)
  SAFE_DEALLOCATE(irqa)
  SAFE_DEALLOCATE(ifqa)
  SAFE_DEALLOCATE_P(xct%epsdiag)
  if (peinf%inode .eq. 0 .or. xct%bLowComm) then
    SAFE_DEALLOCATE_P(xct%isrtq)
    SAFE_DEALLOCATE_P(xct%isrtqi)
  endif
  if (xct%iwriteint .eq. 1) then
    SAFE_DEALLOCATE_P(xct%epsown)
    SAFE_DEALLOCATE_P(xct%epsowni)
    SAFE_DEALLOCATE_P(xct%maxpe)
    SAFE_DEALLOCATE_P(xct%epscol)
  endif
  SAFE_DEALLOCATE_P(peinf%nxqown)
  SAFE_DEALLOCATE_P(peinf%nxqi)
  

!--------------- Write BSE matrices ----------------------------------------------


!      write(6,*) peinf%inode, ''
!      write(6,*) peinf%inode, 'Writing bsemat headers'

  if(peinf%inode.eq.0) then
    call open_file(unit=11,file='bsedmat',form='unformatted',status='replace')
    if (xct%dynamic_screening) then
      call open_file(unit=13,file='bsedmat1',form='unformatted',status='replace')
      call open_file(unit=14,file='bsedmat2',form='unformatted',status='replace')
    endif
    write(11) xct%nkpt,xct%ncband,xct%nvband,xct%nspin
    if (xct%dynamic_screening) then
      write(13) xct%nkpt,xct%ncband,xct%nvband,xct%nspin
      write(14) xct%nkpt,xct%ncband,xct%nvband,xct%nspin
    endif
    do ik=1,xct%nkpt
      write(11) ik,(kg%f(ii,ik),ii=1,3)
      if (xct%dynamic_screening) then
        write(13) ik,(kg%f(ii,ik),ii=1,3)
        write(14) ik,(kg%f(ii,ik),ii=1,3)
      endif
    enddo
    call close_file(11)
    if (xct%dynamic_screening) then
      call close_file(13)
      call close_file(14)
    endif
    call open_file(unit=12,file='bsexmat',form='unformatted',status='replace')
    write(12) xct%nkpt,xct%ncband,xct%nvband,xct%nspin
    do ik=1,xct%nkpt
      write(12) ik,(kg%f(ii,ik),ii=1,3)
    enddo
    call close_file(12)
  endif

!      write(6,*) peinf%inode, ''
!      write(6,*) peinf%inode, 'Entering bsewrite'

  if (xct%ivpar .eq. 1) then
    iownsize = peinf%myown
  else if (xct%icpar .eq. 1) then
    iownsize = peinf%myown * (xct%nvband)**2
  else
    iownsize = peinf%myown * (xct%nvband * xct%ncband)**2
  endif
  
  call timacc(8,1,tsec)
  call bsewrite(xct,iownsize,bsedbody,bsedhead,bsedwing,bsex, &
    bsedhead,bsedbody1,bsedwing1,bsedbody2,bsedwing2)
  call timacc(8,2,tsec)

!      write(6,*) peinf%inode, ''
!      write(6,*) peinf%inode, 'Finished bsewrite'

  call destroy_fftw_plans()

  SAFE_DEALLOCATE(bsedhead)
  SAFE_DEALLOCATE(bsedwing)
  SAFE_DEALLOCATE(bsedbody)
  SAFE_DEALLOCATE(bsex)
  SAFE_DEALLOCATE(indexq)
  if (xct%dynamic_screening) then
    SAFE_DEALLOCATE(bsedwing1)
    SAFE_DEALLOCATE(bsedbody1)
    SAFE_DEALLOCATE(bsedwing2)
    SAFE_DEALLOCATE(bsedbody2)
  endif

  if (xct%iwritecoul .eq. 1) then
    if (peinf%inode .eq. 0) then
      call close_file(19) ! file vcoul
    endif
  endif

!---------------- Time accounting -----------------------------------------------


  ntim=8
  SAFE_ALLOCATE(routnam, (77))
  routnam(1)='TOTAL:'
  routnam(2)='INPUT:'
  routnam(3)='EPSCOPY:'
  routnam(4)='GENWF:'
  routnam(5)='EXCWF:'
  routnam(6)='MTXEL:'
  routnam(7)='FULLBZ:'
  routnam(8)='BSEWRITE:'
  
  call timacc(1,2,tsec)
  if(peinf%inode.eq.0) then
    write(6,*)
    write(6,9000) 'CPU [s]','WALL [s]','#'
    write(6,*)
  endif
  
  do ii=2,ntim
    call timacc(ii,3,tsec,ncount)
#ifdef MPI
    call MPI_ALLREDUCE(tsec,tmin,2,MPI_REAL_DP,MPI_MIN,MPI_COMM_WORLD,mpierr)
    call MPI_ALLREDUCE(tsec,tmax,2,MPI_REAL_DP,MPI_MAX,MPI_COMM_WORLD,mpierr)
#else
    tmin = tsec
    tmax = tsec
#endif
    if(peinf%inode.eq.0) then
      write(6,9001) routnam(ii),tmin(1),tmin(2),ncount
      write(6,9002) tsec(1),tsec(2)
      write(6,9003) tmax(1),tmax(2)
    endif
  enddo

! JRD More Time Accounting for mtxel_kernel

  routnam(61)='MTXEL Setup:'
  routnam(62)='MTXEL Vcoul:'
  routnam(63)='MTXEL W:'
  routnam(64)='MTXEL W-Sum:'
  routnam(65)='MTXEL FFT Dir:'
  routnam(66)='MTXEL High G:'
  routnam(67)='MTXEL FFT X:'
  routnam(68)='MTXEL BSEX:'
  routnam(69)='MTXEL INDEX:'
  routnam(70)='MTXEL EPSHEAD:'
  routnam(71)='MTXEL EPSREAD:'
  routnam(72)='MTXEL EPSOPEN:'
  routnam(73)='MTXEL GROUP:'
  routnam(74)='MTXEL COMM:'
  routnam(75)='MTXEL FREE:'
  routnam(76)='MTXEL BAR:'
  routnam(77)='MTXEL GROUPS:'
  
  do ii=61,77
    call timacc(ii,3,tsec,ncount)
#ifdef MPI
    call MPI_Reduce(tsec,tmin,2,MPI_REAL_DP,MPI_MIN,0,MPI_COMM_WORLD,mpierr)
    call MPI_Reduce(tsec,tmax,2,MPI_REAL_DP,MPI_MAX,0,MPI_COMM_WORLD,mpierr)
#else
    tmin=tsec
    tmax=tsec
#endif
    if(peinf%inode.eq.0) then
      write(6,9001) routnam(ii),tmin(1),tmin(2),ncount
      write(6,9002) tsec(1),tsec(2)
      write(6,9003) tmax(1),tmax(2)
    endif
  enddo
  
  if(peinf%inode.eq.0) write(6,*)
  
  call timacc(1,3,tsec,ncount)
#ifdef MPI
  call MPI_Reduce(tsec,tmin,2,MPI_REAL_DP,MPI_MIN,0,MPI_COMM_WORLD,mpierr)
  call MPI_Reduce(tsec,tmax,2,MPI_REAL_DP,MPI_MAX,0,MPI_COMM_WORLD,mpierr)
#else
  tmin=tsec
  tmax=tsec
#endif
  
  if(peinf%inode.eq.0) then
    write(6,9004) routnam(1),tmin(1),tmin(2)
    write(6,9002) tsec(1),tsec(2)
    write(6,9003) tmax(1),tmax(2)
    write(6,*)
  endif
  
9000 format(22x,a13,3x,a13,3x,a8)
9001 format(1x,a16,'(min.)',f13.3,3x,f13.3,3x,i8)
9002 format(  17x,'(PE 0)',f13.3,3x,f13.3)
9003 format(  17x,'(max.)',f13.3,3x,f13.3)
9004 format(1x,a16,'(min.)',f13.3,3x,f13.3)
  
  call write_memory_usage()

! Delete the intermediate working files

!  do ii = 0, peinf%npes-1
!    if (ii.eq.peinf%inode) then
!      call close_file(128+3*ii  , delete = .true.) ! files INT_VWFN_*
!      call close_file(128+3*ii+1, delete = .true.) ! files INT_CWFN_*
!    endif
!  enddo
  if (peinf%inode.eq.0 .and. xct%iwriteint .eq. 0) then
    do ii = 0, qg%nr-1
      if (ii.lt.10000) then
        write(filename,'(a,i4.4)') 'INT_EPS_',ii
      endif
      call open_file(128+ii,file=filename,form='unformatted',status='old')
      call close_file(128+ii, delete = .true.) ! files INT_EPS_*
    end do
  endif
  
#ifdef MPI
  call MPI_FINALIZE(mpierr)
#endif

end program kernel
