!>===================================================================
!!
!! Module misc_m
!!
!! Routines:
!!
!! 1. checknorm()        Originally By (SIB)     Last Modified 6/12/2008 (JRD)
!!
!!    Checks normalization of the wavefunction contained
!!    in z(nkptotal,nspin).  For some reason it only checks the
!!    normalization of spin component 1 (instead of 1 to nspin)
!!    By normalization we mean that sum_l { |z(l,m)|^2 } = 1 for each m.
!!    It aborts if norm is off by more than 1e-8 from 1.
!!
!! 2. get_volume()       Originally By (SIB)     Last Modified 6/12/2008 (JRD)
!!
!!    This assumes that b is a symmetric matrix.  It sets
!!    vol = (2pi)^3 / square_root(|det(b)|)
!!    This makes sense if b is the matrix of dot products of the recip
!!    lattice vectors, so vol is the real space volume.
!!
!! 3. findvector()       Originally By (SIB)     Last Modified 6/12/2008 (JRD)
!!
!!    Looks for the vector kx, ky, kz in the list of vectors
!!    gvec%k(1:3,1:gvec%ng).  If found, iout is its index.  Otherwise
!!    iout is zero.
!!
!! 4. invert_matrix()    Originally By (SIB)     Last Modified 6/12/2008 (JRD)
!!
!!    Inverts 3x3 matrix.
!!
!! 5. procmem()          Originally By (gsm)     Last Modified 4/14/2009 (gsm)
!!
!!     Determines the amount of free memory per processor
!!     from the proc file system
!!
!! 6. sizeof_scalar()    Originally By (DAS)     Last Modified 1/25/2011 (DAS)
!!
!!     Return the size of the SCALAR type, for memory estimation.
!!
!! 7. voigt()            Originally By (gsm)     Last Modified 1/31/2011 (gsm)
!!
!!     Returns Voigt function (convolution of Gaussian and Lorentzian).
!!     Based on the rational approximation to the complex error function
!!     from A. K. Hui, B. H. Armstrong and A. A. Wray,
!!     "Rapid computation of the Voigt and complex error functions,"
!!     Journal of Quantitative Spectroscopy and Radiative Transfer,
!!     Volume 19, Issue 5, Pages 509 - 516, Year 1978.
!!
!! 8. k_range()        Originally By gsm       Last Modified 8/18/2010 (gsm)
!!
!!    Translates k-point kpt(1:3) to [0,1) interval. Returns G-vector gpt(1:3)
!!    that brings kpt(1:3) to [0,1) interval. The interval is satisfied within
!!    a given tolerance, the actual interval is [-tol,1-tol).
!!
!! 9. invert_matrix_int   Originally by DAS  12/28/11
!!
!!    Like invert_matrix, but for integer input. Dies if output is not integers.
!!
!!===================================================================

#include "f_defs.h"

module misc_m

  use global_m
  use blas_m

  implicit none

  public ::        &
    checknorm,     &
    get_volume,    &
    findvector,    &
    invert_matrix, &
    procmem,       &
    sizeof_scalar, &
    voigt,         &
    k_range,       &
    invert_matrix_int

contains

!> Checking normalization of only one spin component at a time
  subroutine checknorm(filename,iband,ik,ispin,nkptotal,z)
    character (len=*), intent(in) :: filename
    integer, intent(in) :: iband,ik,ispin,nkptotal
    SCALAR, intent(in) :: z(nkptotal)

    real(DP) :: xnorm

    PUSH_SUB(checknorm)

    xnorm = blas_nrm2(nkptotal, z(:), 1)
    
    if(abs(xnorm-1.0d0).gt.1.0d-5) then
      write(0,555) TRUNC(filename),abs(xnorm-1.0d0),iband,ispin,ik
555   format(1x,'The wavefunctions are not normalized in file',1x,a,/,&
        3x,'abs(xnorm - 1) =',f10.6,/,&
        3x,'iband =',i6,1x,'ispin =',i2,1x,'ik =',i6,/)
      call die("Incorrect normalization.")
    endif

    POP_SUB(checknorm)
    
    return
  end subroutine checknorm

!=====================================================================

  subroutine get_volume(vol,b)
    real(DP), intent(out) :: vol
    real(DP), intent(in)  :: b(3,3)
    
    PUSH_SUB(get_volume)
    
    vol = b(1,1)*(b(2,2)*b(3,3) - b(2,3)**2) &
      + 2*b(1,2)*b(2,3)*b(3,1) &
      - b(2,2)*b(1,3)**2 - b(3,3)*b(1,2)**2
    vol = sqrt(abs(vol))
    vol = ((2.0d0*PI_D)**3)/vol
    
    POP_SUB(get_volume)

    return
  end subroutine get_volume

!=====================================================================

  subroutine findvector(iout,kx,ky,kz,gvec)
    integer, intent(out) :: iout
    integer, intent(in)  :: kx,ky,kz
    type (gspace), intent(in) :: gvec
    
! no push/pop since called too frequently

    iout=((kx+gvec%kmax(1)/2)*gvec%kmax(2)+ky+gvec%kmax(2)/2)* &
      gvec%kmax(3)+kz+gvec%kmax(3)/2+1
    if (iout .ge. 1 .and. iout .le. gvec%nktot) then
      iout=gvec%indv(iout)
      if (iout .ge. 1 .and. iout .le. gvec%ng) then
        if (kx .ne. gvec%k(1,iout) .or. &
          ky .ne. gvec%k(2,iout) .or. &
          kz .ne. gvec%k(3,iout) ) iout = 0
      else
        iout = 0
      endif
    else
      iout = 0
    endif

    return
  end subroutine findvector

!=====================================================================

  subroutine invert_matrix(mat, inv)
    real(DP), intent(in) :: mat(:,:)  !< (3,3)
    real(DP), intent(out) :: inv(:,:) !< (3,3)
    
    real(DP) :: aa(3,3), det
    
    PUSH_SUB(invert_matrix)
    
!> Compute matrix of cofactors

    aa(1,1) =  mat(2,2) * mat(3,3) - mat(2,3) * mat(3,2)
    aa(2,1) = -mat(2,1) * mat(3,3) + mat(2,3) * mat(3,1)
    aa(3,1) =  mat(2,1) * mat(3,2) - mat(2,2) * mat(3,1)
    aa(1,2) = -mat(1,2) * mat(3,3) + mat(1,3) * mat(3,2)
    aa(2,2) =  mat(1,1) * mat(3,3) - mat(1,3) * mat(3,1)
    aa(3,2) = -mat(1,1) * mat(3,2) + mat(1,2) * mat(3,1)
    aa(1,3) =  mat(1,2) * mat(2,3) - mat(1,3) * mat(2,2)
    aa(2,3) = -mat(1,1) * mat(2,3) + mat(1,3) * mat(2,1)
    aa(3,3) =  mat(1,1) * mat(2,2) - mat(1,2) * mat(2,1)

!> Compute determinant

    det = sum(mat(1, 1:3) * aa(1:3, 1))
    if (abs(det) .lt. TOL_Small) call die('Cannot invert singular matrix.')
    
    inv(1:3, 1:3) = aa(1:3, 1:3) / det
    
    POP_SUB(invert_matrix)
    
    return
  end subroutine invert_matrix

!================================================================================

  subroutine procmem(mem,nmpinode)
    real(DP), intent(out) :: mem
    integer, intent(out) :: nmpinode

    integer :: ierr,eof,info,iunit,m,n,p,i,j,pagesize
    real(DP) :: x,y,mac_m,mac_n
!    integer :: ntot
!    real(DP) :: xtot,ytot ! we do not use the total memory actually
    character*80 :: s
    character*80, allocatable :: a(:)
    integer, allocatable :: b(:)

    PUSH_SUB(procmem)

!-----------------------------------------------------
!> determine the amount of free memory per node in kB

    m=0
    iunit=14
    x=0

    call open_file(unit=iunit,file='/proc/meminfo',form='formatted',iostat=ierr,status='old')
    if (ierr.eq.0) then
      eof=0
      do while(eof.eq.0)
        read(iunit,'(a)',iostat=eof)s
        if (s(1:7).eq."MemFree") then
          read(s(9:),*)n
          m=m+n
        endif
!        if (s(1:8).eq."MemTotal") then
!          read(s(10:),*)ntot
!        endif
        if (s(1:6).eq."Cached") then
          read(s(8:),*)n
          m=m+n
        endif
      enddo
      x=dble(m)/dble(peinf%npes)
      call close_file(iunit)
    endif

    if(m == 0) then
      !> this is for Mac OS
      !! total memory is accessible instead from sysctl -n hw.usermem
      SYSTEMCALL("vm_stat > vm_stat 2> /dev/null")
      !> Fortran 2008 would use execute_command_line instead
      !! even if the command failed, still open file in order to delete it
      call open_file(unit=iunit,file='vm_stat',form='formatted',iostat=ierr,status='old')
      if (ierr.eq.0) then
        eof=0
        do while(eof.eq.0)
          read(iunit,'(a)',iostat=eof)s
          if (s(1:45).eq."Mach Virtual Memory Statistics: (page size of") then
            read(s(46:),*)pagesize ! in bytes
          endif
          if (s(1:11).eq."Pages free:") then
            read(s(12:),*) mac_n
            mac_m = mac_m + mac_n
          endif
          if (s(1:18).eq."Pages speculative:") then
            read(s(19:),*) mac_n
            mac_m = mac_m + mac_n
          endif
        enddo
        call close_file(iunit, delete = .true.)
        x = mac_m * dble(pagesize) / dble(peinf%npes * 1024) ! to kB
      endif
    endif

!> === Example output from vm_stat ===
!! Mach Virtual Memory Statistics: (page size of 4096 bytes)
!! Pages free:                           2886.
!! Pages active:                       139635.
!! Pages inactive:                      66906.
!! Pages speculative:                    2376.
!! Pages wired down:                    50096.
!! "Translation faults":            123564742.
!! Pages copy-on-write:              10525831.
!! Pages zero filled:                53274329.
!! Pages reactivated:                  739514.
!! Pageins:                           2282166.
!! Pageouts:                           306727.
!! Object cache: 25 hits of 522230 lookups (0% hit rate)

    if(m == 0 .and. mac_m == 0) then ! BSD
      !> http://mario79t.wordpress.com/2008/08/29/memory-usage-on-freebsd/
      !! -bash-2.05b$ sysctl vm.stats.vm.v_free_count
      !! vm.stats.vm.v_free_count: 29835
      !! -bash-2.05b$ sysctl vm.stats.vm.v_page_count
      !! vm.stats.vm.v_page_count: 124419
      !! -bash-2.05b$ sysctl hw.pagesize
      !! hw.pagesize: 4096
      SYSTEMCALL("sysctl -a > sysctl 2> /dev/null")
      !> Fortran 2008 would use execute_command_line instead
      !! even if the command failed, still open file in order to delete it
      call open_file(unit=iunit,file='sysctl',form='formatted',iostat=ierr,status='old')
      if (ierr.eq.0) then
        eof=0
        do while(eof.eq.0)
          read(iunit,'(a)',iostat=eof)s
          if (s(1:12).eq."hw.pagesize:") then
            read(s(13:),*)pagesize ! in bytes
          endif
          if (s(1:25).eq."vm.stats.vm.v_free_count:") then
            read(s(26:),*) mac_n
            mac_m = mac_m + mac_n
          endif
          if (s(1:26).eq."vm.stats.vm.v_cache_count:") then
            read(s(27:),*) mac_n
            mac_m = mac_m + mac_n
          endif
        enddo
        call close_file(iunit, delete = .true.)
        x = mac_m * dble(pagesize) / dble(peinf%npes * 1024) ! to kB
      endif
    endif

!    xtot=dble(ntot)/dble(peinf%npes)
    
#ifdef MPI
    call MPI_Allreduce(x,y,1,MPI_REAL_DP,MPI_SUM,MPI_COMM_WORLD,mpierr)
!    call MPI_Allreduce(xtot,ytot,1,MPI_REAL_DP,MPI_SUM,MPI_COMM_WORLD,mpierr)
#else
    y=x
!    ytot=xtot
#endif

!----------------------------------------------
!> determine the number of processors per node
    
    SAFE_ALLOCATE(a, (peinf%npes))
    HOSTNAMECALL(a(peinf%inode+1),info)
!    write(a(peinf%inode+1),'(a4,i16.16)') 'HOST', peinf%inode
    
#ifdef MPI
    do i=1,peinf%npes
      call MPI_Bcast(a(i),80,MPI_BYTE,i-1,MPI_COMM_WORLD,mpierr)
    enddo
#endif
    
    if (peinf%inode.eq.0) then
      SAFE_ALLOCATE(b, (peinf%npes))
      b(:)=0
      do i=1,peinf%npes
        do j=1,peinf%npes
          if (trim(a(j)).eq.trim(a(i))) b(i)=b(i)+1
        enddo
      enddo
      p=0
      do i=1,peinf%npes
        if (p.lt.b(i)) p=b(i)
      enddo
      SAFE_DEALLOCATE(b)
    endif
    SAFE_DEALLOCATE(a)
    
#ifdef MPI
    call MPI_Bcast(p,1,MPI_INTEGER,0,MPI_COMM_WORLD,mpierr)
#endif

!-----------------------------------
!> report the available memory in B

    if (p.gt.1) y=y/dble(p)
    mem=y*1024.0d0
    
!-----------------------------------
!> report the number of MPI processes per node

    nmpinode=p

!-----------------------------------
!> warn if zero memory

    if (mem .lt. TOL_Small .and. peinf%inode .eq. 0) then
      write(0,666)
666   format(1x,'WARNING: estimation of memory available is zero, probably failed.',/)
    endif

    POP_SUB(procmem)
  
    return
  end subroutine procmem

!================================================================================

!> for memory estimation, tell what size of SCALAR type is
  integer function sizeof_scalar()

#ifdef NOSIZEOF
!> if no sizeof, take a guess

#ifdef CPLX
    sizeof_scalar = 16
#else
    sizeof_scalar = 8
#endif

#else

    SCALAR :: dummy
    sizeof_scalar = sizeof(dummy)

#endif

  end function sizeof_scalar

!================================================================================

  real(DP) function voigt(x, sigma, gamma)
    real(DP), intent(in) :: x, sigma, gamma

    real(DP), parameter :: a0 = 122.607931777104326d0
    real(DP), parameter :: a1 = 214.382388694706425d0
    real(DP), parameter :: a2 = 181.928533092181549d0
    real(DP), parameter :: a3 =  93.155580458138441d0
    real(DP), parameter :: a4 =  30.180142196210589d0
    real(DP), parameter :: a5 =   5.912626209773153d0
    real(DP), parameter :: a6 =   0.564189583562615d0
    real(DP), parameter :: b0 = 122.607931773875350d0
    real(DP), parameter :: b1 = 352.730625110963558d0
    real(DP), parameter :: b2 = 457.334478783897737d0
    real(DP), parameter :: b3 = 348.703917719495792d0
    real(DP), parameter :: b4 = 170.354001821091472d0
    real(DP), parameter :: b5 =  53.992906912940207d0
    real(DP), parameter :: b6 =  10.479857114260399d0

    complex(DPC) :: z, zh, f

    PUSH_SUB(voigt)

    if (sigma .lt. TOL_Zero .or. gamma.lt.-TOL_Zero) &
     call die('Voigt function invalid broadening')

    z = CMPLX(abs(x), gamma) / (sqrt(2.0d0) * sigma)
    zh = CMPLX(IMAG(z), -dble(z))
    f = ((((((a6*zh + a5)*zh + a4)*zh + a3)*zh + a2)*zh + a1)*zh + a0) / &
     (((((((zh + b6)*zh + b5)*zh + b4)*zh + b3)*zh + b2)*zh + b1)*zh + b0)
    if (x .lt. 0.0d0) f = conjg(f)
    voigt = dble(f) / (sqrt(2.0d0 * PI_D) * sigma)

    POP_SUB(voigt)

    return
  end function voigt

!================================================================================

  subroutine k_range(kpt, gpt, tol)
    real(DP), intent(inout) :: kpt(3)
    integer, intent(out) :: gpt(3)
    real(DP), intent(in) :: tol

    integer :: ii

    ! no push_sub, called too frequently
    
    do ii = 1, 3
      gpt(ii) = 0
      do while (kpt(ii) .lt. -tol)
        gpt(ii) = gpt(ii) + 1
        kpt(ii) = kpt(ii) + 1.0d0
      enddo
      do while (kpt(ii) .ge. 1.0d0 - tol)
        gpt(ii) = gpt(ii) - 1
        kpt(ii) = kpt(ii) - 1.0d0
      enddo
    enddo

    return
  end subroutine k_range

!=====================================================================

  subroutine invert_matrix_int(mat, inv)
    integer, intent(in)  :: mat(:,:) !< (3,3)
    integer, intent(out) :: inv(:,:) !< (3,3)
    
    integer :: aa(3,3), det
    
    PUSH_SUB(invert_matrix_int)
    
!> Compute matrix of cofactors

    aa(1,1) =  mat(2,2) * mat(3,3) - mat(2,3) * mat(3,2)
    aa(2,1) = -mat(2,1) * mat(3,3) + mat(2,3) * mat(3,1)
    aa(3,1) =  mat(2,1) * mat(3,2) - mat(2,2) * mat(3,1)
    aa(1,2) = -mat(1,2) * mat(3,3) + mat(1,3) * mat(3,2)
    aa(2,2) =  mat(1,1) * mat(3,3) - mat(1,3) * mat(3,1)
    aa(3,2) = -mat(1,1) * mat(3,2) + mat(1,2) * mat(3,1)
    aa(1,3) =  mat(1,2) * mat(2,3) - mat(1,3) * mat(2,2)
    aa(2,3) = -mat(1,1) * mat(2,3) + mat(1,3) * mat(2,1)
    aa(3,3) =  mat(1,1) * mat(2,2) - mat(1,2) * mat(2,1)

!> Compute determinant

    det = sum(mat(1, 1:3) * aa(1:3, 1))
    if (det == 0) call die('Cannot invert singular matrix.')

    inv(1:3, 1:3) = aa(1:3, 1:3) / det
    
    if (any(inv(1:3, 1:3) * det /= aa(1:3, 1:3))) then
      write(0,*) 'determinant = ', det
      call die('Inverse of this integer matrix is not an integer matrix.')
    endif
    
    POP_SUB(invert_matrix_int)
    
    return
  end subroutine invert_matrix_int

end module misc_m
