#!/bin/bash
#
# pdist_file.sh
# version 1.0.2 April 18, 2005
# Copyright 2005, Caltech and David Mathog
# Send bug reports or comments to mathog@caltech.edu
#
# This script distributes a file to a list of nodes using a 
# daisychain method similar to that in the program "dolly".
# The heavy lifting is done by nettee, which is derived from dolly.
# Each target node reads through the listfile until it finds
# itself and then picks up the next line in the list and uses
# that as the name of the next machine to contact.  If it doesn't
# find itself it picks the first machine in the list.
# It then tees the dataflow and sends output to the disk and
# also via an rsh to the next node in the daisychain.
#
# Tests with a linux 2.6.8 kernel and 100baseT recorded throughput
# that varied from 4-6Mbytes/sec.  Roughly 1/3 to 1/2 of the
# theoretical network bandwidth.
#
# Input is ALWAYS from stdin.
#
# This uses my extract program instead of awk etc.  Extract is
# available as source code here, as is this script and nettee:
#
# ftp://saf.bio.caltech.edu/pub/software/linux_or_unix_tools/
#
#
# parameters are:
#
# 1.  listfile.  This is specified as "/path/listfile.txt"
#     and contains the list of nodes to receive the file
#     being distributed.  Typically this is the PVM or MPI
#     list file.
# 2.  localsave. script to store and/or process the data stream
#     on each compute node.  Note, this script should be able to
#     keep up with the network writes.  If not, store the stream to
#     a file and then come back and process it in parallel on all nodes.
#
####################################################
#Set these as appropriate for your site
####################################################
OPENSSL=/usr/bin/openssl
EXTRACT=/usr/common/bin/extract
NETTEE="/usr/common/bin/nettee -q -t 10"
FINDHOSTNAME="/bin/hostname -s"
ACCUDATE=/usr/common/bin/accudate
THISSCRIPT=/usr/common/bin/pdist_file.sh  #path to this script
#
# The script must also be able to find:
#  rm, cat, tee, and echo
# and echo must support "-n".
#
DODELTA=0
STIME=`$ACCUDATE -t0`
# find the current machine's name and find the NEXT one in input list
#
IAM=`$FINDHOSTNAME`
if [ $# -ne 3 ]
then
  echo "$IAM: failure wrong number of parameters"
  echo "usage: cat file_to_move | $THISSCRIPT LISTFILE LOCALSAVE LSPARAM"
  echo "  LISTFILE  list of target nodes, one node per line"
  echo "  LOCALSAVE the name of a script that process the data stream locally"
  echo "            on each node.  It runs this way on each node:"
  echo "            nettee | \$LOCALSAVE \$LSPARAM  2>&1"
  echo "            Example 1:  Store the stream to a file specified by LSPARAM"
  echo "              #!/bin/sh"
  echo "              cat - > \$1"
  echo "            Example 2:  cd to directory LSPARAM and unpack:"
  echo "              #!/bin/sh"
  echo "              cd \$1"
  echo "              gunzip -c | tar xf - "
  echo "  LSPARAM   A single parameter, for instance, a file name."
  echo "            LOCALSAVE scripts may be written to ignore it."
  echo ""
  echo "  The concatentated log files from all nodes are echoed to stdout."
  exit
else
  LISTFILE=$1
  LOCALSAVE=$2
  LSPARAM=$3
  if [ ! -r $LISTFILE ]
  then
    echo "$IAM: failure: $LISTFILE does not exist or is not readable"
    exit
  fi
  if [ ! -x $LOCALSAVE ]
  then
    echo "$IAM: failure: $LOCALSAVE is not an executable program or script"
    exit
  fi
  if [ "x$LSPARAM" == "x" ]
  then
    echo "$IAM: failure: LSPARAM must be supplied"
    exit
  fi
fi
#
# first find a random name to use for the pipe
#

#
# find if this node is in the input list
#
BACKRSH=0
INLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly | $EXTRACT -if "$IAM" -ifonly`
if [ $INLIST ]
then
# find the next name in the list and start with that
  #note the /dev/null part on the third extract is to eliminate
  #warning about starting beyond the end of the file, which will
  #always happen for the next line after the final node.
  NEXTINLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly |\
              $EXTRACT -if "$IAM" -ifn 1 -ifonly |\
              $EXTRACT -sr 2 2>/dev/null`
  if [ $NEXTINLIST ]
  then
    #there is another node in the list.  Do two things:
    #1. start this script on it
#    echo "$IAM: Starting script on $NEXTINLIST"
    rsh $NEXTINLIST "$THISSCRIPT $LISTFILE $LOCALSAVE $LSPARAM " \
       </dev/null 2>&1  &
    BACKRSH=$!
    #2. start nettee directed to it
    #handle the local part, in foreground
    echo "$IAM: [nettee] Connecting to $NEXTINLIST"
    $NETTEE  -v 17 -next $NEXTINLIST | $LOCALSAVE $LSPARAM
  else
    echo  "$IAM: is [ListEnd]"
    #
    #this is the last node in the list, write file, but no network ops.
    #LOCALSAVE will read directly from stdin.
    #
    $NETTEE  -v 17 | $LOCALSAVE $LSPARAM  2>&1
  fi
else
# find the first name in the list and start with that
# just write straight through the rsh since there's no need to store
# the stream locally.
  #the /dev/null on the 2nd extract eliminates a warning
  # message from extract if the input list is empty
  NEXTINLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly |\
              $EXTRACT -nr 1 2>/dev/null`
  if [ $NEXTINLIST ]
  then
    #1. start nettee on next node in the list
#    echo "$IAM: Starting script on $NEXTINLIST"
    rsh $NEXTINLIST "$THISSCRIPT $LISTFILE $LOCALSAVE $LSPARAM" </dev/null 2>&1 &
    BACKRSH=$!
    #2. start nettee, no local save operation, read from stdin
    echo "$IAM: [stdin] connecting to $NEXTINLIST"
    DODELTA=1
    $NETTEE  -v 17 -in - -out none -next $NEXTINLIST
  else
    #there are no nodes in the list - fatal error
    echo "$IAM fatal error - no nodes in the list"
  fi
fi
if [ $BACKRSH -gt 0 ]
then
  wait $BACKRSH
fi
if [ $DODELTA -gt 0 ]
then
  ETIME=`$ACCUDATE -ds $STIME`
  echo "$IAM: DONE, elapsed time: $ETIME"
else 
  echo "$IAM: DONE"
fi
exit

