#!/bin/bash
#
# pdist_shell.sh
# version 1.0.3 May 3, 2005
# Copyright 2005, Caltech and David Mathog
# Send bug reports or comments to mathog@caltech.edu
#
# This script creates a command chain across a set of nodes.
# On the top node it creates a fifo to which shell commands may
# be written.  Then it distributes those commands very quickly down
# the chain using nettee so that they may be executed on all nodes
# (except the top one).
#
# Input is ALWAYS from stdin.
#
# This uses my extract and execinput programs. These are available as
# source code here:
#
# ftp://saf.bio.caltech.edu/pub/software/linux_or_unix_tools/
#
#
# parameters are:
#
# 1.  listfile.  This is specified as "/path/listfile.txt"
#     and contains the list of nodes to receive the file
#     being distributed.  Typically this is the PVM or MPI
#     list file.
# 2.  fifoname.  Full path for the fifo.  Typically it goes in /tmp
#     but you may put it elsewhere
#
####################################################
#Set these as appropriate for your site
####################################################
EXECINPUT=/usr/common/bin/execinput
EXTRACT=/usr/common/bin/extract
FINDHOSTNAME="/bin/hostname -s"
PORT=12000 #port to use for command nettee chain
NETTEE="/usr/common/bin/nettee -p $PORT -q -t 0 -stm EOS"
ACCUDATE=/usr/common/bin/accudate
THISSCRIPT=/usr/common/bin/pdist_shell.sh  #path to this script
EOS=EOS
#
# The script must also be able to find:
#  rm, cat, tee, and echo
# and echo must support "-n".
#
DODELTA=0
STIME=`$ACCUDATE -t0`
# find the current machine's name and find the NEXT one in input list
#
IAM=`$FINDHOSTNAME`
if [ $# -ne 2 ]
then
  echo "$IAM: failure wrong number of parameters"
  echo "$THISSCRIPT LISTFILE FIFONAME"
  echo "  LISTFILE  list of target nodes, one node per line"
  echo "  FIFONAME  full path to the fifo which will accept shell commands."
  echo "While this script is running commands echoed to FIFONAME"
  echo "will be executed on every target node (in parallel)."
  echo "Text written to stdout or stderr by these commands will"
  echo "be returned to the session that started this script."
  echo "(Simultaneous output may be interleaved and could be unreadable.)"
  echo "To terminate this script echo $EOS to FIFONAME."
  exit
else
  LISTFILE=$1
  FIFONAME=$2
  if [ ! -r $LISTFILE ]
  then
    echo "$IAM: failure: $LISTFILE does not exist or is not readable"
    exit
  fi
  if [ "x$FIFONAME" == "x" ]
  then
    MESLAVE="YES"
  else
    MESLAVE=""
    umask 0600  #only the person running this file may use the fifo
    mkfifo $FIFONAME
    if [ ! -r $FIFONAME ]
    then
      echo "$IAM: failure: $FIFONAME could not be created"
      exit
    fi
  fi
fi
#
# find if this node is in the input list
#
BACKRSH=0
INLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly | $EXTRACT -if "$IAM" -ifonly`
if [ $INLIST ]
then
  if [ ! $MESLAVE ]
  then
    echo "$IAM: failure: $LISTFILE indicates slave but fifoname is >$FIFONAME<"
    exit
  fi
# find the next name in the list and start with that
  #note the /dev/null part on the third extract is to eliminate
  #warning about starting beyond the end of the file, which will
  #always happen for the next line after the final node.
  NEXTINLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly |\
              $EXTRACT -if "$IAM" -ifn 1 -ifonly |\
              $EXTRACT -sr 2 2>/dev/null`
  if [ $NEXTINLIST ]
  then
    #there is another node in the list.  Do two things:
    #1. start this script on it
#    echo "$IAM: Starting script on $NEXTINLIST"
    rsh $NEXTINLIST "$THISSCRIPT $LISTFILE \"\" " \
       </dev/null 2>&1  &
    BACKRSH=$!
    #2. start nettee directed to it
    #handle the local part, in foreground
    echo "$IAM: [nettee] Connecting to $NEXTINLIST"
    export NEXTNODE=$NEXTINLIST ; $NETTEE  -v 17 -next $NEXTNODE | $EXECINPUT   2>&1
  else
    echo  "$IAM: is [ListEnd]"
    #
    #this is the last node in the list, write file, but no network ops.
    #LOCALSAVE will read directly from stdin.
    #
    export NEXTNODE=_EOC_ ; $NETTEE  -v 17 | $EXECINPUT  2>&1
  fi
else
  if [ $MESLAVE ]
  then
    echo "$IAM: failure: $LISTFILE indicates not slave but fifoname is >$FIFONAME<"
    exit
  fi
# find the first name in the list and start with that
# just write straight through the rsh since there's no need to store
# the stream locally.
  #the /dev/null on the 2nd extract eliminates a warning
  # message from extract if the input list is empty
  NEXTINLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly |\
              $EXTRACT -nr 1 2>/dev/null`
  if [ $NEXTINLIST ]
  then
    #1. start nettee on next node in the list
    rsh $NEXTINLIST "$THISSCRIPT $LISTFILE \"\" " </dev/null 2>&1 &
    BACKRSH=$!
    #2. start nettee, no local save operation, read from stdin
    echo "$IAM: [stdin] connecting to $NEXTINLIST"
    DODELTA=1
    $NETTEE  -v 17 -in $FIFONAME -out none -next $NEXTINLIST
  else
    #there are no nodes in the list - fatal error
    echo "$IAM fatal error - no nodes in the list"
  fi
fi
if [ $BACKRSH -gt 0 ]
then
  wait $BACKRSH
  rm -f $FIFONAME
fi
if [ $DODELTA -gt 0 ]
then
  ETIME=`$ACCUDATE -ds $STIME`
  echo "$IAM: DONE, elapsed time: $ETIME"
else 
  echo "$IAM: DONE"
fi
exit

