#!/bin/csh -f

# MPIRUN
# This script tries to start jobs on whatever kind of machine you're on.
# Strategy - This program is built with a default device it uses in
# certain ways. The user can override this default from the command line.
#
# Note that the -f is important to ensure that commands needed by csh are
# not aliased to commands with the same name and different behavior.
#

#set verbose
set MPIR_HOME = /home/lusk/mpich
set EXECER = /home/lusk/mpich/util/execer
set MEIKO_RUN = prun
set SP1_RUN = /usr/lpp/euih/eui/cotb0
set jobid=$$
set progname=''
set np=1
set arch=''
set default_arch='alpha'
set machine = ''
set default_device='ch_p4'
set execer_machine=1
set cmdLineArgs=''
set cmdline=''
set use_execer = 0
set mpirun_verbose = 0
set nolocal = 0
set leavePGFile = 0
set just_testing = 0
set machineFile = ''
set debugger=""
set Machines = "chameleon meiko paragon p4 sp1 ibmspx anlspx ksr sgi i860 inteldelta cray_t3d"
set max_time = 15
#
# Special, system specific values
#
# polling_mode is for systems that can select between polling and 
# interrupt-driven operation.  Currently, only IBM POE is so supported
# (TMC CMMD has some support for this choice of mode)
set polling_mode = 1

# Parse command line arguments
# The ultimate goal is to determine what kind of parallel machine this
# is we are running on. Then we know how to start jobs...

set j = 1
while ("$j" <= "$#argv")
  set arg = "$argv[$j]"
  switch ("$arg")
    case -mr_arch:
      @ j++
      set arch = "$argv[$j]"
      breaksw
    case -mr_np:
      @ j++
      set np = "$argv[$j]"
      set nodigits = `echo $np | sed 's/^[0-9]*$//'`
      if (($nodigits != "") || ($np < 1)) then
        echo np: $np is an invalid number of processors.  Exiting.
        exit
      endif
      breaksw
    case -mr_machine:
      @ j++
      set machine = "$argv[$j]"
      breaksw
    case -mr_machinefile:
      @ j++
      set machineFile = "$argv[$j]"
      breaksw
    case -mr_device:
      @ j++
      set default_device = "$argv[$j]"
      breaksw
    case -mr_nolocal:
      set nolocal = 1
      breaksw
    case -mr_h:
      goto PrintHelp
      breaksw
    case -mr_e:
      set use_execer = 1
      breaksw
    case -mr_pg:
      set use_execer = 0
      breaksw
    case -mr_leave_pg:
    case -mr_keep_pg:
      set leavePGFile = 1
      breaksw
    case -mr_v:
      set mpirun_verbose = 1
      breaksw
    case -mr_t:
      set just_testing = 1
      breaksw
    case -mr_dbx:
      set debugger = "dbx"
      breaksw
#    case -mr_xdbx:
#      set debugger = "xdbx"
#      breaksw
    case -mr_nopoll:
	set polling_mode = 0
	breaksw
    case -mr_maxtime:
	@ j++
        set maxtime = "$argv[$j]"
        breaksw
    case -usage:
    case -help:
    case -?:
      # Accept these for help until the program name is provided.
      if ("$progname" == "") goto PrintHelp
      breaksw
    default:
      # The first unrecognized argument is assumed to be the name of
      # the program, but only if it is executable
      if ("$progname" == "" && -e "${argv[$j]}" ) then 
	set progname = $argv[$j]
      else
        # any following unrecognized arguments are assumed to be arguments
        # to be passed to the program
        if ($use_execer) then
          set cmdLineArgs = "$cmdLineArgs -arg=${argv[$j]}"
        else
          set cmdLineArgs = "$cmdLineArgs ${argv[$j]}"
        endif
      endif
      breaksw
  endsw
  @ j++
end

# We need at least the program name

if ("$progname" == "") then
  echo Missing: program name
  exit
endif

# Try to find the architecture.  Note that many systems do not have 
# arch, and that some systems (RS6000's for example) use "uname -m" 
# to return difficult to interpret info (RS6000s return a string of 
# digits that encodes the particular RS6000 model).
      
#
# Eventually we'll want to use "which" to try and find a version of 
# arch...
if ("$arch" == "") then
  if ( -e /bin/arch) then
      set ARCH=`/bin/arch` 
  else if ( -e /usr/local/bin/arch) then
      set ARCH=`/usr/local/bin/arch` 
  else if ( -e /usr/ucb/arch) then
      set ARCH=`/usr/ucb/arch` 
  else
      set ARCH=`uname -s`
      if ("$ARCH" != "AIX") then
          set ARCH=`uname -m`
      endif
  endif

  switch ($ARCH)
    case sun4:
    case sun4m:
    case sun4c:
      # There are others
      set Version = `/bin/uname -r`
      # In "improving" SunOS, the useful feature of "substr" was withdrawn 
      # from expr.  Can't let the users have life too easy, can we?  This 
      # means that we can't just use 
      #   set MajorVersion = `expr substr $Version 1 1`
      # because it won't work on Solaris systems.  The following should work on
      # both:
      set MajorVersion = `expr "$Version" : "\(.\)"`
      if ("$MajorVersion" ==  5) then
	# It really is solaris, but we don't care
        set arch="sun4"
      else
        set arch="sun4"
      endif
      breaksw
    case alpha:
      set arch="alpha"
      breaksw
    case AIX:
      set arch=rs6000
      breaksw
    case IRIX:
    case sgi:
      set arch="IRIX"
      breaksw
    case freebsd:
      set arch="freebsd"
      breaksw
    case paragon:
      set arch="paragon"
      breaksw
    case ksr:
      set arch="ksr"
      breaksw
    case IP19:
      set arch="sgi_mp"
      breaksw
    case ipsc860:
    case i860:
    case ipsc:
      set arch="ipsc860"
      breaksw
    default:
      echo "Cannot determine machine architecture. Use the '-arch <arch>'"
      echo "flag or the '-machine <machine_name>' flag. Defaulting"
      echo "to $default_arch"
      set arch=$default_arch
      breaksw
  endsw
endif

# Try to find the machine
if ("$machine" == "") then 
  switch ($arch)
    case sun4:
      # This device should only exits on sun4s that are actually
      # MEIKO machines.
      if ( -e /dev/elan ) then
	set machine="meiko"
      else
      if ("$default_device" == "chameleon") then
	set machine="chameleon"
      else
      if (("$default_device" == "ch_p4") && ($use_execer == 0)) then
	set machine="p4"
      else
        set machine="execer"
      endif
      endif
      endif
      breaksw
    case alpha:
    case IRIX:
    case freebsd:
      if ("$default_device" == "chameleon") then
	set machine="chameleon"
      else if (("$default_device" == "ch_p4") && ($use_execer == 0)) then
	set machine="p4"
      else
        set machine="execer"
      endif
      breaksw
    case rs6000:
      # This only works on the ANL sp system
      echo `hostname` | grep spnode > /dev/null
      if ($status == 0) then
        # This is the correct version to use once we're on a node
	set machine="ibmspx"
      else if (-d /etc/FRAMES && -d /mcs) then
        set machine="anlspx"
      else if ("$default_device" == "chameleon") then
	set machine="chameleon"
      else if (("$default_device" == "ch_p4") && ($use_execer == 0)) then
	set machine="p4"
      else if (-e /usr/lpp/euih/eui) then
        set machine="sp1"
      else if (-e /usr/bin/poe) then
        # should work for other users
        set machine="ibmspx"
      else
        set machine="execer"
      endif
      breaksw
    case paragon:
      set machine="paragon"
      breaksw
    case ipsc860:
    case i860:
    case ipsc:
      set machine="ipsc860"
      breaksw
    case ksr:
      set machine="ksr"
      breaksw
    case sgi_mp:
      set machine="sgi_mp"
      breaksw
    case cray_t3d:
      set machine="cray_t3d"
      breaksw
    default:
      echo "Can't determine the type of the machine this is."
      echo "Set it with -mr_machine <machine_type>."
      breaksw
  endsw
endif

# Fill out relative program pathnames

# Get the current directory
if ($?PWD == 1) then
    set PWD_TRIAL = $PWD
else
    set PWD_TRIAL = `pwd`
endif
if ( "$PWD_TRIAL" != "" ) then
    set PWD_TRIAL = `pwd | sed -e 's%/tmp_mnt/%/%g'`
    if ( ! -d $PWD_TRIAL )  then 
        echo "Warning: your default path uses the automounter; this may"
        echo "cause some problems if you use other NFS-connected systems."
        PWD_TRIAL=`pwd`
    endif
endif

set tail=`echo $progname | sed 's/\/.*//'` 
if ("$tail" == "") then
  #echo Absolute path, don't need to change anything
else
  #echo Relative path
  set progname = "$PWD_TRIAL/$progname"
endif

# Get value of host
# Should really check for hostname first....
if ( $?HOST == 0 ) then
    if ($arch == "ipsc860") then
	set HOST = `hostname`
    else
        if (-e "`which hostname`") set HOST = "`hostname`"
    endif
    # Note that uname -n may not produce a usable hostname.  Any suggestions?
    if ("$HOST" == "") set HOST = "`uname -n`"
endif

if ($mpirun_verbose) then
  echo "running $progname on $np ${arch} ${machine} processors"
endif

if (("$machine" == "execer") || ("$machine" == "p4")) then
  if ($nolocal) then
    #echo don\'t run local instance
    # if the job is not to be run locally, then don't include this machine
    # in the list
    set procFound = 0
  else
    # echo run local instance
    if ("$machine" == "execer") then
      set cmdline = "$cmdline -host=${HOST} -pgm=${progname} -numprocs=1 $cmdLineArgs"
    endif
    set procFound = 1
  endif

  if ("$machineFile" == "") then
    # If on the ANL SPx, use getjid to get the machine list...
    if ("$machine" == "ibmspx" && -e /usr/local/bin/getjid) then
	set machineFile = "/sphome/$LOGNAME/SPnodes.`/usr/local/bin/getjid`"
    else
        set machineFile = "${MPIR_HOME}/util/machines/machines.${arch}"
    endif
  endif

  if (!(-e "$machineFile" && -r "$machineFile")) then
    echo Cannot read $machineFile.  Exiting.
    exit 2
  endif

  if ($nolocal == 0) then
      set machinesfound=(`head -${np} $machineFile`)
  else
      # Remove host from the list of available machines....
      set machinesfound=(`cat $machineFile | grep -v $HOST | head -${np}`)
  endif

  @ nfound = $#machinesfound + $procFound
  if ($nfound < $np) then
    echo "Only $nfound ${arch}'s available.  Exiting."
    exit
  endif

  # Get the machine list

  set machinelist = ()
  set machineNum = 1
  while ("$procFound" < "$np")
    set machineName = "${machinesfound[$machineNum]}"
    if ($mpirun_verbose) then
      echo "running on $machineName"
    endif
    if ("$machine" == "execer") then
      if ("$nolocal" || ("$machineName" != "$HOST")) then
	set cmdline = "$cmdline -host=${machineName} -pgm=${progname} -numprocs=1 $cmdLineArgs"
	@ procFound++
      endif
    else
      if ($nolocal || ("$machineName" != "$HOST")) then
	set machinelist = ($machinelist $machineName)
	@ procFound++
      endif
    endif
    @ machineNum++
  end
endif

switch ($machine)
  case meiko:
    @ np--
    if ($just_testing) then
      echo "local $np $progname"
    else
      echo "local $np $progname" >  "$PWD_TRIAL/PI$$"
    endif
    @ np++
    if ($just_testing) then
      echo ${MEIKO_RUN} -n $np $progname -p4pg $PWD_TRIAL/PI$$ $cmdLineArgs
    else
      ${MEIKO_RUN} -n $np $progname -p4pg $PWD_TRIAL/PI$$ $cmdLineArgs
      if ($leavePGFile) then
        echo "P4 procgroup file is $PWD_TRIAL/PI$$."
      else
        /bin/rm "$PWD_TRIAL/PI$$"
      endif
    endif
    breaksw
  case ksr:
    @ np--
    if ($just_testing) then
      echo "local $np $progname"
    else
      echo "local $np $progname" >  "$PWD_TRIAL/PI$$"
    endif
    @ np++
    if ($just_testing) then
      echo $progname -p4pg "$PWD_TRIAL/PI$$ $cmdLineArgs"
    else
      $progname -p4pg "$PWD_TRIAL/PI$$ $cmdLineArgs"
      if ($leavePGFile) then
        echo "P4 procgroup file is $PWD_TRIAL/PI$$."
      else
        /bin/rm "$PWD_TRIAL/PI$$"
      endif
    endif
    breaksw
  case sgi_mp:
    @ np--
    if ($just_testing) then
      echo "local $np $progname"
    else
      echo "local $np $progname" >  "$PWD_TRIAL/PI$$"
    endif
    @ np++
    if ($just_testing) then
      echo $progname -p4pg "$PWD_TRIAL/PI$$"
    else
      $progname -p4pg "$PWD_TRIAL/PI$$"
      if ($leavePGFile) then
        echo "P4 procgroup file is $PWD_TRIAL/PI$$."
      else
        /bin/rm "$PWD_TRIAL/PI$$"
      endif
    endif
    breaksw
  case cray_t3d:
    # Untested.  Only does the interactive (non-NQS submission)
    if ($just_testing) then
        echo $progname -npes $np $cmdLineArgs
    else
        $progname -npes $np $cmdLineArgs
    fi
    breaksw
  case ibmspx:
    # This only works on SPx running release 2 software and with
    # the high-performance switch.  Note that we need to parameterize
    # the hostlist somehow.
    # See /cave3/vroom-51/bash/sp1/sp2.hosts for MM machine hosts,
    # /sphome/gropp/mpich/examples/test/pt2pt/hostlist for DIS hosts
    # use 
    #   MP_EUILIB ip 
    # for ip over the switch
    # Other variables to consider:
    # setenv MP_PMDLOG yes
    # setenv PWD $PWD_TRIAL
    # setenv MP_INFOLEVEL 1 
    # setenv MP_INFOLEVEL 20
    # setenv MP_CSS_INTERRUPT ?
    # Note that if the executable is not on a file system mounted 
    # on a node, you may get a strange error message.
    if ($just_testing) then
        echo setenv MP_EUILIB us
	echo setenv MP_RMPOOL 0
        echo setenv MP_HOSTFILE $machineFile
        echo setenv MP_PROCS $np
	echo setenv MP_INFOLEVEL 0
	if ($polling_mode == 0) echo setenv MP_CSS_INTERRUPT yes
        echo poe $progname $cmdLineArgs
    else
        setenv MP_EUILIB us
	setenv MP_RMPOOL 0
        setenv MP_HOSTFILE $machineFile
        setenv MP_PROCS $np
	setenv MP_INFOLEVEL 0
	if ($polling_mode == 0) setenv MP_CSS_INTERRUPT yes
        poe $progname $cmdLineArgs
    endif
    breaksw
  case sp1:
    # This is for the old MPL/p or EUI-H environment
    if ($just_testing) then
      echo ${SP1_RUN} $progname $np $cmdLineArgs
    else
      ${SP1_RUN} $progname $np $cmdLineArgs
    endif
    breaksw
  case anlspx:
    # This if for the ANL SP1/2, using the ANL "spsubmit" program.
    # Users of "loadleveler" will probably want something like this
    # Create a new file
    cat >PIrun <<.
#! /bin/sh
JID=\`/usr/local/bin/getjid\`
trap "sprelease \$JID"
cd $PWD_TRIAL
MP_EUILIB=us
MP_RMPOOL=0
MP_HOSTFILE=/sphome/$LOGNAME/SPnodes.\$JID    
MP_PROCS=$np
MP_INFOLEVEL=0
export MP_EUILIB
export MP_RMPOOL
export MP_HOSTFILE
export MP_PROCS
export MP_INFOLEVEL
if [ $polling_mode = 0 ] ; then
   MP_CSS_INTERRUPT=yes
   export MP_CSS_INTERRUPT
fi
/bin/rm -f /sphome/$LOGNAME/job.output
echo "About to run poe ... " >> /sphome/$LOGNAME/job.output
poe $progname $cmdLineArgs >> /sphome/$LOGNAME/job.output 2>&1
echo "Poe exited ..." >> /sphome/$LOGNAME/job.output
sprelease \$JID
exit 0
.
    chmod a+x PIrun
    # Determine CAC
    set CAC = `whatcac | cut -d' ' -f 9 | sed -e s/\"//g -e s/://`
    if ($just_testing) then
	echo "spsubmit <<."
	echo $CAC
	echo $max_time
	echo $np
	echo B
	echo M
	echo n
	echo $PWD_TRIAL/PIrun
	echo 
	echo C
	echo .
        echo spwait
    else
	spsubmit <<.
$CAC
$max_time
$np
B
M
n
$PWD_TRIAL/PIrun

C
.
	spwait
    endif
    breaksw
  case paragon:
    if ($just_testing) then
      echo $progname -sz $np $cmdLineArgs
    else
      $progname -sz $np $cmdLineArgs
    endif
    breaksw
  case inteldelta:
    # This script must actually be run ON the Delta (more accurately, 
    # on a Delta service node), which this does by invoking rsh.
    # If the rsh fails, check your permissions.
    # A similiar script could be used on Intel i860 systems
    # UNTESTED
    if ($just_testing) then
      echo 'rsh delta1 mexec -t"'"$np"'" -f ' $progname $cmdLineArgs
    else
      rsh delta1 mexec -t"$np" -f $progname $cmdLineArgs
    endif
    breaksw
  case i860:
  case ipsc860:
    if ($just_testing) then
      echo "getcube -t $np"
      echo "load $progname $cmdLineArgs"
      echo "waitcube"
      echo "relcube"
    else
      getcube -t $np
      if ($status != 0) then
	 echo "Requested number of nodes not available"
      else
         load $progname $cmdLineArgs
         waitcube
         relcube
      endif
    endif
    breaksw

  case chameleon:
    if ($just_testing) then
      echo $progname -np $np $cmdLineArgs
    else
      $progname -np $np $cmdLineArgs
    endif
    breaksw
  case execer:
    if ($just_testing) then
      echo $EXECER "-jobid=$jobid" $cmdline
    else
      $EXECER "-jobid=$jobid" $cmdline
    endif
    breaksw
  case p4:
    # We use this form instead of "local 0" in-case the user is trying to
    # select a second network whose names are not those returned by
    # "hostname".  For example, a system with a DEC Gigiswitch, Myricom 
    # network, or IP over the IBM SP2 switch (HPS).
    if ($just_testing) then
      echo "Procgroup file:"
      echo "$HOST 0 $progname"
    else
      echo "$HOST 0 $progname" > "$PWD_TRIAL/PI$$"
    endif

    if ($nolocal) then
      set procNum = 2
    else
      set procNum = 1
    endif

    while ($procNum <= ${#machinelist})
      if ($just_testing) then
	echo "${machinelist[$procNum]} 1 $progname"
      else
	echo "${machinelist[$procNum]} 1 $progname" >> "$PWD_TRIAL/PI$$"
      endif
      @ procNum++
    end

    # make sure the procgroup file was written
    if (!($just_testing) && \
	!(-e "$PWD_TRIAL/PI$$" && -r "$PWD_TRIAL/PI$$")) then
      echo Failed to write "$PWD_TRIAL/PI$$".  Exiting.
      exit
    endif

    if (!($just_testing) && $mpirun_verbose) then
      echo Created "$PWD_TRIAL/PI$$"
    endif

    set startpgm = "eval"
    if ("$debugger" != "") then
	cat > $PWD_TRIAL/PId$$ <<.
run -p4pg $PWD_TRIAL/PI$$ $cmdLineArgs
.
	set startpgm = "$debugger $progname -sr $PWD_TRIAL/PId$$"
    endif
    #
    if ($just_testing) then
      echo ""
      if ($nolocal) then
	echo rsh ${machinelist[1]} $progname -p4pg "$PWD_TRIAL/PI$$" $cmdLineArgs
      else
	echo $progname -p4pg "$PWD_TRIAL/PI$$" $cmdLineArgs
      endif
    else
      if ($nolocal) then
        if ("$debugger" != "") then
	  rsh ${machinelist[1]} $debugger -sr $PWD_TRIAL/PId$$ $progname 
        else
	  rsh ${machinelist[1]} $progname -p4pg "$PWD_TRIAL/PI$$" $cmdLineArgs
	endif
      else
        if ("$debugger" != "") then
	    $debugger -sr $PWD_TRIAL/PId$$ $progname
	else
	    $progname -p4pg "$PWD_TRIAL/PI$$" $cmdLineArgs
	endif
      endif # nolocal
      if ($leavePGFile) then
	echo "P4 procgroup file is $PWD_TRIAL/PI$$."
      else
        /bin/rm "$PWD_TRIAL/PI$$"
      endif
    endif #just testing
    breaksw
  default:
    echo "This machine ($machine) is not yet supported."
    echo Exiting.
    exit
    breaksw
endsw

exit

PrintHelp:
echo "mpirun [options...] <progname> [options...]"
echo ""
echo "  options:"
echo "    -mr_arch <architecture>"
echo "            specify the architecture (must have matching machines.<arch>"
echo "            file in ${MPIR_HOME}/util/machines) if using the execer"
echo "    -mr_h   This help"
echo "    -mr_machine <machine name>"
echo "            use startup procedure for <machine name>"
echo "            Currently supported:"
foreach machine ($Machines)
  echo "              $machine"
end
echo ""
echo "    -mr_machinefile <machine-file name>"
echo "            Take the list of possible machines to run on from the"
echo "            file <machine-file name>"
echo "    -mr_np <np>"
echo "            specify the number of processors to run on"
echo "    -mr_nolocal"
echo "            don't run on the local machine (only works for "
echo "            p4 and ch_p4 jobs)"
echo "    -mr_e   Use execer to start the program on workstation"
echo "            clusters"
echo "    -mr_pg  Use a procgroup file to start the p4 programs, not execer"
echo "            (default)"
echo "    -mr_leave_pg"
echo "            Don't delete the P4 procgroup file after running"
echo "    -mr_t   Testing - do not actually run, just print what would be"
echo "            executed"
echo "    -mr_v   Verbose - thrown in some comments"
echo "    -mr_dbx Start the first process under dbx where possible"
#echo "    -mr_xdbx Start the first process under xdbx where possible"
echo "    -mr_nopoll Do not use a polling-mode communication."
echo "            Available only on IBM SPx."
echo ""
exit
