#!/usr/bin/env bash
#
# Copyright  (C)  2015  Thomas Jahns <jahns@dkrz.de>
#
# Version: 1.0
# Keywords:
# Author: Thomas Jahns <jahns@dkrz.de>
# Maintainer: Thomas Jahns <jahns@dkrz.de>
# URL: https://www.dkrz.de/redmine/projects/scales-ppm
#
# Redistribution and use in source and binary forms, with or without
# modification, are  permitted provided that the following conditions are
# met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# Neither the name of the DKRZ GmbH nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Commentary:
#
# Wrapper to run MPI-parallelized programs on Cray installations with PBS-like
# queueing systems. Use as mpirun replacement.
#
jobid=
declare -i elapsedSeconds ballState
ballState=0
ballChars="-\|/"
elapsedSeconds=0

function killLine() {
  if tty >/dev/null ; then
    echo -e -n "\r$(tput el)"
  fi
}

function updateStatus() {
    local message="$1"
    killLine
    if tty >/dev/null ; then
      echo -n "${message} $(( elapsedSeconds / 60 )) min (${ballChars:$ballState:1})"
    fi
    sleep 1
    elapsedSeconds=$((elapsedSeconds + 1))
    ballState=$(((ballState + 1) % 4))
}

function resetStatus() {
    elapsedSeconds=0
    killLine
}

function cancelRun() {
    #resetStatus
    if [[ "x$jobid" != "x" ]] ; then
        qdel "$jobid"
        while [[ x"$(qstat "$jobid")" != x ]] ; do updateStatus "aborting job" ; done
        resetStatus
    fi
    if tty >/dev/null ; then
      echo "canceled by user"
      echo "if you see a runscript.out file, you can delete it (known race condition)"
    fi
    exit 1
}

trap 'cancelRun' SIGINT
set -e
set -x
shopt -s extglob

if [ x"$1" = x-n ]; then
  shift
  NTASKS=$1
  shift
else
  NTASKS=1
fi

executable=$1
shift

JOBOUTPUT=$(mktemp --tmpdir=.)
JOBERROR=$(mktemp --tmpdir=.)
trap '\rm -f "$JOBOUTPUT" "$JOBERROR"' EXIT
jobname=${executable##*/}
jobname=xt_${jobname// /}
jobname=${jobname:0:15}

executable=$(readlink -f "$executable")
declare -a cmd
cmd=("$executable" "$@")

jobid=$(qsub -q xc_norm_h \
  -l walltime=00:20:00 \
  -e "$JOBERROR" \
  -o "$JOBOUTPUT" \
  -N "${jobname}" -W block=true \
  -- /opt/cray/alps/default/bin/aprun -n $NTASKS "${cmd[@]}" )
jobstatus=$?


#
#resetStatus
while [[ x"$(qstat "$jobid" 2>/dev/null)" != x ]] ; do updateStatus "waiting for job to terminate" ; done
resetStatus
set +e
sed '$d' "$JOBOUTPUT" ; cat "$JOBERROR" >&2
set -e
exit $jobstatus
