#!/bin/csh -f
#
#       %W% %E%
#

# called to check and update job status

# first arg : entry length
# second arg : entries
# find out what should go on on this machine
# remove entr(y)(ies) from current_nodes
# remove entr(y)(ies) from running_jobs
# check out tiff files
# requeu jobs if necessary

set host = `uname -n`
set curf = curf_${host}
set rendir = ${GIG_D_ROOT}/projects/renmanadm/${curf}
set logfile = ${rendir}/gigrs_logfile

set macstat = $argv[1]
set gserver = $argv[3]
set pid_num = $argv[4]
set pid_fil = $argv[$#argv]

shift
shift
shift
shift

set arguments = `getopt vr:i:o:idp: $argv`

# skip ALL arguments
foreach i ( $arguments )
  switch ( $i )
    case '-d'
      shift arguments
      breaksw
    case '-v'
      shift arguments
      breaksw
    case '-V'
      shift arguments
      breaksw
    case '-r'
      shift arguments
      shift arguments
      breaksw
    case '-o'
      shift arguments
      shift arguments
      breaksw
    case '-p'
      shift arguments
      shift arguments
      breaksw
    case '-i'
      shift arguments
      shift arguments
      breaksw
    case '--'
      shift arguments
      break
  endsw
end

if ( $macstat != 'lost' ) then

  set job_status = `gigrs_checkrunjobs $arguments`

  if ( $job_status[1] == 'done' ) then
# prepare removal of entry from running_jobs
# actual removal takes place when all running jobs are checked
# this prevents fucking up a counter in gigrs_startjob
    set done_entr = `fgrep "$arguments" $rendir"/running_jobs"`
    set done_entr = ($done_entr "done")
    echo $done_entr >! $rendir"/running_jobs_tmp"
    fgrep -v "$arguments" $rendir"/running_jobs" >> $rendir"/running_jobs_tmp"
    mv $rendir"/running_jobs_tmp" $rendir"/running_jobs"

# remove entry from current_nodes
    touch ${rendir}/nodeflag_1
    echo 2 > ${rendir}/nodeturn
    set turn = `cat ${rendir}/nodeturn`
    while ( ( -e ${rendir}/nodeflag_2 ) && ( $turn == '2' ) )
      set turn = `cat ${rendir}/nodeturn`
    end
    set nod_ent = `fgrep "$gserver $pid_num" $rendir"/current_nodes"`
    fgrep -v "$gserver $pid_num" $rendir"/current_nodes" >! $rendir"/current_nodes_tmp"
    mv $rendir"/current_nodes_tmp" $rendir"/current_nodes"
    set nod_ent[1] = idle
    set nod_ent[3] = 0
    echo $nod_ent >> $rendir"/current_nodes"
    /bin/rm -f ${rendir}/nodeflag_1

# update donejobs
# we do not double entries
    egrep -v -e "$argv" $rendir"/gigdonejobs" >! $rendir"/gigdonejobs_tmp"
    /bin/mv $rendir"/gigdonejobs_tmp" $rendir"/gigdonejobs"
    set new_entry = ("done "$argv "time :" $job_status[2] $gserver)
#    set new_entry = ("done "$argv "time :" $job_status[2])
    echo $new_entry >> $rendir"/gigdonejobs"
  endif
    
else
# prepare removal of entry from running_jobs
# actual removal takes place when all jobs on lost machine are checked
# this prevents fucking up a counter in gigrs_startjob

echo Process was killed on server $gserver >> ${logfile}
  set lost_entr = ("lost $gserver 0" $argv)
  echo $lost_entr >! $rendir"/running_jobs_tmp"
  fgrep -v "$arguments" $rendir"/running_jobs" >> $rendir"/running_jobs_tmp"
  mv $rendir"/running_jobs_tmp" $rendir"/running_jobs"
endif

