#!/bin/csh -f
#
#       %W% %E%
#

# called to check and update job status

# first arg : entry length
# second arg : entries
# find out what should go on on this machine
# remove entr(y)(ies) from current_nodes
# remove entr(y)(ies) from running_jobs
# check out tiff files
# requeu jobs if necessary

set host = `uname -n`
set curf = curf_${host}
set rendir = ${GIG_D_ROOT}/projects/renmanadm/${curf}
set logfile = ${rendir}/gigrs_logfile

set macstat = $argv[1]
set gserver = $argv[3]
set pid_num = $argv[4]
set pid_fil = $argv[$#argv]

shift
shift
shift
shift

set arguments = `getopt vr:i:o:idp: $argv`

# skip ALL arguments
foreach i ( $arguments )
  switch ( $i )
    case '-d'
      shift arguments
      breaksw
    case '-v'
      shift arguments
      breaksw
    case '-V'
      shift arguments
      breaksw
    case '-r'
      shift arguments
      shift arguments
      breaksw
    case '-o'
      shift arguments
      shift arguments
      breaksw
    case '-p'
      shift arguments
      shift arguments
      breaksw
    case '-i'
      shift arguments
      shift arguments
      breaksw
    case '--'
      shift arguments
      break
  endsw
end

if ( $macstat == 'unsu' ) then

  if ( -e $rendir/$pid_fil".out" ) then
    set errr = `cat $rendir/$pid_fil".out" | grep error`
    set entr = `echo $errr | awk '{print NF; exit}'`

    if ( $entr != '' && $entr != 0 ) then

      echo gig_sa encountered an error on server $gserver >> ${logfile}
      echo removing entry from current_nodes >> ${logfile}
# gig_sa encountered an error
# remove entry from current_nodes
      touch ${rendir}/nodeflag_2
      echo 1 > ${rendir}/nodeturn
      set turn = `cat ${rendir}/nodeturn`
      while ( ( -e ${rendir}/nodeflag_1 ) && ( $turn == '1' ) )
        set turn = `cat ${rendir}/nodeturn`
      end

      set nod_ent = `fgrep "$gserver $pid_num" $rendir"/current_nodes"`
      fgrep -v "$gserver $pid_num" $rendir"/current_nodes" >! $rendir"/current_nodes_tmp"
      mv $rendir"/current_nodes_tmp" $rendir"/current_nodes"
      set nod_ent[1] = idle
      set nod_ent[3] = 0
      echo $nod_ent >> $rendir"/current_nodes"

      /bin/rm -f ${rendir}/nodeflag_2

# move entry to gigdonejobs
      echo error $argv $errr >> $rendir/gigdonejobs

# prepare removal of entry from running_jobs
# actual removal takes place when all running jobs are checked
# this prevents fucking up a counter in gigrs_startjob
      set done_entr = `fgrep "$arguments" $rendir"/running_jobs"`
      set done_entr = ($done_entr "done")
      echo $done_entr >! $rendir"/running_jobs_tmp"
      fgrep -v "$arguments" $rendir"/running_jobs" >> $rendir"/running_jobs_tmp"
      mv $rendir"/running_jobs_tmp" $rendir"/running_jobs"

      /bin/rm -f $rendir/$argv[$#argv]".int"
      /bin/rm -f $rendir/$argv[$#argv]".out"
      exit

    endif
  endif
endif

if ( $macstat != 'lost' ) then

  set job_status = `gigrs_checkrunjobs $arguments`

  if ( $job_status[1] == 'done' ) then
# prepare removal of entry from running_jobs
# actual removal takes place when all running jobs are checked
# this prevents fucking up a counter in gigrs_startjob
    set done_entr = `fgrep "$arguments" $rendir"/running_jobs"`
    set done_entr = ($done_entr "done")
    echo $done_entr >! $rendir"/running_jobs_tmp"
    fgrep -v "$arguments" $rendir"/running_jobs" >> $rendir"/running_jobs_tmp"
    mv $rendir"/running_jobs_tmp" $rendir"/running_jobs"

# remove entry from current_nodes
    touch ${rendir}/nodeflag_2
    echo 1 > ${rendir}/nodeturn
    set turn = `cat ${rendir}/nodeturn`
    while ( ( -e ${rendir}/nodeflag_1 ) && ( $turn == '1' ) )
      set turn = `cat ${rendir}/nodeturn`
    end

    set nod_ent = `fgrep "$gserver $pid_num" $rendir"/current_nodes"`
    fgrep -v "$gserver $pid_num" $rendir"/current_nodes" >! $rendir"/current_nodes_tmp"
    mv $rendir"/current_nodes_tmp" $rendir"/current_nodes"
    set nod_ent[1] = idle
    set nod_ent[3] = 0
    echo $nod_ent >> $rendir"/current_nodes"


    /bin/rm -f ${rendir}/nodeflag_2
# update donejobs
# we do not honor double entries
    egrep -v -e "$argv" $rendir"/gigdonejobs" >! $rendir"/gigdonejobs_tmp"
    /bin/mv $rendir"/gigdonejobs_tmp" $rendir"/gigdonejobs"
    set new_entry = ("done "$argv "time :" $job_status[2] $gserver)
#    set new_entry = ("done "$argv "time :" $job_status[2])
    echo $new_entry >> $rendir"/gigdonejobs"

  endif

else
# prepare removal of entry from running_jobs
# actual removal takes place when all jobs on lost machine are checked
# this prevents fucking up a counter in gigrs_startjob

echo Error on server $gserver. >> ${logfile}

  if ( -e $rendir/$pid_fil".out" ) then
    set errr = `cat $rendir/$pid_fil".out" | grep error`
    set entr = `echo $errr | awk '{print NF; exit}'`
    if ( $entr != 0 ) then

# gig_sa encountered an error
# remove entry from current_nodes
      touch ${rendir}/nodeflag_2
      echo 1 > ${rendir}/nodeturn
      set turn = `cat ${rendir}/nodeturn`
      while ( ( -e ${rendir}/nodeflag_1 ) && ( $turn == '1' ) )
        set turn = `cat ${rendir}/nodeturn`
      end

      set nod_ent = `fgrep "$gserver $pid_num" $rendir"/current_nodes"`
      fgrep -v "$gserver $pid_num" $rendir"/current_nodes" >! $rendir"/current_nodes_tmp"
      mv $rendir"/current_nodes_tmp" $rendir"/current_nodes"
      set nod_ent[1] = idle
      set nod_ent[3] = 0
      echo $nod_ent >> $rendir"/current_nodes"

      /bin/rm -f ${rendir}/nodeflag_2

# move entry to gigdonejobs
      echo error $argv $errr >> $rendir/gigdonejobs

# prepare removal of entry from running_jobs
# actual removal takes place when all running jobs are checked
# this prevents fucking up a counter in gigrs_startjob
      set done_entr = `fgrep "$arguments" $rendir"/running_jobs"`
      set done_entr = ($done_entr "done")
      echo $done_entr >! $rendir"/running_jobs_tmp"
      fgrep -v "$arguments" $rendir"/running_jobs" >> $rendir"/running_jobs_tmp"
      mv $rendir"/running_jobs_tmp" $rendir"/running_jobs"

      /bin/rm -f $rendir/$argv[$#argv]".int"
      /bin/rm -f $rendir/$argv[$#argv]".out"
      exit

    endif
  endif

  set lost_entr = ("lost $gserver 0" $argv)
  echo $lost_entr >! $rendir"/running_jobs_tmp"
  fgrep -v "$arguments" $rendir"/running_jobs" >> $rendir"/running_jobs_tmp"
  mv $rendir"/running_jobs_tmp" $rendir"/running_jobs"
endif

