#!/usr/bin/env python2.3
# -*- python -*-
from __future__ import division

__version__ = "$Revision: 1.6 $"

import commands
import exceptions
import os
import sys
import time

import tools2

### XXX: should at init, lookup bjob

LSPLACE_CMDLINE = "lsplace %s"
BJOBS_CMDLINE = "bjobs -Aw %s | tr -s ' ' '\t'"
BMOD_CMDLINE = "bmod -J '%%%d' %d"

JOB_SLOT_STEP = 1
SLEEP_TIME = 60 # seconds

def die(text):
    print >>sys.stderr, text
    sys.exit(1)

def increase_slot_limit(jobid):
    status, output = commands.getstatusoutput(BJOBS_CMDLINE % jobid)
    if status != 0:
        die("bjobs returned non-zero status")

    lines = output.splitlines()
    jobinfo = tools2.tab_dict_iterable(lines)[0].next()
    slot_limit = int((jobinfo["ARRAY_SPEC"].split("%"))[1])
    running = int(jobinfo["RUN"])
    pending = int(jobinfo["PEND"])

    if pending == 0:
        print >>sys.stderr, "no pending jobs left"
        sys.exit(0)
        
    if slot_limit - running < JOB_SLOT_STEP:
        new_slot_limit = slot_limit+JOB_SLOT_STEP
        os.system(BMOD_CMDLINE % (new_slot_limit, jobid))
        print >>sys.stderr, "%s job slot limit now %s" % (jobid, new_slot_limit)
    else:
        print >>sys.stderr, "%s job slot limit %s but only %s jobs running" % (jobid, slot_limit, running)

def watch(jobid, hostgroups, sleep_time=SLEEP_TIME):
    while 1:
        time.sleep(sleep_time)
        
        for hostgroup in hostgroups:
            status, output = commands.getstatusoutput(LSPLACE_CMDLINE % " ".join(hostgroup))
            if status != 0:
                print >>sys.stderr, "%s not ready; lsplace exit status %s" % (",".join(hostgroup), status)
                break
        else:
            increase_slot_limit(jobid)

def comma_split(text):
    return text.split(",")

def main(args):
    try:
        sleep_time = float(os.environ["POLYWATCH_SLEEP_TIME"])
    except KeyError:
        sleep_time = SLEEP_TIME

    if args[1:]:
        hostgroup_texts = args[1:]
    else:
        try:
            hostgroup_texts = os.environ["POLYWATCH_HOSTS"].split(" ")
        except KeyError:
            raise RuntimeError, "no hosts to watch specified"

    try:
        watch(int(args[0]), map(comma_split, hostgroup_texts), sleep_time)
    except KeyboardInterrupt:
        return 2

if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
