aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsienkiew <sienkiew@d34015c8-bcbb-4646-8ac8-8ba5febf221d>2011-09-27 11:46:09 -0400
committersienkiew <sienkiew@d34015c8-bcbb-4646-8ac8-8ba5febf221d>2011-09-27 11:46:09 -0400
commit61285fb53a2b871d52e27e4e8ecc4d7da6e09e1b (patch)
tree942866a36819b21731f74c157bda1ffb19081c51
parentff102cd2b89daf9a0feea2e10503e780d2454e29 (diff)
downloadsteuermann-61285fb53a2b871d52e27e4e8ecc4d7da6e09e1b.tar.gz
checkpoint
git-svn-id: https://svn.stsci.edu/svn/ssb/etal/steuermann/trunk@430 d34015c8-bcbb-4646-8ac8-8ba5febf221d
-rw-r--r--README4
-rw-r--r--dev.sm189
-rw-r--r--init.sm4
-rw-r--r--scripts/steuermann_report.cgi56
-rw-r--r--steuermann/config.py4
-rw-r--r--steuermann/db.sql9
-rw-r--r--steuermann/hosts.ini11
-rw-r--r--steuermann/nodes.py20
-rw-r--r--steuermann/report.py53
-rw-r--r--steuermann/run.py284
-rw-r--r--steuermann/run_all.py116
11 files changed, 557 insertions, 193 deletions
diff --git a/README b/README
index b0cd1d7..8054a62 100644
--- a/README
+++ b/README
@@ -7,7 +7,5 @@ make
python setup.py install
-smc test.sm
-?
-
+smc [ -a ] [ -r run_name ] file.sm
diff --git a/dev.sm b/dev.sm
index 1def284..cb55c8e 100644
--- a/dev.sm
+++ b/dev.sm
@@ -1,5 +1,11 @@
+## TODO:
+## add builds on ssbwebv1 for those things that we actually care about
-TABLE assemble HOST rhe5-64
+####################
+####################
+
+# arzach assembles all the source code
+TABLE assemble HOST arzach
CMD dev.stsci_python RUN "assemble_stsci_python dev"
AFTER init/*
@@ -12,7 +18,16 @@ TABLE assemble HOST rhe5-64
CMD dev.hstcal RUN "assemble_hstcal dev"
AFTER init/*
-TABLE build HOST rhe4-32 rhe4-64 rhe5-64 leopard snow-leopard
+ CMD nop RUN "sleep 1"
+ AFTER init/*
+
+####################
+####################
+
+# install stsci_python into default environment
+# build hstcal
+# - everywhere
+TABLE build HOST herbert thor arzach bond cadeau
CMD dev.py2.7 RUN "build_stsci_python dev 2.7"
AFTER init/*
AFTER *:assemble/dev.stsci_python
@@ -20,7 +35,9 @@ TABLE build HOST rhe4-32 rhe4-64 rhe5-64 leopard snow-leopard
AFTER init/*
AFTER *:assemble/dev.hstcal
-TABLE build HOST rhe5-64
+# older python environments
+# - arzach only
+TABLE build HOST arzach
CMD dev.py2.6 RUN "build_stsci_python dev 2.6"
AFTER init/*
AFTER *:assemble/dev.stsci_python
@@ -29,7 +46,9 @@ TABLE build HOST rhe5-64
AFTER init/*
AFTER *:assemble/dev.stsci_python
-TABLE build HOST rhe4-32 leopard
+# stsdas and friends
+# - 32 bit only
+TABLE build HOST herbert bond
CMD dev.axe RUN "build_axe dev"
AFTER init/*
AFTER *:assemble/dev.axe
@@ -48,14 +67,166 @@ TABLE build HOST rhe4-32 leopard
AFTER build/dev.stsci_iraf
-TABLE build HOST rhe4-64 rhe5-64
+# stsdas for 64 bit machines - get it from a related 32 bit system
+TABLE build HOST thor arzach
CMD dev.stsci_iraf_64hack RUN "build_stsci_iraf_64hack dev herbert"
- AFTER rhe4-32:build/dev.stsci_iraf*
+ AFTER herbert:build/dev.stsci_iraf*
-TABLE build HOST snow-leopard
+TABLE build HOST cadeau
CMD dev.stsci_iraf_64hack RUN "build_stsci_iraf_64hack dev cadeau"
- AFTER rhe4-32:build/dev.stsci_iraf*
+ AFTER bond:build/dev.stsci_iraf*
-TABLE build HOST rhe5-64
+# stsci_python documentation
+# - one machine only
+TABLE build HOST arzach
CMD dev.stsci_python_sphinxdocs RUN "build_sphinxdocs dev 2.7"
AFTER build/dev.py2.7
+
+# old epydoc documentation - only works on thor; hope we can get rid of
+# epydoc sooner than we have to do anything about this.
+TABLE build HOST thor
+ CMD dev.stsci_python_epydoc RUN "/thor/data2/iraf/epydoc_test/nightly"
+ AFTER build/dev.py2.7
+
+# stamp the IRAF banner file when the builds are complete
+TABLE stamp HOST herbert thor arzach bond cadeau
+ CMD dev RUN "build_stamp dev"
+ AFTER build/*
+
+####################
+####################
+
+# regular distributions
+
+TABLE distribute HOST herbert thor arzach
+ CMD dev.iraf RUN "synctool - irafdev"
+ AFTER stamp/dev
+ CMD dev.pyssg RUN "synctool - pyssgdev"
+ AFTER stamp/dev
+ CMD dev.stsci_iraf RUN "synctool - stsci_iraf_dev"
+ AFTER stamp/dev
+ CMD dev.hstcal RUN "synctool - hstcal_dev"
+ AFTER stamp/dev
+ CMD dev.motd RUN "synctool - irafdev/iraf/unix/hlib/motd"
+ AFTER distribute/dev.iraf
+
+TABLE distribute HOST bond cadeau
+ CMD irafdev.pkg RUN "cd $HOME/daily_build/mac_package; ./clean ; ./build dev " AFTER stamp/dev
+ CMD irafdev.dmg RUN "cd $HOME/daily_build/mac_package; ./distribute dev" AFTER irafdev.pkg
+
+# wads of special cases
+
+# jwcalibdev has local disk - some day it may do its own builds
+TABLE distribute HOST arzach
+ CMD jwcalibdev.iraf RUN "synctool jwcalibdev: irafdev"
+ AFTER stamp/dev
+ CMD jwcalibdev.pyssg RUN "synctool jwcalibdev: pyssgdev"
+ AFTER stamp/dev
+ CMD jwcalibdev.stsci_iraf RUN "synctool jwcalibdev: stsci_iraf_dev"
+ AFTER stamp/dev
+ CMD jwcalibdev.hstcal RUN "synctool jwcalibdev: hstcal_dev"
+ AFTER stamp/dev
+ CMD jwcalibdev.motd RUN "synctool jwcalibdev: irafdev/iraf/unix/hlib/motd"
+ AFTER jwcalibdev.iraf
+
+# goods - has RHE 5 only now
+
+TABLE distribute_other HOST arzach
+ CMD goods.iraf RUN "synctool goods12: irafdev"
+ AFTER stamp/dev
+ CMD goods.pyssg RUN "synctool goods12: pyssgdev"
+ AFTER stamp/dev
+ CMD goods.stsci_iraf RUN "synctool goods12: stsci_iraf_dev"
+ AFTER stamp/dev
+ CMD goods.hstcal RUN "synctool goods12: hstcal_dev"
+ AFTER stamp/dev
+ CMD goods.motd RUN "synctool goods12: irafdev/iraf/unix/hlib/motd"
+ AFTER goods.iraf
+
+# witserv1 - who are these guys?
+
+TABLE distribute_other HOST arzach
+ CMD witserv1.iraf RUN "synctool witserv1: irafdev"
+ AFTER stamp/dev
+ CMD witserv1.pyssg RUN "synctool witserv1: pyssgdev"
+ AFTER stamp/dev
+ CMD witserv1.stsci_iraf RUN "synctool witserv1: stsci_iraf_dev"
+ AFTER stamp/dev
+ CMD witserv1.hstcal RUN "synctool witserv1: hstcal_dev"
+ AFTER stamp/dev
+ CMD witserv1.motd RUN "synctool witserv1: irafdev/iraf/unix/hlib/motd"
+ AFTER witserv1.iraf
+
+# dmsinsvm - have a pipeline and irafx/irafdev on the same machine for INS
+
+TABLE distribute_other HOST arzach
+ CMD dmsinsvm.iraf RUN "synctool dmsinsvm: irafdev"
+ AFTER stamp/dev
+ CMD dmsinsvm.pyssg RUN "synctool dmsinsvm: pyssgdev"
+ AFTER stamp/dev
+ CMD dmsinsvm.stsci_iraf RUN "synctool dmsinsvm: stsci_iraf_dev"
+ AFTER stamp/dev
+ CMD dmsinsvm.hstcal RUN "synctool dmsinsvm: hstcal_dev"
+ AFTER stamp/dev
+ CMD dmsinsvm.motd RUN "synctool dmsinsvm: irafdev/iraf/unix/hlib/motd"
+ AFTER dmsinsvm.iraf
+
+# UDF - another funded project with their own machines
+
+TABLE distribute_other HOST thor
+ CMD udf1.iraf RUN "synctool udf1: irafdev"
+ AFTER stamp/dev
+ CMD udf1.pyssg RUN "synctool udf1: pyssgdev"
+ AFTER stamp/dev
+ CMD udf1.stsci_iraf RUN "synctool udf1: stsci_iraf_dev"
+ AFTER stamp/dev
+ CMD udf1.hstcal RUN "synctool udf1: hstcal_dev"
+ AFTER stamp/dev
+ CMD udf1.motd RUN "synctool udf1: irafdev/iraf/unix/hlib/motd"
+ AFTER udf1.iraf
+
+# royal - a beowulf cluster
+
+TABLE distribute_other HOST thor
+ CMD royal.iraf RUN "synctool royal: irafdev"
+ AFTER stamp/dev
+ CMD royal.pyssg RUN "synctool royal: pyssgdev"
+ AFTER stamp/dev
+ CMD royal.stsci_iraf RUN "synctool royal: stsci_iraf_dev"
+ AFTER stamp/dev
+ CMD royal.hstcal RUN "synctool royal: hstcal_dev"
+ AFTER stamp/dev
+ CMD royal.motd RUN "synctool royal: irafdev/iraf/unix/hlib/motd"
+ AFTER royal.iraf
+
+####################
+####################
+
+# This structure is meant to serialize the regtests so that nothing else
+# is going on at the same time. The regtests can consume all the CPUs.
+TABLE regtest HOST herbert thor arzach bond cadeau
+ CMD okify RUN "test_okify"
+ AFTER distribute/*
+ CMD dev.test2.7 RUN "test_rt dev 2.7"
+ AFTER okify
+
+TABLE regtest HOST arzach
+ CMD dev.test2.6 RUN "test_rt dev 2.6"
+ AFTER dev.test2.7
+ CMD dev.test2.5 RUN "test_rt dev 2.5"
+ AFTER dev.test2.6
+
+TABLE regtest HOST thor
+ CMD dev.contact RUN "test_contact"
+ AFTER dev.test2.7
+
+ # change this to have each test batch import independently
+TABLE regtest_import HOST ssb
+ CMD dev.import RUN "test_import daily"
+ AFTER *:regtest/*
+ CMD dev.notify RUN "test_notify daily"
+ AFTER dev.import
+ CMD dev.sqlite_watch RUN "test_sqlite_watch"
+ AFTER dev.import
+ CMD dev.mysql_watch RUN "test_mysql_watch"
+ AFTER dev.import
diff --git a/init.sm b/init.sm
index 7297f85..af058f6 100644
--- a/init.sm
+++ b/init.sm
@@ -1,9 +1,9 @@
-TABLE init HOST leopard snow-leopard rhe5-64 rhe4-64 rhe4-32
+TABLE init HOST bond cadeau arzach thor herbert
CMD sendscripts LOCAL "/eng/ssb/auto/steuermann_scripts/init_sendscripts %(hostname)s %(workdir)s"
CMD sysstat RUN "sysstat"
AFTER sendscripts
-TABLE init HOST rhe5-64
+TABLE init HOST arzach
CMD svnsync RUN "assemble_svnsync"
AFTER OPT init/irafx_update
AFTER sendscripts
diff --git a/scripts/steuermann_report.cgi b/scripts/steuermann_report.cgi
index a25d407..abf371b 100644
--- a/scripts/steuermann_report.cgi
+++ b/scripts/steuermann_report.cgi
@@ -35,18 +35,23 @@ def sqltime(arg) :
return d
+##########
+# if no action specified, show the list of runs
+#
if not 'action' in form :
print 'content-type: text/html'
print ''
db = steuermann.config.open_db()
c = db.cursor()
- c.execute('SELECT DISTINCT run FROM status ORDER BY run DESC')
+ c.execute('SELECT DISTINCT run FROM sm_status ORDER BY run DESC')
for run, in c :
print "<a href=%s?action=status&run=%s>%s</a><br>"%(cginame, run, run)
sys.exit(0)
action = form['action'].value
-
+##########
+# status means show the status of a particular run
+#
if action == 'status' :
db = steuermann.config.open_db()
import steuermann.report
@@ -57,6 +62,9 @@ if action == 'status' :
print steuermann.report.report_html( db, run, info_callback=steuermann.report.info_callback_gui )
sys.exit(0)
+##########
+# log means show the result of a particular node from a run
+#
elif action == 'log' :
print 'content-type: text/plain'
print ''
@@ -71,7 +79,7 @@ elif action == 'log' :
db = steuermann.config.open_db()
c = db.cursor()
- c.execute("SELECT status, start_time, end_time, notes FROM status WHERE run = ? AND host = ? AND tablename = ? AND cmd = ?",(
+ c.execute("SELECT status, start_time, end_time, notes FROM sm_status WHERE run = ? AND host = ? AND tablename = ? AND cmd = ?",(
run, host, table, cmd ) )
x = c.fetchone()
if x is None :
@@ -95,17 +103,43 @@ elif action == 'log' :
for x in [ ' ' + x for x in notes.split('\n') ] :
print x
print ""
- print "--------------------"
filename = '%s/%s/%s:%s.%s.log'%(steuermann.config.logdir,run,host,table,cmd)
- f=open(filename,'r')
- while 1 :
- x = f.read(65536)
- if x == '' :
- break
- sys.stdout.write(x)
+ try :
+ f=open(filename,'r')
+ except IOError:
+ print "No log file %s" %filename
+ f = None
+ print "--------------------"
+
+ if f :
+ while 1 :
+ x = f.read(65536)
+ if x == '' :
+ break
+ sys.stdout.write(x)
+
sys.exit(0)
+##########
+# info means show information about the system
+#
+elif action == 'info' :
+ print 'content-type: text/html\n'
+ print 'db credentials: ',steuermann.config.db_creds,'<br>'
+ print 'logdir: ',steuermann.config.logdir,'<br>'
+ db = steuermann.config.open_db()
+ cur = db.cursor()
+ cur.execute("select count(*) from sm_status")
+ l = cur.fetchone()
+ print "database records: %s\n"%l[0],'<br>'
+ cur.execute("select count(*) from sm_runs")
+ l = cur.fetchone()
+ print "runs: %s\n"%l[0],'<br>'
+ sys.exit(0)
+
+##########
+
print 'content-type: text/html'
print ''
-print 'no action?'
+print 'no recognized action?'
diff --git a/steuermann/config.py b/steuermann/config.py
index 7419703..b10881b 100644
--- a/steuermann/config.py
+++ b/steuermann/config.py
@@ -1,5 +1,7 @@
+db_creds = '/ssbwebv1/data2/steuermann/steuermann.db'
+
def open_db() :
import sqlite3
- return sqlite3.connect('/ssbwebv1/data2/steuermann/steuermann.db')
+ return sqlite3.connect(db_creds)
logdir = '/ssbwebv1/data2/steuermann/logs'
diff --git a/steuermann/db.sql b/steuermann/db.sql
index dd6a569..b6d2ae6 100644
--- a/steuermann/db.sql
+++ b/steuermann/db.sql
@@ -2,7 +2,7 @@
-- Before we start running anything, we insert a record for every
-- command in the test run. The initial status is 'S'.
-CREATE TABLE status (
+CREATE TABLE sm_status (
run VARCHAR(100),
-- name of this run
@@ -19,6 +19,7 @@ CREATE TABLE status (
-- R = started, not finished
-- S = skipped
-- P = prereq not satisfied, so not attempted
+ -- E = error internal to steuermann
-- 0-255 = exit code
start_time VARCHAR(30),
@@ -37,13 +38,13 @@ CREATE TABLE status (
);
-create unique index idx_status_1 on status ( run, host, tablename, cmd );
+create unique index sm_status_idx1 on sm_status ( run, host, tablename, cmd );
-- table lists all run names in the system
-CREATE TABLE runs (
+CREATE TABLE sm_runs (
run VARCHAR(100)
);
-CREATE UNIQUE INDEX idx_runs_run ON runs(run);
+CREATE UNIQUE INDEX sm_runs_idx1 ON sm_runs(run);
diff --git a/steuermann/hosts.ini b/steuermann/hosts.ini
index 0f0c043..f8c2e3b 100644
--- a/steuermann/hosts.ini
+++ b/steuermann/hosts.ini
@@ -17,9 +17,12 @@
[all]
hostname=no_such_machine
-local=[ 'sh', '-c', '%(script)s' ]
maxproc=2
+; local really applies the same to all the machines, but this is a
+; convenient place to stash it for now.
+local=[ 'sh', '-c', '%(script)s' ]
+
[linux:csh]
; for CSH
;
@@ -74,6 +77,12 @@ like=linux:csh
workdir=/arzach/data1/iraf/steuermann
maxproc=4
+[ssb]
+hostname=ssbwebv1
+like=linux:csh
+workdir=/ssbwebv1/data1/iraf/work
+maxproc=2
+
[bond]
hostname=bond
like=mac:csh
diff --git a/steuermann/nodes.py b/steuermann/nodes.py
index 5e9852a..0f24e36 100644
--- a/steuermann/nodes.py
+++ b/steuermann/nodes.py
@@ -110,11 +110,21 @@ class command_tree(object):
# crack open host:table/cmd
def crack_name(name) :
- t = name.split(':')
- host = t[0]
- t = t[1].split('/')
- table = t[0]
- cmd = t[1]
+ if ':' in name :
+ t = name.split(':')
+ host = t[0]
+ name = t[1]
+ else :
+ host = '*'
+
+ if '/' in name :
+ t = name.split('/')
+ table = t[0]
+ cmd = t[1]
+ else :
+ table = '*'
+ cmd = name
+
return (host, table, cmd)
#####
diff --git a/steuermann/report.py b/steuermann/report.py
index 82da984..cc90604 100644
--- a/steuermann/report.py
+++ b/steuermann/report.py
@@ -9,6 +9,21 @@ import pandokia.text_table as text_table
import pandokia.common
import StringIO
+# maybe the output is html 3.2 - in any case, it is way simpler than
+# more recent standards.
+html_header='''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<HTML>
+<HEAD>
+<TITLE>%(title)s</TITLE>
+</HEAD>
+<BODY>
+'''
+
+html_trailer='''
+</BODY>
+</HTML>
+'''
+
# this will be reset by the cgi main program if we are in a real cgi
cginame = 'arf.cgi'
@@ -16,7 +31,7 @@ cginame = 'arf.cgi'
def info_callback_status( db, run, tablename, host, cmd ) :
c = db.cursor()
- c.execute("SELECT status FROM status WHERE run = ? AND host = ? AND tablename = ? AND cmd = ?",(
+ c.execute("SELECT status FROM sm_status WHERE run = ? AND host = ? AND tablename = ? AND cmd = ?",(
run, host, tablename, cmd ) )
status, = c.fetchone()
return status
@@ -29,7 +44,7 @@ simple_status = ( 'N', 'P', 'S', 'W' )
def info_callback_gui( db, run, tablename, host, cmd ) :
c = db.cursor()
- c.execute("SELECT status, start_time, end_time FROM status WHERE run = ? AND host = ? AND tablename = ? AND cmd = ?",(
+ c.execute("SELECT status, start_time, end_time FROM sm_status WHERE run = ? AND host = ? AND tablename = ? AND cmd = ?",(
run, host, tablename, cmd ) )
x = c.fetchone()
if x is None :
@@ -81,7 +96,7 @@ def info_callback_debug_table_cell( db, run, tablename, cmd, host ) :
def get_table_list( db, run_name ) :
c = db.cursor()
- c.execute("select max(depth) as d, tablename from status where run = ? group by tablename order by d asc",(run_name,))
+ c.execute("select max(depth) as d, tablename from sm_status where run = ? group by tablename order by d asc",(run_name,))
table_list = [ x for x in c ]
# table_list contains ( depth, tablename )
return table_list
@@ -97,25 +112,28 @@ def get_table( db, run_name, tablename, info_callback, showdepth=0 ) :
t.define_column('depth')
c = db.cursor()
- c.execute("select distinct host from status where tablename = ? and run = ? order by host asc",(tablename, run_name))
+ c.execute("select distinct host from sm_status where tablename = ? and run = ? order by host asc",(tablename, run_name))
for host, in c :
t.define_column(host)
- c.execute("""select cmd, host, depth, status, start_time, end_time, notes from status
- where tablename = ? and run = ? order by depth, cmd asc
+ c.execute("select cmd, max(depth) as d from sm_status where tablename = ? and run = ? group by cmd order by d asc",(tablename, run_name))
+ row = -1
+ cmd_to_row = { }
+ for cmd, depth in c :
+ row = row + 1
+ cmd_to_row[cmd] = row
+ t.set_value(row, 0, cmd)
+ if showdepth :
+ t.set_value(row, 'depth', depth)
+
+ c.execute("""select cmd, host, status, start_time, end_time, notes from sm_status
+ where tablename = ? and run = ? order by cmd asc
""", ( tablename, run_name ) )
- row = -1
- prev_cmd = None
+ row = 0
for x in c :
- cmd, host, depth, status, start_time, end_time, notes = x
- if cmd != prev_cmd :
- row = row + 1
- t.set_value(row, 0, cmd)
- if showdepth :
- t.set_value(row, 'depth', depth)
- prev_cmd = cmd
-
+ cmd, host, status, start_time, end_time, notes = x
+ row = cmd_to_row[cmd]
info = info_callback( db, run_name, tablename, host, cmd )
if isinstance(info, tuple) :
t.set_value( row, host, text=info[0], html=info[1] )
@@ -149,6 +167,7 @@ def report_text( db, run_name, info_callback = info_callback_status ) :
def report_html( db, run_name, info_callback = info_callback_status, hlevel=1 ) :
s = StringIO.StringIO()
+ s.write(html_header % { 'title' : run_name } )
s.write('<h%d>%s</h%d>\n'%(hlevel,run_name,hlevel))
hlevel = hlevel + 1
@@ -160,6 +179,8 @@ def report_html( db, run_name, info_callback = info_callback_status, hlevel=1 )
t = get_table( db, run_name, tablename, info_callback, showdepth=1 )
s.write(t.get_html())
+ s.write(html_trailer)
+
return s.getvalue()
#
diff --git a/steuermann/run.py b/steuermann/run.py
index 36cda80..af52a02 100644
--- a/steuermann/run.py
+++ b/steuermann/run.py
@@ -7,6 +7,8 @@ import subprocess
import time
import datetime
import os
+import traceback
+import sys
import ConfigParser
@@ -20,6 +22,9 @@ class struct :
#####
+class run_exception(Exception) :
+ pass
+
class runner(object):
# dict of all current running processes, indexed by node name
@@ -51,100 +56,112 @@ class runner(object):
#####
# start a process
- def run( self, node, run_name ):
+ def run( self, node, run_name, no_run = False ):
try :
- args = self.get_host_info(node.host)
- except :
- print "ERROR: do not know how to run on %s"%node.host
- raise
+ try :
+ args = self.get_host_info(node.host)
+ except Exception, e :
+ log_traceback()
+ print "ERROR: do not know how to run on %s"%node.host
+ print e
+ raise
- if 'maxproc' in args :
hostname = args['hostname']
+ if 'maxproc' in args :
- n = int(self.howmany.get(hostname,0))
- if n >= int(args['maxproc']) :
- print "decline to run %s - %d other already running"%(node.name,n)
- return False
-
- n = n + 1
- self.howmany[hostname] = n
- print "running %s %s %d"%(hostname,node.name, n)
- else :
- print "running %s %s no maxproc"%(hostname, node.name)
-
- if debug :
- print "run",node.name
- if debug :
- print "....%s:%s/%s\n"%(node.host, node.table, node.cmd)
-
- node.running = 1
-
- args = args.copy()
- args.update(
- script=node.script,
- script_type=node.script_type,
- host=node.host,
- table=node.table,
- cmd=node.cmd,
- node=node.name,
- )
-
- if debug :
- print "ARGS"
- for x in sorted([x for x in args]) :
- print '%s=%s'%(x,args[x])
-
- args['script'] = args['script'] % args
-
- if args['script_type'] == 'r' :
- run = args['run']
- elif args['script_type'] == 'l' :
- run = args['local']
- else :
- raise Exception()
-
- t = [ ]
- for x in run :
- # bug: what to do in case of keyerror
- t.append( x % args )
-
- run = t
+ n = int(self.howmany.get(hostname,0))
+ if n >= int(args['maxproc']) :
+ print "decline to run %s - %d other already running"%(node.name,n)
+ return False
- if debug :
- print "RUN",run
-
- # make sure the log directory is there
- logdir= self.logdir + "/%s"%run_name
- try :
- os.makedirs(logdir)
- except OSError:
- pass
-
- # create a name for the log file, but do not use / in the name
- logfile_name = "%s/%s.log"%( logdir, node.name.replace('/','.') )
-
- # open the log file, write initial notes
- logfile=open(logfile_name,"w")
- logfile.write('%s %s\n'%(datetime.datetime.now(),run))
- logfile.flush()
-
- # start running the process
- p = subprocess.Popen(args=run,
- stdout=logfile,
- stderr=subprocess.STDOUT,
- shell=False, close_fds=True)
-
- # remember the popen object for the process; remember the open log file
- n = struct()
- n.proc = p
- n.logfile = logfile
- n.logfile_name = logfile_name
-
- # remember the process is running
- self.all_procs[node.name] = n
-
- return True
+ n = n + 1
+ self.howmany[hostname] = n
+ print "running %s %s %d"%(hostname,node.name, n)
+ else :
+ print "running %s %s no maxproc"%(hostname, node.name)
+
+ if debug :
+ print "run",node.name
+ if debug :
+ print "....%s:%s/%s\n"%(node.host, node.table, node.cmd)
+
+ node.running = 1
+
+ args = args.copy()
+ args.update(
+ script=node.script,
+ script_type=node.script_type,
+ host=node.host,
+ table=node.table,
+ cmd=node.cmd,
+ node=node.name,
+ )
+
+ if debug :
+ print "ARGS"
+ for x in sorted([x for x in args]) :
+ print '%s=%s'%(x,args[x])
+
+ args['script'] = args['script'] % args
+
+ if args['script_type'] == 'r' :
+ run = args['run']
+ elif args['script_type'] == 'l' :
+ run = args['local']
+ else :
+ raise Exception()
+
+ t = [ ]
+ for x in run :
+ # bug: what to do in case of keyerror
+ t.append( x % args )
+
+ run = t
+
+ if debug :
+ print "RUN",run
+
+ # make sure the log directory is there
+ logdir= self.logdir + "/%s"%run_name
+ try :
+ os.makedirs(logdir)
+ except OSError:
+ pass
+
+ # create a name for the log file, but do not use / in the name
+ logfile_name = "%s/%s.log"%( logdir, node.name.replace('/','.') )
+
+ # open the log file, write initial notes
+ logfile=open(logfile_name,"w")
+ logfile.write('%s %s\n'%(datetime.datetime.now(),run))
+ logfile.flush()
+
+ # debug - just say the name of the node we would run
+ if no_run :
+ run = [ 'echo', 'no_run - node=', node.name ]
+
+ # start running the process
+ p = subprocess.Popen(args=run,
+ stdout=logfile,
+ stderr=subprocess.STDOUT,
+ shell=False, close_fds=True)
+
+ # remember the popen object for the process; remember the open log file
+ n = struct()
+ n.proc = p
+ n.logfile = logfile
+ n.logfile_name = logfile_name
+
+ # remember the process is running
+ self.all_procs[node.name] = n
+
+ return True
+
+ except Exception, e :
+ log_traceback()
+ txt= "ERROR RUNNING %s"%node.name
+ raise run_exception(txt)
#####
# callback when a node finishes
@@ -155,16 +172,12 @@ class runner(object):
args = self.get_host_info(node.host)
- if 'maxproc' in args :
- hostname = args['hostname']
+ hostname = args['hostname']
- n = int(self.howmany.get(hostname,0))
- n = n - 1
+ n = self.howmany[hostname] - 1
+ self.howmany[hostname] = n
- self.howmany[hostname] = n
- print "finish %s %s %d"%(hostname,node_name,n)
- else :
- print "finish %s %s no maxproc"%(hostname,node_name)
+ print "finish %s %s %d"%(hostname,node_name,n)
# note the termination of the process at the end of the log file
logfile = self.all_procs[node_name].logfile
@@ -225,14 +238,18 @@ class runner(object):
def _host_get_names( self, cfg, section ) :
d = { }
# pick all the variables out of this section
- for name, value in cfg.items(section) :
- if value.startswith('[') :
- # it is a list
- d[name] = eval(value)
- else :
- # everything else is plain text
- d[name] = value
- return d
+ try :
+ for name, value in cfg.items(section) :
+ if value.startswith('[') :
+ # it is a list
+ d[name] = eval(value)
+ else :
+ # everything else is plain text
+ d[name] = value
+ return d
+ except ConfigParser.NoSectionError :
+ print "No config section in hosts.ini: %s"%section
+ return { }
def load_host_info( self, filename=None ) :
@@ -243,26 +260,55 @@ class runner(object):
self.cfg.read(filename)
def get_host_info(self, host) :
- if debug:
- print "enter get_host_info",host
if not host in self.host_info_cache :
-
d = self._host_get_names(self.cfg, host)
- if debug:
- print "in get_host_info, got names for ",host, d
+
if 'like' in d :
- if debug:
- print "has like", d['like']
- d1 = self.get_host_info(d['like']).copy()
- del d['like']
+ # get the dict of what this entry is like, copy it,
+ # and update it with the values for this entry
+ d1 = self.get_host_info(d['like'])
+ d1 = d1.copy()
d1.update(d)
- self.host_info_cache[host] = d1
- else :
- print "end of chain",host,d
- self.host_info_cache[host] = d
+ d = d1
+ print d
+ del d['like']
- if debug:
- print "leave get_host_info",host, self.host_info_cache[host]
+ # default hostname is the name from the section header
+ if not 'hostname' in d :
+ d['hostname'] = host
+
+ # default maximum processes is 1
+ if not 'maxproc' in d :
+ d['maxproc'] = 1
+
+ self.host_info_cache[host] = d
return self.host_info_cache[host]
#####
+
+# The traceback interface is awkward in python; here is something I copied from pyetc:
+
+def log_traceback() :
+ # You would think that the python traceback module contains
+ # something useful to do this, but it always returns multi-line
+ # strings. I want each line of output logged separately so the log
+ # file remains easy to process, so I reverse engineered this out of
+ # the logging module.
+ try:
+ etype, value, tb = sys.exc_info()
+ tbex = traceback.extract_tb( tb )
+ for filename, lineno, name, line in tbex :
+ print '%s:%d, in %s'%(filename,lineno,name)
+ if line:
+ print ' %s'%line.strip()
+
+ for x in traceback.format_exception_only( etype, value ) :
+ print ": %s",x
+
+ print "---"
+
+ finally:
+ # If you don't clear these guys, you can make loops that
+ # the garbage collector has to work hard to eliminate.
+ etype = value = tb = None
+
diff --git a/steuermann/run_all.py b/steuermann/run_all.py
index e7bff61..f670abe 100644
--- a/steuermann/run_all.py
+++ b/steuermann/run_all.py
@@ -26,6 +26,7 @@ except ImportError :
def main() :
global xnodes
+ global no_run
# read all the input files
if readline :
@@ -37,19 +38,38 @@ def main() :
import atexit
atexit.register(readline.write_history_file, history)
- opt, args = easyargs.get( { '-a' : '--all',
- '--all' : '-a',
+
+# easyargs spec definition:
+#
+# '-v' : '', # arg takes no parameter, opt['-v'] is
+# # how many times it occurred
+# '-f' : '=', # arg takes a parameter
+# '-mf' : '=+', # arg takes a parameter, may be specified
+# # several times to get a list
+# '--verbose' : '-v', # arg is an alias for some other arg
+
+ opt, args = easyargs.get( {
+ '--all' : '-a' ,
+ '-a' : '' , # run all nodes non-interactively
+ '-r' : '=' , # give run name
+ '-n' : '' , # do not actually execute any processes
} )
#
#
- all = '--all' in opt
+ all = opt['-a']
+ no_run = opt['-n']
di_nodes = nodes.read_file_list( args )
xnodes = di_nodes.node_index
- run_name = str(datetime.datetime.now()).replace(' ','_')
+
+ if '-r' in opt :
+ run_name = opt['-r']
+ else :
+ run_name = str(datetime.datetime.now()).replace(' ','_')
+
db = steuermann.config.open_db()
if all :
@@ -59,6 +79,16 @@ def main() :
#
+def find_wild_names( xnodes, name ) :
+ print "find_wild",name
+ l = [ ]
+ for x in xnodes :
+ if nodes.wildcard_name( name, x ) :
+ print "...",x
+ l.append(x)
+ return l
+#
+
def do_flag( xnodes, name, recursive, fn, verbose ) :
if verbose :
verbose = verbose + 1
@@ -84,7 +114,6 @@ def do_flag( xnodes, name, recursive, fn, verbose ) :
else :
if verbose :
print ' '*verbose, "not in list", name
- raise Exception()
def set_want( node ) :
# if we said we want it, mark it as wanted and don't skip
@@ -111,6 +140,17 @@ def cmd_flagging( l, xnodes, func ) :
for x in l :
do_flag( xnodes, x, recursive, func, 1 )
+
+#
+def print_node(xnodes, x, print_recursive, print_all, indent=0):
+ print ' '*indent, xnodes[x].wanted, xnodes[x].finished, xnodes[x].skip, x
+ if print_all :
+ l = [ a.name for a in xnodes[x].predecessors ]
+ print ' '*indent, " AFTER", ' '.join(l)
+ if print_recursive :
+ for x in l :
+ print_node( xnodes, x, print_recursive, print_all, indent=indent+8)
+
#
helpstr = """
@@ -128,6 +168,9 @@ pre node show what must come before a node
def run_interactive( xnodes, run_name, db) :
+ org_run_name = run_name
+ run_count = 0
+
register_database(db, run_name, xnodes)
runner = run.runner( xnodes, steuermann.config.logdir )
@@ -195,18 +238,37 @@ def run_interactive( xnodes, run_name, db) :
for x in xnodes :
xnodes[x].finished = 0
- run_name = str(datetime.datetime.now()).replace(' ','_')
+ run_name = org_run_name + '.%d'%run_count
+ run_count = run_count + 1
print "new run name",run_name
register_database(db, run_name, xnodes)
elif n == 'list' :
- print_all = '-a' in l
- l = sorted ( [ x for x in xnodes ] )
+ l = l[1:]
+ if len(l) > 0 and l[0] == '-a' :
+ l = l[1:]
+ print_all = 1
+ else :
+ print_all = 0
+
+ if len(l) > 0 and l[0] == '-r' :
+ l = l[1:]
+ print_recursive=1
+ else :
+ print_recursive=0
+
+ if len(l) == 0 :
+ all = [ x for x in xnodes ]
+ else :
+ all = [ ]
+ for x in l :
+ all = all + find_wild_names( xnodes, x )
+
+ all = sorted(all)
+ print "recursive",print_recursive
print "w f s name"
- for x in l :
- print xnodes[x].wanted, xnodes[x].finished, xnodes[x].skip, x
- if print_all :
- print " AFTER", ' '.join([ a.name for a in xnodes[x].predecessors ])
+ for x in all :
+ print_node(xnodes, x, print_recursive, all)
elif n == 'wait' :
c = db.cursor()
@@ -214,7 +276,7 @@ def run_interactive( xnodes, run_name, db) :
host, tablename, cmd = nodes.crack_name(x)
if xnodes[x].wanted :
status = 'W'
- c.execute("UPDATE status SET status = 'W' WHERE run = ? AND host = ? AND tablename = ? AND cmd = ? AND status = 'N'",
+ c.execute("UPDATE sm_status SET status = 'W' WHERE run = ? AND host = ? AND tablename = ? AND cmd = ? AND status = 'N'",
(run_name, host, tablename, cmd) )
db.commit()
@@ -286,13 +348,13 @@ def print_pre(who, xnodes, depth) :
def register_database(db, run, xnodes ) :
c = db.cursor()
- c.execute('INSERT INTO runs ( run ) VALUES ( ? )', ( run, ) )
+ c.execute('INSERT INTO sm_runs ( run ) VALUES ( ? )', ( run, ) )
c = db.cursor()
for x in xnodes :
host, tablename, cmd = nodes.crack_name(x)
depth = xnodes[x].depth
- c.execute("INSERT INTO status ( run, host, tablename, cmd, depth, status ) VALUES "
+ c.execute("INSERT INTO sm_status ( run, host, tablename, cmd, depth, status ) VALUES "
"( ?, ?, ?, ?, ?, 'N' )", ( run, host, tablename, cmd, depth ) )
db.commit()
@@ -365,16 +427,26 @@ def run_step( runner, xnodes, run_name, db ) :
x.finished = 1
no_sleep = 1
keep_running = 1
- db.execute("UPDATE status SET start_time = ?, status = 'S' WHERE ( run = ? AND host = ? AND tablename = ? AND cmd = ? )",
+ db.execute("UPDATE sm_status SET start_time = ?, status = 'S' WHERE ( run = ? AND host = ? AND tablename = ? AND cmd = ? )",
( str(datetime.datetime.now()), run_name, host, table, cmd ) )
db.commit()
else :
- if runner.run(x, run_name) :
- # returns true/false whether it actually ran it - it may not because of resource limits
- db.execute("UPDATE status SET start_time = ?, status = 'R' WHERE ( run = ? AND host = ? AND tablename = ? AND cmd = ? )",
- ( str(datetime.datetime.now()), run_name, host, table, cmd ) )
- db.commit()
+ try :
+ if runner.run(x, run_name, no_run=no_run) :
+ # returns true/false whether it actually ran it - it may not because of resource limits
+ db.execute("UPDATE sm_status SET start_time = ?, status = 'R' WHERE ( run = ? AND host = ? AND tablename = ? AND cmd = ? )",
+ ( str(datetime.datetime.now()), run_name, host, table, cmd ) )
+ except run.run_exception, e :
+ now = str(datetime.datetime.now())
+ db.execute("UPDATE sm_status SET start_time=?, end_time=?, status='E', notes=? WHERE ( run=? AND host=? AND tablename=? AND cmd=? )",
+ ( now, now, repr(e), run_name, host, table, cmd ) )
+ x.finished = 1
+ no_sleep = 1
+ keep_running = 1
+
+ db.commit()
+
# if anything has exited, we process it and update the status in the database
while 1 :
@@ -390,7 +462,7 @@ def run_step( runner, xnodes, run_name, db ) :
# note who and log it
x_host, x_table, x_cmd = nodes.crack_name(who_exited[0])
- db.execute("UPDATE status SET end_time = ?, status = ? WHERE ( run = ? AND host = ? AND tablename = ? AND cmd = ? )",
+ db.execute("UPDATE sm_status SET end_time = ?, status = ? WHERE ( run = ? AND host = ? AND tablename = ? AND cmd = ? )",
( str(datetime.datetime.now()), who_exited[1], run_name, x_host, x_table, x_cmd ) )
db.commit()