diff options
Diffstat (limited to 'steuermann/run.py')
-rw-r--r-- | steuermann/run.py | 284 |
1 files changed, 165 insertions, 119 deletions
diff --git a/steuermann/run.py b/steuermann/run.py index 36cda80..af52a02 100644 --- a/steuermann/run.py +++ b/steuermann/run.py @@ -7,6 +7,8 @@ import subprocess import time import datetime import os +import traceback +import sys import ConfigParser @@ -20,6 +22,9 @@ class struct : ##### +class run_exception(Exception) : + pass + class runner(object): # dict of all current running processes, indexed by node name @@ -51,100 +56,112 @@ class runner(object): ##### # start a process - def run( self, node, run_name ): + def run( self, node, run_name, no_run = False ): try : - args = self.get_host_info(node.host) - except : - print "ERROR: do not know how to run on %s"%node.host - raise + try : + args = self.get_host_info(node.host) + except Exception, e : + log_traceback() + print "ERROR: do not know how to run on %s"%node.host + print e + raise - if 'maxproc' in args : hostname = args['hostname'] + if 'maxproc' in args : - n = int(self.howmany.get(hostname,0)) - if n >= int(args['maxproc']) : - print "decline to run %s - %d other already running"%(node.name,n) - return False - - n = n + 1 - self.howmany[hostname] = n - print "running %s %s %d"%(hostname,node.name, n) - else : - print "running %s %s no maxproc"%(hostname, node.name) - - if debug : - print "run",node.name - if debug : - print "....%s:%s/%s\n"%(node.host, node.table, node.cmd) - - node.running = 1 - - args = args.copy() - args.update( - script=node.script, - script_type=node.script_type, - host=node.host, - table=node.table, - cmd=node.cmd, - node=node.name, - ) - - if debug : - print "ARGS" - for x in sorted([x for x in args]) : - print '%s=%s'%(x,args[x]) - - args['script'] = args['script'] % args - - if args['script_type'] == 'r' : - run = args['run'] - elif args['script_type'] == 'l' : - run = args['local'] - else : - raise Exception() - - t = [ ] - for x in run : - # bug: what to do in case of keyerror - t.append( x % args ) - - run = t + n = int(self.howmany.get(hostname,0)) + if n >= int(args['maxproc']) : + print "decline to run %s - %d other already running"%(node.name,n) + return False - if debug : - print "RUN",run - - # make sure the log directory is there - logdir= self.logdir + "/%s"%run_name - try : - os.makedirs(logdir) - except OSError: - pass - - # create a name for the log file, but do not use / in the name - logfile_name = "%s/%s.log"%( logdir, node.name.replace('/','.') ) - - # open the log file, write initial notes - logfile=open(logfile_name,"w") - logfile.write('%s %s\n'%(datetime.datetime.now(),run)) - logfile.flush() - - # start running the process - p = subprocess.Popen(args=run, - stdout=logfile, - stderr=subprocess.STDOUT, - shell=False, close_fds=True) - - # remember the popen object for the process; remember the open log file - n = struct() - n.proc = p - n.logfile = logfile - n.logfile_name = logfile_name - - # remember the process is running - self.all_procs[node.name] = n - - return True + n = n + 1 + self.howmany[hostname] = n + print "running %s %s %d"%(hostname,node.name, n) + else : + print "running %s %s no maxproc"%(hostname, node.name) + + if debug : + print "run",node.name + if debug : + print "....%s:%s/%s\n"%(node.host, node.table, node.cmd) + + node.running = 1 + + args = args.copy() + args.update( + script=node.script, + script_type=node.script_type, + host=node.host, + table=node.table, + cmd=node.cmd, + node=node.name, + ) + + if debug : + print "ARGS" + for x in sorted([x for x in args]) : + print '%s=%s'%(x,args[x]) + + args['script'] = args['script'] % args + + if args['script_type'] == 'r' : + run = args['run'] + elif args['script_type'] == 'l' : + run = args['local'] + else : + raise Exception() + + t = [ ] + for x in run : + # bug: what to do in case of keyerror + t.append( x % args ) + + run = t + + if debug : + print "RUN",run + + # make sure the log directory is there + logdir= self.logdir + "/%s"%run_name + try : + os.makedirs(logdir) + except OSError: + pass + + # create a name for the log file, but do not use / in the name + logfile_name = "%s/%s.log"%( logdir, node.name.replace('/','.') ) + + # open the log file, write initial notes + logfile=open(logfile_name,"w") + logfile.write('%s %s\n'%(datetime.datetime.now(),run)) + logfile.flush() + + # debug - just say the name of the node we would run + if no_run : + run = [ 'echo', 'no_run - node=', node.name ] + + # start running the process + p = subprocess.Popen(args=run, + stdout=logfile, + stderr=subprocess.STDOUT, + shell=False, close_fds=True) + + # remember the popen object for the process; remember the open log file + n = struct() + n.proc = p + n.logfile = logfile + n.logfile_name = logfile_name + + # remember the process is running + self.all_procs[node.name] = n + + return True + + except Exception, e : + log_traceback() + txt= "ERROR RUNNING %s"%node.name + raise run_exception(txt) ##### # callback when a node finishes @@ -155,16 +172,12 @@ class runner(object): args = self.get_host_info(node.host) - if 'maxproc' in args : - hostname = args['hostname'] + hostname = args['hostname'] - n = int(self.howmany.get(hostname,0)) - n = n - 1 + n = self.howmany[hostname] - 1 + self.howmany[hostname] = n - self.howmany[hostname] = n - print "finish %s %s %d"%(hostname,node_name,n) - else : - print "finish %s %s no maxproc"%(hostname,node_name) + print "finish %s %s %d"%(hostname,node_name,n) # note the termination of the process at the end of the log file logfile = self.all_procs[node_name].logfile @@ -225,14 +238,18 @@ class runner(object): def _host_get_names( self, cfg, section ) : d = { } # pick all the variables out of this section - for name, value in cfg.items(section) : - if value.startswith('[') : - # it is a list - d[name] = eval(value) - else : - # everything else is plain text - d[name] = value - return d + try : + for name, value in cfg.items(section) : + if value.startswith('[') : + # it is a list + d[name] = eval(value) + else : + # everything else is plain text + d[name] = value + return d + except ConfigParser.NoSectionError : + print "No config section in hosts.ini: %s"%section + return { } def load_host_info( self, filename=None ) : @@ -243,26 +260,55 @@ class runner(object): self.cfg.read(filename) def get_host_info(self, host) : - if debug: - print "enter get_host_info",host if not host in self.host_info_cache : - d = self._host_get_names(self.cfg, host) - if debug: - print "in get_host_info, got names for ",host, d + if 'like' in d : - if debug: - print "has like", d['like'] - d1 = self.get_host_info(d['like']).copy() - del d['like'] + # get the dict of what this entry is like, copy it, + # and update it with the values for this entry + d1 = self.get_host_info(d['like']) + d1 = d1.copy() d1.update(d) - self.host_info_cache[host] = d1 - else : - print "end of chain",host,d - self.host_info_cache[host] = d + d = d1 + print d + del d['like'] - if debug: - print "leave get_host_info",host, self.host_info_cache[host] + # default hostname is the name from the section header + if not 'hostname' in d : + d['hostname'] = host + + # default maximum processes is 1 + if not 'maxproc' in d : + d['maxproc'] = 1 + + self.host_info_cache[host] = d return self.host_info_cache[host] ##### + +# The traceback interface is awkward in python; here is something I copied from pyetc: + +def log_traceback() : + # You would think that the python traceback module contains + # something useful to do this, but it always returns multi-line + # strings. I want each line of output logged separately so the log + # file remains easy to process, so I reverse engineered this out of + # the logging module. + try: + etype, value, tb = sys.exc_info() + tbex = traceback.extract_tb( tb ) + for filename, lineno, name, line in tbex : + print '%s:%d, in %s'%(filename,lineno,name) + if line: + print ' %s'%line.strip() + + for x in traceback.format_exception_only( etype, value ) : + print ": %s",x + + print "---" + + finally: + # If you don't clear these guys, you can make loops that + # the garbage collector has to work hard to eliminate. + etype = value = tb = None + |