diff options
-rwxr-xr-x | condor_split.py | 89 | ||||
-rwxr-xr-x | condor_wrap.py | 23 |
2 files changed, 89 insertions, 23 deletions
diff --git a/condor_split.py b/condor_split.py index faf08d8..480c94d 100755 --- a/condor_split.py +++ b/condor_split.py @@ -3,10 +3,15 @@ import argparse import os import sys import fnmatch +import shutil +import tempfile -def path_dive(p, ext, strip_components): +VERBOSE = False + +def path_search(p, ext, strip_components): filenames = [] p = os.path.abspath(p) + index = 1 for root, _, files in os.walk(p): for f in files: path = os.path.join(root, f) @@ -20,29 +25,91 @@ def path_dive(p, ext, strip_components): components = head.split(os.path.sep)[strip_components:] path = os.path.join(os.path.sep.join(components), tail) else: - #strip_components = strip_max + # strip_components = strip_max print("Warning: Cannot strip {} components from {} (max {})".format(strip_components, path, strip_max)) + + if VERBOSE: + print("[{}]:{}".format(index, path)) filenames.append(path) + index += 1 return filenames -def assign_chunk(): - pass +def generate_manifest(manifest): + dest = tempfile.NamedTemporaryFile("w+", delete=False) + for line in manifest: + dest.write(line + os.linesep) + return dest.name + +def assign_chunks(manifest, chunks, output_dir, as_jobs=False): + label = 0 + count = 0 + written = 0 + # max_chunks = 0 + dest = None + output_dir = os.path.abspath(output_dir) + + if VERBOSE: + print("Output directory: {}".format(output_dir)) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + else: + shutil.rmtree(output_dir) + os.makedirs(output_dir) + + if as_jobs: + manifest_count = len(file(manifest, 'r').readlines()) + max_chunks = manifest_count / chunks + chunks = max_chunks + + + for line in open(manifest, 'r'): + line = line.strip(os.linesep) + line += " " + if count % chunks == 0: + if dest: + dest.close() + if VERBOSE: + print("[{}]: {}, {} entries".format(label, os.path.basename(dest.name), written)) + written = 0 + dest = file(os.path.join(output_dir, "stdin." + str(label)), 'w') + label += 1 + dest.write(line) + count += 1 + written += 1 + os.unlink(manifest) + def main(): parser = argparse.ArgumentParser() - parser.add_argument("search_path", action="store", help="Directory to search under") parser.add_argument("--strip-components", default=0, type=int, help="Top-level directories to strip") parser.add_argument("--output-dir", default="input") - parser.add_argument("--extension", default="*.*", help="Extension of files") - parser.add_argument("--chunks", default=4, help="Number of entries per file") + parser.add_argument("--extension", default="*.*", type=str, help="Extension of files") + parser.add_argument("--chunks", default=1, type=int, help="Number of entries per file") + parser.add_argument("--as-jobs", action="store_true", help="Number of expected jobs") + parser.add_argument("--verbose", action="store_true") + parser.add_argument("search_path", action="store", type=str, help="Directory to search under") args = parser.parse_args() - paths = path_dive(args.search_path, args.extension, args.strip_components) - print("{} files".format(len(paths))) - if not paths: - exit(0) + global VERBOSE + VERBOSE = args.verbose + chunks = args.chunks + if chunks < 1: + chunks = 1 + index_at = 0 + files = path_search(args.search_path, args.extension, args.strip_components) + if files: + index_at = 1 + + if VERBOSE: + print("{} files".format(len(files))) + + if not files: + exit(0) + manifest = generate_manifest(files) + assign_chunks(manifest, chunks, args.output_dir, as_jobs=args.as_jobs) if __name__ == "__main__": main() diff --git a/condor_wrap.py b/condor_wrap.py index 2142c88..0aa25c4 100755 --- a/condor_wrap.py +++ b/condor_wrap.py @@ -15,21 +15,20 @@ def main(): job = [os.path.normpath(args.job)] indata = args.indata - data = [] + output = [] if verbose: print("Job: {}".format(job)) - if isinstance(indata, file): - print("Input:"), - for line in indata.readlines(): - data.append(line) - print(data) - else: - print("Data: {}".format(indata)) - data = indata - compiled = job + data - # print(compiled) - process = subprocess.Popen(compiled) + if isinstance(indata, file): + for line in indata.readlines(): + output.append(line) + else: + output = indata + + compiled = job + output + process = subprocess.Popen(compiled, stdout=sys.stdout, stderr=sys.stderr) + process.communicate() + process.wait() if __name__ == "__main__": |