diff options
author | Matt Rendina <mrendina@stsci.edu> | 2017-05-23 11:53:21 -0400 |
---|---|---|
committer | Matt Rendina <mrendina@stsci.edu> | 2017-05-23 11:53:21 -0400 |
commit | b3e82911587c49863a4dab105aa139f61e53945f (patch) | |
tree | 460cd786d51fd647473429e7d46a5b207a23b162 /rambo | |
parent | 43ce44715858ee56b3fe0652ad6645f6643dcc99 (diff) | |
download | rambo-b3e82911587c49863a4dab105aa139f61e53945f.tar.gz |
Turning into a package
Diffstat (limited to 'rambo')
-rw-r--r-- | rambo/__init__.py | 0 | ||||
-rw-r--r-- | rambo/_version.py | 1 | ||||
-rwxr-xr-x | rambo/rambo.py | 493 |
3 files changed, 494 insertions, 0 deletions
diff --git a/rambo/__init__.py b/rambo/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/rambo/__init__.py diff --git a/rambo/_version.py b/rambo/_version.py new file mode 100644 index 0000000..a655b17 --- /dev/null +++ b/rambo/_version.py @@ -0,0 +1 @@ +__version__ = '1.0.0b1' diff --git a/rambo/rambo.py b/rambo/rambo.py new file mode 100755 index 0000000..998c2f8 --- /dev/null +++ b/rambo/rambo.py @@ -0,0 +1,493 @@ +#!/usr/bin/env python + +''' +RAMBO - Recipe Analyzer and Multi-package Build Optimizer + +Requires conda & conda-build to be installed in a path that appears in the +python interprer's search list in order to access the API machinery via +'conda_build.api. +''' + +from __future__ import print_function +import os +import sys +from copy import deepcopy +import argparse +from six.moves import urllib +import codecs +from yaml import safe_load +import json +from rambo._version import __version__ +try: + import conda_build.api +except ImportError: + raise ImportError('conda-build must be installed order to use this \n' + 'tool. Either conda-build is not installed, or you \n' + 'are working in an activated conda environment. \n' + 'If conda-build is installed deactivate the \n' + 'environment currently enabled or explicitly switch \n' + 'to the conda "root" environment to allow use of\n' + 'conda-build.') + +DEFAULT_MINIMUM_NUMPY_VERSION = '1.11' + + +class meta(object): + '''Holds metadata for a recipe obtained from the recipe's meta.yaml file, + certain values derived from that data, and methods to calculate those + derived values.''' + + def __init__(self, recipe_dir, versions, dirty=False): + self.recipe_dirname = os.path.basename(recipe_dir) + self.versions = versions + self.dirty = dirty + self.metaobj = None # renderdata[0] (MetaData) + self.mdata = None # renderdata[0].meta (dict) + self.active = True # Visit metadata in certain processing steps? + self.valid = False + self.complete = False + self.name = None + self.num_bdeps = 0 + self.deps = [] + self.peer_bdeps = [] + self.import_metadata(recipe_dir) + self.derive_values() + self.canonical_name = '' + # Whether or not the package with this metadata + # already exists in the channel archive + self.archived = False + self.gen_canonical() + + def import_metadata(self, rdir): + '''Read in the package metadata from the given recipe directory via + the conda recipe renderer to perform string interpolation and + store the values in a dictionary.''' + if os.path.isfile(rdir + '/meta.yaml'): + # render() returns a tuple: (MetaData, bool, bool) + self.metaobj = conda_build.api.render( + rdir, + dirty=self.dirty, + python=self.versions['python'], + numpy=self.versions['numpy'])[0] + self.mdata = self.metaobj.meta + self.valid = self.is_valid() + self.complete = self.is_complete() + if self.valid: + self.name = self.mdata['package']['name'] + else: + print('Recipe directory {0} has no meta.yaml file.'.format( + self.recipe_dirname)) + + def derive_values(self): + if self.complete: + self.num_bdeps = len(self.mdata['requirements']['build']) + for req in self.mdata['requirements']['build']: + self.deps.append(req.split()[0]) + + def deplist(self, deptype): + '''Return the simplified (no version info, if present) list of + dependency names of the given type.''' + lst = [] + for dep in self.mdata['requirements'][deptype]: + lst.append(dep.split()[0]) + return lst + + def is_valid(self): + '''Does the metadata for this recipe contain the minimum information + necessary to process?''' + valid = True + if 'package' not in self.mdata.keys(): + complete = False + return valid + + def is_complete(self): + '''Is the metadata for this recipe complete enough to allow for use + in build-order optimization?''' + complete = True + if 'requirements' in self.mdata.keys(): + if 'build' not in self.mdata['requirements'].keys(): + complete = False + else: + complete = False + return complete + + def gen_canonical(self): + '''Generate the package's canonical name using available + information.''' + self.canonical_name = os.path.basename( + conda_build.api.get_output_file_path( + self.metaobj, + python=self.versions['python'], + numpy=self.versions['numpy'])) + + +class metaSet(object): + '''A collection of mulitple recipe metadata objects from a directory + specification, and methods for manipulationg and querying this + collection.''' + + ignore_dirs = ['.git', 'template'] + + def __init__(self, + directory, + versions, + platform, + manfile=None, + dirty=False): + '''Parameters: + directory - a relative or absolute directory in which Conda + recipe subdirectories may be found. + versions - Dictionary containing python, numpy, etc, version + information.''' + self.metas = [] + self.platform = platform + self.versions = versions + self.manfile = manfile + self.manifest = None + if self.manfile: + self.read_manifest() + self.filter_by_manifest() + self.dirty = dirty + self.incomplete_metas = [] + self.names = [] + self.read_recipes(directory) + self.derive_values() + self.sort_by_peer_bdeps() + self.merge_metas() + if self.channel: + self.channel_data = self.get_channel_data() + self.flag_archived() + + def read_recipes_old(self, directory): + '''Process a directory reading in each conda recipe found, creating + a list of metadata objects for use in analyzing the collection of + recipes as a whole.''' + recipe_dirnames = os.listdir(directory) + for rdirname in recipe_dirnames: + if rdirname in self.ignore_dirs: + continue + rdir = directory + '/' + rdirname + m = meta(rdir, versions=self.versions, dirty=self.dirty) + if m.complete: + self.metas.append(m) + self.names.append(m.name) + else: + self.incomplete_metas.append(m) + + def read_recipe_selection(self, directory, recipe_list): + '''Process a directory reading in each conda recipe found, creating + a list of metadata objects for use in analyzing the collection of + recipes as a whole.''' + for rdirname in recipe_list: + if rdirname in self.ignore_dirs: + continue + rdir = directory + '/' + rdirname + m = meta(rdir, versions=self.versions, dirty=self.dirty) + if m.complete: + self.metas.append(m) + self.names.append(m.name) + else: + self.incomplete_metas.append(m) + + def read_recipes(self, directory): + recipe_dirnames = os.listdir(directory) + # If a manifest was given, use it to filter the list of available + # recipes. + if self.manifest: + recipe_list = set.intersection( + set(recipe_dirnames), + set(self.manifest['packages'])) + else: + recipe_list = recipe_dirnames + self.read_recipe_selection(directory, recipe_list) + + def read_manifest(self): + mf = open(self.manfile, 'r') + self.manifest = safe_load(mf) + self.channel = self.manifest['channel_URL'].strip('/') + self.channel += '/' + self.platform + self.versions['numpy'] = str(self.manifest['numpy_version']) + + def filter_by_manifest(self): + '''Leave only the recipe metadata entries that appear in the + provided manifest list active.''' + for meta in self.metas: + if meta.name not in self.manifest['packages']: + meta.active = False + + def merge_metas(self): + '''Prepend the list of metas that do not have complete build + dependency information to the main list. + Also, add those names to the names list.''' + # Sort alphabetically by name + self.incomplete_metas = sorted( + self.incomplete_metas, + key=lambda meta: meta.name) + for m in self.incomplete_metas[::-1]: + self.metas.insert(0, m) + + def derive_values(self): + '''Produce values from the set of recipes taken as a whole.''' + self.calc_peer_bdeps() + + def calc_peer_bdeps(self): + '''Produce and store a names-only list of the build dependencies + for each recipe found to this set of recipes that each recipe + references.''' + for meta in self.metas: + for name in meta.deps: + if name in self.names: + meta.peer_bdeps.append(name) + + def sort_by_peer_bdeps(self): + '''Sort the list of metadata objects by the number of peer build + dependencies each has, in ascending order. This gives a good first + approximation to a correct build order of all peers. Peform an + extra step here to reduce stochasticity of the order of packages + within a given tier that all share the same number of peer_bdeps. + The order of those items apparently varies from run to run.''' + # First sort by alphabetical on name to make the subsequent + # sorting deterministic. + self.metas = sorted(self.metas, key=lambda meta: meta.name) + self.metas = sorted(self.metas, key=lambda meta: len(meta.peer_bdeps)) + + def index(self, mname): + '''Return the index of a metadata object with the name 'mname'.''' + for i, meta in enumerate(self.metas): + if (meta.name == mname): + return i + raise IndexError('Name [{0}] not found.'.format(mname)) + + def peer_bdep_indices(self, mname): + '''Returns a list of the indices in the meta list corresponding to + all the peer build dependencies (bdeps) of the given package + metadata.''' + indices = [] + for i, meta in enumerate(self.metas): + if (meta.name == mname): + for dep in meta.peer_bdeps: + indices.append(self.index(dep)) + return indices + + def position_OK(self, mname): + '''If a package has peer build dependencies that all occur before + the package in the sorted list of package recipes, the package's + position in the build order list is acceptable.''' + for i in self.peer_bdep_indices(mname): + if i > self.index(mname): + return False + return True + + def relocate(self, mname): + '''Relocate a meta object in the meta set such that all its internal + dependencies appear earlier in the list than it does. + The algorithm: + For a package that does not have position_OK=True, examine the + internal dependency indices. If any index is greater than the + package's index, relocate the package to the index in the list just + after the largest such dependency index. + 1. Deepcopy object into temp variable + 2. Insert copy into list at new index + 3. remove the original item from list''' + idx = self.index(mname) + new_idx = max(self.peer_bdep_indices(mname)) + 1 + temp = deepcopy(self.metas[idx]) + self.metas.insert(new_idx, temp) + del self.metas[idx] + + def optimize_build_order(self): + '''Makes a single pass through the list of (complete) package metadata, + relocating in the list any item which is not in the correct slot in + the build order.''' + for m in self.metas: + if not self.position_OK(m.name): + self.relocate(m.name) + + def multipass_optimize(self, max_passes=8): + '''Makes multiple passes over the list of metadata, optimizing during + each pass until either the entire list is ordered correctly for + building, or the maximum number of allowed passes is reached. The + latter condition suggests there is a circular dependency that needs + to be manually resolved.''' + opass = 0 + num_notOK = 1 + while (num_notOK > 0 and opass < max_passes): + opass = opass + 1 + num_notOK = 0 + self.optimize_build_order() + for m in self.metas: + if not self.position_OK(m.name): + num_notOK = num_notOK + 1 + if (opass == max_passes): + print('Pass {0} of {1} reached. Check for circular ' + 'dependencies.'.format( + opass, + max_passes)) + return False + return True + + def get_channel_data(self): + '''Download the channel metadata from all specified conda package + channel URLs, parse the JSON data into a dictionary.''' + jsonbytes = urllib.request.urlopen(self.channel + '/repodata.json') + # urllib only returns 'bytes' objects, so convert to unicode. + reader = codecs.getreader('utf-8') + return json.load(reader(jsonbytes)) + + def flag_archived(self): + '''Flag each meta as either being archived or not by generating the + package canonical name, fetching the provided conda channel + archive data, and searching the archive data for the generated + name. Each meta's 'archived' attribute is set to True if found + and False if not.''' + for meta in self.metas: + if meta.canonical_name in self.channel_data['packages'].keys(): + meta.archived = True + + def print_details(self, fh=sys.stdout): + num_notOK = 0 + print('conda-build version : ', conda_build.__version__) + print('Python version specified: ', self.versions['python']) + print('Numpy version specified: ', self.versions['numpy']) + print(' num num peer', file=fh) + print(' name bdeps peer bdep pos.', + file=fh) + print(' bdeps indices OK?', + file=fh) + print('----------------------------------------------------------', + file=fh) + for idx, m in enumerate(self.metas): + if not self.position_OK(m.name): + num_notOK = num_notOK + 1 + print('{0:>28} {1:{wid}} {2:{wid}} idx={3:{wid}} {4} {5}' + .format(m.name, + m.num_bdeps, + len(m.peer_bdeps), + idx, + self.peer_bdep_indices(m.name), + self.position_OK(m.name), + wid=2), file=fh) + print('Num not in order = {0}/{1}\n'.format( + num_notOK, + len(self.metas)), file=fh) + + def print(self, fh=sys.stdout): + '''Prints the list of package names in the order in which they appear + in self.metas to stdout, suitable for ingestion by other tools during + a build process.''' + for m in self.metas: + print('{0}'.format(m.name), file=fh) + + def print_culled(self, fh=sys.stdout): + '''Prints the list of package names for which the canonical name does + not exist in the specified archive channel. List is presented in the + order in which entries appear in self.metas.''' + for m in [m for m in self.metas if m.active and not m.archived]: + print('{0}'.format(m.name), file=fh) + + def print_canonical(self, fh=sys.stdout): + '''Prints list of canonical package names.''' + for meta in self.metas: + print('{0:>50}'.format(meta.canonical_name), file=fh) + + def print_status_in_channel(self, fh=sys.stdout): + '''Prints list of canonical package names and whether or not each + has already been built and archived in the specified channel.''' + statstr = {True: '', False: 'Not in channel archive'} + for meta in self.metas: + print('{0:>50} {1}'.format( + meta.canonical_name, + statstr[meta.archived]), file=fh) + + +# ---- + + +def main(argv=None): + + if argv is None: + argv = sys.argv + + parser = argparse.ArgumentParser(prog='rambo') + parser.add_argument('-p', '--platform', type=str) + parser.add_argument( + '--python', + type=str, + help='Python version to pass to conda machinery when rendering ' + 'recipes. "#.#" format. If not specified, the version of python' + ' hosting conda_build.api is used.') + parser.add_argument( + '-m', + '--manifest', + type=str, + help='Use this file to filter the list of recipes to process.') + parser.add_argument( + '-f', + '--file', + type=str, + help='Send package list output to this file instead of stdout.') + parser.add_argument( + '-c', + '--culled', + action='store_true', + help='Print the ordered list of package names reduced to the set' + ' of packages that do not already exist in the channel specified' + ' in the supplied manifest file.') + parser.add_argument( + '-d', + '--details', + action='store_true', + help='Display details used in determining build order and/or ' + 'package culling.') + parser.add_argument( + '--dirty', + action='store_true', + help='Use the most recent pre-existing conda work directory for ' + 'each recipe instead of creating a new one. If a work directory ' + 'does not already exist, the recipe is processed in the normal ' + 'fashion. Used mostly for testing purposes.') + parser.add_argument( + '-v', + '--version', + action='version', + version='%(prog)s ' + __version__, + help='Display version information.') + parser.add_argument('recipes_dir', type=str) + args = parser.parse_args() + + if args.version: + print(__version__) + os.exit(0) + + recipes_dir = os.path.normpath(args.recipes_dir) + + fh = None + if args.file: + fh = open(args.file, 'w') + + versions = {'python': '', 'numpy': ''} + if args.python: + versions['python'] = args.python + + versions['numpy'] = DEFAULT_MINIMUM_NUMPY_VERSION + + mset = metaSet( + recipes_dir, + platform=args.platform, + versions=versions, + dirty=args.dirty, + manfile=args.manifest) + + mset.multipass_optimize() + + if args.details: + mset.print_details(fh) + if mset.channel: + mset.print_status_in_channel(fh) + elif args.culled: + mset.print_culled(fh) + else: + mset.print(fh) + +if __name__ == "__main__": + main() |