From ec4e84b875bdcdaf757660e822f5b35e0f2c7d97 Mon Sep 17 00:00:00 2001 From: Matt Rendina Date: Tue, 18 Apr 2017 14:52:39 -0400 Subject: Filter available recipes by manifest, if provided; presort metas to make order optimization deterministic; consolidate command line flags; cleanup and PEP 8 --- rambo.py | 200 +++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 132 insertions(+), 68 deletions(-) diff --git a/rambo.py b/rambo.py index ea6cc14..ae006cc 100755 --- a/rambo.py +++ b/rambo.py @@ -13,9 +13,13 @@ from copy import deepcopy import argparse import urllib.request import codecs +from yaml import safe_load import json import conda_build.api +DEFAULT_MINIMUM_NUMPY_VERSION = '1.11' + + class meta(object): '''Holds metadata for a recipe obtained from the recipe's meta.yaml file, certain values derived from that data, and methods to calculate those @@ -25,40 +29,34 @@ class meta(object): self.recipe_dirname = os.path.basename(recipe_dir) self.versions = versions self.dirty = dirty - self.metaobj = None # renderdata[0] (MetaData) - self.mdata = None # renderdata[0].meta (dict) - + self.metaobj = None # renderdata[0] (MetaData) + self.mdata = None # renderdata[0].meta (dict) + self.active = True # Visit metadata in certain processing steps? self.valid = False self.complete = False self.name = None - self.num_bdeps = 0 self.deps = [] - self.peer_bdeps = [] - self.import_metadata(recipe_dir) self.derive_values() - self.canonical_name = '' - self.archived = False # Whether or not the package with this metadata - # already exists in the channel archive - + # Whether or not the package with this metadata + # already exists in the channel archive + self.archived = False self.gen_canonical() - # self.unite_deps() # Test if needed. - def import_metadata(self, rdir): '''Read in the package metadata from the given recipe directory via the conda recipe renderer to perform string interpolation and store the values in a dictionary.''' if os.path.isfile(rdir + '/meta.yaml'): - #print(' >>>>>>>> Importing metadata from {0}...'.format(self.recipe_dirname)) # render() returns a tuple: (MetaData, bool, bool) - self.metaobj = conda_build.api.render(rdir, - self.dirty, - python=self.versions['python'], - numpy=self.versions['numpy'])[0] + self.metaobj = conda_build.api.render( + rdir, + dirty=self.dirty, + python=self.versions['python'], + numpy=self.versions['numpy'])[0] self.mdata = self.metaobj.meta self.valid = self.is_valid() self.complete = self.is_complete() @@ -74,15 +72,6 @@ class meta(object): for req in self.mdata['requirements']['build']: self.deps.append(req.split()[0]) - def unite_deps(self): - '''Store the union of the simple names (no version specifications) of - build and run dependencies in .deps.''' - if self.complete: - for key in ['build', 'run']: - for req in self.mdata['requirements'][key]: - self.deps.append(req.split()[0]) - self.deps = set(self.deps) - def deplist(self, deptype): '''Return the simplified (no version info, if present) list of dependency names of the given type.''' @@ -130,29 +119,34 @@ class metaSet(object): def __init__(self, directory, versions, - channel, + platform, + manfile=None, dirty=False): '''Parameters: directory - a relative or absolute directory in which Conda recipe subdirectories may be found. versions - Dictionary containing python, numpy, etc, version information.''' + self.metas = [] + self.platform = platform self.versions = versions + self.manfile = manfile + self.manifest = None + if self.manfile: + self.read_manifest() + self.filter_by_manifest() self.dirty = dirty - self.metas = [] self.incomplete_metas = [] self.names = [] self.read_recipes(directory) self.derive_values() self.sort_by_peer_bdeps() self.merge_metas() - self.channel = channel - if channel: - self.channel_URL = channel.strip('/') + if self.channel: self.channel_data = self.get_channel_data() self.flag_archived() - def read_recipes(self, directory): + def read_recipes_old(self, directory): '''Process a directory reading in each conda recipe found, creating a list of metadata objects for use in analyzing the collection of recipes as a whole.''' @@ -168,10 +162,55 @@ class metaSet(object): else: self.incomplete_metas.append(m) + def read_recipe_selection(self, directory, recipe_list): + '''Process a directory reading in each conda recipe found, creating + a list of metadata objects for use in analyzing the collection of + recipes as a whole.''' + for rdirname in recipe_list: + if rdirname in self.ignore_dirs: + continue + rdir = directory + '/' + rdirname + m = meta(rdir, versions=self.versions, dirty=self.dirty) + if m.complete: + self.metas.append(m) + self.names.append(m.name) + else: + self.incomplete_metas.append(m) + + def read_recipes(self, directory): + recipe_dirnames = os.listdir(directory) + # If a manifest was given, use it to filter the list of available + # recipes. + if self.manifest: + recipe_list = set.intersection( + set(recipe_dirnames), + set(self.manifest['packages'])) + else: + recipe_list = recipe_dirnames + self.read_recipe_selection(directory, recipe_list) + + def read_manifest(self): + mf = open(self.manfile, 'r') + self.manifest = safe_load(mf) + self.channel = (self.manifest['channel_URL'].strip('/')) + ('/' + self.platform) + self.versions['numpy'] = str(self.manifest['numpy_version']) + + def filter_by_manifest(self): + '''Leave only the recipe metadata entries that appear in the + provided manifest list active.''' + for meta in self.metas: + if meta.name not in self.manifest['packages']: + meta.active = False + def merge_metas(self): '''Prepend the list of metas that do not have complete build dependency information to the main list. Also, add those names to the names list.''' + # Sort alphabetically by name + self.incomplete_metas = sorted( + self.incomplete_metas, + key=lambda meta: meta.name) for m in self.incomplete_metas[::-1]: self.metas.insert(0, m) @@ -191,7 +230,13 @@ class metaSet(object): def sort_by_peer_bdeps(self): '''Sort the list of metadata objects by the number of peer build dependencies each has, in ascending order. This gives a good first - approximation to a correct build order of all peers.''' + approximation to a correct build order of all peers. Peform an + extra step here to reduce stochasticity of the order of packages + within a given tier that all share the same number of peer_bdeps. + The order of those items apparently varies from run to run.''' + # First sort by alphabetical on name to make the subsequent + # sorting deterministic. + self.metas = sorted(self.metas, key=lambda meta: meta.name) self.metas = sorted(self.metas, key=lambda meta: len(meta.peer_bdeps)) def index(self, mname): @@ -272,7 +317,7 @@ class metaSet(object): def get_channel_data(self): '''Download the channel metadata from all specified conda package channel URLs, parse the JSON data into a dictionary.''' - jsonbytes = urllib.request.urlopen(self.channel_URL + '/repodata.json') + jsonbytes = urllib.request.urlopen(self.channel + '/repodata.json') # urllib only returns 'bytes' objects, so convert to unicode. reader = codecs.getreader('utf-8') return json.load(reader(jsonbytes)) @@ -289,10 +334,15 @@ class metaSet(object): def print_details(self, fh=sys.stdout): num_notOK = 0 + print('Python version specified: ', self.versions['python']) + print('Numpy version specified: ', self.versions['numpy']) print(' num num peer', file=fh) - print(' name bdeps peer bdep pos.', file=fh) - print(' bdeps indices OK?', file=fh) - print('----------------------------------------------------------', file=fh) + print(' name bdeps peer bdep pos.', + file=fh) + print(' bdeps indices OK?', + file=fh) + print('----------------------------------------------------------', + file=fh) for idx, m in enumerate(self.metas): if not self.position_OK(m.name): num_notOK = num_notOK + 1 @@ -304,7 +354,8 @@ class metaSet(object): self.peer_bdep_indices(m.name), self.position_OK(m.name), wid=2), file=fh) - print('Num not in order = {0}/{1}\n'.format(num_notOK, + print('Num not in order = {0}/{1}\n'.format( + num_notOK, len(self.metas)), file=fh) def print(self, fh=sys.stdout): @@ -315,12 +366,11 @@ class metaSet(object): print('{0}'.format(m.name), file=fh) def print_culled(self, fh=sys.stdout): - '''Prints the list of package names for which the canonical name does not - exist in the specified archive channel. List is presented in the order in - which entries appear in self.metas.''' - for m in self.metas: - if not m.archived: - print('{0}'.format(m.name), file=fh) + '''Prints the list of package names for which the canonical name does + not exist in the specified archive channel. List is presented in the + order in which entries appear in self.metas.''' + for m in [m for m in self.metas if m.active and not m.archived]: + print('{0}'.format(m.name), file=fh) def print_canonical(self, fh=sys.stdout): '''Prints list of canonical package names.''' @@ -332,7 +382,8 @@ class metaSet(object): has already been built and archived in the specified channel.''' statstr = {True: '', False: 'Not in channel archive'} for meta in self.metas: - print('{0:>50} {1}'.format(meta.canonical_name, + print('{0:>50} {1}'.format( + meta.canonical_name, statstr[meta.archived]), file=fh) @@ -342,29 +393,42 @@ class metaSet(object): def main(argv): parser = argparse.ArgumentParser() - parser.add_argument('-c', '--culled', action='store_true', + parser.add_argument('-p', '--platform', type=str) + parser.add_argument( + '--python', + type=str, + help='Python version to pass to conda machinery when rendering ' + 'recipes. "#.#" format.') + parser.add_argument( + '-m', + '--manifest', + type=str, + help='Use this file to filter the list of recipes to process.') + parser.add_argument( + '-f', + '--file', + type=str, + help='Send package list output to this file instead of stdout.') + parser.add_argument( + '-c', + '--culled', + action='store_true', help='Print the ordered list of package names reduced to the set' - ' of packages that do not already exist in the specified channel.' - ' Requires --channel') - parser.add_argument('-d', '--details', action='store_true', + ' of packages that do not already exist in the channel specified' + ' in the supplied manifest file.') + parser.add_argument( + '-d', + '--details', + action='store_true', help='Display details used in determining build order and/or ' 'package culling.') - parser.add_argument('-f', '--file', - help='Send package list output to this file instead of stdout.') - parser.add_argument('--channel', type=str, - help='URL of conda channel repository to search for package list ' - 'culling purposes.') - parser.add_argument('--python', type=str, - help='Python version to pass to conda machinery when rendering ' - 'recipes. "#.#" format.') - parser.add_argument('--numpy', type=str, - help='Numpy version to pass to conda machinery when rendering ' - 'recipes. "#.#" format.') - parser.add_argument('--dirty', type=str, + parser.add_argument( + '--dirty', + action='store_true', help='Use the most recent pre-existing conda work directory for ' 'each recipe instead of creating a new one. If a work directory ' 'does not already exist, the recipe is processed in the normal ' - 'fashion.') + 'fashion. Used mostly for testing purposes.') parser.add_argument('recipes_dir', type=str) args = parser.parse_args() @@ -374,18 +438,18 @@ def main(argv): if args.file: fh = open(args.file, 'w') - versions = {'python':'', - 'numpy':''} + versions = {'python': '', 'numpy': ''} if args.python: versions['python'] = args.python - if args.numpy: - versions['numpy'] = args.numpy + + versions['numpy'] = DEFAULT_MINIMUM_NUMPY_VERSION mset = metaSet( recipes_dir, + platform=args.platform, versions=versions, - channel=args.channel, - dirty=args.dirty) + dirty=args.dirty, + manfile=args.manifest) mset.multipass_optimize() -- cgit