3 files changed, 494 insertions, 0 deletions
diff --git a/rambo/__init__.py b/rambo/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/rambo/__init__.py
diff --git a/rambo/_version.py b/rambo/_version.py
new file mode 100644
index 0000000..a655b17
--- /dev/null
+++ b/rambo/_version.py
@@ -0,0 +1 @@
+__version__ = '1.0.0b1'
diff --git a/rambo/rambo.py b/rambo/rambo.py
new file mode 100755
index 0000000..998c2f8
--- /dev/null
+++ b/rambo/rambo.py
@@ -0,0 +1,493 @@
+#!/usr/bin/env python
+
+'''
+RAMBO - Recipe Analyzer and Multi-package Build Optimizer
+
+Requires conda & conda-build to be installed in a path that appears in the
+python interprer's search list in order to access the API machinery via
+'conda_build.api.
+'''
+
+from __future__ import print_function
+import os
+import sys
+from copy import deepcopy
+import argparse
+from six.moves import urllib
+import codecs
+from yaml import safe_load
+import json
+from rambo._version import __version__
+try:
+    import conda_build.api
+except ImportError:
+    raise ImportError('conda-build must be installed order to use this \n'
+                      'tool. Either conda-build is not installed, or you \n'
+                      'are working in an activated conda environment. \n'
+                      'If conda-build is installed deactivate the \n'
+                      'environment currently enabled or explicitly switch \n'
+                      'to the conda "root" environment to allow use of\n'
+                      'conda-build.')
+
+DEFAULT_MINIMUM_NUMPY_VERSION = '1.11'
+
+
+class meta(object):
+    '''Holds metadata for a recipe obtained from the recipe's meta.yaml file,
+    certain values derived from that data, and methods to calculate those
+    derived values.'''
+
+    def __init__(self, recipe_dir, versions, dirty=False):
+        self.recipe_dirname = os.path.basename(recipe_dir)
+        self.versions = versions
+        self.dirty = dirty
+        self.metaobj = None     # renderdata[0] (MetaData)
+        self.mdata = None       # renderdata[0].meta (dict)
+        self.active = True  # Visit metadata in certain processing steps?
+        self.valid = False
+        self.complete = False
+        self.name = None
+        self.num_bdeps = 0
+        self.deps = []
+        self.peer_bdeps = []
+        self.import_metadata(recipe_dir)
+        self.derive_values()
+        self.canonical_name = ''
+        # Whether or not the package with this metadata
+        # already exists in the channel archive
+        self.archived = False
+        self.gen_canonical()
+
+    def import_metadata(self, rdir):
+        '''Read in the package metadata from the given recipe directory via
+        the conda recipe renderer to perform string interpolation and
+        store the values in a dictionary.'''
+        if os.path.isfile(rdir + '/meta.yaml'):
+            # render() returns a tuple: (MetaData, bool, bool)
+            self.metaobj = conda_build.api.render(
+                rdir,
+                dirty=self.dirty,
+                python=self.versions['python'],
+                numpy=self.versions['numpy'])[0]
+            self.mdata = self.metaobj.meta
+            self.valid = self.is_valid()
+            self.complete = self.is_complete()
+            if self.valid:
+                self.name = self.mdata['package']['name']
+        else:
+            print('Recipe directory {0} has no meta.yaml file.'.format(
+                self.recipe_dirname))
+
+    def derive_values(self):
+        if self.complete:
+            self.num_bdeps = len(self.mdata['requirements']['build'])
+            for req in self.mdata['requirements']['build']:
+                self.deps.append(req.split()[0])
+
+    def deplist(self, deptype):
+        '''Return the simplified (no version info, if present) list of
+        dependency names of the given type.'''
+        lst = []
+        for dep in self.mdata['requirements'][deptype]:
+            lst.append(dep.split()[0])
+        return lst
+
+    def is_valid(self):
+        '''Does the metadata for this recipe contain the minimum information
+        necessary to process?'''
+        valid = True
+        if 'package' not in self.mdata.keys():
+            complete = False
+        return valid
+
+    def is_complete(self):
+        '''Is the metadata for this recipe complete enough to allow for use
+        in build-order optimization?'''
+        complete = True
+        if 'requirements' in self.mdata.keys():
+            if 'build' not in self.mdata['requirements'].keys():
+                complete = False
+        else:
+            complete = False
+        return complete
+
+    def gen_canonical(self):
+        '''Generate the package's canonical name using available
+        information.'''
+        self.canonical_name = os.path.basename(
+                conda_build.api.get_output_file_path(
+                    self.metaobj,
+                    python=self.versions['python'],
+                    numpy=self.versions['numpy']))
+
+
+class metaSet(object):
+    '''A collection of mulitple recipe metadata objects from a directory
+    specification, and methods for manipulationg and querying this
+    collection.'''
+
+    ignore_dirs = ['.git', 'template']
+
+    def __init__(self,
+                 directory,
+                 versions,
+                 platform,
+                 manfile=None,
+                 dirty=False):
+        '''Parameters:
+        directory - a relative or absolute directory in which Conda
+          recipe subdirectories may be found.
+        versions - Dictionary containing python, numpy, etc, version
+          information.'''
+        self.metas = []
+        self.platform = platform
+        self.versions = versions
+        self.manfile = manfile
+        self.manifest = None
+        if self.manfile:
+            self.read_manifest()
+            self.filter_by_manifest()
+        self.dirty = dirty
+        self.incomplete_metas = []
+        self.names = []
+        self.read_recipes(directory)
+        self.derive_values()
+        self.sort_by_peer_bdeps()
+        self.merge_metas()
+        if self.channel:
+            self.channel_data = self.get_channel_data()
+            self.flag_archived()
+
+    def read_recipes_old(self, directory):
+        '''Process a directory reading in each conda recipe found, creating
+        a list of metadata objects for use in analyzing the collection of
+        recipes as a whole.'''
+        recipe_dirnames = os.listdir(directory)
+        for rdirname in recipe_dirnames:
+            if rdirname in self.ignore_dirs:
+                continue
+            rdir = directory + '/' + rdirname
+            m = meta(rdir, versions=self.versions, dirty=self.dirty)
+            if m.complete:
+                self.metas.append(m)
+                self.names.append(m.name)
+            else:
+                self.incomplete_metas.append(m)
+
+    def read_recipe_selection(self, directory, recipe_list):
+        '''Process a directory reading in each conda recipe found, creating
+        a list of metadata objects for use in analyzing the collection of
+        recipes as a whole.'''
+        for rdirname in recipe_list:
+            if rdirname in self.ignore_dirs:
+                continue
+            rdir = directory + '/' + rdirname
+            m = meta(rdir, versions=self.versions, dirty=self.dirty)
+            if m.complete:
+                self.metas.append(m)
+                self.names.append(m.name)
+            else:
+                self.incomplete_metas.append(m)
+
+    def read_recipes(self, directory):
+        recipe_dirnames = os.listdir(directory)
+        # If a manifest was given, use it to filter the list of available
+        # recipes.
+        if self.manifest:
+            recipe_list = set.intersection(
+                set(recipe_dirnames),
+                set(self.manifest['packages']))
+        else:
+            recipe_list = recipe_dirnames
+        self.read_recipe_selection(directory, recipe_list)
+
+    def read_manifest(self):
+        mf = open(self.manfile, 'r')
+        self.manifest = safe_load(mf)
+        self.channel = self.manifest['channel_URL'].strip('/')
+        self.channel += '/' + self.platform
+        self.versions['numpy'] = str(self.manifest['numpy_version'])
+
+    def filter_by_manifest(self):
+        '''Leave only the recipe metadata entries that appear in the
+        provided manifest list active.'''
+        for meta in self.metas:
+            if meta.name not in self.manifest['packages']:
+                meta.active = False
+
+    def merge_metas(self):
+        '''Prepend the list of metas that do not have complete build
+        dependency information to the main list.
+        Also, add those names to the names list.'''
+        # Sort alphabetically by name
+        self.incomplete_metas = sorted(
+            self.incomplete_metas,
+            key=lambda meta: meta.name)
+        for m in self.incomplete_metas[::-1]:
+            self.metas.insert(0, m)
+
+    def derive_values(self):
+        '''Produce values from the set of recipes taken as a whole.'''
+        self.calc_peer_bdeps()
+
+    def calc_peer_bdeps(self):
+        '''Produce and store a names-only list of the build dependencies
+        for each recipe found to this set of recipes that each recipe
+        references.'''
+        for meta in self.metas:
+            for name in meta.deps:
+                if name in self.names:
+                    meta.peer_bdeps.append(name)
+
+    def sort_by_peer_bdeps(self):
+        '''Sort the list of metadata objects by the number of peer build
+        dependencies each has, in ascending order. This gives a good first
+        approximation to a correct build order of all peers.  Peform an
+        extra step here to reduce stochasticity of the order of packages
+        within a given tier that all share the same number of peer_bdeps.
+        The order of those items apparently varies from run to run.'''
+        # First sort by alphabetical on name to make the subsequent
+        # sorting deterministic.
+        self.metas = sorted(self.metas, key=lambda meta: meta.name)
+        self.metas = sorted(self.metas, key=lambda meta: len(meta.peer_bdeps))
+
+    def index(self, mname):
+        '''Return the index of a metadata object with the name 'mname'.'''
+        for i, meta in enumerate(self.metas):
+            if (meta.name == mname):
+                return i
+        raise IndexError('Name [{0}] not found.'.format(mname))
+
+    def peer_bdep_indices(self, mname):
+        '''Returns a list of the indices in the meta list corresponding to
+        all the peer build dependencies (bdeps) of the given package
+        metadata.'''
+        indices = []
+        for i, meta in enumerate(self.metas):
+            if (meta.name == mname):
+                for dep in meta.peer_bdeps:
+                    indices.append(self.index(dep))
+        return indices
+
+    def position_OK(self, mname):
+        '''If a package has peer build dependencies that all occur before
+        the package in the sorted list of package recipes, the package's
+        position in the build order list is acceptable.'''
+        for i in self.peer_bdep_indices(mname):
+            if i > self.index(mname):
+                return False
+        return True
+
+    def relocate(self, mname):
+        '''Relocate a meta object in the meta set such that all its internal
+        dependencies appear earlier in the list than it does.
+        The algorithm:
+        For a package that does not have position_OK=True, examine the
+        internal dependency indices. If any index is greater than the
+        package's index, relocate the package to the index in the list just
+        after the largest such dependency index.
+        1. Deepcopy object into temp variable
+        2. Insert copy into list at new index
+        3. remove the original item from list'''
+        idx = self.index(mname)
+        new_idx = max(self.peer_bdep_indices(mname)) + 1
+        temp = deepcopy(self.metas[idx])
+        self.metas.insert(new_idx, temp)
+        del self.metas[idx]
+
+    def optimize_build_order(self):
+        '''Makes a single pass through the list of (complete) package metadata,
+        relocating in the list any item which is not in the correct slot in
+        the build order.'''
+        for m in self.metas:
+            if not self.position_OK(m.name):
+                self.relocate(m.name)
+
+    def multipass_optimize(self, max_passes=8):
+        '''Makes multiple passes over the list of metadata, optimizing during
+        each pass until either the entire list is ordered correctly for
+        building, or the maximum number of allowed passes is reached. The
+        latter condition suggests there is a circular dependency that needs
+        to be manually resolved.'''
+        opass = 0
+        num_notOK = 1
+        while (num_notOK > 0 and opass < max_passes):
+            opass = opass + 1
+            num_notOK = 0
+            self.optimize_build_order()
+            for m in self.metas:
+                if not self.position_OK(m.name):
+                    num_notOK = num_notOK + 1
+        if (opass == max_passes):
+            print('Pass {0} of {1} reached. Check for circular '
+                  'dependencies.'.format(
+                    opass,
+                    max_passes))
+            return False
+        return True
+
+    def get_channel_data(self):
+        '''Download the channel metadata from all specified conda package
+        channel URLs, parse the JSON data into a dictionary.'''
+        jsonbytes = urllib.request.urlopen(self.channel + '/repodata.json')
+        # urllib only returns 'bytes' objects, so convert to unicode.
+        reader = codecs.getreader('utf-8')
+        return json.load(reader(jsonbytes))
+
+    def flag_archived(self):
+        '''Flag each meta as either being archived or not by generating the
+        package canonical name, fetching the provided conda channel
+        archive data, and searching the archive data for the generated
+        name. Each meta's 'archived' attribute is set to True if found
+        and False if not.'''
+        for meta in self.metas:
+            if meta.canonical_name in self.channel_data['packages'].keys():
+                meta.archived = True
+
+    def print_details(self, fh=sys.stdout):
+        num_notOK = 0
+        print('conda-build version     : ', conda_build.__version__)
+        print('Python version specified: ', self.versions['python'])
+        print('Numpy  version specified: ', self.versions['numpy'])
+        print('                              num  num      peer', file=fh)
+        print('         name               bdeps  peer     bdep     pos.',
+              file=fh)
+        print('                                   bdeps    indices  OK?',
+              file=fh)
+        print('----------------------------------------------------------',
+              file=fh)
+        for idx, m in enumerate(self.metas):
+            if not self.position_OK(m.name):
+                num_notOK = num_notOK + 1
+            print('{0:>28}  {1:{wid}}  {2:{wid}}  idx={3:{wid}} {4} {5}'
+                  .format(m.name,
+                          m.num_bdeps,
+                          len(m.peer_bdeps),
+                          idx,
+                          self.peer_bdep_indices(m.name),
+                          self.position_OK(m.name),
+                          wid=2), file=fh)
+        print('Num not in order = {0}/{1}\n'.format(
+            num_notOK,
+            len(self.metas)), file=fh)
+
+    def print(self, fh=sys.stdout):
+        '''Prints the list of package names in the order in which they appear
+        in self.metas to stdout, suitable for ingestion by other tools during
+        a build process.'''
+        for m in self.metas:
+            print('{0}'.format(m.name), file=fh)
+
+    def print_culled(self, fh=sys.stdout):
+        '''Prints the list of package names for which the canonical name does
+        not exist in the specified archive channel. List is presented in the
+        order in which entries appear in self.metas.'''
+        for m in [m for m in self.metas if m.active and not m.archived]:
+            print('{0}'.format(m.name), file=fh)
+
+    def print_canonical(self, fh=sys.stdout):
+        '''Prints list of canonical package names.'''
+        for meta in self.metas:
+            print('{0:>50}'.format(meta.canonical_name), file=fh)
+
+    def print_status_in_channel(self, fh=sys.stdout):
+        '''Prints list of canonical package names and whether or not each
+        has already been built and archived in the specified channel.'''
+        statstr = {True: '', False: 'Not in channel archive'}
+        for meta in self.metas:
+            print('{0:>50}   {1}'.format(
+                meta.canonical_name,
+                statstr[meta.archived]), file=fh)
+
+
+# ----
+
+
+def main(argv=None):
+
+    if argv is None:
+        argv = sys.argv
+
+    parser = argparse.ArgumentParser(prog='rambo')
+    parser.add_argument('-p', '--platform', type=str)
+    parser.add_argument(
+            '--python',
+            type=str,
+            help='Python version to pass to conda machinery when rendering '
+            'recipes. "#.#" format. If not specified, the version of python'
+            ' hosting conda_build.api is used.')
+    parser.add_argument(
+            '-m',
+            '--manifest',
+            type=str,
+            help='Use this file to filter the list of recipes to process.')
+    parser.add_argument(
+            '-f',
+            '--file',
+            type=str,
+            help='Send package list output to this file instead of stdout.')
+    parser.add_argument(
+            '-c',
+            '--culled',
+            action='store_true',
+            help='Print the ordered list of package names reduced to the set'
+            ' of packages that do not already exist in the channel specified'
+            ' in the supplied manifest file.')
+    parser.add_argument(
+            '-d',
+            '--details',
+            action='store_true',
+            help='Display details used in determining build order and/or '
+            'package culling.')
+    parser.add_argument(
+            '--dirty',
+            action='store_true',
+            help='Use the most recent pre-existing conda work directory for '
+            'each recipe instead of creating a new one. If a work directory '
+            'does not already exist, the recipe is processed in the normal '
+            'fashion. Used mostly for testing purposes.')
+    parser.add_argument(
+            '-v',
+            '--version',
+            action='version',
+            version='%(prog)s ' + __version__,
+            help='Display version information.')
+    parser.add_argument('recipes_dir', type=str)
+    args = parser.parse_args()
+
+    if args.version:
+        print(__version__)
+        os.exit(0)
+
+    recipes_dir = os.path.normpath(args.recipes_dir)
+
+    fh = None
+    if args.file:
+        fh = open(args.file, 'w')
+
+    versions = {'python': '', 'numpy': ''}
+    if args.python:
+        versions['python'] = args.python
+
+    versions['numpy'] = DEFAULT_MINIMUM_NUMPY_VERSION
+
+    mset = metaSet(
+            recipes_dir,
+            platform=args.platform,
+            versions=versions,
+            dirty=args.dirty,
+            manfile=args.manifest)
+
+    mset.multipass_optimize()
+
+    if args.details:
+        mset.print_details(fh)
+        if mset.channel:
+            mset.print_status_in_channel(fh)
+    elif args.culled:
+        mset.print_culled(fh)
+    else:
+        mset.print(fh)
+
+if __name__ == "__main__":
+    main()