Refactor to use official conda_build API; add culling behavior; add file output option; cleanup and usability improvements

author: Matt Rendina <mrendina@stsci.edu> 2017-04-13 11:03:24 -0400
committer: Matt Rendina <mrendina@stsci.edu> 2017-04-13 11:03:24 -0400
commit: 5ae2aab46f99bbc849eea3ee1abe198fde5815a5 (patch)
tree: 2765b8ee543aa7a8da97c623ac07d21fd99576d6 /rambo.py
parent: 139b6076ba21a4da27c6a9aaa7f648baebc736fb (diff)
download: rambo-5ae2aab46f99bbc849eea3ee1abe198fde5815a5.tar.gz
1 files changed, 166 insertions, 95 deletions
diff --git a/rambo.py b/rambo.py
index 1ed1612..ea6cc14 100755
--- a/rambo.py
+++ b/rambo.py
@@ -3,33 +3,31 @@
 '''
 RAMBO - Recipe Analyzer and Multi-package Build Optimizer
 
-Requires conda to be installed on the path in order to access the recipe
-renderer 'conda_build.cli.main_render'.
-
- TODO: Correct conda recipe renderer RuntimeError "'numpy x.x' requires
-       external setting" when parsing meta file for astroconda-dev/astropy
+Requires conda to be installed on the PATH in order to access the API
+machinery via 'conda_build.api.
 '''
 
 import os
 import sys
-from yaml import safe_load
-from io import StringIO
 from copy import deepcopy
 import argparse
-
-# Class provided by conda-build for performing string interpolation of
-# jinja2-enhanced <recipe>/meta.yaml files to produce legal YAML.
-import conda_build.cli.main_render as cbr
-
+import urllib.request
+import codecs
+import json
+import conda_build.api
 
 class meta(object):
     '''Holds metadata for a recipe obtained from the recipe's meta.yaml file,
     certain values derived from that data, and methods to calculate those
     derived values.'''
 
-    def __init__(self, recipe_dir):
-        self.yaml = None
+    def __init__(self, recipe_dir, versions, dirty=False):
         self.recipe_dirname = os.path.basename(recipe_dir)
+        self.versions = versions
+        self.dirty = dirty
+        self.metaobj = None    # renderdata[0] (MetaData)
+        self.mdata = None      # renderdata[0].meta (dict)
+
         self.valid = False
         self.complete = False
         self.name = None
@@ -42,35 +40,38 @@ class meta(object):
         self.import_metadata(recipe_dir)
         self.derive_values()
 
+        self.canonical_name = ''
+        self.archived = False # Whether or not the package with this metadata
+                              # already exists in the channel archive
+
+        self.gen_canonical()
+
         # self.unite_deps() # Test if needed.
 
     def import_metadata(self, rdir):
-        '''Read in the package metadata from the given file then pass it
-        through the conda recipe renderer to perform string interpolation and
-        produce legal YAML text which is then parsed and stored.'''
+        '''Read in the package metadata from the given recipe directory via
+        the conda recipe renderer to perform string interpolation and
+        store the values in a dictionary.'''
         if os.path.isfile(rdir + '/meta.yaml'):
-            # Redirect stdout for each call to cbr.execute since it only
-            # writes to stdout.
-            capture = StringIO()
-            save_stdout = sys.stdout
-            sys.stdout = capture
-            cbr.execute([rdir])
-            # Restore stdout.
-            sys.stdout = save_stdout
-            yaml = safe_load(capture.getvalue())
-            self.yaml = yaml
+            #print('      >>>>>>>> Importing metadata from {0}...'.format(self.recipe_dirname))
+            # render() returns a tuple: (MetaData, bool, bool)
+            self.metaobj = conda_build.api.render(rdir,
+                            self.dirty,
+                            python=self.versions['python'],
+                            numpy=self.versions['numpy'])[0]
+            self.mdata = self.metaobj.meta
             self.valid = self.is_valid()
             self.complete = self.is_complete()
             if self.valid:
-                self.name = self.yaml['package']['name']
+                self.name = self.mdata['package']['name']
         else:
             print('Recipe directory {0} has no meta.yaml file.'.format(
                 self.recipe_dirname))
 
     def derive_values(self):
         if self.complete:
-            self.num_bdeps = len(self.yaml['requirements']['build'])
-            for req in self.yaml['requirements']['build']:
+            self.num_bdeps = len(self.mdata['requirements']['build'])
+            for req in self.mdata['requirements']['build']:
                 self.deps.append(req.split()[0])
 
     def unite_deps(self):
@@ -78,7 +79,7 @@ class meta(object):
         build and run dependencies in .deps.'''
         if self.complete:
             for key in ['build', 'run']:
-                for req in self.yaml['requirements'][key]:
+                for req in self.mdata['requirements'][key]:
                     self.deps.append(req.split()[0])
             self.deps = set(self.deps)
 
@@ -86,7 +87,7 @@ class meta(object):
         '''Return the simplified (no version info, if present) list of
         dependency names of the given type.'''
         lst = []
-        for dep in self.yaml['requirements'][deptype]:
+        for dep in self.mdata['requirements'][deptype]:
             lst.append(dep.split()[0])
         return lst
 
@@ -94,7 +95,7 @@ class meta(object):
         '''Does the metadata for this recipe contain the minimum information
         necessary to process?'''
         valid = True
-        if 'name' not in self.yaml.get('package', {}):
+        if 'package' not in self.mdata.keys():
             complete = False
         return valid
 
@@ -102,10 +103,22 @@ class meta(object):
         '''Is the metadata for this recipe complete enough to allow for use
         in build-order optimization?'''
         complete = True
-        if 'build' not in self.yaml.get('requirements', {}):
+        if 'requirements' in self.mdata.keys():
+            if 'build' not in self.mdata['requirements'].keys():
+                complete = False
+        else:
             complete = False
         return complete
 
+    def gen_canonical(self):
+        '''Generate the package's canonical name using available
+        information.'''
+        self.canonical_name = os.path.basename(
+                conda_build.api.get_output_file_path(
+                    self.metaobj,
+                    python=self.versions['python'],
+                    numpy=self.versions['numpy']))
+
 
 class metaSet(object):
     '''A collection of mulitple recipe metadata objects from a directory
@@ -114,7 +127,18 @@ class metaSet(object):
 
     ignore_dirs = ['.git', 'template']
 
-    def __init__(self, directory):
+    def __init__(self,
+                 directory,
+                 versions,
+                 channel,
+                 dirty=False):
+        '''Parameters:
+        directory - a relative or absolute directory in which Conda
+          recipe subdirectories may be found.
+        versions - Dictionary containing python, numpy, etc, version
+          information.'''
+        self.versions = versions
+        self.dirty = dirty
         self.metas = []
         self.incomplete_metas = []
         self.names = []
@@ -122,6 +146,11 @@ class metaSet(object):
         self.derive_values()
         self.sort_by_peer_bdeps()
         self.merge_metas()
+        self.channel = channel
+        if channel:
+            self.channel_URL = channel.strip('/')
+            self.channel_data = self.get_channel_data()
+            self.flag_archived()
 
     def read_recipes(self, directory):
         '''Process a directory reading in each conda recipe found, creating
@@ -132,7 +161,7 @@ class metaSet(object):
             if rdirname in self.ignore_dirs:
                 continue
             rdir = directory + '/' + rdirname
-            m = meta(rdir)
+            m = meta(rdir, versions=self.versions, dirty=self.dirty)
             if m.complete:
                 self.metas.append(m)
                 self.names.append(m.name)
@@ -221,8 +250,8 @@ class metaSet(object):
         '''Makes multiple passes over the list of metadata, optimizing during
         each pass until either the entire list is ordered correctly for
         building, or the maximum number of allowed passes is reached. The
-        latter condition likely means there is a circular dependency that
-        needs to be manually resolved.'''
+        latter condition suggests there is a circular dependency that needs
+        to be manually resolved.'''
         opass = 0
         num_notOK = 1
         while (num_notOK > 0 and opass < max_passes):
@@ -240,35 +269,30 @@ class metaSet(object):
             return False
         return True
 
-    def print_by_tier(self):
-        print('                              num  num      peer')
-        print('         name               bdeps  peer     bdep     pos.')
-        print('                                   bdeps    indices  OK?')
-        print('----------------------------------------------------------')
-        num_notOK = 0
-        for num_peer_bdeps in range(0, 16):
-            for idx, m in enumerate(self.metas):
-                if (len(m.peer_bdeps) == num_peer_bdeps):
-                    if not self.position_OK(m.name):
-                        num_notOK = num_notOK + 1
-                    print('{0:>28}  {1:{wid}}  {2:{wid}}  idx={3:{wid}}'
-                          ' {4} {5}'.format(
-                                m.name,
-                                m.num_bdeps,
-                                len(m.peer_bdeps),
-                                idx,
-                                self.peer_bdep_indices(m.name),
-                                self.position_OK(m.name),
-                                wid=2))
-            print()
-        print('Num not in order = {0}/{1}'.format(num_notOK, len(self.metas)))
-
-    def print_details(self):
+    def get_channel_data(self):
+        '''Download the channel metadata from all specified conda package
+        channel URLs, parse the JSON data into a dictionary.'''
+        jsonbytes = urllib.request.urlopen(self.channel_URL + '/repodata.json')
+        # urllib only returns 'bytes' objects, so convert to unicode.
+        reader = codecs.getreader('utf-8')
+        return json.load(reader(jsonbytes))
+
+    def flag_archived(self):
+        '''Flag each meta as either being archived or not by generating the
+        package canonical name, fetching the provided conda channel
+        archive data, and searching the archive data for the generated
+        name. Each meta's 'archived' attribute is set to True if found
+        and False if not.'''
+        for meta in self.metas:
+            if meta.canonical_name in self.channel_data['packages'].keys():
+                meta.archived = True
+
+    def print_details(self, fh=sys.stdout):
         num_notOK = 0
-        print('                              num  num      peer')
-        print('         name               bdeps  peer     bdep     pos.')
-        print('                                   bdeps    indices  OK?')
-        print('----------------------------------------------------------')
+        print('                              num  num      peer', file=fh)
+        print('         name               bdeps  peer     bdep     pos.', file=fh)
+        print('                                   bdeps    indices  OK?', file=fh)
+        print('----------------------------------------------------------', file=fh)
         for idx, m in enumerate(self.metas):
             if not self.position_OK(m.name):
                 num_notOK = num_notOK + 1
@@ -279,53 +303,100 @@ class metaSet(object):
                           idx,
                           self.peer_bdep_indices(m.name),
                           self.position_OK(m.name),
-                          wid=2))
-        print('Num not in order = {0}/{1}'.format(num_notOK, len(self.metas)))
-
-    def print(self):
-        '''Prints the list of package names in the order they appear in
-        self.metas to stdout, suitable for ingestion by other tools
-        during a build process.'''
+                          wid=2), file=fh)
+        print('Num not in order = {0}/{1}\n'.format(num_notOK,
+            len(self.metas)), file=fh)
+
+    def print(self, fh=sys.stdout):
+        '''Prints the list of package names in the order in which they appear
+        in self.metas to stdout, suitable for ingestion by other tools during
+        a build process.'''
         for m in self.metas:
-            print('{0}'.format(m.name))
+            print('{0}'.format(m.name), file=fh)
 
-# ----
+    def print_culled(self, fh=sys.stdout):
+        '''Prints the list of package names for which the canonical name does not
+        exist in the specified archive channel. List is presented in the order in
+        which entries appear in self.metas.'''
+        for m in self.metas:
+            if not m.archived:
+                print('{0}'.format(m.name), file=fh)
 
+    def print_canonical(self, fh=sys.stdout):
+        '''Prints list of canonical package names.'''
+        for meta in self.metas:
+            print('{0:>50}'.format(meta.canonical_name), file=fh)
 
-def print_ordered(mset):
-    '''Perform a multi-pass build order optimization on the package metadata
-    and print a simple ordered list of package names to stdout, suitable
-    for piping to other programs.'''
-    mset.multipass_optimize()
-    mset.print()
+    def print_status_in_channel(self, fh=sys.stdout):
+        '''Prints list of canonical package names and whether or not each
+        has already been built and archived in the specified channel.'''
+        statstr = {True: '', False: 'Not in channel archive'}
+        for meta in self.metas:
+            print('{0:>50}   {1}'.format(meta.canonical_name,
+                statstr[meta.archived]), file=fh)
 
 
-def print_details(mset):
-    '''Perform a multi-pass build order optimization on the package metadata
-    and print a detailed summary of each package's dependency totals, index,
-    dependency indices, and build position status.'''
-    mset.multipass_optimize()
-    mset.print_details()
+# ----
 
 
 def main(argv):
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('--ordered', action='store_true')
-    parser.add_argument('--details', action='store_true')
-    parser.add_argument('-v', '--verbose', action='store_true')
+    parser.add_argument('-c', '--culled', action='store_true',
+            help='Print the ordered list of package names reduced to the set'
+            ' of packages that do not already exist in the specified channel.'
+            ' Requires --channel')
+    parser.add_argument('-d', '--details', action='store_true',
+            help='Display details used in determining build order and/or '
+            'package culling.')
+    parser.add_argument('-f', '--file',
+            help='Send package list output to this file instead of stdout.')
+    parser.add_argument('--channel', type=str,
+            help='URL of conda channel repository to search for package list '
+            'culling purposes.')
+    parser.add_argument('--python', type=str,
+            help='Python version to pass to conda machinery when rendering '
+            'recipes. "#.#" format.')
+    parser.add_argument('--numpy', type=str,
+            help='Numpy version to pass to conda machinery when rendering '
+            'recipes. "#.#" format.')
+    parser.add_argument('--dirty', type=str,
+            help='Use the most recent pre-existing conda work directory for '
+            'each recipe instead of creating a new one. If a work directory '
+            'does not already exist, the recipe is processed in the normal '
+            'fashion.')
     parser.add_argument('recipes_dir', type=str)
     args = parser.parse_args()
+
     recipes_dir = os.path.normpath(args.recipes_dir)
 
-    mset = metaSet(recipes_dir)
+    fh = None
+    if args.file:
+        fh = open(args.file, 'w')
 
-    if args.ordered:
-        print_ordered(mset)
+    versions = {'python':'',
+                'numpy':''}
+    if args.python:
+        versions['python'] = args.python
+    if args.numpy:
+        versions['numpy'] = args.numpy
 
-    if args.details:
-        print_details(mset)
+    mset = metaSet(
+            recipes_dir,
+            versions=versions,
+            channel=args.channel,
+            dirty=args.dirty)
 
+    mset.multipass_optimize()
+
+    if args.details:
+        mset.print_details(fh)
+        if mset.channel:
+            mset.print_status_in_channel(fh)
+    elif args.culled:
+        mset.print_culled(fh)
+    else:
+        mset.print(fh)
 
 if __name__ == "__main__":
     main(sys.argv)
author	Matt Rendina <mrendina@stsci.edu>	2017-04-13 11:03:24 -0400
committer	Matt Rendina <mrendina@stsci.edu>	2017-04-13 11:03:24 -0400
commit	5ae2aab46f99bbc849eea3ee1abe198fde5815a5 (patch)
tree	2765b8ee543aa7a8da97c623ac07d21fd99576d6 /rambo.py
parent	139b6076ba21a4da27c6a9aaa7f648baebc736fb (diff)
download	rambo-5ae2aab46f99bbc849eea3ee1abe198fde5815a5.tar.gz