3 files changed, 247 insertions, 0 deletions
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..3f1b241
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,29 @@
+Copyright (C) 2018 Association of Universities for Research in Astronomy (AURA)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    3. The name of AURA and its representatives may not be used to
+      endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY AURA ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL AURA BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f46a8f8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,6 @@
+# pipeline_backup
+
+Scans the current directory, or named directory, for files matching a pattern. If a valid spec file is found, it will download each component package into a path relative to its URL base.
+
+Where `http://example.com/channel/main/linux-64/package.tar.bz2` becomes `destination/main/linux-64/package.tar.bz2` on the local filesystem.
+
diff --git a/pipeline_backup.py b/pipeline_backup.py
new file mode 100755
index 0000000..b32260f
--- /dev/null
+++ b/pipeline_backup.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+import fnmatch
+import os
+import sys
+import time
+from urllib.request import urlopen
+from urllib.error import HTTPError
+
+
+__version__ = '1.0.0'
+
+
+class PipelineSpecError(Exception):
+    pass
+
+
+class PipelineSpec:
+    def __init__(self, filename):
+        self.filename = filename
+        self.data = list()
+        self._read()
+
+    def replace(self, old, new):
+        for idx, record in enumerate(self.data):
+            if old in record:
+                self.data[idx] = record.replace(old, new)
+
+    def search(self, pattern):
+        for record in self.data:
+            if fnmatch.fnmatch(record, pattern):
+                yield record
+
+    def verify(self):
+        with open(self.filename, 'r') as fp:
+            if '@EXPLICIT' not in fp.readlines():
+                return False
+
+        return True
+
+    def _read(self):
+        if not self.verify():
+            raise PipelineSpecError('Invalid spec file: {}'.format(self.filename))
+
+        with open(self.filename, 'r') as fp:
+            data = list()
+            for line in fp:
+                line = line.strip()
+                if not line or line.startswith('#') or line.startswith('@'):
+                    continue
+                data.append(line)
+            self.data = data
+
+
+class Backup:
+    def __init__(self, data, destination):
+        assert isinstance(data, list)
+        assert isinstance(destination, str)
+        self.block_size = 0xFFFF
+        self.destination = os.path.normpath(destination)
+        self.data = data
+        self.stats = dict(
+            read=0,
+            written=0,
+            success=0,
+            skipped=0,
+            fatal=list(),
+            fail=list(),
+        )
+
+    def run(self):
+        for url in self.data:
+            self._download(url)
+
+    def show_stats(self):
+        print("### Statistics ###")
+        for key, value in self.stats.items():
+            fmt = '{:<10s}: {:<20d}'
+            if isinstance(value, list):
+                if len(value) != 0:
+                    fmt += '\n=>'
+                print(fmt.format(key, len(value)))
+                for url, reason in value:
+                    print('  [{}] {}'.format(reason, url))
+                continue
+
+            if key == 'read' or key == 'written':
+                value = value / (1024 ** 2)
+                fmt = '{:<10s}: {:<.02f}MB'
+
+            print(fmt.format(key, value))
+
+    def _download(self, url):
+        path, filename = self._determine_local_path(url)
+        dirpath = os.path.join(self.destination, path)
+        fullpath = os.path.join(dirpath, filename)
+        block_size = self.block_size
+
+        if not os.path.exists(dirpath):
+            os.makedirs(dirpath)
+        elif os.path.exists(fullpath):
+            self.stats['skipped'] += 1
+            return
+
+        try:
+            with urlopen(url) as data:
+                with open(fullpath, 'w+b') as fp:
+                    if self.verbose:
+                        print("Writing: {}".format(fullpath))
+
+                    chunk = data.read(block_size)
+                    self.stats['read'] += len(chunk)
+                    while chunk:
+                        fp.write(chunk)
+                        self.stats['written'] += len(chunk)
+                        chunk = data.read(block_size)
+                        self.stats['read'] += len(chunk)
+        except HTTPError as reason:
+            self.stats['fail'].append([url, reason])
+            return
+        except Exception as reason:
+            self.stats['fatal'].append([url, reason])
+
+        self.stats['success'] += 1
+
+    def _determine_local_path(self, record):
+        assert isinstance(record, str)
+        filename = os.path.basename(record)
+        markers = []
+
+        for i, ch in enumerate(record):
+            if ch == '/':
+                markers.append(i)
+
+        markers_len = len(markers)
+        if markers_len < 3:
+            raise ValueError('Invalid URL part length')
+
+        begin = markers[markers_len - 3] + 1  # start after leading slash
+        end = markers[markers_len - 1]
+
+        local_path = os.path.normpath(os.path.join(
+            self.destination, record[begin:end]))
+        return local_path, filename
+
+
+def find_specs(search_path, pattern):
+    """ Compile list of spec file paths
+    """
+    for root, dirs, files in os.walk(search_path):
+        for filename in files:
+            filename = os.path.join(root, filename)
+            if fnmatch.fnmatch(filename, pattern):
+                yield filename
+
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--basedir', '-d', default='.',
+                        help='Search for spec files under this path')
+
+    parser.add_argument('--extension', '-e', default='*-py*.txt',
+                        help='Match spec file extension by glob')
+
+    parser.add_argument('--search-pattern', '-s', default='*',
+                        help='Return packages from spec files matching glob pattern')
+
+    parser.add_argument('--replace-pattern', '-r', action='append',
+                        default=list(), nargs='*', help='Replace pattern in package output strings')
+
+    parser.add_argument(
+        '--backup', '-b', help='Backup packages to root directory (preserve relative tree)')
+    parser.add_argument('--version', action='store_true')
+
+    args = parser.parse_args()
+
+    if args.version:
+        print(__version__)
+        exit(0)
+
+    info = list()
+    for spec in find_specs(args.basedir, args.extension):
+        pspec = PipelineSpec(spec)
+
+        for pattern in args.replace_pattern:
+            old, new = pattern
+            pspec.replace(old, new)
+
+        if args.search_pattern:
+            info += pspec.search(args.search_pattern)
+        else:
+            info += pspec.data
+
+    if not info:
+        print("No spec files found (extension: '{}')".format(args.extension), file=sys.stderr)
+        exit(0)
+
+    info = sorted(set(info))
+
+    if args.backup:
+        if not os.path.exists(args.backup):
+            os.makedirs(args.backup)
+
+        backup = Backup(info, args.backup)
+        backup.verbose = True
+        backup.run()
+        backup.show_stats()
+
+    else:
+        for x in info:
+            print(x)