aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LICENSE.txt29
-rw-r--r--README.md6
-rwxr-xr-xpipeline_backup.py212
3 files changed, 247 insertions, 0 deletions
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..3f1b241
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,29 @@
+Copyright (C) 2018 Association of Universities for Research in Astronomy (AURA)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ 3. The name of AURA and its representatives may not be used to
+ endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY AURA ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL AURA BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f46a8f8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,6 @@
+# pipeline_backup
+
+Scans the current directory, or named directory, for files matching a pattern. If a valid spec file is found, it will download each component package into a path relative to its URL base.
+
+Where `http://example.com/channel/main/linux-64/package.tar.bz2` becomes `destination/main/linux-64/package.tar.bz2` on the local filesystem.
+
diff --git a/pipeline_backup.py b/pipeline_backup.py
new file mode 100755
index 0000000..b32260f
--- /dev/null
+++ b/pipeline_backup.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+import fnmatch
+import os
+import sys
+import time
+from urllib.request import urlopen
+from urllib.error import HTTPError
+
+
+__version__ = '1.0.0'
+
+
+class PipelineSpecError(Exception):
+ pass
+
+
+class PipelineSpec:
+ def __init__(self, filename):
+ self.filename = filename
+ self.data = list()
+ self._read()
+
+ def replace(self, old, new):
+ for idx, record in enumerate(self.data):
+ if old in record:
+ self.data[idx] = record.replace(old, new)
+
+ def search(self, pattern):
+ for record in self.data:
+ if fnmatch.fnmatch(record, pattern):
+ yield record
+
+ def verify(self):
+ with open(self.filename, 'r') as fp:
+ if '@EXPLICIT' not in fp.readlines():
+ return False
+
+ return True
+
+ def _read(self):
+ if not self.verify():
+ raise PipelineSpecError('Invalid spec file: {}'.format(self.filename))
+
+ with open(self.filename, 'r') as fp:
+ data = list()
+ for line in fp:
+ line = line.strip()
+ if not line or line.startswith('#') or line.startswith('@'):
+ continue
+ data.append(line)
+ self.data = data
+
+
+class Backup:
+ def __init__(self, data, destination):
+ assert isinstance(data, list)
+ assert isinstance(destination, str)
+ self.block_size = 0xFFFF
+ self.destination = os.path.normpath(destination)
+ self.data = data
+ self.stats = dict(
+ read=0,
+ written=0,
+ success=0,
+ skipped=0,
+ fatal=list(),
+ fail=list(),
+ )
+
+ def run(self):
+ for url in self.data:
+ self._download(url)
+
+ def show_stats(self):
+ print("### Statistics ###")
+ for key, value in self.stats.items():
+ fmt = '{:<10s}: {:<20d}'
+ if isinstance(value, list):
+ if len(value) != 0:
+ fmt += '\n=>'
+ print(fmt.format(key, len(value)))
+ for url, reason in value:
+ print(' [{}] {}'.format(reason, url))
+ continue
+
+ if key == 'read' or key == 'written':
+ value = value / (1024 ** 2)
+ fmt = '{:<10s}: {:<.02f}MB'
+
+ print(fmt.format(key, value))
+
+ def _download(self, url):
+ path, filename = self._determine_local_path(url)
+ dirpath = os.path.join(self.destination, path)
+ fullpath = os.path.join(dirpath, filename)
+ block_size = self.block_size
+
+ if not os.path.exists(dirpath):
+ os.makedirs(dirpath)
+ elif os.path.exists(fullpath):
+ self.stats['skipped'] += 1
+ return
+
+ try:
+ with urlopen(url) as data:
+ with open(fullpath, 'w+b') as fp:
+ if self.verbose:
+ print("Writing: {}".format(fullpath))
+
+ chunk = data.read(block_size)
+ self.stats['read'] += len(chunk)
+ while chunk:
+ fp.write(chunk)
+ self.stats['written'] += len(chunk)
+ chunk = data.read(block_size)
+ self.stats['read'] += len(chunk)
+ except HTTPError as reason:
+ self.stats['fail'].append([url, reason])
+ return
+ except Exception as reason:
+ self.stats['fatal'].append([url, reason])
+
+ self.stats['success'] += 1
+
+ def _determine_local_path(self, record):
+ assert isinstance(record, str)
+ filename = os.path.basename(record)
+ markers = []
+
+ for i, ch in enumerate(record):
+ if ch == '/':
+ markers.append(i)
+
+ markers_len = len(markers)
+ if markers_len < 3:
+ raise ValueError('Invalid URL part length')
+
+ begin = markers[markers_len - 3] + 1 # start after leading slash
+ end = markers[markers_len - 1]
+
+ local_path = os.path.normpath(os.path.join(
+ self.destination, record[begin:end]))
+ return local_path, filename
+
+
+def find_specs(search_path, pattern):
+ """ Compile list of spec file paths
+ """
+ for root, dirs, files in os.walk(search_path):
+ for filename in files:
+ filename = os.path.join(root, filename)
+ if fnmatch.fnmatch(filename, pattern):
+ yield filename
+
+
+if __name__ == '__main__':
+ import argparse
+
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('--basedir', '-d', default='.',
+ help='Search for spec files under this path')
+
+ parser.add_argument('--extension', '-e', default='*-py*.txt',
+ help='Match spec file extension by glob')
+
+ parser.add_argument('--search-pattern', '-s', default='*',
+ help='Return packages from spec files matching glob pattern')
+
+ parser.add_argument('--replace-pattern', '-r', action='append',
+ default=list(), nargs='*', help='Replace pattern in package output strings')
+
+ parser.add_argument(
+ '--backup', '-b', help='Backup packages to root directory (preserve relative tree)')
+ parser.add_argument('--version', action='store_true')
+
+ args = parser.parse_args()
+
+ if args.version:
+ print(__version__)
+ exit(0)
+
+ info = list()
+ for spec in find_specs(args.basedir, args.extension):
+ pspec = PipelineSpec(spec)
+
+ for pattern in args.replace_pattern:
+ old, new = pattern
+ pspec.replace(old, new)
+
+ if args.search_pattern:
+ info += pspec.search(args.search_pattern)
+ else:
+ info += pspec.data
+
+ if not info:
+ print("No spec files found (extension: '{}')".format(args.extension), file=sys.stderr)
+ exit(0)
+
+ info = sorted(set(info))
+
+ if args.backup:
+ if not os.path.exists(args.backup):
+ os.makedirs(args.backup)
+
+ backup = Backup(info, args.backup)
+ backup.verbose = True
+ backup.run()
+ backup.show_stats()
+
+ else:
+ for x in info:
+ print(x)