summaryrefslogtreecommitdiff
path: root/entomb.py
diff options
context:
space:
mode:
authorJoseph Hunkeler <jhunkeler@gmail.com>2019-09-20 23:25:24 -0400
committerJoseph Hunkeler <jhunkeler@gmail.com>2019-09-20 23:25:24 -0400
commitbf6adac4d192164cded9f0b71fe03461417a5162 (patch)
treea119dfc3b65160faa566f429a58a337f91ddd8a2 /entomb.py
parent8c9a82e2d80586e6464b1d0a09bb12c769307fc7 (diff)
downloadentomb-master.tar.gz
Minor improvements:HEADmaster
* Added statistics * Only create directories when they don't exist * Added verbose mode
Diffstat (limited to 'entomb.py')
-rwxr-xr-xentomb.py52
1 files changed, 48 insertions, 4 deletions
diff --git a/entomb.py b/entomb.py
index da26357..171b216 100755
--- a/entomb.py
+++ b/entomb.py
@@ -4,6 +4,31 @@ import os
import fnmatch
import requests
import shutil
+import sys
+import time
+
+
+VERBOSE = False
+ERRORS = []
+STATS = {
+ 'files': 0,
+ 'urls': 0,
+ 'processed': 0,
+ 'skipped': 0,
+ 'failed': 0,
+ 'time_sec': ''
+}
+
+
+def post_info():
+ if ERRORS:
+ print("# Errors:")
+ for msg in ERRORS:
+ print(msg)
+
+ print("# Statistics:")
+ for k, v in STATS.items():
+ print("{:<20s}: {:>10}".format(k, v))
def channel_dir(d):
@@ -15,12 +40,16 @@ def channel_dir(d):
def download(url, destdir='.', clobber=True, in_memory=False):
filename = url.split('/')[-1]
- if destdir != '.':
+ if destdir != '.' and not os.path.exists(destdir):
+ if VERBOSE:
+ print("Creating directory: {}".format(destdir))
os.makedirs(destdir, mode=0o775, exist_ok=True)
outfile = os.path.join(destdir, filename);
+
if not clobber and os.path.exists(outfile):
print("Skipping: {}".format(outfile))
+ STATS['skipped'] += 1
return outfile
if not url.startswith('http'):
@@ -35,16 +64,24 @@ def download(url, destdir='.', clobber=True, in_memory=False):
r = requests.get(url, stream = True)
if r.status_code != 200:
- print("HTTP ERROR[{}]: Could not download: {}".format(r.status_code, url))
+ msg = "HTTP ERROR[{}]: Could not download: {}".format(r.status_code, url)
+ print(msg, file=sys.stderr)
+ STATS['failed'] += 1
+ ERRORS.append(msg)
return ""
if in_memory:
return r.contents
- with open(outfile,"w+b") as fp:
+ if VERBOSE:
+ print("Writing to: {}".format(outfile))
+
+ with open(outfile, "w+b") as fp:
for chunk in r.iter_content(chunk_size=0xFFFF):
if chunk:
fp.write(chunk)
+
+ STATS['processed'] += 1
return outfile
@@ -77,15 +114,18 @@ if __name__ == '__main__':
parser.add_argument('-o', '--output-dir', required=True, help='Path to output directory')
parser.add_argument('-c', '--clobber', action='store_true', help='Overwrite existing packages')
parser.add_argument('-p', '--pattern', action='append', help='Search tree for directories and filenames matching patterns (e.g. \'*/latest-*\')')
+ parser.add_argument('-v', '--verbose', action='store_true', help='Be verbose')
args = parser.parse_args()
input_dir = args.input_dir
output_dir = args.output_dir
+ VERBOSE = args.verbose
pattern = ['*']
if args.pattern:
pattern = args.pattern
+ start_time = time.time()
for spec in spec_search(input_dir, pattern):
urls = spec_read(spec)
channel_parent = channel_dir(spec)
@@ -93,5 +133,9 @@ if __name__ == '__main__':
channel_sibling = channel_dir(url)
new_channel = os.path.join(output_dir, channel_parent, channel_sibling)
download(url, destdir=new_channel, clobber=args.clobber);
+ STATS['urls'] += len(urls)
+ STATS['files'] += 1
-
+ stop_time = (time.time() - start_time)
+ STATS['time_sec'] = '{:0.3f}'.format(stop_time)
+ post_info()