2 files changed, 239 insertions, 0 deletions
diff --git a/firewatch/__init__.py b/firewatch/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/firewatch/__init__.py
diff --git a/firewatch/firewatch.py b/firewatch/firewatch.py
new file mode 100644
index 0000000..df17e8a
--- /dev/null
+++ b/firewatch/firewatch.py
@@ -0,0 +1,239 @@
+import errno
+import json
+import platform as PLATFORM
+import requests
+import sys
+import time
+from datetime import datetime, timedelta
+
+
+conda_channel_pool = [
+    f'https://repo.continuum.io/pkgs/main',
+    # f'https://repo.continuum.io/pkgs/free',  # deprecated: conda<4.3.30
+]
+
+time_units = dict(
+    s=1,  # second
+    m=60,  # minute
+    h=3600,  # hour
+    d=86400,  # day
+    w=604800,  # week
+    M=2.628e+6,  # month
+    y=3.154e+7,  # year
+    D=3.154e+8,  # decade
+    c=3.154e+9,  # century
+)
+
+system_map = dict(
+    Linux='linux',
+    Darwin='osx',
+    Windows='win'
+)
+
+machine_map = dict(
+    i386='32',
+    x86_64='64'
+)
+
+
+def extract_channel_platform(url):
+    """Returns last two elements in URL: (channel/platform-arch)
+    """
+    parts = [x for x in url.split('/')]
+    result = '/'.join(parts[-2:])
+    return result
+
+
+def convert_human_timespan(t):
+    """Convert timespan to seconds to generate datetime.timedelta objects
+    """
+    value, unit = int(t[:-1]), t[-1]
+    if unit not in time_units.keys():
+        raise ValueError(f'Invalid time unit: "{unit}" (expected: ['
+                         f'{"|".join([x for x in time_units.keys()])}])')
+    return value * time_units[unit]
+
+
+def get_packages(channels):
+    packages = list()
+    for channel in channels:
+        repodata = f'{channel}/repodata.json'
+        data = dict(
+            packages=list(),
+            channel=extract_channel_platform(channel),
+        )
+
+        try:
+            with requests.get(repodata) as r:
+                r.raise_for_status()
+                data['packages'] = json.loads(r.text)['packages']
+                packages.append(data)
+
+        except requests.exceptions.RequestException as e:
+            print(f'Error {e.response.status_code}/{e.response.reason}:'
+                  f' {channel}', file=sys.stderr)
+        except Exception as e:
+            print(e)
+
+    return packages
+
+
+def get_timestamps(data, brute_force=False):
+    """ Extract and convert package timestamps to datetime objects
+    """
+    rt_fmt = '%a, %d %b %Y %H:%M:%S %Z'
+
+    for base in data:
+        for pkg_name, pkg_info in base['packages'].items():
+            result = dict()
+            result['name'] = pkg_name
+            result['channel'] = base['channel']
+
+            timestamp = datetime(1970, 1, 1)
+            # Continuum used 'date' for tracking some time ago
+            if 'date' in pkg_info:
+                date_str = [int(x) for x in pkg_info['date'].split('-')]
+                timestamp = datetime(*date_str)
+
+            # Newer packages use 'timestamp', but depending on the direction
+            # of the wind, the unix epoch is stored in microseconds rather
+            # than seconds. So adjust for former case...
+            elif 'timestamp' in pkg_info:
+                timestamp = datetime.fromtimestamp(pkg_info['timestamp'] // 1000)
+                if timestamp < datetime(2000, 1, 1):
+                    timestamp = datetime.fromtimestamp(pkg_info['timestamp'])
+
+            # Scan remote server for 'last-modified' timestamp
+            # Don't do this unless you own the server you're spamming.
+            elif brute_force:
+                url = f'{result["channel"]}/{pkg_name}'
+                try:
+                    modified = requests.head(url).headers['last-modified']
+                except requests.exceptions.RequestException as e:
+                    print(f'Error {e.response.status_code}/{e.response.reason}:'
+                          f' {result["channel"]}', file=sys.stderr)
+                    continue
+                except Exception as e:
+                    print(e)
+                    continue
+
+                timestamp = datetime.strptime(modified, rt_fmt)
+
+            result['timestamp'] = timestamp
+            yield result
+
+
+def noarch_channel(channel, platform):
+    channel = channel.replace(f'{platform}', 'noarch')
+    return channel
+
+
+def convert_channel(channel, platform, noarch=False):
+    # Strip trailing slash
+    if channel.endswith('/'):
+        channel = channel[:-1]
+
+    # Sanitize URL by stripping out part we will adjust dynamically
+    if f'/{platform}' in channel:
+        pos = channel.find(f'/{platform}')
+        channel = channel[:pos]
+
+    if '://' not in channel:
+        channel = f'https://conda.anaconda.org/{channel}/{platform}'
+    else:
+        channel = f'{channel}/{platform}'
+
+    if noarch:
+        channel = noarch_channel(channel, platform)
+
+    return channel
+
+
+def get_platform():
+    """Generate a conda compatible platform-arch string
+    """
+    system = PLATFORM.system()
+    machine = PLATFORM.machine()
+
+    result = None
+    try:
+        result = '-'.join([system_map[system], machine_map[machine]])
+    except KeyError:
+        print(f'Unknown platform/arch combination: {system}/{machine}',
+              file=sys.stderr)
+
+    return result
+
+
+def main():
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument('--benchmark', action='store_true',
+                        help='Display total time to parse and sort channel data')
+
+    parser.add_argument('--brute-force', action='store_true',
+                        help='Derive timestamps from HTTP header: "last-modified"')
+
+    parser.add_argument('--channel', '-c', default=conda_channel_pool,
+                        action='append', dest='channels', help='Conda channel')
+
+    parser.add_argument('--order', '-o', default='asc', help='[asc|dsc]')
+
+    parser.add_argument('--platform', '-p', default=[get_platform()],
+                        action='append', dest='platforms',
+                        help=f'[{"|".join(system_map.values())}]'
+                             f'-[{"|".join(machine_map.values())}]')
+
+    parser.add_argument('--time-span', '-t', default='1c',
+                        help=f'i[{"|".join([x for x in time_units.keys()])}]'
+                        ' (120s, 12h, 1d, 2w, 3m, 4y)')
+
+    args = parser.parse_args()
+
+    order = False  # Ascending
+    if args.order != 'asc':
+        order = True  # Descending
+
+    if args.benchmark:
+        timer_start = time.time()
+
+    channels = list()
+    for platform in set(args.platforms):
+        channels.extend([convert_channel(x, platform) for x in args.channels])
+        channels.extend([convert_channel(x, platform, noarch=True)
+                        for x in args.channels])
+        channels = sorted(set(channels))
+
+    today = datetime.now()
+    span_delta = today - timedelta(seconds=convert_human_timespan(args.time_span))
+    packages = get_packages(channels)
+    timestamps = sorted(list(get_timestamps(packages, args.brute_force)),
+                        reverse=order, key=lambda x: x['timestamp'])
+
+    if args.benchmark:
+        timer_stop = time.time()
+        print('#benchmark: {:.02f}s'.format(timer_stop - timer_start))
+
+    channel_width = max([len(extract_channel_platform(x)) for x in channels]) + 1
+    print('#{:<20s} {:<{channel_width}s}  {:<40s}'.format(
+          'date', 'channel', 'package', channel_width=channel_width))
+
+    try:
+        for info in timestamps:
+            name = info['name']
+            ts = info['timestamp']
+            chn = info['channel']
+
+            tstr = ts.isoformat()
+            if span_delta < ts:
+                print(f'{tstr:<20s}: {chn:<{channel_width}s}: {name:<40s}')
+    except IOError as e:
+        # Broken pipe on '|head'
+        # TODO: Figure out why
+        if e.errno == errno.EPIPE:
+            pass
+
+
+if __name__ == '__main__':
+    main()