diff options
Diffstat (limited to 'firewatch')
| -rw-r--r-- | firewatch/__init__.py | 0 | ||||
| -rw-r--r-- | firewatch/firewatch.py | 239 | 
2 files changed, 239 insertions, 0 deletions
| diff --git a/firewatch/__init__.py b/firewatch/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/firewatch/__init__.py diff --git a/firewatch/firewatch.py b/firewatch/firewatch.py new file mode 100644 index 0000000..df17e8a --- /dev/null +++ b/firewatch/firewatch.py @@ -0,0 +1,239 @@ +import errno +import json +import platform as PLATFORM +import requests +import sys +import time +from datetime import datetime, timedelta + + +conda_channel_pool = [ +    f'https://repo.continuum.io/pkgs/main', +    # f'https://repo.continuum.io/pkgs/free',  # deprecated: conda<4.3.30 +] + +time_units = dict( +    s=1,  # second +    m=60,  # minute +    h=3600,  # hour +    d=86400,  # day +    w=604800,  # week +    M=2.628e+6,  # month +    y=3.154e+7,  # year +    D=3.154e+8,  # decade +    c=3.154e+9,  # century +) + +system_map = dict( +    Linux='linux', +    Darwin='osx', +    Windows='win' +) + +machine_map = dict( +    i386='32', +    x86_64='64' +) + + +def extract_channel_platform(url): +    """Returns last two elements in URL: (channel/platform-arch) +    """ +    parts = [x for x in url.split('/')] +    result = '/'.join(parts[-2:]) +    return result + + +def convert_human_timespan(t): +    """Convert timespan to seconds to generate datetime.timedelta objects +    """ +    value, unit = int(t[:-1]), t[-1] +    if unit not in time_units.keys(): +        raise ValueError(f'Invalid time unit: "{unit}" (expected: [' +                         f'{"|".join([x for x in time_units.keys()])}])') +    return value * time_units[unit] + + +def get_packages(channels): +    packages = list() +    for channel in channels: +        repodata = f'{channel}/repodata.json' +        data = dict( +            packages=list(), +            channel=extract_channel_platform(channel), +        ) + +        try: +            with requests.get(repodata) as r: +                r.raise_for_status() +                data['packages'] = json.loads(r.text)['packages'] +                packages.append(data) + +        except requests.exceptions.RequestException as e: +            print(f'Error {e.response.status_code}/{e.response.reason}:' +                  f' {channel}', file=sys.stderr) +        except Exception as e: +            print(e) + +    return packages + + +def get_timestamps(data, brute_force=False): +    """ Extract and convert package timestamps to datetime objects +    """ +    rt_fmt = '%a, %d %b %Y %H:%M:%S %Z' + +    for base in data: +        for pkg_name, pkg_info in base['packages'].items(): +            result = dict() +            result['name'] = pkg_name +            result['channel'] = base['channel'] + +            timestamp = datetime(1970, 1, 1) +            # Continuum used 'date' for tracking some time ago +            if 'date' in pkg_info: +                date_str = [int(x) for x in pkg_info['date'].split('-')] +                timestamp = datetime(*date_str) + +            # Newer packages use 'timestamp', but depending on the direction +            # of the wind, the unix epoch is stored in microseconds rather +            # than seconds. So adjust for former case... +            elif 'timestamp' in pkg_info: +                timestamp = datetime.fromtimestamp(pkg_info['timestamp'] // 1000) +                if timestamp < datetime(2000, 1, 1): +                    timestamp = datetime.fromtimestamp(pkg_info['timestamp']) + +            # Scan remote server for 'last-modified' timestamp +            # Don't do this unless you own the server you're spamming. +            elif brute_force: +                url = f'{result["channel"]}/{pkg_name}' +                try: +                    modified = requests.head(url).headers['last-modified'] +                except requests.exceptions.RequestException as e: +                    print(f'Error {e.response.status_code}/{e.response.reason}:' +                          f' {result["channel"]}', file=sys.stderr) +                    continue +                except Exception as e: +                    print(e) +                    continue + +                timestamp = datetime.strptime(modified, rt_fmt) + +            result['timestamp'] = timestamp +            yield result + + +def noarch_channel(channel, platform): +    channel = channel.replace(f'{platform}', 'noarch') +    return channel + + +def convert_channel(channel, platform, noarch=False): +    # Strip trailing slash +    if channel.endswith('/'): +        channel = channel[:-1] + +    # Sanitize URL by stripping out part we will adjust dynamically +    if f'/{platform}' in channel: +        pos = channel.find(f'/{platform}') +        channel = channel[:pos] + +    if '://' not in channel: +        channel = f'https://conda.anaconda.org/{channel}/{platform}' +    else: +        channel = f'{channel}/{platform}' + +    if noarch: +        channel = noarch_channel(channel, platform) + +    return channel + + +def get_platform(): +    """Generate a conda compatible platform-arch string +    """ +    system = PLATFORM.system() +    machine = PLATFORM.machine() + +    result = None +    try: +        result = '-'.join([system_map[system], machine_map[machine]]) +    except KeyError: +        print(f'Unknown platform/arch combination: {system}/{machine}', +              file=sys.stderr) + +    return result + + +def main(): +    from argparse import ArgumentParser + +    parser = ArgumentParser() +    parser.add_argument('--benchmark', action='store_true', +                        help='Display total time to parse and sort channel data') + +    parser.add_argument('--brute-force', action='store_true', +                        help='Derive timestamps from HTTP header: "last-modified"') + +    parser.add_argument('--channel', '-c', default=conda_channel_pool, +                        action='append', dest='channels', help='Conda channel') + +    parser.add_argument('--order', '-o', default='asc', help='[asc|dsc]') + +    parser.add_argument('--platform', '-p', default=[get_platform()], +                        action='append', dest='platforms', +                        help=f'[{"|".join(system_map.values())}]' +                             f'-[{"|".join(machine_map.values())}]') + +    parser.add_argument('--time-span', '-t', default='1c', +                        help=f'i[{"|".join([x for x in time_units.keys()])}]' +                        ' (120s, 12h, 1d, 2w, 3m, 4y)') + +    args = parser.parse_args() + +    order = False  # Ascending +    if args.order != 'asc': +        order = True  # Descending + +    if args.benchmark: +        timer_start = time.time() + +    channels = list() +    for platform in set(args.platforms): +        channels.extend([convert_channel(x, platform) for x in args.channels]) +        channels.extend([convert_channel(x, platform, noarch=True) +                        for x in args.channels]) +        channels = sorted(set(channels)) + +    today = datetime.now() +    span_delta = today - timedelta(seconds=convert_human_timespan(args.time_span)) +    packages = get_packages(channels) +    timestamps = sorted(list(get_timestamps(packages, args.brute_force)), +                        reverse=order, key=lambda x: x['timestamp']) + +    if args.benchmark: +        timer_stop = time.time() +        print('#benchmark: {:.02f}s'.format(timer_stop - timer_start)) + +    channel_width = max([len(extract_channel_platform(x)) for x in channels]) + 1 +    print('#{:<20s} {:<{channel_width}s}  {:<40s}'.format( +          'date', 'channel', 'package', channel_width=channel_width)) + +    try: +        for info in timestamps: +            name = info['name'] +            ts = info['timestamp'] +            chn = info['channel'] + +            tstr = ts.isoformat() +            if span_delta < ts: +                print(f'{tstr:<20s}: {chn:<{channel_width}s}: {name:<40s}') +    except IOError as e: +        # Broken pipe on '|head' +        # TODO: Figure out why +        if e.errno == errno.EPIPE: +            pass + + +if __name__ == '__main__': +    main() | 
