#!/usr/bin/env python
import argparse
import os
import fnmatch
import requests
import shutil
import sys
import time


VERBOSE = False
ERRORS = []
STATS = {
    'files': 0,
    'urls': 0,
    'processed': 0,
    'skipped': 0,
    'failed': 0,
    'time_sec': ''
}


def post_info():
    if ERRORS:
        print("# Errors:")
        for msg in ERRORS:
            print(msg)

    print("# Statistics:")
    for k, v in STATS.items():
        print("{:<20s}: {:>10}".format(k, v))


def channel_dir(d):
    dname = os.path.dirname(d)
    channel = '/'.join(dname.split('/')[-2:])
    return channel


def download(url, destdir='.', clobber=True, in_memory=False):
    filename = url.split('/')[-1]

    if destdir != '.' and not os.path.exists(destdir):
        if VERBOSE:
            print("Creating directory: {}".format(destdir))
        os.makedirs(destdir, mode=0o775, exist_ok=True)

    outfile = os.path.join(destdir, filename);

    if not clobber and os.path.exists(outfile):
        print("Skipping: {}".format(outfile))
        STATS['skipped'] += 1
        return outfile

    if not url.startswith('http'):
        if url.startswith('file://'):
            url = url.replace('file://', '')

        print("Copying: {} -> {}".format(url, destdir))
        shutil.copy2(url, outfile)
        return outfile

    print("Downloading: {} -> {}".format(url, destdir))
    r = requests.get(url, stream = True)

    if r.status_code != 200:
        msg = "HTTP ERROR[{}]: Could not download: {}".format(r.status_code, url)
        print(msg, file=sys.stderr)
        STATS['failed'] += 1
        ERRORS.append(msg)
        return ""

    if in_memory:
        return r.contents

    if VERBOSE:
        print("Writing to: {}".format(outfile))

    with open(outfile, "w+b") as fp:
        for chunk in r.iter_content(chunk_size=0xFFFF):
            if chunk:
                fp.write(chunk)

    STATS['processed'] += 1
    return outfile


def spec_read(filename):
    urls = []
    with open(filename, 'r') as fp:
        for line in fp:
            line = line.strip()
            if not line or line.startswith('#') or line.startswith('@'):
                continue
            urls.append(line)
    return urls

def spec_search(input_dir, patterns):
    results = []

    for root, _, files in os.walk(input_dir):
        for fname in files:
            path = os.path.join(root, fname)
            for pattern in patterns:
                if fnmatch.fnmatch(path, pattern):
                    results.append(path)

    return results


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input-dir', required=True, help='Path to astroconda-releases directory')
    parser.add_argument('-o', '--output-dir', required=True, help='Path to output directory')
    parser.add_argument('-c', '--clobber', action='store_true', help='Overwrite existing packages')
    parser.add_argument('-p', '--pattern', action='append', help='Search tree for directories and filenames matching patterns (e.g. \'*/latest-*\')')
    parser.add_argument('-v', '--verbose', action='store_true', help='Be verbose')
    args = parser.parse_args()

    input_dir = args.input_dir
    output_dir = args.output_dir
    VERBOSE = args.verbose

    pattern = ['*']
    if args.pattern:
        pattern = args.pattern

    start_time = time.time()
    for spec in spec_search(input_dir, pattern):
        urls = spec_read(spec)
        channel_parent = channel_dir(spec)
        for url in urls:
            channel_sibling = channel_dir(url)
            new_channel = os.path.join(output_dir, channel_parent, channel_sibling)
            download(url, destdir=new_channel, clobber=args.clobber);
        STATS['urls'] += len(urls)
        STATS['files'] += 1

    stop_time = (time.time() - start_time)
    STATS['time_sec'] = '{:0.3f}'.format(stop_time)
    post_info()