#!/usr/bin/env python3 import os import sys import re from glob import glob import pickle from math import ceil import hashlib import gzip import socket import pandas as pd import datetime as dt import matplotlib.pyplot as plt import matplotlib.dates as mdates from dateutil import parser as dpar from collections import OrderedDict def md5(fname): hash_md5 = hashlib.md5() with open(fname, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() # regex pattern to extract key values from each line of an apache/nginx access log # Accommodate PUTs as well as second URLs (normally "-") patt = '(?P\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}) .* .* \\[(?P\\d{2}\\/[a-zA-Z]{3}\\/\\d{4}):(?P