#!/usr/bin/env python3 import re import gzip import socket import pandas as pd import datetime as dt import matplotlib.pyplot as plt import matplotlib.dates as mdates # Notes # df.to_pickle(filename) for serializing a pandas data frame to disk. # df.read_pickle(filename) to get it back. # regex pattern to extract key values from each line of an apache/nginx access log # Accommodate PUTs as well as second URLs (normally "-") patt = '(?P\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}) - - \\[(?P\\d{2}\\/[a-zA-Z]{3}\\/\\d{4}):(?P