From a8c187c28b335b648437b8e19edd150c3bf32132 Mon Sep 17 00:00:00 2001 From: Matt Rendina Date: Tue, 6 Aug 2019 11:31:59 -0400 Subject: Generate primary report from log files. --- logparse.py | 367 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 194 insertions(+), 173 deletions(-) diff --git a/logparse.py b/logparse.py index 4a21054..4b8edb0 100755 --- a/logparse.py +++ b/logparse.py @@ -1,58 +1,32 @@ #!/usr/bin/env python3 +import os +import sys import re +from glob import glob +import argparse +from math import ceil import gzip import socket import pandas as pd import datetime as dt import matplotlib.pyplot as plt import matplotlib.dates as mdates - -# Notes -# df.to_pickle(filename) for serializing a pandas data frame to disk. -# df.read_pickle(filename) to get it back. +from dateutil import parser as dpar +from collections import OrderedDict # regex pattern to extract key values from each line of an apache/nginx access log # Accommodate PUTs as well as second URLs (normally "-") -patt = '(?P\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}) - - \\[(?P\\d{2}\\/[a-zA-Z]{3}\\/\\d{4}):(?P