9 files changed, 497 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b5be78d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.idea
+__pycache__
+venv
+*.egg-info
+benchstrace/_version.py
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..2086dd0
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2023, Association of Universities for Research in Astronomy (AURA)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/README.md
diff --git a/benchstrace/__init__.py b/benchstrace/__init__.py
new file mode 100644
index 0000000..a85946d
--- /dev/null
+++ b/benchstrace/__init__.py
@@ -0,0 +1,3 @@
+from . import benchmark
+from . import strace
+from ._version import __version__, __version_tuple__
diff --git a/benchstrace/benchmark.py b/benchstrace/benchmark.py
new file mode 100644
index 0000000..8094bd5
--- /dev/null
+++ b/benchstrace/benchmark.py
@@ -0,0 +1,264 @@
+import os
+import sys
+from .strace import STrace, STraceRecord
+from .strace import parse_record
+
+
+SHOW_COLLECTION = 1 << 1
+SHOW_AVERAGE = 2 << 1
+SHOW_TOTAL = 3 << 1
+SHOW_ALL = SHOW_COLLECTION | SHOW_AVERAGE | SHOW_TOTAL
+
+
+class Benchmark:
+    # Input file data begins after
+    MAX_HEADER_LINES = 3
+
+    def __init__(self, command="", passes=2, setup="", teardown=""):
+        """
+        A benchmark record
+        :param command: to execute
+        :param passes: times to execute command
+        :param setup: to execute before command
+        :param teardown: to execute after command
+        """
+        self.result = []
+        self.command = command
+        self.passes = passes
+        self.count = 0
+
+        if self.passes < 1:
+            self.passes = 1
+
+        if self.command:
+            for i in range(self.passes):
+                if self.passes > 1:
+                    print(f"Pass [{i+1}/{passes}]: ", end="")
+                print(f"Running `{self.command}`")
+                data = STrace(command, setup=setup, teardown=teardown).records
+                self.count += len(data)
+                self.result.append(data)
+
+    def save(self, filename, clobber=False):
+        """
+        Write benchmark data to a file
+        :param filename: output file path
+        :param clobber: toggle overwriting the output file
+        """
+        if clobber and os.path.exists(filename):
+            raise FileExistsError(filename)
+
+        filename = os.path.abspath(filename)
+        print(f"Writing {filename}")
+        with open(filename, "w+") as fp:
+            print(f"# {len(self.result)}", file=fp)
+            print(f"# {self.command}", file=fp)
+            print("", file=fp)
+            for t, data in enumerate(self.result):
+                print(f"# {t}", file=fp)
+                for rec in data:
+                    rec_fmt = f"{rec.name} {rec.calls} {rec.seconds:.6f} {rec.min:.6f} {rec.max:.6f} {rec.usecs_call}"
+                    print(rec_fmt, file=fp)
+
+    def load(self, filename):
+        """
+        Read benchmark data from a file
+        :param filename: input file path
+        """
+        result = []
+        filename_s = os.path.basename(filename)
+
+        with open(filename, "r") as fp:
+            collection_max = int(fp.readline().split("#")[1]) or 0
+            command = fp.readline().split("#")[1] or "UNKNOWN"
+
+        if not collection_max:
+            raise ValueError(f"{filename_s} has no collections!")
+
+        if command == "UNKNOWN":
+            print(f"{filename_s}: no command stored", file=sys.stderr)
+
+        print(f"{filename_s}: {collection_max} collection(s)")
+        print(f"{filename_s}: command: {command}")
+        data_count = 0
+        start_collection = 0
+        data = []
+
+        fp = open(filename, "r")
+        for i, line in enumerate(fp.readlines()):
+            # Skip header information
+            if i < self.MAX_HEADER_LINES:
+                continue
+
+            # Begin collecting records
+            if not start_collection and line.startswith("#") and i == self.MAX_HEADER_LINES:
+                start_collection = 1
+                continue
+
+            # Append collected records to result list
+            if line.startswith("#") and start_collection:
+                result.append(data)
+                data = []
+                continue
+
+            # Store record
+            rec = parse_record(line)
+            data.append(rec)
+            data_count += 1
+
+        result.append(data)
+        fp.close()
+        self.count = data_count
+        print(f"{filename}: {data_count} records")
+        self.result = result
+
+    def diff_record(self, a, b):
+        """
+        Calculate the difference between records a and b
+        :param a: baseline StraceRecord
+        :param b: comparison StraceRecord
+        :return: StraceRecord containing the difference between a and b
+        """
+        r_calls = b.calls - a.calls
+        r_seconds = b.seconds - a.seconds
+        r_min = b.min - a.min
+        r_max = b.max - a.max
+        r_usecs_call = b.usecs_call - a.usecs_call
+
+        return STraceRecord(a.name, r_calls, r_seconds, r_min, r_max, r_usecs_call)
+
+    @property
+    def total(self):
+        """Sum of all records"""
+        result = dict(name="Total", calls=0, usecs_call=0, min=0, max=0, seconds=0)
+        for trace in self.result:
+            for rec in trace:
+                result["calls"] += rec.calls
+                result["usecs_call"] += rec.usecs_call
+                result["max"] += rec.max
+                result["min"] += rec.min
+                result["seconds"] += rec.seconds
+
+        return STraceRecord(**result)
+
+    @property
+    def average(self):
+        """Average of all records"""
+        result = dict(name="Average", calls=0, usecs_call=0, min=0, max=0, seconds=0)
+        total = self.total
+        result["calls"] = total.calls
+        result["usecs_call"] = int(total.usecs_call / self.count)
+        result["max"] = int(total.max / self.count)
+        result["min"] = int(total.min / self.count)
+        result["seconds"] = int(total.seconds / self.count)
+
+        return STraceRecord(**result)
+
+    @staticmethod
+    def get_winner(data):
+        result = ""
+        if data.usecs_call == 0:
+            result = "same"
+        elif data.usecs_call < 0:
+            result = "faster"
+        else:
+            result = "slower"
+        return result
+
+    @staticmethod
+    def get_percent(a, b):
+        if (b.usecs_call - a.usecs_call) < 0:
+            percent = (a.usecs_call - b.usecs_call) / a.usecs_call * 100
+        else:
+            percent = (b.usecs_call - a.usecs_call) / b.usecs_call * 100
+
+        return percent
+
+    def diff_show_record(self, title, a, b):
+        abdiff = self.diff_record(a, b)
+        fastest = self.get_winner(abdiff)
+        percent = self.get_percent(a, b)
+        if not a.calls or not b.calls:
+            percent = 0
+            fastest = ""
+
+        print(f"{title}:")
+        print(f"\tcalls: {a.calls:10d} {b.calls:10d} {abdiff.calls:+10d}")
+        print(f"\t\u00B5s/call: {a.usecs_call:8d} {b.usecs_call:10d} {abdiff.usecs_call:+10d} {percent:10.2f}% {fastest}")
+
+    def diff_total(self, b):
+        """
+        Display the total difference between total and b.total
+        :param b:
+        """
+        total_a = self.total
+        total_b = b.total
+        self.diff_show_record("Total", total_a, total_b)
+
+    def diff_average(self, b):
+        """
+        Display the average difference between result and b.average
+        :param b:
+        """
+        average_a = self.average
+        average_b = b.average
+        self.diff_show_record("Average", average_a, average_b)
+
+    @staticmethod
+    def normalize_results(a, b):
+        def extract(objs, name):
+            for x in objs:
+                if x.name == name:
+                    return x
+
+        x1_result = []
+        x2_result = []
+        nop = dict(name="", calls=0, usecs_call=0, min=0, max=0, seconds=0)
+
+        for left, right in zip(a.result, b.result):
+            empty = nop.copy()
+            keys_a = set(x.name for x in left)
+            keys_b = set(x.name for x in right)
+            keys_missing = keys_b ^ keys_a
+            x1_missing = []
+            x2_missing = []
+            x1_data = []
+            x2_data = []
+
+            for x in sorted(keys_a):
+                if x in keys_missing:
+                    x2_missing.append(x)
+                value = extract(a.result[0], x)
+                if not value:
+                    continue
+                x1_data.append(value)
+
+            for x in sorted(keys_b):
+                if x in keys_missing:
+                    x1_missing.append(x)
+                value = extract(b.result[0], x)
+                x2_data.append(value)
+
+            for x in sorted(x1_missing):
+                empty["name"] = x
+                x1_data.append(STraceRecord(**empty))
+
+            for x in sorted(x2_missing):
+                empty["name"] = x
+                x2_data.append(STraceRecord(**empty))
+
+            x1_result.append(sorted(x1_data))
+            x2_result.append(sorted(x2_data))
+        return zip(x1_result, x2_result)
+
+    def diff(self, b, mode=SHOW_ALL):
+        """
+        Display the difference between stored result and b
+        :param b: list of StraceRecords
+        :param mode: flag to handle various output modes (not implemented)
+        """
+        a = self
+        for i, (left, right) in enumerate(self.normalize_results(a, b)):
+            print(f"\nCOLLECTION {i+1}\n")
+            for x1, x2 in zip(left, right):
+                self.diff_show_record(x1.name, x1, x2)
+\ No newline at end of file
diff --git a/benchstrace/cli.py b/benchstrace/cli.py
new file mode 100644
index 0000000..caa4c19
--- /dev/null
+++ b/benchstrace/cli.py
@@ -0,0 +1,54 @@
+from argparse import ArgumentParser
+from .benchmark import Benchmark
+import sys
+
+
+def prof_mode(args):
+    result = Benchmark(args.COMMAND, passes=args.passes, setup=args.setup, teardown=args.teardown)
+
+    print(f"Records: {result.count}")
+    if args.output_file:
+        result.save(args.output_file, clobber=args.clobber)
+    return 0
+
+
+def diff_mode(args):
+    left = Benchmark()
+    left.load(args.left)
+    right = Benchmark()
+    right.load(args.right)
+    left.diff(right)
+
+    print("\nSUMMARY\n")
+    left.diff_average(right)
+    left.diff_total(right)
+    return 0
+
+
+def main():
+    parser = ArgumentParser()
+    subparsers = parser.add_subparsers()
+
+    parser_prof = subparsers.add_parser("prof")
+    parser_prof.add_argument("-o", "--output_file", type=str)
+    parser_prof.add_argument("-c", "--clobber", action='store_true')
+    parser_prof.add_argument("-p", "--passes", type=int, default=1)
+    parser_prof.add_argument("-s", "--setup", type=str)
+    parser_prof.add_argument("-t", "--teardown", type=str)
+    parser_prof.add_argument("COMMAND")
+    parser_prof.set_defaults(func=prof_mode)
+
+    parser_diff = subparsers.add_parser("diff")
+    parser_diff.add_argument("left")
+    parser_diff.add_argument("right")
+    parser_diff.set_defaults(func=diff_mode)
+
+    args = parser.parse_args()
+    if len(sys.argv) < 2:
+        parser.print_help()
+        exit(0)
+
+    args.func(args)
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/benchstrace/strace.py b/benchstrace/strace.py
new file mode 100644
index 0000000..efb35d4
--- /dev/null
+++ b/benchstrace/strace.py
@@ -0,0 +1,113 @@
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from collections import namedtuple
+
+KEYS = ["name", "calls", "seconds", "min", "max", "usecs_call"]
+STraceRecord = namedtuple("STraceRecord", KEYS)
+
+
+def find_program(name):
+    syspath = os.environ.get("PATH", "")
+    for x in syspath.split(":"):
+        target = os.path.abspath(os.path.join(x, name))
+        if os.path.exists(target):
+            return target
+    return ""
+
+
+def parse_record(line):
+    """
+    Parse a single line from: strace -w -c
+    :return: an StraceRecord
+    """
+    rec = line.split()
+    data = dict(zip(*[KEYS, rec]))
+    result = dict()
+    for k, v in data.items():
+        result[k] = v
+        if k == "name":
+            continue
+        elif k == "calls" or k == "usecs_call":
+            if "." in v:
+                v = v[:v.find(".")]
+            result[k] = int(v)
+        else:
+            result[k] = float(v)
+    return STraceRecord(**result)
+
+
+def parse_output(lines):
+    """
+    Parse all lines from: strace -w -c
+    :return: a list of STraceRecords
+    """
+    result = []
+    for i, line in enumerate(lines):
+        if i < 2 or i > len(lines) - 3:
+            continue
+        result.append(parse_record(line))
+    return result
+
+
+class STrace:
+    """Run strace command and parse statistical output
+    """
+    VERSION_RE = re.compile(r"strace.*version\s(?P<major>\d+)\.(?P<minor>\d+)\.?(?P<patch>\d+)?")
+    NEED_VERSION = (6, 0, 0)
+
+    def __init__(self, command="", setup="", teardown="", output_file=""):
+        self.program = find_program("strace")
+        self.command = command
+        major, minor, _ = self.version
+        if major < 6:
+            raise RuntimeError(f"strace {major}.{minor} is too old. Install {self.NEED_VERSION}, or greater.")
+        self.records = self.run(setup, teardown) or []
+
+    def run(self, setup="", teardown=""):
+        """
+        Execute strace
+        :param setup: command to execute before `self.command`
+        :param teardown: command to execute after `self.command`
+        :return: a list of STraceRecords
+        """
+        handle, tmpfile = tempfile.mkstemp()
+        os.close(handle)
+        command = ["strace", "-o", tmpfile, "-w", "-c", "-S", "name", "-U", "name,calls,time-total,time-min,time-max,time-avg"] + self.command.split(" ")
+
+        if setup:
+            proc_setup = subprocess.run(setup.split())
+            if proc_setup.returncode:
+                print(f"Warning: setup command failed ({proc_setup.returncode})", file=sys.stderr)
+
+        proc = subprocess.run(command, stderr=open("/dev/null", "w"))
+        if proc.returncode:
+            print("Warning: non-zero exit ({proc.returncode})", file=sys.stderr)
+
+        if teardown:
+            proc_teardown = subprocess.run(teardown.split())
+            if proc_teardown.returncode:
+                print(f"Warning: teardown command failed ({proc_teardown.returncode})", file=sys.stderr)
+
+        data = open(tmpfile, "r").read().splitlines()
+        os.remove(tmpfile)
+        return parse_output(data)
+
+    @property
+    def version(self):
+        """Retrieve version number from strace
+        :return: tuple containing major, minor, and patch version
+        """
+        command = ["strace", "--version"]
+        proc = subprocess.run(command, capture_output=True)
+        data = proc.stdout.decode().splitlines()
+        match = re.match(self.VERSION_RE, data[0])
+        if not match:
+            return 0, 0, 0
+
+        result = match.groupdict()
+        if not result.get("patch"):
+            result["patch"] = 0
+        return int(result["major"]), int(result["minor"]), int(result["patch"])
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..f53ca6b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,26 @@
+[build-system]
+requires = ["setuptools>=45", "setuptools-scm[toml]>=6.2"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools_scm]
+write_to = "benchstrace/_version.py"
+
+[project]
+name = "benchstrace"
+authors = [
+    {name = "Joseph Hunkeler", email = "jhunkeler@gmail.com"},
+]
+description = "Benchmark and compare strace data"
+readme = "README.md"
+requires-python = ">=3.7"
+keywords = ["benchmark", "profile"]
+license = {text = "BSD-3-Clause"}
+classifiers = [
+    "Programming Language :: Python :: 3",
+]
+dynamic = ["version"]
+
+[project.optional-dependencies]
+
+[project.scripts]
+benchstrace = "benchstrace.cli:main"
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..6068493
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,3 @@
+from setuptools import setup
+
+setup()