From 34aa5ae2f89e84dd622b76ec817d3f1638dde1ee Mon Sep 17 00:00:00 2001 From: Alvin Wan Date: Sat, 4 Apr 2026 23:56:33 -0700 Subject: [PATCH 1/2] add benchmark harness and docs --- README.md | 3 + benchmarks/README.md | 22 +++ benchmarks/benchmark_speed.py | 273 ++++++++++++++++++++++++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 benchmarks/README.md create mode 100644 benchmarks/benchmark_speed.py diff --git a/README.md b/README.md index c6b9d76..b911a5f 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,9 @@ are regenerated by `scripts/regenerate_examples.py`. | `TexSoup/` raw Python source (`*.py`) | `98,181` bytes | `33,107` bytes, `66.3%` | `—` | `—` | | `TexSoup/` compressed source (`.tar.gz`) | `70,532` bytes | `11,850` bytes, `83.2%` | `—` | `—` | +Benchmark notes and the current speed comparison table live in +[benchmarks/README.md](./benchmarks/README.md). + ## TexSoup Validation `pymini` has been validated against the upstream `TexSoup` test suite in package mode. diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..a76e4f5 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,22 @@ +# Benchmarks + +Latency is machine-dependent. Recompute these with +`PYTHONPATH=. .venv/bin/python benchmarks/benchmark_speed.py`. + +The single-file comparison uses `pymini`, `pyminifier`, and +`python-minifier`. The package benchmark is `pymini`-only. + +| Input | `pymini` | `pyminifier` | `python-minifier` | +| --- | ---: | ---: | ---: | +| `tests/examples/pyminifier.py` | `14.1 ms` | `0.4 ms` | `1.5 ms` | +| `tests/examples/pyminify.py` | `1227.6 ms` | `1.1 ms` | `4.0 ms` | +| `TexSoup/` package API | `4928.8 ms` | `—` | `—` | +| `TexSoup/` package CLI | `5062.0 ms` | `—` | `—` | + +To reproduce those numbers: + +```bash +python3 -m pip install -e ".[dev]" python-minifier +git clone https://github.com/liftoff/pyminifier /tmp/pyminifier +PYTHONPATH=. .venv/bin/python benchmarks/benchmark_speed.py --pyminifier-root /tmp/pyminifier +``` diff --git a/benchmarks/benchmark_speed.py b/benchmarks/benchmark_speed.py new file mode 100644 index 0000000..3c46697 --- /dev/null +++ b/benchmarks/benchmark_speed.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import importlib +import shutil +import sys +import tempfile +from pathlib import Path +from statistics import mean +from time import perf_counter +from types import SimpleNamespace + +from pymini import minify +from pymini.cli import load_sources, main as cli_main, resolve_python_files + + +ROOT = Path(__file__).resolve().parents[1] +EXAMPLE_DIR = ROOT / "tests" / "examples" +DEFAULT_TEXSOUP_ROOT = Path("/tmp/pymini-texsoup-repo/TexSoup") +DEFAULT_PYMINIFIER_ROOT = Path("/tmp/pymini-pyminifier-src/pyminifier-2.1") + + +def benchmark_transform( + transform, + source: str, + *, + iterations: int, + warmup: int, +) -> dict[str, float]: + result = None + for _ in range(warmup): + result = transform(source) + samples = [] + for _ in range(iterations): + start = perf_counter() + result = transform(source) + samples.append(perf_counter() - start) + avg = mean(samples) + output_bytes = len((result or "").encode()) + return { + "output_bytes": float(output_bytes), + "avg_ms": avg * 1000, + "throughput_kb_s": (len(source.encode()) / 1024) / avg, + } + + +def pymini_single_file_transform(path: Path): + def transform(source: str) -> str: + outputs, _ = minify(source, path.stem, keep_global_variables=True) + return outputs[0] + + return transform + + +def load_python_minifier(): + try: + python_minifier = importlib.import_module("python_minifier") + except ImportError: + return None + + def factory(path: Path): + return lambda source: python_minifier.minify(source, filename=path.name) + + return factory + + +def load_pyminifier(pyminifier_root: Path): + if not pyminifier_root.exists(): + return None + sys.path.insert(0, str(pyminifier_root)) + try: + minification = importlib.import_module("pyminifier.minification") + token_utils = importlib.import_module("pyminifier.token_utils") + except ImportError: + return None + options = SimpleNamespace(tabs=False) + + def factory(path: Path): + def transform(source: str) -> str: + tokens = token_utils.listified_tokenizer(source) + return minification.minify(tokens, options) + + return transform + + return factory + + +def benchmark_package_api( + package_root: Path, + *, + iterations: int, + warmup: int, +) -> dict[str, float]: + paths, module_root = resolve_python_files(str(package_root)) + sources, modules, _ = load_sources(paths, module_root=module_root) + for _ in range(warmup): + minify( + sources, + modules, + keep_global_variables=True, + keep_module_names=True, + ) + samples = [] + outputs = None + for _ in range(iterations): + start = perf_counter() + outputs, _ = minify( + sources, + modules, + keep_global_variables=True, + keep_module_names=True, + ) + samples.append(perf_counter() - start) + raw_bytes = sum(len(source.encode()) for source in sources) + output_bytes = sum(len(output.encode()) for output in (outputs or [])) + avg = mean(samples) + return { + "files": float(len(paths)), + "bytes": float(raw_bytes), + "output_bytes": float(output_bytes), + "avg_ms": avg * 1000, + "throughput_kb_s": (raw_bytes / 1024) / avg, + } + + +def benchmark_package_cli(package_root: Path, *, iterations: int) -> dict[str, float]: + samples = [] + output_bytes = 0 + for _ in range(iterations): + output_dir = Path(tempfile.mkdtemp(prefix="pymini-bench-")) + try: + start = perf_counter() + rc = cli_main(["package", str(package_root), "-o", str(output_dir)]) + samples.append(perf_counter() - start) + if rc != 0: + raise RuntimeError(f"pymini CLI returned {rc}") + output_bytes = sum(len(path.read_bytes()) for path in output_dir.rglob("*.py")) + finally: + shutil.rmtree(output_dir) + avg = mean(samples) + return {"avg_ms": avg * 1000, "output_bytes": float(output_bytes)} + + +def print_example_results( + *, + example_iterations: int, + warmup: int, + pyminifier_root: Path, +) -> None: + tool_factories = [("pymini", pymini_single_file_transform)] + python_minifier = load_python_minifier() + if python_minifier is not None: + tool_factories.append(("python-minifier", python_minifier)) + pyminifier_factory = load_pyminifier(pyminifier_root) + if pyminifier_factory is not None: + tool_factories.append(("pyminifier", pyminifier_factory)) + + print("Single-file API benchmarks") + print("input\ttool\tinput_bytes\toutput_bytes\tavg_ms\tthroughput_kb_s") + for path in sorted(EXAMPLE_DIR.glob("*.py")): + source = path.read_text(encoding="utf-8") + for tool_name, factory in tool_factories: + result = benchmark_transform( + factory(path), + source, + iterations=example_iterations, + warmup=warmup, + ) + print( + f"{path.name}\t" + f"{tool_name}\t" + f"{len(source.encode())}\t" + f"{int(result['output_bytes'])}\t" + f"{result['avg_ms']:.3f}\t" + f"{result['throughput_kb_s']:.1f}" + ) + + +def print_package_results( + texsoup_root: Path, + *, + package_api_iterations: int, + package_cli_iterations: int, + warmup: int, +) -> None: + if not texsoup_root.exists(): + print(f"TexSoup benchmark skipped: {texsoup_root} does not exist") + return + + api_result = benchmark_package_api( + texsoup_root, + iterations=package_api_iterations, + warmup=warmup, + ) + cli_result = benchmark_package_cli( + texsoup_root, + iterations=package_cli_iterations, + ) + + print() + print("Package benchmarks") + print("name\tfiles\tinput_bytes\toutput_bytes\tavg_ms\tthroughput_kb_s") + print( + f"TexSoup-api\t" + f"{int(api_result['files'])}\t" + f"{int(api_result['bytes'])}\t" + f"{int(api_result['output_bytes'])}\t" + f"{api_result['avg_ms']:.3f}\t" + f"{api_result['throughput_kb_s']:.1f}" + ) + print( + f"TexSoup-cli\t-\t-\t{int(cli_result['output_bytes'])}\t" + f"{cli_result['avg_ms']:.3f}\t-" + ) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Benchmark pymini speed on repo fixtures.") + parser.add_argument( + "--texsoup-root", + type=Path, + default=DEFAULT_TEXSOUP_ROOT, + help="Path to a TexSoup package checkout for package-mode benchmarks.", + ) + parser.add_argument( + "--pyminifier-root", + type=Path, + default=DEFAULT_PYMINIFIER_ROOT, + help="Path to a pyminifier source checkout for baseline single-file benchmarks.", + ) + parser.add_argument( + "--example-iterations", + type=int, + default=10, + help="Number of timed runs per single-file example.", + ) + parser.add_argument( + "--package-api-iterations", + type=int, + default=3, + help="Number of timed runs for the in-memory package benchmark.", + ) + parser.add_argument( + "--package-cli-iterations", + type=int, + default=3, + help="Number of timed runs for the end-to-end CLI package benchmark.", + ) + parser.add_argument( + "--warmup", + type=int, + default=1, + help="Warmup runs to perform before each benchmark group.", + ) + args = parser.parse_args() + + print_example_results( + example_iterations=args.example_iterations, + warmup=args.warmup, + pyminifier_root=args.pyminifier_root, + ) + print_package_results( + args.texsoup_root, + package_api_iterations=args.package_api_iterations, + package_cli_iterations=args.package_cli_iterations, + warmup=args.warmup, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From d454edd2dc531008e95da992603dad7ed23c98fd Mon Sep 17 00:00:00 2001 From: Alvin Wan Date: Sun, 5 Apr 2026 00:01:13 -0700 Subject: [PATCH 2/2] shorten readmes and move benchmark details --- README.md | 98 ++++++++++++-------------------------------- benchmarks/README.md | 63 ++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index b911a5f..ddef45d 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,29 @@ # pymini -`pymini` minifies Python source code by simplifying syntax, shortening identifiers, and stripping unnecessary whitespace. Its primary multi-file workflow preserves package structure; one-file bundling is available as an explicit opt-in. +`pymini` is an AST-based Python minifier for scripts and packages. It preserves +package layout by default, can emit a single-file bundle when asked, and can +shrink Python code by roughly `30%` to `90%` depending on the codebase and +whether you compare raw source or compressed archives. -## Status +- [Getting Started](#getting-started) +- [Installation](#installation) +- [Benchmarks](./benchmarks/README.md) -This project is maintained as an AST-based minifier for Python 3.9+ code. It is best suited to scripts and small-to-medium package graphs. Package mode preserves package layout and now covers relative imports, dotted imports, star imports, package re-exports, and `importlib`-based internal imports; bundle mode emits a self-contained loader-backed single file for the same kinds of graphs. +# Getting Started -## Installation - -```bash -python3 -m pip install pymini -``` - -## CLI - -Package mode is the default and preserves the package tree: +Package mode preserves the package tree: ```bash pymini package src -o out ``` -Legacy invocation without an explicit mode still defaults to `package`: - -```bash -pymini src -o out -``` - -By default, `pymini` preserves module paths and public globals. When possible, it keeps the public surface stable by emitting aliases while still shortening internal names. To trade API stability for more aggressive compression: - -```bash -pymini package src --rename-global-variables -o out -``` - -Bundle mode emits a single file and is better suited to app-style graphs than libraries: +Bundle mode emits one file: ```bash pymini bundle src -o out/bundle.py ``` -The legacy `--single-file` flag is still accepted as a compatibility alias for bundle mode. - -## Python API +You can also use the Python API directly: ```python from pymini import minify @@ -54,59 +37,32 @@ sources, modules = minify( ) ``` -## Development - -Install development dependencies and run the test suite: - -```bash -python3 -m pip install -e ".[dev]" -python3 -m pytest -``` - -## Compression Examples +# Compression -Checked-in minified outputs for the repo fixtures live in [examples](./examples) and -are regenerated by `scripts/regenerate_examples.py`. +Current checked-in fixtures: -| Input | Original | `pymini` | `pyminifier` | `python-minifier` (`pyminify`) | -| --- | ---: | ---: | ---: | ---: | -| `tests/examples/pyminifier.py` | `1,355` bytes | `511` bytes, `62.3%` | `676` bytes, `50.1%` | `1,020` bytes, `24.7%` | -| `tests/examples/pyminify.py` | `1,990` bytes | `981` bytes, `50.7%` | `1,605` bytes, `19.3%` | `983` bytes, `50.6%` | -| `TexSoup/` raw Python source (`*.py`) | `98,181` bytes | `33,107` bytes, `66.3%` | `—` | `—` | -| `TexSoup/` compressed source (`.tar.gz`) | `70,532` bytes | `11,850` bytes, `83.2%` | `—` | `—` | +| Input | Original | Minified | Reduction | +| --- | ---: | ---: | ---: | +| `tests/examples/pyminifier.py` | `1,355` bytes | `511` bytes | `62.3%` | +| `tests/examples/pyminify.py` | `1,990` bytes | `981` bytes | `50.7%` | +| `TexSoup/` raw Python source (`*.py`) | `98,181` bytes | `33,107` bytes | `66.3%` | +| `TexSoup/` compressed source (`.tar.gz`) | `70,532` bytes | `11,850` bytes | `83.2%` | -Benchmark notes and the current speed comparison table live in +For baseline comparisons, speed results, and TexSoup validation details, see [benchmarks/README.md](./benchmarks/README.md). -## TexSoup Validation +# Installation -`pymini` has been validated against the upstream `TexSoup` test suite in package mode. -Current validation: upstream pytest passes (`78` tests), raw source code `66.3%` -smaller, compressed source code (`.tar.gz`) `83.2%` smaller. - - -To reproduce that flow locally: +## Pip ```bash -git clone https://github.com/alvinwan/TexSoup /tmp/texsoup -mkdir -p /tmp/texsoup-out/TexSoup -pymini package /tmp/texsoup/TexSoup -o /tmp/texsoup-out/TexSoup -cp -R /tmp/texsoup/tests /tmp/texsoup-tests -PYTHONPATH=/tmp/texsoup-out:/tmp/texsoup-tests python3 -m pytest /tmp/texsoup-tests/tests -o addopts='' -``` - -To compare raw package bytes before and after minification: - -```bash -rg --files /tmp/texsoup/TexSoup -g '*.py' | xargs cat | wc -c -rg --files /tmp/texsoup-out/TexSoup -g '*.py' | xargs cat | wc -c +python3 -m pip install pymini ``` -To compare compressed package snapshots: +## From source ```bash -tar -czf /tmp/texsoup-original-package.tar.gz -C /tmp/texsoup TexSoup -tar -czf /tmp/texsoup-minified-package.tar.gz -C /tmp/texsoup-out TexSoup -stat -f%z /tmp/texsoup-original-package.tar.gz -stat -f%z /tmp/texsoup-minified-package.tar.gz +git clone https://github.com/alvinwan/pymini.git +cd pymini +python3 -m pip install -e ".[dev]" ``` diff --git a/benchmarks/README.md b/benchmarks/README.md index a76e4f5..858a300 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,11 +1,33 @@ # Benchmarks +This directory holds the current size and speed measurements for `pymini`, plus +the benchmark harness used to reproduce them. + +- [Compression](#compression) +- [Speed](#speed) +- [TexSoup Validation](#texsoup-validation) + +# Compression + +Checked-in fixture comparison: + +| Input | Original | `pymini` | `pyminifier` | `python-minifier` | +| --- | ---: | ---: | ---: | ---: | +| `tests/examples/pyminifier.py` | `1,355` bytes | `511` bytes, `62.3%` | `676` bytes, `50.1%` | `1,020` bytes, `24.7%` | +| `tests/examples/pyminify.py` | `1,990` bytes | `981` bytes, `50.7%` | `1,605` bytes, `19.3%` | `983` bytes, `50.6%` | + +TexSoup package validation: + +| Input | Original | `pymini` | Reduction | +| --- | ---: | ---: | ---: | +| `TexSoup/` raw Python source (`*.py`) | `98,181` bytes | `33,107` bytes | `66.3%` | +| `TexSoup/` compressed source (`.tar.gz`) | `70,532` bytes | `11,850` bytes | `83.2%` | + +# Speed + Latency is machine-dependent. Recompute these with `PYTHONPATH=. .venv/bin/python benchmarks/benchmark_speed.py`. -The single-file comparison uses `pymini`, `pyminifier`, and -`python-minifier`. The package benchmark is `pymini`-only. - | Input | `pymini` | `pyminifier` | `python-minifier` | | --- | ---: | ---: | ---: | | `tests/examples/pyminifier.py` | `14.1 ms` | `0.4 ms` | `1.5 ms` | @@ -20,3 +42,38 @@ python3 -m pip install -e ".[dev]" python-minifier git clone https://github.com/liftoff/pyminifier /tmp/pyminifier PYTHONPATH=. .venv/bin/python benchmarks/benchmark_speed.py --pyminifier-root /tmp/pyminifier ``` + +# TexSoup Validation + +`pymini` has been validated against the upstream `TexSoup` test suite in +package mode. Current validation: upstream pytest passes (`78` tests), raw +source code is `66.3%` smaller, and compressed source code (`.tar.gz`) is +`83.2%` smaller. + + + +To reproduce that flow locally: + +```bash +git clone https://github.com/alvinwan/TexSoup /tmp/texsoup +mkdir -p /tmp/texsoup-out/TexSoup +pymini package /tmp/texsoup/TexSoup -o /tmp/texsoup-out/TexSoup +cp -R /tmp/texsoup/tests /tmp/texsoup-tests +PYTHONPATH=/tmp/texsoup-out:/tmp/texsoup-tests python3 -m pytest /tmp/texsoup-tests/tests -o addopts='' +``` + +To compare raw package bytes before and after minification: + +```bash +rg --files /tmp/texsoup/TexSoup -g '*.py' | xargs cat | wc -c +rg --files /tmp/texsoup-out/TexSoup -g '*.py' | xargs cat | wc -c +``` + +To compare compressed package snapshots: + +```bash +tar -czf /tmp/texsoup-original-package.tar.gz -C /tmp/texsoup TexSoup +tar -czf /tmp/texsoup-minified-package.tar.gz -C /tmp/texsoup-out TexSoup +stat -f%z /tmp/texsoup-original-package.tar.gz +stat -f%z /tmp/texsoup-minified-package.tar.gz +```