Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,18 +99,28 @@ Wheel-specific failures:
| --- | ---: | ---: | ---: |
| pyminifier.py | 11.8 ms | 1.7 ms | 7.5 ms |
| pyminify.py | 25.3 ms | 4.4 ms | 24.2 ms |
| click | 3.529 s | failed | 914.4 ms |
| pytest | 15.592 s | failed | 4.567 s |
| TexSoup | 124.9 ms | 52.2 ms | 117.2 ms |
| timefhuman | 352.0 ms | 71.0 ms | 266.0 ms |
| pyminifier | 137.1 ms | 35.6 ms | 114.8 ms |
| rich | 3286.6 ms | failed | 1838.7 ms |
| rich | 3.287 s | failed | 1.839 s |

Speed failures:

- click + pyminifier: minification fails on `click/__init__.py` with
`TypeError: 'NoneType' object is not subscriptable`.
- pytest + pyminifier: minification fails on `_pytest/_argcomplete.py` with
`TypeError: 'NoneType' object is not subscriptable`.
- rich + pyminifier: the same minification failure prevents a timing result.

The single-file rows come from [benchmark_speed.py](./benchmark_speed.py). The
package rows are one-shot package minification timings from the same
environment used for the compression comparison.
environment used for the compression comparison. The `click` and `pytest`
rows were measured on the checked-in fixtures under `.bench-repos`; `pymini`
used package mode with `--rename-modules --rename-global-variables
--rename-arguments`, while the baseline tools minified each file independently
in the preserved package tree.

# Reproduce

Expand Down
74 changes: 55 additions & 19 deletions pymini/pymini.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import ast
import copy
import keyword
from collections import Counter
from typing import Dict, List, Optional, Set
from .utils import variable_name_generator

Expand Down Expand Up @@ -299,6 +300,8 @@ def __init__(
self.class_member_mappings = {}
self.callable_argument_infos = {}
self.class_method_argument_infos = {}
self._class_public_member_reference_cache = {}
self._module_attribute_reference_cache = {}
self.modules = set(modules) # don't alias variables imported from these modules
self.keep_global_variables = keep_global_variables
self.rename_arguments = rename_arguments
Expand Down Expand Up @@ -416,28 +419,61 @@ def _public_class_reference_count(self, node, old_name):
count += 1
return count

def _public_member_reference_count(self, class_node, class_name, member_name):
count = 1
def _class_public_member_references(self, class_node):
cache_key = id(class_node)
cached = self._class_public_member_reference_cache.get(cache_key)
if cached is not None:
return cached
Comment on lines +424 to +426
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Invalidate class member count cache during class traversal

_public_member_reference_count is called multiple times while the same class AST is being mutated, but this cache never refreshes once populated. In keep_global_variables=True flows, earlier methods can rewrite receiver names (for example selfa when rename_arguments=True), so later rename decisions should use the updated tree; reusing the old counts overestimates member usage and can trigger renames that increase output size (e.g. LongClassName.foo/bar gets bar renamed with an alias, producing longer minified code than before this commit).

Useful? React with 👍 / 👎.


name_loads = Counter()
attribute_loads_by_base = {}
for current in ast.walk(class_node):
if isinstance(current, ast.Name) and isinstance(current.ctx, ast.Load) and current.id == member_name:
count += 1
elif (
isinstance(current, ast.Attribute)
and current.attr == member_name
and isinstance(current.value, ast.Name)
and current.value.id in {"self", "cls", class_name}
):
count += 1
if isinstance(current, ast.Name) and isinstance(current.ctx, ast.Load):
name_loads[current.id] += 1
elif isinstance(current, ast.Attribute) and isinstance(current.value, ast.Name):
base_name = current.value.id
base_counts = attribute_loads_by_base.get(base_name)
if base_counts is None:
base_counts = Counter()
attribute_loads_by_base[base_name] = base_counts
base_counts[current.attr] += 1

cached = {
"name_loads": name_loads,
"attribute_loads_by_base": attribute_loads_by_base,
}
self._class_public_member_reference_cache[cache_key] = cached
return cached

def _module_attribute_references(self, module):
cache_key = id(module)
cached = self._module_attribute_reference_cache.get(cache_key)
if cached is not None:
return cached

attribute_loads_by_base = {}
for current in ast.walk(module):
if not isinstance(current, ast.Attribute) or not isinstance(current.value, ast.Name):
continue
base_name = current.value.id
base_counts = attribute_loads_by_base.get(base_name)
if base_counts is None:
base_counts = Counter()
attribute_loads_by_base[base_name] = base_counts
base_counts[current.attr] += 1

self._module_attribute_reference_cache[cache_key] = attribute_loads_by_base
return attribute_loads_by_base

def _public_member_reference_count(self, class_node, class_name, member_name):
references = self._class_public_member_references(class_node)
count = 1 + references["name_loads"].get(member_name, 0)
attribute_loads_by_base = references["attribute_loads_by_base"]
for base_name in {"self", "cls", class_name}:
count += attribute_loads_by_base.get(base_name, {}).get(member_name, 0)
module = self._containing_module(class_node)
if module is not None:
for current in ast.walk(module):
if (
isinstance(current, ast.Attribute)
and current.attr == member_name
and isinstance(current.value, ast.Name)
and current.value.id == class_name
):
count += 1
count += self._module_attribute_references(module).get(class_name, {}).get(member_name, 0)
return count

def _public_global_reference_count(self, node, old_name):
Expand Down
Loading