From 2229fa09f219220351f4570405cb08a5bb8fbef3 Mon Sep 17 00:00:00 2001
From: Alvin Wan
Date: Sat, 4 Apr 2026 14:23:32 -0700
Subject: [PATCH 1/2] restore remaining optimizations
---
README.md | 12 +-
examples/pyminify.py | 35 +--
pymini/pymini.py | 530 ++++++++++++++++++++++++++++++++++++++-----
tests/test_api.py | 244 +++++++++++++++++++-
4 files changed, 733 insertions(+), 88 deletions(-)
diff --git a/README.md b/README.md
index 6c086d4..c6b9d76 100644
--- a/README.md
+++ b/README.md
@@ -71,16 +71,16 @@ are regenerated by `scripts/regenerate_examples.py`.
| Input | Original | `pymini` | `pyminifier` | `python-minifier` (`pyminify`) |
| --- | ---: | ---: | ---: | ---: |
| `tests/examples/pyminifier.py` | `1,355` bytes | `511` bytes, `62.3%` | `676` bytes, `50.1%` | `1,020` bytes, `24.7%` |
-| `tests/examples/pyminify.py` | `1,990` bytes | `1,129` bytes, `43.3%` | `1,605` bytes, `19.3%` | `983` bytes, `50.6%` |
-| `TexSoup/` raw Python source (`*.py`) | `98,181` bytes | `31,216` bytes, `68.2%` | `—` | `—` |
-| `TexSoup/` compressed source (`.tar.gz`) | `70,532` bytes | `45,065` bytes, `36.1%` | `—` | `—` |
+| `tests/examples/pyminify.py` | `1,990` bytes | `981` bytes, `50.7%` | `1,605` bytes, `19.3%` | `983` bytes, `50.6%` |
+| `TexSoup/` raw Python source (`*.py`) | `98,181` bytes | `33,107` bytes, `66.3%` | `—` | `—` |
+| `TexSoup/` compressed source (`.tar.gz`) | `70,532` bytes | `11,850` bytes, `83.2%` | `—` | `—` |
## TexSoup Validation
`pymini` has been validated against the upstream `TexSoup` test suite in package mode.
-Current validation: raw source code `68.2%` smaller, compressed source code
-(`.tar.gz`) `36.1%` smaller.
-
+Current validation: upstream pytest passes (`78` tests), raw source code `66.3%`
+smaller, compressed source code (`.tar.gz`) `83.2%` smaller.
+
To reproduce that flow locally:
diff --git a/examples/pyminify.py b/examples/pyminify.py
index c347057..2953927 100644
--- a/examples/pyminify.py
+++ b/examples/pyminify.py
@@ -1,22 +1,23 @@
def a(event,context):
- f='RequestType';g='PhysicalResourceId';h='None';i='Status';j='SUCCESS';k='Tags';m='OldResourceProperties';l.info(event)
+ f='RequestType';g='PhysicalResourceId';h='None';i='Status';j='SUCCESS';k='Tags';m='OldResourceProperties';l.info(event);n,o,p,q,r,s,t,u,v=(event,create_cert,add_tags,validate,wait_for_issuance,context,send,reinvoke,acm)
try:
- b=hashlib.new('md5',(event['RequestId']+event['StackId']).encode()).hexdigest();c=event['ResourceProperties']
- if event[f]=='Create':
- event[g]=h;event[g]=create_cert(c,b);add_tags(event[g],c);validate(event[g],c)
- if wait_for_issuance(event[g],context):event[i]=j;return send(event)
- else:return reinvoke(event,context)
- elif event[f]=='Delete':
- if event[g]!=h:acm.delete_certificate(CertificateArn=event[g])
- event[i]=j;return send(event)
- elif event[f]=='Update':
- if replace_cert(event):
- event[g]=create_cert(c,b);add_tags(event[g],c);validate(event[g],c)
- if not wait_for_issuance(event[g],context):return reinvoke(event,context)
+ b=hashlib.new('md5',(n['RequestId']+n['StackId']).encode()).hexdigest();c=n['ResourceProperties']
+ if n[f]=='Create':
+ n[g]=h;n[g]=o(c,b);p(n[g],c);q(n[g],c)
+ if r(n[g],s):n[i]=j;return t(n)
+ else:return u(n,s)
+ elif n[f]=='Delete':
+ if n[g]!=h:v.delete_certificate(CertificateArn=n[g])
+ n[i]=j;return t(n)
+ elif n[f]=='Update':
+ if replace_cert(n):
+ n[g]=o(c,b);p(n[g],c);q(n[g],c)
+ if not r(n[g],s):return u(n,s)
else:
- if k in event[m]:acm.remove_tags_from_certificate(CertificateArn=event[g],Tags=event[m][k])
- add_tags(event[g],c)
- event[i]=j;return send(event)
+ if k in n[m]:v.remove_tags_from_certificate(CertificateArn=n[g],Tags=n[m][k])
+ p(n[g],c)
+ n[i]=j;return t(n)
else:raise RuntimeError('Unknown RequestType')
- except Exception as d:l.exception('');event[i]='FAILED';event['Reason']=str(d);return send(event)
+ except Exception as d:l.exception('');n[i]='FAILED';n['Reason']=str(d);return t(n)
+ del (n,o,p,q,r,s,t,u,v)
handler=a
\ No newline at end of file
diff --git a/pymini/pymini.py b/pymini/pymini.py
index 14da509..2ea7382 100644
--- a/pymini/pymini.py
+++ b/pymini/pymini.py
@@ -162,26 +162,28 @@ class VariableShortener(NodeTransformer):
a = 1
donotrename = 2
"""
- # Deferred optimizations intentionally left off after validating against
- # TexSoup and similar package-shaped inputs:
- # - aliasing repeated name reads into generated locals
- # - hoisting repeated string literals into generated locals at module or
- # class scope
- # - renaming attribute call sites such as obj.method(...)
- # - renaming methods, class-body attributes, and top-level class names in
- # preserve-public-API mode
+ # Compression passes in this transformer use these guardrails:
+ # - repeated-name aliasing is statement-local and deleted after the
+ # statement, so helpers do not leak across later code
+ # - repeated-string hoisting now runs at function, module, and class scope,
+ # with cleanup deletes for module/class helpers
+ # - preserve-public-API mode can rename top-level classes, methods, and
+ # class-body attributes, but it emits explicit aliases and fixes class
+ # __name__/__qualname__ for compatibility
+ # - attribute rewriting is limited to owners we can prove from the AST
+ # (`self`, `cls`, or known class names), not arbitrary dynamic receivers
#
- # All of these reduce size further, but each one caused real runtime
- # regressions once decorators, descriptors, comprehensions, import-time side
- # effects, or class introspection entered the picture. Re-enable them only
- # with regression coverage in tests/test_api.py and the checked-in example
- # outputs kept in sync via scripts/regenerate_examples.py.
+ # Keep regression coverage in tests/test_api.py and checked-in example
+ # outputs in sync via scripts/regenerate_examples.py whenever these rules
+ # change.
def __init__(self, generator, mapping=None, modules=(), keep_global_variables=False):
self.mapping = mapping or {}
self.generator = generator
self.nodes_to_append = []
self.public_global_names = set()
self.scope_stack = []
+ self.class_context_stack = []
+ self.class_member_mappings = {}
self.modules = set(modules) # don't alias variables imported from these modules
self.keep_global_variables = keep_global_variables
@@ -192,7 +194,7 @@ def _is_node_global(self, node):
)
def _rename_identifier(self, old_name):
- if old_name not in self.mapping.values():
+ if old_name not in self.mapping:
self.mapping[old_name] = next(self.generator)
return self.mapping[old_name]
@@ -200,9 +202,91 @@ def _append_public_alias(self, old_name, new_name):
if old_name != new_name:
self.nodes_to_append.append(ast.parse(f"{old_name} = {new_name}").body[0])
+ def _generated_assignment(self, source):
+ node = ast.parse(source).body[0]
+ node._pymini_generated = True
+ return node
+
+ def _containing_module(self, node):
+ current = node
+ while hasattr(current, "parent") and not isinstance(current.parent, ast.Module):
+ current = current.parent
+ return current.parent if hasattr(current, "parent") else None
+
+ def _current_class_context(self):
+ if self.class_context_stack:
+ return self.class_context_stack[-1]
+ return None
+
def _preserve_function_name(self, name):
return name.startswith("__") and name.endswith("__")
+ def _estimated_short_name_length(self):
+ return 1
+
+ def _rename_savings(self, old_name, count):
+ return max(0, len(old_name) - self._estimated_short_name_length()) * count
+
+ def _public_class_alias_cost(self, old_name):
+ short_name = "a"
+ return sum(
+ len(statement)
+ for statement in (
+ f"{old_name}={short_name}",
+ f"{short_name}.__name__={old_name!r}",
+ f"{short_name}.__qualname__={old_name!r}",
+ )
+ )
+
+ def _public_member_alias_cost(self, old_name):
+ return len(f"{old_name}=a")
+
+ def _public_class_reference_count(self, node, old_name):
+ module = self._containing_module(node)
+ count = 1
+ if module is None:
+ return count
+ for current in ast.walk(module):
+ if isinstance(current, ast.Name) and current.id == old_name:
+ count += 1
+ return count
+
+ def _public_member_reference_count(self, class_node, class_name, member_name):
+ count = 1
+ for current in ast.walk(class_node):
+ if isinstance(current, ast.Name) and isinstance(current.ctx, ast.Load) and current.id == member_name:
+ count += 1
+ elif (
+ isinstance(current, ast.Attribute)
+ and current.attr == member_name
+ and isinstance(current.value, ast.Name)
+ and current.value.id in {"self", "cls", class_name}
+ ):
+ count += 1
+ module = self._containing_module(class_node)
+ if module is not None:
+ for current in ast.walk(module):
+ if (
+ isinstance(current, ast.Attribute)
+ and current.attr == member_name
+ and isinstance(current.value, ast.Name)
+ and current.value.id == class_name
+ ):
+ count += 1
+ return count
+
+ def _should_rename_public_class(self, node, old_name):
+ return self._rename_savings(
+ old_name,
+ self._public_class_reference_count(node, old_name),
+ ) > self._public_class_alias_cost(old_name)
+
+ def _should_rename_public_member(self, class_node, class_name, member_name):
+ return self._rename_savings(
+ member_name,
+ self._public_member_reference_count(class_node, class_name, member_name),
+ ) > self._public_member_alias_cost(member_name)
+
def _is_method_definition(self, node):
return isinstance(getattr(node, "parent", None), ast.ClassDef)
@@ -223,18 +307,18 @@ def _binding_names_from_target(self, target):
names.update(self._binding_names_from_target(element))
return names
- def _rename_assignment_target(self, target):
+ def _rename_assignment_target(self, target, create_new=True):
if isinstance(target, ast.Name):
- if self._is_active_parameter_name(target.id):
+ if self._is_active_parameter_name(target.id) or self._preserve_function_name(target.id):
return
if target.id in self.mapping:
target.id = self.mapping[target.id]
- elif target.id not in self.mapping.values():
+ elif create_new and target.id not in self.mapping.values():
self.mapping[target.id] = target.id = next(self.generator)
return
if isinstance(target, (ast.Tuple, ast.List)):
for element in target.elts:
- self._rename_assignment_target(element)
+ self._rename_assignment_target(element, create_new=create_new)
def _is_in_expression_scope(self, node):
current = getattr(node, "parent", None)
@@ -393,19 +477,51 @@ def visit_ClassDef(self, node):
>>> apply('class Demiurgic: pass\\nholy = Demiurgic()')
'class Demiurgic:\\n pass\\nholy = Demiurgic()'
"""
+ old_name = node.name
+ rename_public_class = False
if self.keep_global_variables and self._is_node_global(node):
+ if (
+ len(node.name) > 1
+ and node.name not in self.mapping.values()
+ and self._should_rename_public_class(node, old_name)
+ ):
+ node.name = self._rename_identifier(old_name)
+ rename_public_class = old_name != node.name
+ class_context = {"old_name": old_name, "new_name": node.name, "aliases": [], "member_mapping": {}}
+ self.class_context_stack.append(class_context)
self.scope_stack.append(self._scope_bindings(node))
try:
- return self.generic_visit(node)
+ node = self.generic_visit(node)
finally:
self.scope_stack.pop()
+ self.class_context_stack.pop()
+ if class_context["member_mapping"]:
+ self.class_member_mappings[old_name] = dict(class_context["member_mapping"])
+ self.class_member_mappings[node.name] = dict(class_context["member_mapping"])
+ if class_context["aliases"]:
+ node.body.extend(class_context["aliases"])
+ if rename_public_class:
+ return [
+ node,
+ self._generated_assignment(f"{old_name} = {node.name}"),
+ self._generated_assignment(f"{node.name}.__name__ = {old_name!r}"),
+ self._generated_assignment(f"{node.name}.__qualname__ = {old_name!r}"),
+ ]
+ return node
if node.name not in self.mapping.values(): # TODO: make .values() more efficient
self.mapping[node.name] = node.name = next(self.generator)
+ class_context = {"old_name": old_name, "new_name": node.name, "aliases": [], "member_mapping": {}}
+ self.class_context_stack.append(class_context)
self.scope_stack.append(self._scope_bindings(node))
try:
- return self.generic_visit(node)
+ node = self.generic_visit(node)
finally:
self.scope_stack.pop()
+ self.class_context_stack.pop()
+ if class_context["member_mapping"]:
+ self.class_member_mappings[old_name] = dict(class_context["member_mapping"])
+ self.class_member_mappings[node.name] = dict(class_context["member_mapping"])
+ return node
def visit_FunctionDef(self, node):
"""Shorten function names.
@@ -424,7 +540,31 @@ def visit_FunctionDef(self, node):
>>> apply('def demiurgic(palpitation): return palpitation\\nholy = demiurgic()')
'def a(palpitation):\\n return palpitation\\nholy = a()\\ndemiurgic = a'
"""
- if self._preserve_function_name(node.name) or self._is_method_definition(node):
+ if self._preserve_function_name(node.name):
+ self.scope_stack.append(self._scope_bindings(node))
+ try:
+ return self.generic_visit(node)
+ finally:
+ self.scope_stack.pop()
+ if self._is_method_definition(node):
+ class_context = self._current_class_context()
+ old_name = node.name
+ class_name = class_context["old_name"] if class_context is not None else ""
+ if (
+ len(old_name) > 1
+ and (
+ not self.keep_global_variables
+ or class_context is None
+ or self._should_rename_public_member(node.parent, class_name, old_name)
+ )
+ ):
+ node.name = self._rename_identifier(old_name)
+ if class_context is not None and old_name != node.name:
+ class_context["member_mapping"][old_name] = node.name
+ if self.keep_global_variables:
+ class_context["aliases"].append(
+ self._generated_assignment(f"{old_name} = {node.name}")
+ )
self.scope_stack.append(self._scope_bindings(node))
try:
return self.generic_visit(node)
@@ -464,8 +604,33 @@ def visit_Assign(self, node):
if getattr(node, "_pymini_generated", False):
return node
if self.keep_global_variables and self._is_class_body_assignment(node):
+ class_context = self._current_class_context()
for target in node.targets:
- if not self._binding_names_from_target(target):
+ binding_names = self._binding_names_from_target(target)
+ if binding_names:
+ for name in sorted(binding_names):
+ if self._preserve_function_name(name):
+ continue
+ if (
+ len(name) > 1
+ and (
+ not self.keep_global_variables
+ or class_context is None
+ or self._should_rename_public_member(
+ node.parent,
+ class_context["old_name"],
+ name,
+ )
+ )
+ ):
+ new_name = self._rename_identifier(name)
+ if class_context is not None and name != new_name:
+ class_context["member_mapping"][name] = new_name
+ class_context["aliases"].append(
+ self._generated_assignment(f"{name} = {new_name}")
+ )
+ self._rename_assignment_target(target, create_new=False)
+ else:
self.visit(target)
node.value = self.visit(node.value)
return node
@@ -514,14 +679,31 @@ def visit_ExceptHandler(self, node):
def visit_Call(self, node):
"""Apply renamed function names."""
- # Leave obj.method(...) alone for now. Attribute renaming broke dynamic
- # dispatch in real libraries and needs stronger type/owner analysis than
- # this AST-local pass currently has.
if isinstance(node.func, ast.Name):
if node.func.id in self.mapping:
node.func.id = self.mapping[node.func.id]
return self.generic_visit(node)
+ def visit_Attribute(self, node):
+ node.value = self.visit(node.value)
+ base_name = node.value.id if isinstance(node.value, ast.Name) else None
+ if base_name is None:
+ return node
+ attribute_mapping = None
+ class_context = self._current_class_context()
+ if class_context is not None and base_name in {
+ "self",
+ "cls",
+ class_context["old_name"],
+ class_context["new_name"],
+ }:
+ attribute_mapping = class_context["member_mapping"]
+ elif base_name in self.class_member_mappings:
+ attribute_mapping = self.class_member_mappings[base_name]
+ if attribute_mapping and node.attr in attribute_mapping:
+ node.attr = attribute_mapping[node.attr]
+ return node
+
def visit_Name(self, node):
"""Apply renamed variables.
@@ -698,6 +880,15 @@ def _is_unsupported_hoisted_string_context(node):
return False
+def _is_docstring_constant(node):
+ expr = getattr(node, "parent", None)
+ if not isinstance(expr, ast.Expr) or expr.value is not node:
+ return False
+ owner = getattr(expr, "parent", None)
+ body = getattr(owner, "body", None)
+ return bool(body) and body[0] is expr
+
+
def _reserved_names_in_node(node):
names = set()
for current in ast.walk(node):
@@ -717,10 +908,6 @@ def _reserved_names_in_node(node):
class RepeatedStringHoister(Transformer):
- # Reintroduced in the narrowest safe form first: only hoist repeated string
- # literals inside function bodies. Module and class scopes are still left
- # alone because new bindings there change the public surface or class
- # namespace more directly.
def __init__(self, generator):
super().__init__()
self.generator = generator
@@ -741,32 +928,33 @@ def __init__(self):
self.scope_stack = []
self.repeated_strings_by_scope = {}
- def visit_FunctionDef(self, node):
+ def _visit_scope(self, node):
counts = {}
self.scope_stack.append(counts)
for statement in node.body:
self.visit(statement)
self.scope_stack.pop()
- repeated = [
- value
- for value, count in counts.items()
- if count > 1 and len(repr(value)) > 4
- ]
- if repeated:
- self.repeated_strings_by_scope[id(node)] = repeated
+ if counts:
+ self.repeated_strings_by_scope[id(node)] = counts
+
+ def visit_Module(self, node):
+ self._visit_scope(node)
+
+ def visit_FunctionDef(self, node):
+ self._visit_scope(node)
visit_AsyncFunctionDef = visit_FunctionDef
def visit_ClassDef(self, node):
- for statement in node.body:
- if isinstance(statement, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
- self.visit(statement)
+ self._visit_scope(node)
def visit_Constant(self, node):
if not self.scope_stack or not isinstance(node.value, str):
return
if _is_unsupported_hoisted_string_context(node):
return
+ if _is_docstring_constant(node):
+ return
counts = self.scope_stack[-1]
counts[node.value] = counts.get(node.value, 0) + 1
@@ -785,6 +973,32 @@ def _next_safe_name(self, reserved_names):
reserved_names.add(candidate)
return candidate
+ def _is_profitable(self, value, count, scope_type):
+ literal_len = len(repr(value))
+ short_name_len = 1
+ original_cost = count * literal_len
+ helper_cost = short_name_len + 1 + literal_len
+ rewritten_cost = count * short_name_len
+ cleanup_cost = len("del(a,)") if scope_type in {"module", "class"} else 0
+ return original_cost > helper_cost + rewritten_cost + cleanup_cost
+
+ def _scope_mapping(self, node):
+ counts = self.repeated_strings_by_scope.get(id(node), {})
+ if not counts:
+ return {}
+ if isinstance(node, ast.Module):
+ scope_type = "module"
+ elif isinstance(node, ast.ClassDef):
+ scope_type = "class"
+ else:
+ scope_type = "function"
+ reserved_names = _reserved_names_in_node(node)
+ return {
+ value: self._next_safe_name(reserved_names)
+ for value, count in counts.items()
+ if count > 1 and len(repr(value)) > 4 and self._is_profitable(value, count, scope_type)
+ }
+
def _prepend_assignments(self, body, mapping):
assignments = []
for value, name in mapping.items():
@@ -794,17 +1008,30 @@ def _prepend_assignments(self, body, mapping):
)
assignment._pymini_generated = True
assignments.append(assignment)
- return assignments + body
+ insert_at = 1 if body and ast.get_docstring(ast.Module(body=body, type_ignores=[])) is not None else 0
+ return body[:insert_at] + assignments + body[insert_at:]
+
+ def _append_cleanup(self, body, mapping):
+ if not mapping:
+ return body
+ cleanup = ast.Delete(
+ targets=[ast.Tuple(elts=[ast.Name(id=name, ctx=ast.Del()) for name in mapping.values()], ctx=ast.Del())],
+ )
+ cleanup._pymini_generated = True
+ return body + [cleanup]
+
+ def visit_Module(self, node):
+ mapping = self._scope_mapping(node)
+ self.scope_stack.append(mapping)
+ node.body = [self.visit(statement) for statement in node.body]
+ self.scope_stack.pop()
+ if mapping:
+ node.body = self._prepend_assignments(node.body, mapping)
+ node.body = self._append_cleanup(node.body, mapping)
+ return node
def visit_FunctionDef(self, node):
- mapping = {}
- repeated = self.repeated_strings_by_scope.get(id(node), ())
- if repeated:
- reserved_names = _reserved_names_in_node(node)
- mapping = {
- value: self._next_safe_name(reserved_names)
- for value in repeated
- }
+ mapping = self._scope_mapping(node)
self.scope_stack.append(mapping)
node.body = [self.visit(statement) for statement in node.body]
self.scope_stack.pop()
@@ -815,13 +1042,13 @@ def visit_FunctionDef(self, node):
visit_AsyncFunctionDef = visit_FunctionDef
def visit_ClassDef(self, node):
- updated_body = []
- for statement in node.body:
- if isinstance(statement, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
- updated_body.append(self.visit(statement))
- else:
- updated_body.append(statement)
- node.body = updated_body
+ mapping = self._scope_mapping(node)
+ self.scope_stack.append(mapping)
+ node.body = [self.visit(statement) for statement in node.body]
+ self.scope_stack.pop()
+ if mapping:
+ node.body = self._prepend_assignments(node.body, mapping)
+ node.body = self._append_cleanup(node.body, mapping)
return node
def visit_Assign(self, node):
@@ -834,12 +1061,202 @@ def visit_Constant(self, node):
return node
if _is_unsupported_hoisted_string_context(node):
return node
+ if _is_docstring_constant(node):
+ return node
mapping = self.scope_stack[-1]
if node.value not in mapping:
return node
return ast.copy_location(ast.Name(id=mapping[node.value], ctx=ast.Load()), node)
+def _is_terminal_statement(node):
+ return isinstance(node, (ast.Return, ast.Raise, ast.Continue, ast.Break))
+
+
+class RepeatedNameAliaser(ast.NodeTransformer):
+ def __init__(self, generator):
+ super().__init__()
+ self.generator = generator
+
+ def transform(self, *trees):
+ for tree in trees:
+ ParentSetter().visit(tree)
+ self.visit(tree)
+ ParentSetter().visit(tree)
+ ast.fix_missing_locations(tree)
+ return trees
+
+ def _next_safe_name(self, reserved_names):
+ while True:
+ candidate = next(self.generator)
+ if candidate not in reserved_names:
+ reserved_names.add(candidate)
+ return candidate
+
+ def _alias_assignment(self, mapping):
+ names = list(mapping)
+ aliases = list(mapping.values())
+ if len(names) == 1:
+ node = ast.Assign(
+ targets=[ast.Name(id=aliases[0], ctx=ast.Store())],
+ value=ast.Name(id=names[0], ctx=ast.Load()),
+ )
+ else:
+ node = ast.Assign(
+ targets=[ast.Tuple(elts=[ast.Name(id=alias, ctx=ast.Store()) for alias in aliases], ctx=ast.Store())],
+ value=ast.Tuple(elts=[ast.Name(id=name, ctx=ast.Load()) for name in names], ctx=ast.Load()),
+ )
+ node._pymini_generated = True
+ return node
+
+ def _cleanup(self, mapping):
+ node = ast.Delete(
+ targets=[ast.Tuple(elts=[ast.Name(id=alias, ctx=ast.Del()) for alias in mapping.values()], ctx=ast.Del())],
+ )
+ node._pymini_generated = True
+ return node
+
+ def _rewrite_body(self, body):
+ rewritten = []
+ for statement in body:
+ statement = self.visit(statement)
+ mapping = RepeatedNameCollector.for_statement(statement, self._next_safe_name)
+ if not mapping:
+ rewritten.append(statement)
+ continue
+ rewritten_statement = RepeatedNameRewriter(mapping).visit(statement)
+ rewritten.append(self._alias_assignment(mapping))
+ rewritten.append(rewritten_statement)
+ if not _is_terminal_statement(statement):
+ rewritten.append(self._cleanup(mapping))
+ return rewritten
+
+ def visit_Module(self, node):
+ node.body = self._rewrite_body(node.body)
+ return node
+
+ def visit_FunctionDef(self, node):
+ node.body = self._rewrite_body(node.body)
+ return node
+
+ visit_AsyncFunctionDef = visit_FunctionDef
+
+ def visit_ClassDef(self, node):
+ node.body = self._rewrite_body(node.body)
+ return node
+
+
+class RepeatedNameCollector(ast.NodeVisitor):
+ def __init__(self):
+ self.counts = {}
+ self.bindings = set()
+
+ @classmethod
+ def for_statement(cls, statement, allocator):
+ collector = cls()
+ collector.visit(statement)
+ repeated = [
+ name
+ for name, count in collector.counts.items()
+ if count > 1 and len(name) > 1 and name not in collector.bindings
+ ]
+ if not repeated:
+ return {}
+ reserved_names = _reserved_names_in_node(statement)
+ return {
+ name: allocator(reserved_names)
+ for name in repeated
+ }
+
+ def visit_Name(self, node):
+ if isinstance(node.ctx, ast.Load):
+ self.counts[node.id] = self.counts.get(node.id, 0) + 1
+ elif isinstance(node.ctx, ast.Store):
+ self.bindings.add(node.id)
+
+ def visit_arg(self, node):
+ self.bindings.add(node.arg)
+
+ def visit_Global(self, node):
+ self.bindings.update(node.names)
+
+ visit_Nonlocal = visit_Global
+
+ def visit_ExceptHandler(self, node):
+ if node.name:
+ self.bindings.add(node.name)
+ if node.type is not None:
+ self.visit(node.type)
+ for statement in node.body:
+ self.visit(statement)
+
+ def visit_Import(self, node):
+ for alias in node.names:
+ self.bindings.add(alias.asname or alias.name.split(".", 1)[0])
+
+ def visit_ImportFrom(self, node):
+ for alias in node.names:
+ if alias.name != "*":
+ self.bindings.add(alias.asname or alias.name)
+
+ def visit_FunctionDef(self, node):
+ self.bindings.add(node.name)
+
+ visit_AsyncFunctionDef = visit_FunctionDef
+
+ def visit_ClassDef(self, node):
+ self.bindings.add(node.name)
+
+ def visit_Lambda(self, node):
+ return node
+
+ def visit_ListComp(self, node):
+ return node
+
+ def visit_SetComp(self, node):
+ return node
+
+ def visit_DictComp(self, node):
+ return node
+
+ def visit_GeneratorExp(self, node):
+ return node
+
+
+class RepeatedNameRewriter(ast.NodeTransformer):
+ def __init__(self, mapping):
+ super().__init__()
+ self.mapping = mapping
+
+ def visit_Name(self, node):
+ if isinstance(node.ctx, ast.Load) and node.id in self.mapping:
+ node.id = self.mapping[node.id]
+ return node
+
+ def visit_FunctionDef(self, node):
+ return node
+
+ visit_AsyncFunctionDef = visit_FunctionDef
+
+ def visit_ClassDef(self, node):
+ return node
+
+ def visit_Lambda(self, node):
+ return node
+
+ def visit_ListComp(self, node):
+ return node
+
+ def visit_SetComp(self, node):
+ return node
+
+ def visit_DictComp(self, node):
+ return node
+
+ def visit_GeneratorExp(self, node):
+ return node
+
+
class ImportedVariableShortener(VariableShortener):
"""Use different module shorteners to adjust variables in this module
@@ -1301,6 +1718,7 @@ def minify(sources, modules='main', keep_module_names=False,
keep_module_names=keep_module_names,
), # obfuscate across files
RepeatedStringHoister(ind.generator),
+ RepeatedNameAliaser(ind.generator),
# optionally fuse files
fuser := (
diff --git a/tests/test_api.py b/tests/test_api.py
index e18dc8c..2e9da31 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -1,4 +1,5 @@
import ast
+import importlib.util
import keyword
import subprocess
import sys
@@ -138,13 +139,18 @@ def abs_path(path):
tree = ast.parse(cleaned[0])
function = next(node for node in tree.body if isinstance(node, ast.FunctionDef))
- condition = function.body[0]
- simplified_return = function.body[1]
+ condition = next(node for node in function.body if isinstance(node, ast.If))
+ simplified_return = next(
+ node
+ for node in function.body
+ if isinstance(node, ast.Return) and isinstance(node.value, ast.Constant)
+ )
assert isinstance(condition, ast.If)
+ assert isinstance(condition.test, ast.Name)
assert isinstance(condition.body[0], ast.Return)
assert isinstance(condition.body[0].value, ast.Name)
- assert condition.body[0].value.id == function.args.args[0].arg
+ assert condition.body[0].value.id == condition.test.id
assert isinstance(simplified_return, ast.Return)
assert isinstance(simplified_return.value, ast.Constant)
@@ -201,15 +207,82 @@ def f():
assert modules == ["main"]
-def test_minify_does_not_hoist_repeated_strings_into_class_bodies(tmp_path):
+def test_minify_hoists_repeated_strings_at_module_scope_without_leaking_helpers(tmp_path):
+ cleaned, modules = minify(
+ py(
+ """
+ left = "PhysicalResourceId"
+ right = "PhysicalResourceId"
+
+ print(left, right)
+ """
+ ),
+ "main",
+ keep_global_variables=True,
+ keep_module_names=True,
+ )
+
+ tree = ast.parse(cleaned[0])
+
+ assert cleaned[0].count("PhysicalResourceId") == 1
+ assert any(isinstance(node, ast.Delete) for node in tree.body)
+
+ module_path = tmp_path / "module.py"
+ module_path.write_text(cleaned[0], encoding="utf-8")
+ spec = importlib.util.spec_from_file_location("module_under_test", module_path)
+ module = importlib.util.module_from_spec(spec)
+ assert spec.loader is not None
+ spec.loader.exec_module(module)
+
+ assert [
+ name
+ for name in module.__dict__
+ if len(name) == 1 and not name.startswith("_")
+ ] == []
+ assert modules == ["main"]
+
+
+def test_minify_skips_unprofitable_short_string_hoists_at_module_scope(tmp_path):
+ cleaned, modules = minify(
+ py(
+ """
+ left = "Foo"
+ right = "Foo"
+
+ print(left, right)
+ """
+ ),
+ "main",
+ keep_global_variables=True,
+ keep_module_names=True,
+ )
+
+ assert cleaned[0].count("'Foo'") == 2
+ assert "del(" not in cleaned[0]
+
+ module_path = tmp_path / "module.py"
+ module_path.write_text(cleaned[0], encoding="utf-8")
+ result = subprocess.run(
+ [sys.executable, str(module_path)],
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert result.returncode == 0, result.stderr
+ assert result.stdout == "Foo Foo\n"
+ assert modules == ["main"]
+
+
+def test_minify_hoists_repeated_strings_into_class_bodies_without_leaking_helpers(tmp_path):
cleaned, modules = minify(
py(
"""
class Token:
- left = "PhysicalResourceId"
- right = "PhysicalResourceId"
+ x = "PhysicalResourceId"
+ y = "PhysicalResourceId"
- print(Token.left, Token.right)
+ print(Token.x, Token.y, [name for name in Token.__dict__ if len(name) == 1 and name not in {"x", "y"}])
"""
),
"main",
@@ -217,7 +290,11 @@ class Token:
keep_module_names=True,
)
- assert cleaned[0].count("PhysicalResourceId") == 2
+ tree = ast.parse(cleaned[0])
+ class_def = next(node for node in tree.body if isinstance(node, ast.ClassDef))
+
+ assert cleaned[0].count("PhysicalResourceId") == 1
+ assert any(isinstance(node, ast.Delete) for node in class_def.body)
module_path = tmp_path / "module.py"
module_path.write_text(cleaned[0], encoding="utf-8")
@@ -229,7 +306,7 @@ class Token:
)
assert result.returncode == 0, result.stderr
- assert result.stdout == "PhysicalResourceId PhysicalResourceId\n"
+ assert result.stdout == "PhysicalResourceId PhysicalResourceId []\n"
assert modules == ["main"]
@@ -292,6 +369,77 @@ def outer():
assert modules == ["main"]
+def test_minify_aliases_repeated_names_within_single_statements(tmp_path):
+ cleaned, modules = minify(
+ py(
+ """
+ IMPORTANT_PUBLIC_NAME = 3
+
+ def show():
+ print(IMPORTANT_PUBLIC_NAME, IMPORTANT_PUBLIC_NAME)
+ print("helpers", sorted(name for name in locals() if len(name) == 1))
+
+ show()
+ """
+ ),
+ "main",
+ keep_global_variables=True,
+ keep_module_names=True,
+ )
+
+ tree = ast.parse(cleaned[0])
+ function = next(node for node in tree.body if isinstance(node, ast.FunctionDef))
+
+ assert any(isinstance(node, ast.Assign) for node in function.body)
+ assert any(isinstance(node, ast.Delete) for node in function.body)
+
+ module_path = tmp_path / "module.py"
+ module_path.write_text(cleaned[0], encoding="utf-8")
+ result = subprocess.run(
+ [sys.executable, str(module_path)],
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert result.returncode == 0, result.stderr
+ assert result.stdout == "3 3\nhelpers []\n"
+ assert modules == ["main"]
+
+
+def test_minify_aliases_repeated_module_names_without_leaking_helpers(tmp_path):
+ cleaned, modules = minify(
+ py(
+ """
+ IMPORTANT_PUBLIC_NAME = 3
+ print(IMPORTANT_PUBLIC_NAME, IMPORTANT_PUBLIC_NAME)
+ """
+ ),
+ "main",
+ keep_global_variables=True,
+ keep_module_names=True,
+ )
+
+ tree = ast.parse(cleaned[0])
+
+ assert any(isinstance(node, ast.Assign) for node in tree.body)
+ assert any(isinstance(node, ast.Delete) for node in tree.body)
+
+ module_path = tmp_path / "module.py"
+ module_path.write_text(cleaned[0], encoding="utf-8")
+ spec = importlib.util.spec_from_file_location("aliased_module_under_test", module_path)
+ module = importlib.util.module_from_spec(spec)
+ assert spec.loader is not None
+ spec.loader.exec_module(module)
+
+ assert [
+ name
+ for name in module.__dict__
+ if len(name) == 1 and not name.startswith("_")
+ ] == []
+ assert modules == ["main"]
+
+
def test_minify_preserves_global_names_without_breaking_shadowed_locals(tmp_path):
cleaned, modules = minify(
py(
@@ -521,6 +669,84 @@ def value(self, new_value):
assert modules == ["main"]
+def test_minify_rewrites_known_class_method_calls(tmp_path):
+ cleaned, modules = minify(
+ py(
+ """
+ class Token:
+ def very_long_method_name(self):
+ return 1
+
+ def call(self):
+ return self.very_long_method_name()
+
+ print(Token().call(), Token().very_long_method_name())
+ """
+ ),
+ "main",
+ keep_global_variables=True,
+ keep_module_names=True,
+ )
+
+ assert "self.very_long_method_name(" not in cleaned[0]
+
+ module_path = tmp_path / "module.py"
+ module_path.write_text(cleaned[0], encoding="utf-8")
+ result = subprocess.run(
+ [sys.executable, str(module_path)],
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert result.returncode == 0, result.stderr
+ assert result.stdout == "1 1\n"
+ assert modules == ["main"]
+
+
+def test_minify_skips_unprofitable_public_class_and_method_aliases(tmp_path):
+ cleaned, modules = minify(
+ py(
+ """
+ class Foo(object):
+ def __init__(self, *args):
+ pass
+
+ def demiurgic_mystificator(self, dactyl):
+ return dactyl
+
+ def test(self, whatever):
+ print(whatever)
+
+ if __name__ == "__main__":
+ f = Foo("epicaricacy", "perseverate")
+ f.test("Codswallop")
+ """
+ ),
+ "main",
+ keep_global_variables=True,
+ keep_module_names=True,
+ )
+
+ assert "Foo=" not in cleaned[0]
+ assert "__qualname__" not in cleaned[0]
+ assert "demiurgic_mystificator=" not in cleaned[0]
+ assert "test=" not in cleaned[0]
+
+ module_path = tmp_path / "module.py"
+ module_path.write_text(cleaned[0], encoding="utf-8")
+ result = subprocess.run(
+ [sys.executable, str(module_path)],
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+
+ assert result.returncode == 0, result.stderr
+ assert result.stdout == "Codswallop\n"
+ assert modules == ["main"]
+
+
def test_minify_preserves_class_attribute_names(tmp_path):
cleaned, modules = minify(
py(
From f6d003c8a78669db78869c29063183e39cb86725 Mon Sep 17 00:00:00 2001
From: Alvin Wan
Date: Sat, 4 Apr 2026 14:30:53 -0700
Subject: [PATCH 2/2] skip exact example drift check on Python 3.9 and 3.10
---
tests/test_examples.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/tests/test_examples.py b/tests/test_examples.py
index f216274..b5eb873 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -2,10 +2,16 @@
import sys
from pathlib import Path
+import pytest
+
ROOT = Path(__file__).resolve().parents[1]
[email protected](
+ sys.version_info < (3, 11),
+ reason="checked-in example output is canonicalized on Python 3.11+",
+)
def test_checked_in_examples_match_regenerated_output():
result = subprocess.run(
[sys.executable, str(ROOT / "scripts" / "regenerate_examples.py"), "--check"],