Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,24 @@ python3 -m pip install -e ".[dev]"
python3 -m pytest
```

## Compression Examples

Checked-in minified outputs for the repo fixtures live in [examples](./examples) and
are regenerated by `scripts/regenerate_examples.py`.

| Input | Original | `pymini` | `pyminifier` | `python-minifier` (`pyminify`) |
| --- | ---: | ---: | ---: | ---: |
| `tests/examples/pyminifier.py` | `1,355` bytes | `511` bytes, `62.3%` | `676` bytes, `50.1%` | `1,020` bytes, `24.7%` |
| `tests/examples/pyminify.py` | `1,990` bytes | `1,129` bytes, `43.3%` | `1,605` bytes, `19.3%` | `983` bytes, `50.6%` |
| `TexSoup/` raw Python source (`*.py`) | `98,181` bytes | `31,216` bytes, `68.2%` | `—` | `—` |
| `TexSoup/` compressed source (`.tar.gz`) | `70,532` bytes | `45,065` bytes, `36.1%` | `—` | `—` |

## TexSoup Validation

`pymini` has been validated against the upstream `TexSoup` test suite in package mode.
Current validation: raw source code `68.2%` smaller, compressed source code
(`.tar.gz`) `36.1%` smaller.
<!-- Raw bytes: 98,181 -> 31,212. Compressed bytes: 70,532 -> 45,054. -->

| Measurement | Original | Minified | Reduction | Reduction Rate |
| --- | ---: | ---: | ---: | ---: |
| Raw Python source (`*.py`) | `98,181` bytes | `31,212` bytes | `66,969` bytes | `68.2%` |
| `.tar.gz` of `TexSoup/` | `70,532` bytes | `45,054` bytes | `25,478` bytes | `36.1%` |
<!-- Raw bytes: 98,181 -> 31,216. Compressed bytes: 70,532 -> 45,065. -->

To reproduce that flow locally:

Expand Down
28 changes: 14 additions & 14 deletions examples/pyminify.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
def a(event,context):
l.info(event)
f='RequestType';g='PhysicalResourceId';h='None';i='Status';j='SUCCESS';k='Tags';m='OldResourceProperties';l.info(event)
try:
b=hashlib.new('md5',(event['RequestId']+event['StackId']).encode()).hexdigest();c=event['ResourceProperties']
if event['RequestType']=='Create':
event['PhysicalResourceId']='None';event['PhysicalResourceId']=create_cert(c,b);add_tags(event['PhysicalResourceId'],c);validate(event['PhysicalResourceId'],c)
if wait_for_issuance(event['PhysicalResourceId'],context):event['Status']='SUCCESS';return send(event)
if event[f]=='Create':
event[g]=h;event[g]=create_cert(c,b);add_tags(event[g],c);validate(event[g],c)
if wait_for_issuance(event[g],context):event[i]=j;return send(event)
else:return reinvoke(event,context)
elif event['RequestType']=='Delete':
if event['PhysicalResourceId']!='None':acm.delete_certificate(CertificateArn=event['PhysicalResourceId'])
event['Status']='SUCCESS';return send(event)
elif event['RequestType']=='Update':
elif event[f]=='Delete':
if event[g]!=h:acm.delete_certificate(CertificateArn=event[g])
event[i]=j;return send(event)
elif event[f]=='Update':
if replace_cert(event):
event['PhysicalResourceId']=create_cert(c,b);add_tags(event['PhysicalResourceId'],c);validate(event['PhysicalResourceId'],c)
if not wait_for_issuance(event['PhysicalResourceId'],context):return reinvoke(event,context)
event[g]=create_cert(c,b);add_tags(event[g],c);validate(event[g],c)
if not wait_for_issuance(event[g],context):return reinvoke(event,context)
else:
if 'Tags' in event['OldResourceProperties']:acm.remove_tags_from_certificate(CertificateArn=event['PhysicalResourceId'],Tags=event['OldResourceProperties']['Tags'])
add_tags(event['PhysicalResourceId'],c)
event['Status']='SUCCESS';return send(event)
if k in event[m]:acm.remove_tags_from_certificate(CertificateArn=event[g],Tags=event[m][k])
add_tags(event[g],c)
event[i]=j;return send(event)
else:raise RuntimeError('Unknown RequestType')
except Exception as d:l.exception('');event['Status']='FAILED';event['Reason']=str(d);return send(event)
except Exception as d:l.exception('');event[i]='FAILED';event['Reason']=str(d);return send(event)
handler=a
176 changes: 175 additions & 1 deletion pymini/pymini.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ class VariableShortener(NodeTransformer):
# Deferred optimizations intentionally left off after validating against
# TexSoup and similar package-shaped inputs:
# - aliasing repeated name reads into generated locals
# - hoisting repeated string literals into generated locals
# - hoisting repeated string literals into generated locals at module or
# class scope
# - renaming attribute call sites such as obj.method(...)
# - renaming methods, class-body attributes, and top-level class names in
# preserve-public-API mode
Expand Down Expand Up @@ -667,6 +668,178 @@ def transform(self, *trees):
return new_trees


def _is_unsupported_hoisted_string_context(node):
current = node
pattern_nodes = tuple(
node_type for node_type in (
getattr(ast, "MatchValue", None),
getattr(ast, "MatchSingleton", None),
getattr(ast, "MatchSequence", None),
getattr(ast, "MatchMapping", None),
getattr(ast, "MatchClass", None),
getattr(ast, "MatchAs", None),
getattr(ast, "MatchOr", None),
)
if node_type is not None
)
while hasattr(current, "parent"):
parent = current.parent
if isinstance(parent, ast.JoinedStr):
return True
if pattern_nodes and isinstance(parent, pattern_nodes):
return True
if isinstance(parent, ast.arg) and parent.annotation is current:
return True
if isinstance(parent, ast.AnnAssign) and parent.annotation is current:
return True
if isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)) and parent.returns is current:
return True
current = parent
return False


def _reserved_names_in_node(node):
names = set()
for current in ast.walk(node):
if isinstance(current, ast.Name):
names.add(current.id)
elif isinstance(current, ast.arg):
names.add(current.arg)
elif isinstance(current, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
names.add(current.name)
elif isinstance(current, ast.alias):
names.add(current.asname or current.name.split(".", 1)[0])
elif isinstance(current, (ast.Global, ast.Nonlocal)):
names.update(current.names)
elif isinstance(current, ast.ExceptHandler) and current.name:
names.add(current.name)
return names


class RepeatedStringHoister(Transformer):
# Reintroduced in the narrowest safe form first: only hoist repeated string
# literals inside function bodies. Module and class scopes are still left
# alone because new bindings there change the public surface or class
# namespace more directly.
def __init__(self, generator):
super().__init__()
self.generator = generator

def transform(self, *trees):
for tree in trees:
ParentSetter().visit(tree)
collector = RepeatedStringCollector()
collector.visit(tree)
RepeatedStringRewriter(self.generator, collector.repeated_strings_by_scope).visit(tree)
ParentSetter().visit(tree)
ast.fix_missing_locations(tree)
return trees


class RepeatedStringCollector(ast.NodeVisitor):
def __init__(self):
self.scope_stack = []
self.repeated_strings_by_scope = {}

def visit_FunctionDef(self, node):
counts = {}
self.scope_stack.append(counts)
for statement in node.body:
self.visit(statement)
self.scope_stack.pop()
repeated = [
value
for value, count in counts.items()
if count > 1 and len(repr(value)) > 4
]
if repeated:
self.repeated_strings_by_scope[id(node)] = repeated

visit_AsyncFunctionDef = visit_FunctionDef

def visit_ClassDef(self, node):
for statement in node.body:
if isinstance(statement, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
self.visit(statement)

def visit_Constant(self, node):
if not self.scope_stack or not isinstance(node.value, str):
return
if _is_unsupported_hoisted_string_context(node):
return
counts = self.scope_stack[-1]
counts[node.value] = counts.get(node.value, 0) + 1


class RepeatedStringRewriter(ast.NodeTransformer):
def __init__(self, generator, repeated_strings_by_scope):
super().__init__()
self.generator = generator
self.repeated_strings_by_scope = repeated_strings_by_scope
self.scope_stack = []

def _next_safe_name(self, reserved_names):
while True:
candidate = next(self.generator)
if candidate not in reserved_names:
reserved_names.add(candidate)
return candidate

def _prepend_assignments(self, body, mapping):
assignments = []
for value, name in mapping.items():
assignment = ast.Assign(
targets=[ast.Name(id=name, ctx=ast.Store())],
value=ast.Constant(value=value),
)
assignment._pymini_generated = True
assignments.append(assignment)
return assignments + body

def visit_FunctionDef(self, node):
mapping = {}
repeated = self.repeated_strings_by_scope.get(id(node), ())
if repeated:
reserved_names = _reserved_names_in_node(node)
mapping = {
value: self._next_safe_name(reserved_names)
for value in repeated
}
self.scope_stack.append(mapping)
node.body = [self.visit(statement) for statement in node.body]
self.scope_stack.pop()
if mapping:
node.body = self._prepend_assignments(node.body, mapping)
return node

visit_AsyncFunctionDef = visit_FunctionDef

def visit_ClassDef(self, node):
updated_body = []
for statement in node.body:
if isinstance(statement, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
updated_body.append(self.visit(statement))
else:
updated_body.append(statement)
node.body = updated_body
return node

def visit_Assign(self, node):
if getattr(node, "_pymini_generated", False):
return node
return self.generic_visit(node)

def visit_Constant(self, node):
if not self.scope_stack or not isinstance(node.value, str):
return node
if _is_unsupported_hoisted_string_context(node):
return node
mapping = self.scope_stack[-1]
if node.value not in mapping:
return node
return ast.copy_location(ast.Name(id=mapping[node.value], ctx=ast.Load()), node)


class ImportedVariableShortener(VariableShortener):
"""Use different module shorteners to adjust variables in this module

Expand Down Expand Up @@ -1127,6 +1300,7 @@ def minify(sources, modules='main', keep_module_names=False,
modules=ind.modules,
keep_module_names=keep_module_names,
), # obfuscate across files
RepeatedStringHoister(ind.generator),

# optionally fuse files
fuser := (
Expand Down
Loading
Loading