diff --git a/.circleci/Dockerfile b/.circleci/Dockerfile new file mode 100644 index 00000000..976dbed5 --- /dev/null +++ b/.circleci/Dockerfile @@ -0,0 +1,25 @@ +FROM fedora + +# CircleCI required tools +RUN dnf install -y \ + git \ + openssh \ + tar \ + gzip \ + gpg \ + ca-certificates + +# Python versions +RUN dnf install -y \ + python26 \ + python27 \ + python33 \ + python34 \ + python35 \ + python36 \ + python37 \ + pypy \ + pypy3 + +WORKDIR /tmp/work +ENTRYPOINT ["/bin/bash"] diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..b592d572 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,53 @@ +version: 2 +jobs: + test: + docker: + - image: danielflook/python-minifier-build + steps: + - checkout + + - run: + name: Set version statically + command: | + VERSION=$(python setup.py --version) + sed -i "s/setup_requires=.*/version='$VERSION',/; s/use_scm_version=.*//" setup.py + + - run: + name: tox + command: | + pip install -r requirements-tox.txt + tox -e py27,py34,py35,pypy + + publish: + docker: + - image: danielflook/python-minifier-build + steps: + - checkout + + - run: + name: Set version statically + command: | + VERSION=$(python setup.py --version) + sed -i "s/setup_requires=.*/version='$VERSION',/; s/use_scm_version=.*//" setup.py + + - run: + name: Add signing key + command: | + echo $SIGNING_KEY | base64 -d > private.key + gpg --import private.key + + - run: + name: sdist + command: | + pip3 install --upgrade setuptools wheel twine pip + python3 setup.py sdist bdist_wheel + twine upload --sign dist/* + +workflows: + version: 2 + build: + jobs: + - test + - publish: + requires: + - test diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..d74ed69f --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +.tox/ +.idea/ +__pycache__/ +*.py[cod] +dist/ +downloads/ +eggs/ +.eggs/ +wheels/ +*.egg-info/ +*.egg +venv/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..db292f7e --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Daniel Flook + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..00aba3b3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include *.txt +include LICENSE +include tox.ini diff --git a/README.md b/README.md new file mode 100644 index 00000000..14a39ef0 --- /dev/null +++ b/README.md @@ -0,0 +1,100 @@ +# Python Minifier + +Transforms Python source code into it's most compact representation. + +python-minifier supports Python 2.6 to 2.7 and Python 3.3 to 3.5.. + +As an example, the following python source: + +```python +import ast + +from python_minifier.miniprinter import MiniPrinter +from python_minifier.ast_compare import AstCompare + +class UnstableMinification(Exception): + def __init__(self, original, minified, exception): + self.original = original + self.minified = minified + self.exception = exception + + def __str__(self): + return str(self.exception) + +def minify(source): + + code = ast.parse(source) + minifier = MiniPrinter() + + minifier.visit(code) + + try: + # Check that the minified code is identical to the original + minified_code = ast.parse(minifier.code) + comparer = AstCompare() + comparer.compare(code, minified_code) + except Exception as exception: + raise UnstableMinification(source, minifier.code, exception) + + return minifier.code +``` + +Becomes: + +```python +import ast +from python_minifier.miniprinter import MiniPrinter +from python_minifier.ast_compare import AstCompare +class UnstableMinification(Exception): + def __init__(self,original,minified,exception): + self.original=original;self.minified=minified;self.exception=exception + def __str__(self):return str(self.exception) +def minify(source): + code=ast.parse(source);minifier=MiniPrinter();minifier.visit(code) + try: + minified_code=ast.parse(minifier.code);comparer=AstCompare();comparer.compare(code,minified_code) + except Exception as exception: + raise UnstableMinification(source,minifier.code,exception) + return minifier.code +``` + +## Why? + +AWS Cloudformation templates may have AWS lambda function source code embedded in them, but only if the function is less +than 4KiB. I wrote this package so I could write python normally and still embed the module in a template. + +## Installation + +To install python-minifier use pip: + +```bash +$ pip install python-minifier +``` + +Note that python-minifier depends on the python interpreter for parsing source code, +so install using a version of python appropriate for your source. + +python-minifier runs with and can minify code written for Python 2.6 to 2.7 and Python 3.3 to 3.5. + +## Usage + +To minify a source file, and write the minified module to stdout: + +```bash +$ pyminify hello.py +``` + +There is also an API. The same example would look like: + +```python +import python_minifier + +with open('hello.py') as f: + print(python_minifier.minify(f.read())) +``` + +## License + +Available under the MIT License. Full text is in the [LICENSE](LICENSE) file. + +Copyright 2018 Daniel Flook diff --git a/requirements-tox.txt b/requirements-tox.txt new file mode 100644 index 00000000..8e62fdf6 --- /dev/null +++ b/requirements-tox.txt @@ -0,0 +1,2 @@ +tox +virtualenv<16.0.0 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..aa5b55d8 --- /dev/null +++ b/setup.py @@ -0,0 +1,52 @@ +import os.path +from setuptools import setup, find_packages + +readme_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.md') +with open(readme_path) as f: + long_desc = f.read() + +setup( + name='python_minifier', + description='Transform Python source code into it\'s most compact representation', + author='Daniel Flook', + author_email='daniel@flook.org', + url='https://github.com/dflook/python-minifier', + license='MIT', + project_urls={ + 'Issues': 'https://github.com/dflook/python-minifier/issues', + 'Say Thanks!': 'https://saythanks.io/to/dflook', + }, + keywords='minify minifier', + + use_scm_version=True, + package_dir={'': 'src'}, + packages=find_packages('src'), + long_description=long_desc, + long_description_content_type='text/markdown', + + python_requires='>=2.6, !=3.0.*, !=3.1.*, !=3.2.*, <3.6', + setup_requires=['setuptools_scm'], + + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', + 'Intended Audience :: Developers', + 'Topic :: Software Development' + ], + + entry_points = { + 'console_scripts': ['pyminify=python_minifier.__main__:main'] + }, + + zip_safe=True +) diff --git a/src/python_minifier/__init__.py b/src/python_minifier/__init__.py new file mode 100644 index 00000000..c9e7d806 --- /dev/null +++ b/src/python_minifier/__init__.py @@ -0,0 +1,63 @@ +import ast + +from python_minifier.ast_compare import AstComparer, CompareError +from python_minifier.miniprinter import MiniPrinter + + +class UnstableMinification(RuntimeError): + def __init__(self, exception, source, minified): + self.exception = exception + self.source = source + self.minified = minified + + def __str__(self): + return 'Minification was unstable! Please create an issue at https://github.com/dflook/python-minifier/issues' + + +def minify(source, filename=None): + """ + Minify a python module + + With the default arguments an exact representation of the input source is returned. + + :param str source: The python module to minify + :param str filename: The original source filename if known + :rtype: str + + """ + + filename = filename or 'python_minifer.minify source' + + # This will raise if the source file can't be parsed + module = ast.parse(source, filename) + + printer = MiniPrinter() + printer(module) + + try: + minified_module = ast.parse(printer.code, 'python_minifier.minify output') + except SyntaxError as syntax_error: + raise UnstableMinification(syntax_error, source, printer.code) + + try: + comparer = AstComparer() + comparer.compare(module, minified_module) + except CompareError as compare_error: + raise UnstableMinification(compare_error, source, printer.code) + + return printer.code + + +def awslambda(source, filename=None): + """ + Minify a python module for use as an AWS Lambda function + + This returns a string suitable for embedding in a cloudformation template. + + :param str source: The python module to minify + :param str filename: The original source filename if known + :rtype: str + + """ + + return minify(source, filename) diff --git a/src/python_minifier/__main__.py b/src/python_minifier/__main__.py new file mode 100644 index 00000000..a17c6c86 --- /dev/null +++ b/src/python_minifier/__main__.py @@ -0,0 +1,18 @@ +from __future__ import print_function + +import sys + +from python_minifier import minify + + +def main(): + if len(sys.argv) < 2: + print('Usage: pyminify ') + exit(-1) + + with open(sys.argv[1], 'rb') as f: + print(minify(f.read())) + + +if __name__ == '__main__': + main() diff --git a/src/python_minifier/ast_compare.py b/src/python_minifier/ast_compare.py new file mode 100644 index 00000000..e363fbdb --- /dev/null +++ b/src/python_minifier/ast_compare.py @@ -0,0 +1,584 @@ +class CompareError(RuntimeError): + """ + Raised when an AST compares unequal. + """ + + def __init__(self, namespace, lnode, rnode, msg=None): + self.namespace = namespace + self.lnode = lnode + self.rnode = rnode + self.msg = msg + + def __repr__(self): + return 'NodeError(%r, %r)' % (self.lnode, self.rnode) + + def __str__(self): + error = '' + + if self.msg: + error += self.msg + + if self.namespace: + error += ' in namespace ' + '.'.join(self.namespace) + ' ' + + if self.lnode and hasattr(self.lnode, 'lineno'): + error += ' at source %i:%i:' % (self.lnode.lineno, self.lnode.col_offset) + + return error + + +class AstComparer: + """ + Compare Python Abstract Syntax Trees + + >>> comparer = AstComparer() + >>> comparer(l_ast, r_ast) + + After instantiating, call with two ASTs. + If they are not identical, an exception will be raised. + + """ + + def __init__(self): + self.namespaces = [] + + def __call__(self, l_ast, r_ast): + return self.compare(l_ast, r_ast) + + def compare(self, lnode, rnode): + if type(lnode) != type(rnode): + raise CompareError( + self.namespaces, lnode, rnode, msg='Nodes do not match! rnode=%r, lnode=%r' % (lnode, rnode) + ) + + if lnode is None: + return + + method = 'compare_' + lnode.__class__.__name__ + v = getattr(self, method) + + v(lnode, rnode) + + def compare_list(self, llist, rlist): + + if len(llist) != len(rlist): + raise CompareError( + self.namespaces, + llist[0] if len(llist) else None, + rlist[0] if len(rlist) else None, + 'Node list does not have the same number of elements', + ) + + for l, r in zip(llist, rlist): + self.compare(l, r) + + def compare_Exec(self, lnode, rnode): + self.compare(lnode.body, rnode.body) + self.compare(lnode.locals, rnode.locals) + self.compare(lnode.globals, rnode.globals) + + # region Literals + + def compare_Num(self, lnode, rnode): + if lnode.n != rnode.n: + raise CompareError(self.namespaces, lnode, rnode, 'Num values do not match') + + def compare_Str(self, lnode, rnode): + if lnode.s != rnode.s: + raise CompareError(self.namespaces, lnode, rnode, 'String values do not match') + + def compare_FormattedValue(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + if lnode.conversion != rnode.conversion: + raise CompareError(self.namespaces, lnode, rnode, 'FormattedValue conversions do not match') + self.compare(lnode.format_spec, rnode.format_spec) + + def compare_JoinedStr(self, lnode, rnode): + self.compare_list(lnode.values, rnode.values) + + def compare_Bytes(self, lnode, rnode): + if lnode.s != rnode.s: + raise CompareError(self.namespaces, lnode, rnode, 'Bytes values do not match') + + def compare_List(self, lnode, rnode): + self.compare_list(lnode.elts, rnode.elts) + + def compare_Tuple(self, lnode, rnode): + self.compare_list(lnode.elts, rnode.elts) + + def compare_Set(self, lnode, rnode): + self.compare_list(lnode.elts, rnode.elts) + + def compare_Dict(self, lnode, rnode): + self.compare_list(lnode.keys, rnode.keys) + self.compare_list(lnode.values, rnode.values) + + def compare_Ellipsis(self, lnode, rnode): + pass + + def compare_NameConstant(self, lnode, rnode): + if lnode.value != rnode.value: + raise CompareError(self.namespaces, lnode, rnode, 'Constant values do not match') + + # endregion + + # region Variables + + def compare_Name(self, lnode, rnode): + if lnode.id != rnode.id: + raise CompareError(self.namespaces, lnode, rnode, 'Name values do not match') + + def compare_Starred(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + # endregion + + # region Expressions + + def compare_Repr(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + def compare_Expr(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + def compare_UnaryOp(self, lnode, rnode): + self.compare(lnode.op, rnode.op) + self.compare(lnode.operand, rnode.operand) + + def compare_UAdd(self, lnode, rnode): + pass + + def compare_USub(self, lnode, rnode): + pass + + def compare_Not(self, lnode, rnode): + pass + + def compare_Invert(self, lnode, rnode): + pass + + def compare_BinOp(self, lnode, rnode): + self.compare(lnode.left, rnode.left) + self.compare(lnode.op, rnode.op) + self.compare(lnode.right, rnode.right) + + def compare_Add(self, lnode, rnode): + pass + + def compare_Sub(self, lnode, rnode): + pass + + def compare_Mult(self, lnode, rnode): + pass + + def compare_Div(self, lnode, rnode): + pass + + def compare_FloorDiv(self, lnode, rnode): + pass + + def compare_Mod(self, lnode, rnode): + pass + + def compare_Pow(self, lnode, rnode): + pass + + def compare_LShift(self, lnode, rnode): + pass + + def compare_RShift(self, lnode, rnode): + pass + + def compare_BitOr(self, lnode, rnode): + pass + + def compare_BitXor(self, lnode, rnode): + pass + + def compare_BitAnd(self, lnode, rnode): + pass + + def compare_MatMult(self, lnode, rnode): + pass + + def compare_BoolOp(self, lnode, rnode): + self.compare(lnode.op, rnode.op) + self.compare_list(lnode.values, rnode.values) + + def compare_And(self, lnode, rnode): + pass + + def compare_Or(self, lnode, rnode): + pass + + def compare_Compare(self, lnode, rnode): + self.compare(lnode.left, rnode.left) + self.compare_list(lnode.ops, rnode.ops) + self.compare_list(lnode.comparators, rnode.comparators) + + def compare_Eq(self, lnode, rnode): + pass + + def compare_NotEq(self, lnode, rnode): + pass + + def compare_Lt(self, lnode, rnode): + pass + + def compare_LtE(self, lnode, rnode): + pass + + def compare_Gt(self, lnode, rnode): + pass + + def compare_GtE(self, lnode, rnode): + pass + + def compare_Is(self, lnode, rnode): + pass + + def compare_IsNot(self, lnode, rnode): + pass + + def compare_In(self, lnode, rnode): + pass + + def compare_NotIn(self, lnode, rnode): + pass + + def compare_Call(self, lnode, rnode): + self.compare(lnode.func, rnode.func) + self.compare_list(lnode.args, rnode.args) + self.compare_list(lnode.keywords, rnode.keywords) + + if hasattr(lnode, 'starargs'): + self.compare(lnode.starargs, rnode.starargs) + self.compare(lnode.kwargs, rnode.kwargs) + + def compare_keyword(self, lnode, rnode): + if lnode.arg != rnode.arg: + raise CompareError(self.namespaces, lnode, rnode, 'Keyword arg names do not match') + self.compare(lnode.value, rnode.value) + + def compare_IfExp(self, lnode, rnode): + self.compare(lnode.test, rnode.test) + self.compare(lnode.body, rnode.body) + self.compare(lnode.orelse, rnode.orelse) + + def compare_Attribute(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + if lnode.attr != rnode.attr: + raise CompareError(self.namespaces, lnode, rnode, 'Attrs do not match') + + # endregion + + # region Subscripting + + def compare_Subscript(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + self.compare(lnode.slice, rnode.slice) + + def compare_Index(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + def compare_Slice(self, lnode, rnode): + self.compare(lnode.lower, rnode.lower) + self.compare(lnode.upper, rnode.upper) + self.compare(lnode.step, rnode.step) + + def compare_ExtSlice(self, lnode, rnode): + self.compare_list(lnode.dims, rnode.dims) + + # endregion + + # region Comprehensions + + def compare_ListComp(self, lnode, rnode): + self.compare(lnode.elt, rnode.elt) + self.compare_list(lnode.generators, rnode.generators) + + def compare_SetComp(self, lnode, rnode): + self.compare(lnode.elt, rnode.elt) + self.compare_list(lnode.generators, rnode.generators) + + def compare_GeneratorExp(self, lnode, rnode): + self.compare(lnode.elt, rnode.elt) + self.compare_list(lnode.generators, rnode.generators) + + def compare_DictComp(self, lnode, rnode): + self.compare(lnode.key, rnode.key) + self.compare(lnode.value, rnode.value) + self.compare_list(lnode.generators, rnode.generators) + + def compare_comprehension(self, lnode, rnode): + self.compare(lnode.target, rnode.target) + self.compare(lnode.iter, rnode.iter) + self.compare_list(lnode.ifs, rnode.ifs) + + if hasattr(lnode, 'is_async'): + if lnode.is_async != rnode.is_async: + raise CompareError(self.namespaces, lnode, rnode, 'Comprehension is_async values do not match') + + # endregion + + # region Statements + + def compare_Assign(self, lnode, rnode): + self.compare_list(lnode.targets, rnode.targets) + self.compare(lnode.value, rnode.value) + + def compare_AnnAssign(self, lnode, rnode): + self.compare(lnode.target, rnode.target) + self.compare(lnode.annotation, rnode.annotation) + self.compare(lnode.value, rnode.value) + if lnode.simple != rnode.simple: + raise CompareError(self.namespaces, lnode, rnode, 'AnnAssign simple flags do not match') + + def compare_AugAssign(self, lnode, rnode): + self.compare(lnode.target, rnode.target) + self.compare(lnode.op, rnode.op) + self.compare(lnode.value, rnode.value) + + def compare_Print(self, lnode, rnode): + self.compare(lnode.dest, rnode.dest) + self.compare_list(lnode.values, rnode.values) + + if lnode.nl != rnode.nl: + raise CompareError(self.namespaces, lnode, rnode, 'Print nl values do not match') + + def compare_Raise(self, lnode, rnode): + + if hasattr(lnode, 'type'): + self.compare(lnode.type, rnode.type) + self.compare(lnode.inst, rnode.inst) + self.compare(lnode.tback, rnode.tback) + else: + self.compare(lnode.exc, rnode.exc) + self.compare(lnode.cause, rnode.cause) + + def compare_Assert(self, lnode, rnode): + self.compare(lnode.test, rnode.test) + self.compare(lnode.msg, rnode.msg) + + def compare_Delete(self, lnode, rnode): + self.compare_list(lnode.targets, rnode.targets) + + def compare_Pass(self, lnode, rnode): + pass + + # endregion + + # region Imports + + def compare_Import(self, lnode, rnode): + self.compare_list(lnode.names, rnode.names) + + def compare_ImportFrom(self, lnode, rnode): + if lnode.module != rnode.module: + raise CompareError(self.namespaces, lnode, rnode, 'ImportFrom modules do not match') + self.compare_list(lnode.names, rnode.names) + if lnode.level != rnode.level: + raise CompareError(self.namespaces, lnode, rnode, 'ImportFrom levels do not match') + + def compare_alias(self, lnode, rnode): + if lnode.name != rnode.name: + raise CompareError(self.namespaces, lnode, rnode, 'Import alias names do not match') + + if lnode.asname != rnode.asname: + raise CompareError(self.namespaces, lnode, rnode, 'Import alias asnames do not match') + + # endregion + + # region Control Flow + + def compare_If(self, lnode, rnode): + self.compare(lnode.test, rnode.test) + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.orelse, rnode.orelse) + + def compare_For(self, lnode, rnode): + self.compare(lnode.target, rnode.target) + self.compare(lnode.iter, rnode.iter) + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.orelse, rnode.orelse) + + def compare_While(self, lnode, rnode): + self.compare(lnode.test, rnode.test) + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.orelse, rnode.orelse) + + def compare_Break(self, lnode, rnode): + pass + + def compare_Continue(self, lnode, rnode): + pass + + def compare_Try(self, lnode, rnode): + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.handlers, rnode.handlers) + self.compare_list(lnode.orelse, rnode.orelse) + self.compare_list(lnode.finalbody, rnode.finalbody) + + def compare_TryFinally(self, lnode, rnode): + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.finalbody, rnode.finalbody) + + def compare_TryExcept(self, lnode, rnode): + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.handlers, rnode.handlers) + self.compare_list(lnode.orelse, rnode.orelse) + + def compare_ExceptHandler(self, lnode, rnode): + self.compare(lnode.type, rnode.type) + + if isinstance(lnode.name, str) and lnode.name != rnode.name: + raise CompareError(self.namespaces, lnode, rnode, 'ExceptionHander names do not match') + elif lnode.name == None and lnode.name != rnode.name: + self.compare(lnode.name, rnode.name) + + self.compare_list(lnode.body, rnode.body) + + def compare_With(self, lnode, rnode): + + if hasattr(lnode, 'items'): + self.compare_list(lnode.items, rnode.items) + else: + self.compare(lnode.context_expr, rnode.context_expr) + self.compare(lnode.optional_vars, rnode.optional_vars) + + def compare_withitem(self, lnode, rnode): + self.compare(lnode.context_expr, rnode.context_expr) + self.compare(lnode.optional_vars, rnode.optional_vars) + + # endregion + + # region Function and Class definitions + + def compare_FunctionDef(self, lnode, rnode): + if lnode.name != rnode.name: + raise CompareError(self.namespaces, lnode, rnode, 'FunctionDef names do not match') + + self.namespaces.append(lnode.name) + + self.compare(lnode.args, rnode.args) + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.decorator_list, rnode.decorator_list) + + if hasattr(lnode, 'returns'): + self.compare(lnode.returns, rnode.returns) + + self.namespaces.pop() + + def compare_Lambda(self, lnode, rnode): + self.compare(lnode.args, rnode.args) + self.compare(lnode.body, rnode.body) + + def compare_arguments(self, lnode, rnode): + self.compare_list(lnode.args, rnode.args) + + if hasattr(lnode, 'kwonlyargs'): + self.compare_list(lnode.kwonlyargs, rnode.kwonlyargs) + + if isinstance(lnode.vararg, str): + if lnode.vararg != rnode.vararg: + raise CompareError(self.namespaces, lnode, rnode, 'varargs do not match') + else: + self.compare(lnode.vararg, rnode.vararg) + + if isinstance(lnode.kwarg, str): + if lnode.kwarg != rnode.kwarg: + raise CompareError(self.namespaces, lnode, rnode, 'kwargs do not match') + else: + self.compare(lnode.kwarg, rnode.kwarg) + + self.compare_list(lnode.defaults, rnode.defaults) + + if hasattr(lnode, 'kw_defaults'): + self.compare_list(lnode.kw_defaults, rnode.kw_defaults) + + if hasattr(lnode, 'varargannotation'): + self.compare(lnode.varargannotation, rnode.varargannotation) + self.compare(lnode.kwargannotation, rnode.kwargannotation) + + def compare_arg(self, lnode, rnode): + if lnode.arg != rnode.arg: + raise CompareError(self.namespaces, lnode, rnode, 'arg names do not match') + self.compare(lnode.annotation, rnode.annotation) + + def compare_Return(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + def compare_Yield(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + def compare_YieldFrom(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + def compare_Global(self, lnode, rnode): + if lnode.names != rnode.names: + raise CompareError(self.namespaces, lnode, rnode, 'Global names do not match') + + def compare_Nonlocal(self, lnode, rnode): + if lnode.names != rnode.names: + raise CompareError(self.namespaces, lnode, rnode, 'Nonlocal names do not match') + + def compare_ClassDef(self, lnode, rnode): + + if lnode.name != rnode.name: + raise CompareError(self.namespaces, lnode, rnode, 'Class names do not match') + + self.namespaces.append(lnode.name) + + self.compare_list(lnode.bases, rnode.bases) + + if hasattr(lnode, 'keywords'): + self.compare_list(lnode.keywords, rnode.keywords) + + if hasattr(lnode, 'starargs'): + self.compare(lnode.starargs, rnode.starargs) + self.compare(lnode.kwargs, rnode.kwargs) + + self.compare_list(lnode.decorator_list, rnode.decorator_list) + self.compare_list(lnode.body, rnode.body) + + self.namespaces.pop() + + # endregion + + # region async and await + + def compare_AsyncFunctionDef(self, lnode, rnode): + if lnode.name != rnode.name: + raise CompareError(self.namespaces, lnode, rnode, 'AsyncFunctionDef names do not match') + + self.namespaces.append(lnode.name) + + self.compare(lnode.args, rnode.args) + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.decorator_list, rnode.decorator_list) + self.compare(lnode.returns, rnode.returns) + + self.namespaces.pop() + + def compare_Await(self, lnode, rnode): + self.compare(lnode.value, rnode.value) + + def compare_AsyncFor(self, lnode, rnode): + self.compare(lnode.target, rnode.target) + self.compare(lnode.iter, rnode.iter) + self.compare_list(lnode.body, rnode.body) + self.compare_list(lnode.orelse, rnode.orelse) + + def compare_AsyncWith(self, lnode, rnode): + + if hasattr(lnode, 'items'): + self.compare_list(lnode.items, rnode.items) + else: + self.compare(lnode.context_expr, rnode.context_expr) + self.compare(lnode.optional_vars, rnode.optional_vars) + + # endregion + + def compare_Module(self, lnode, rnode): + self.compare_list(lnode.body, rnode.body) diff --git a/src/python_minifier/miniprinter.py b/src/python_minifier/miniprinter.py new file mode 100644 index 00000000..32dca0c4 --- /dev/null +++ b/src/python_minifier/miniprinter.py @@ -0,0 +1,1390 @@ +import ast +import sys + +class MiniPrinter(object): + """ + Builds the smallest possible exact representation of an ast + """ + + def __init__(self, indent_char='\t'): + + self.code = '' + self.indent = 0 + self.indent_char = indent_char + + self.unicode_literals = False + + self.precedences = { + 'Lambda': 2, + 'IfExp': 3, + 'Or': 4, + 'And': 5, + 'Not': 6, + 'In': 7, 'NotIn': 7, 'Is': 7, 'IsNot': 7, 'Lt': 7, 'LtE': 7, 'Gt': 7, 'GtE': 7, 'NotEq': 7, 'Eq': 7, + 'BitOr': 8, + 'BitXor': 9, + 'BitAnd': 10, + 'LShift': 11, 'RShift': 11, + 'Add': 12, 'Sub': 12, + 'Mult': 13, 'Div': 13, 'FloorDiv': 13, 'Mod': 13, 'MatMult': 13, + 'UAdd': 14, 'USub': 14, 'Invert': 14, + 'Pow': 15, + 'Await': 16, + 'Subscript': 17, 'Call': 17, 'Attribute': 17, + 'Tuple': 18, 'Set': 18, 'List': 18, 'Dict': 18, + 'comprehension': 18 + } + + def __call__(self, module): + """ + Generate the source code for an AST + + :param module: The Module to generate code for + :type module: ast.Module + :rtype: str + + """ + + self.visit_Module(module) + return self.code + + def precedence(self, node): + """ + The precedence of an expression + + Node will usually be an operator or literal. + Nodes with no precedence value return 0. + + :param node: The AST node to decide precedence for + :type node: ast.Node + :rtype: int + + """ + + if isinstance(node, ast.BinOp): + return self.precedences[node.op.__class__.__name__] + elif isinstance(node, ast.UnaryOp): + return self.precedences[node.op.__class__.__name__] + elif isinstance(node, ast.BoolOp): + return self.precedences[node.op.__class__.__name__] + elif isinstance(node, ast.Compare): + return min(self.precedences[n.__class__.__name__] for n in node.ops) + + # Python2 parses negative ints as an ast.Num with a negative value. + # Make sure the Num get the precedence of the USub operator in this case. + if sys.version_info < (3, 0) and isinstance(node, ast.Num): + if str(node.n)[0] == '-': + return self.precedences['USub'] + + return self.precedences.get(node.__class__.__name__, 0) + + def visit(self, node): + """ + Visit a node + + Call the correct visit_ method based on the node type. + Prefer to call the correct method directly if you already know + the node type. + + :param node: The node to visit + :type node: ast.Node + + """ + + method = 'visit_' + node.__class__.__name__ + visitor = getattr(self, method, self.visit_Unknown) + return visitor(node) + + def newline(self): + """ + Ensure there is a newline at the end of the output + """ + + self.code = self.code.rstrip('\n' + self.indent_char + ';') + self.code += '\n' + self.code += self.indent_char * self.indent + + def visit_Unknown(self, node): + raise RuntimeError('Unknown node %r' % node) + + # region Simple Statements + + def visit_Exec(self, node): + assert isinstance(node, ast.Exec) + + self.token_break() + + self.code += 'exec' + self._expression(node.body) + + if node.globals: + self.token_break() + self.code += 'in' + self._expression(node.globals) + + if node.locals: + self.code += ',' + self._expression(node.locals) + + self.end_statement() + + def visit_Expr(self, node): + assert isinstance(node, ast.Expr) + + self._testlist(node.value) + self.end_statement() + + def visit_Assert(self, node): + assert isinstance(node, ast.Assert) + + self.token_break() + + self.code += 'assert' + self._expression(node.test) + + if node.msg: + self.code += ',' + self._expression(node.msg) + + self.end_statement() + + def visit_Assign(self, node): + assert isinstance(node, ast.Assign) + + for target_node in node.targets: + self._testlist(target_node) + self.code += '=' + + # Yield nodes that are the sole node on the right hand side of an assignment do not need parens + if isinstance(node.value, ast.Expr) and isinstance(node.value.value, ast.Yield): + self._yield_expr(node.value) + elif isinstance(node.value, ast.Expr) and isinstance(node.value.value, ast.YieldFrom): + self._yield_expr(node.value) + else: + self._testlist(node.value) + + self.end_statement() + + def visit_AugAssign(self, node): + assert isinstance(node, ast.AugAssign) + + self._testlist(node.target) + self.visit(node.op) + self.code += '=' + + # Yield nodes that are the sole node on the right hand side of an assignment do not need parens + if isinstance(node.value, ast.Expr) and isinstance(node.value.value, ast.Yield): + self._yield_expr(node.value) + elif isinstance(node.value, ast.Expr) and isinstance(node.value.value, ast.YieldFrom): + self._yield_expr(node.value) + else: + self._testlist(node.value) + + self.end_statement() + + def visit_AnnAssign(self, node): + assert isinstance(node, ast.AnnAssign) + + if node.simple: + self.visit(node.target) + else: + self.code += '(' + self._expression(node.target) + self.code += ')' + + if node.annotation: + self.code += ':' + self._expression(node.annotation) + + if node.value: + self.code += '=' + + # Yield nodes that are the sole node on the right hand side of an assignment do not need parens + if isinstance(node.value, ast.Expr) and isinstance(node.value.value, ast.Yield): + self._yield_expr(node.value) + elif isinstance(node.value, ast.Expr) and isinstance(node.value.value, ast.YieldFrom): + self._yield_expr(node.value) + else: + self._expression(node.value) + + self.end_statement() + + def visit_Pass(self, node): + assert isinstance(node, ast.Pass) + + self.token_break() + self.code += 'pass' + self.end_statement() + + def visit_Delete(self, node): + assert isinstance(node, ast.Delete) + + self.code += 'del' + self._exprlist(node.targets) + self.end_statement() + + def visit_Return(self, node): + assert isinstance(node, ast.Return) + + self.token_break() + self.code += 'return' + if node.value is not None: + self._expression(node.value) + self.end_statement() + + def visit_Print(self, node): + assert isinstance(node, ast.Print) + + self.code += 'print' + + first = True + + if node.dest: + self.code += '>>' + self._expression(node.dest) + first = False + + for v in node.values: + if first: + first = False + else: + self.code += ',' + + self._expression(v) + + if not node.nl: + self.code += ',' + + self.end_statement() + + def visit_Yield(self, node): + assert isinstance(node, ast.Yield) + + self._yield_expr(node) + self.end_statement() + + def visit_YieldFrom(self, node): + assert isinstance(node, ast.YieldFrom) + + self._yield_expr(node) + self.end_statement() + + def visit_Raise(self, node): + assert isinstance(node, ast.Raise) + + self.code += 'raise' + + if hasattr(node, 'type'): + # Python2 raise node + + if node.type: + self.code += ' ' + self._expression(node.type) + if node.inst: + self.code += ',' + self._expression(node.inst) + if node.tback: + self.code += ',' + self._expression(node.tback) + + else: + # Python3 + + if node.exc: + self.code += ' ' + self._expression(node.exc) + + if node.cause: + self.code += ' from ' + self._expression(node.cause) + + self.end_statement() + + def visit_Break(self, node): + assert isinstance(node, ast.Break) + + self.token_break() + self.code += 'break' + self.end_statement() + + def visit_Continue(self, node): + assert isinstance(node, ast.Continue) + + self.token_break() + self.code += 'continue' + self.end_statement() + + def visit_Import(self, node): + assert isinstance(node, ast.Import) + + self.code += 'import ' + + first = True + for n in node.names: + if first: + first = False + else: + self.code += ',' + + self.visit_alias(n) + + self.end_statement() + + def visit_ImportFrom(self, node): + assert isinstance(node, ast.ImportFrom) + + if node.module is None: + self.code += 'from ' + ('.' * node.level) + ' ' + else: + self.code += 'from ' + ('.' * node.level) + self.code += node.module + + self.code += ' import ' + first = True + for n in node.names: + if first: + first = False + else: + self.code += ',' + + if node.module == '__future__' and n.name == 'unicode_literals': + self.unicode_literals = True + + self.visit_alias(n) + + self.end_statement() + + def visit_alias(self, node): + assert isinstance(node, ast.alias) + + self.code += node.name + + if node.asname: + self.code += ' as ' + node.asname + + def visit_Global(self, node): + assert isinstance(node, ast.Global) + + self.code += 'global ' + ','.join(node.names) + self.end_statement() + + def visit_Nonlocal(self, node): + assert isinstance(node, ast.Nonlocal) + + self.code += 'nonlocal ' + ','.join(node.names) + self.end_statement() + + # endregion + + # region Compound Statements + + def visit_If(self, node, el=False): + assert isinstance(node, ast.If) + + self.newline() + + if el: + self.code += 'el' + self.code += 'if' + self._expression(node.test) + self.code += ':' + + self._suite(node.body) + + if node.orelse: + if len(node.orelse) == 1 and isinstance(node.orelse[0], ast.If): + # elif + self.visit_If(node.orelse[0], el=True) + self.newline() + else: + # an else block + self.code += 'else:' + self._suite(node.orelse) + + def visit_For(self, node, is_async=False): + assert isinstance(node, ast.For) or (hasattr(ast, 'AsyncFor') and isinstance(node, ast.AsyncFor)) + + self.newline() + + if is_async: + self.code += 'async ' + + self.code += 'for ' + self._exprlist([node.target]) + self.code += ' in ' + self._expression(node.iter) + self.code += ':' + + self._suite(node.body) + + if node.orelse: + self.newline() + self.code += 'else:' + self._suite(node.orelse) + + def visit_While(self, node): + assert isinstance(node, ast.While) + + self.newline() + self.code += 'while ' + self._expression(node.test) + self.code += ':' + self._suite(node.body) + + if node.orelse: + self.code += 'else:' + self._suite(node.orelse) + + def visit_Try(self, node): + assert isinstance(node, ast.Try) + + self.newline() + self.code += 'try:' + self._suite(node.body) + + [self.visit_ExceptHandler(n) for n in node.handlers] + + if node.orelse: + self.code += 'else:' + self._suite(node.orelse) + + if node.finalbody: + self.code += 'finally:' + self._suite(node.finalbody) + + def visit_TryFinally(self, node): + assert isinstance(node, ast.TryFinally) + + if len(node.body) == 1 and isinstance(node.body[0], ast.TryExcept): + self.visit_TryExcept(node.body[0]) + else: + self.newline() + self.code += 'try:' + self._suite(node.body) + + if node.finalbody: + self.code += 'finally:' + self._suite(node.finalbody) + + def visit_TryExcept(self, node): + assert isinstance(node, ast.TryExcept) + + self.newline() + self.code += 'try:' + self._suite(node.body) + + [self.visit_ExceptHandler(n) for n in node.handlers] + + if node.orelse: + self.code += 'else:' + self._suite(node.orelse) + + def visit_ExceptHandler(self, node): + assert isinstance(node, ast.ExceptHandler) + + self.code += 'except' + if node.type is not None: + self.code += ' ' + self._expression(node.type) + + if node.name is not None: + self.token_break() + self.code += 'as' + + if isinstance(node.name, str): + self.code += ' ' + node.name + else: + self._expression(node.name) + + self.code += ':' + + self._suite(node.body) + + def visit_With(self, node, is_async=False): + assert isinstance(node, ast.With) or (hasattr(ast, 'AsyncWith') and isinstance(node, ast.AsyncWith)) + + self.newline() + + if is_async: + self.code += 'async ' + + self.code += 'with' + + first = True + if hasattr(node, 'items'): + + for item in node.items: + if first: + first = False + else: + self.code += ',' + + if self.precedence(item.context_expr) != 0 and self.precedence(item.context_expr) <= self.precedence( + node + ): + self.code += '(' + self.visit_withitem(item) + self.code += ')' + else: + self.visit_withitem(item) + else: + self.visit_withitem(node) + + self.code += ':' + self._suite(node.body) + + def visit_withitem(self, node): + assert (hasattr(ast, 'withitem') and isinstance(node, ast.withitem)) or isinstance(node, ast.With) + + self._expression(node.context_expr) + + if node.optional_vars is not None: + self.token_break() + self.code += 'as' + self._expression(node.optional_vars) + + def visit_FunctionDef(self, node, is_async=False): + assert isinstance(node, ast.FunctionDef) or ( + hasattr(ast, 'AsyncFunctionDef') and isinstance(node, ast.AsyncFunctionDef) + ) + + self.newline() + + for d in node.decorator_list: + self.code += '@' + self.visit(d) + self.newline() + + if is_async: + self.code += 'async ' + + self.code += 'def ' + node.name + '(' + self.visit_arguments(node.args) + self.code += ')' + + if hasattr(node, 'returns') and node.returns: + self.code += '->' + self._expression(node.returns) + self.code += ':' + else: + self.code += ':' + + if hasattr(node, 'docstring') and node.docstring is not None: + self._suite([ast.Expr(value=ast.Str(s=node.docstring))] + node.body) + else: + self._suite(node.body) + + def visit_ClassDef(self, node): + assert isinstance(node, ast.ClassDef) + + self.newline() + + for d in node.decorator_list: + self.code += '@' + self.visit(d) + self.newline() + + first = True + self.code += 'class ' + node.name + + for b in node.bases: + if first: + self.code += '(' + first = False + else: + self.code += ',' + self._expression(b) + + if hasattr(node, 'starargs') and node.starargs is not None: + if first: + self.code += '(' + first = False + else: + self.code += ',' + + self.code += '*' + self._expression(node.starargs) + + if hasattr(node, 'keywords'): + for kw in node.keywords: + if first: + self.code += '(' + first = False + else: + self.code += ',' + self.visit_keyword(kw) + + if hasattr(node, 'kwargs') and node.kwargs is not None: + if first: + self.code += '(' + first = False + else: + self.code += ',' + + self.code += '**' + self.visit(node.kwargs) + + if not first: + self.code += ')' + + self.code += ':' + + if hasattr(node, 'docstring') and node.docstring is not None: + self._suite([ast.Expr(value=ast.Str(s=node.docstring))] + node.body) + else: + self._suite(node.body) + + # endregion + + # region async and await + + def visit_AsyncFunctionDef(self, node): + assert isinstance(node, ast.AsyncFunctionDef) + self.visit_FunctionDef(node, is_async=True) + + def visit_Await(self, node): + assert isinstance(node, ast.Await) + self.token_break() + self.code += 'await' + self._rhs(node.value, node) + + def visit_AsyncFor(self, node): + assert isinstance(node, ast.AsyncFor) + self.visit_For(node, is_async=True) + + def visit_AsyncWith(self, node): + assert isinstance(node, ast.AsyncWith) + self.visit_With(node, is_async=True) + + # endregion + + # region Literals + + def visit_Num(self, node): + self.token_break() + + v = repr(node.n) + + if v == 'inf': + self.code += '1e999' + elif v == '-inf': + self.code += '-1e999' + elif v == 'infj': + self.code += '1e999j' + elif v == '-infj': + self.code += '-1e999j' + + else: + if isinstance(node.n, int): + # Due to the 0x notation, it's unlikely a base-16 literal will be more compact than base-10 + # But for those rare cases.... + h = hex(node.n) + if len(h) < len(v): + v = h + + self.code += v + + def visit_Str(self, node): + + s = repr(node.s) + + if sys.version_info < (3, 0) and self.unicode_literals: + if s[0] == 'u': + s = s[1:] + else: + s = 'b' + s + + if len(s) > 0 and s[0].isalpha(): + self.token_break() + + self.code += s + + def visit_Bytes(self, node): + + s = repr(node.s) + + if len(s) > 0 and s[0].isalpha(): + self.token_break() + + self.code += s + + def visit_List(self, node): + self.code += '[' + self._exprlist(node.elts) + self.code += ']' + + def visit_Tuple(self, node): + + if len(node.elts) == 0: + self.code += '()' + return + + self._exprlist(node.elts) + + if len(node.elts) == 1: + self.code += ',' + + def visit_Set(self, node): + self.code += '{' + self._exprlist(node.elts) + self.code += '}' + + def visit_Dict(self, node): + self.code += '{' + + first = True + for k, v in zip(node.keys, node.values): + if not first: + self.code += ',' + else: + first = False + + if k is None: + self.code += '**' + else: + self._expression(k) + self.code += ':' + + self._expression(v) + + self.code += '}' + + def visit_Ellipsis(self, node): + self.code += '...' + + def visit_NameConstant(self, node): + self.token_break() + self.code += repr(node.value) + + # endregion + + # region Variables + + def visit_Name(self, node): + self.token_break() + self.code += node.id + + def visit_Starred(self, node): + self.code += '*' + self._expression(node.value) + + # endregion + + # region Expressions + + def visit_UnaryOp(self, node): + self.visit(node.op) + + if sys.version_info < (3, 0) and isinstance(node.op, ast.USub) and isinstance(node.operand, ast.Num): + # For: -(1), which is parsed as a UnaryOp(USub, Num(1)). + # Without this special case it would be printed as -1 + # This is fine, but python 2 will then parse it at Num(-1) so the AST wouldn't round-trip. + + self.code += '(' + self.visit_Num(node.operand) + self.code += ')' + return + + self._rhs(node.operand, node) + + def visit_UAdd(self, node): + self.code += '+' + + def visit_USub(self, node): + self.code += '-' + + def visit_Not(self, node): + self.token_break() + self.code += 'not' + + def visit_Invert(self, node): + self.code += '~' + + def visit_BinOp(self, node): + self._lhs(node.left, node.op) + self.visit(node.op) + self._rhs(node.right, node.op) + + def visit_Add(self, node): + self.code += '+' + + def visit_Sub(self, node): + self.code += '-' + + def visit_Mult(self, node): + self.code += '*' + + def visit_Div(self, node): + self.code += '/' + + def visit_FloorDiv(self, node): + self.code += '//' + + def visit_Mod(self, node): + self.code += '%' + + def visit_Pow(self, node): + self.code += '**' + + def visit_LShift(self, node): + self.code += '<<' + + def visit_RShift(self, node): + self.code += '>>' + + def visit_BitOr(self, node): + self.code += '|' + + def visit_BitXor(self, node): + self.code += '^' + + def visit_BitAnd(self, node): + self.code += '&' + + def visit_MatMult(self, node): + self.code += '@' + + def visit_BoolOp(self, node): + first = True + + op_precedence = self.precedence(node.op) + + for v in node.values: + if first: + first = False + else: + self._expression(node.op) + + value_precendence = self.precedence(v) + + if value_precendence != 0 and ( + (op_precedence > value_precendence) + or op_precedence == value_precendence + and self._is_left_associative(node.op) + ): + self.code += '(' + self._expression(v) + self.code += ')' + else: + self._expression(v) + + def visit_And(self, node): + self.token_break() + self.code += 'and' + + def visit_Or(self, node): + self.token_break() + self.code += 'or' + + def visit_Compare(self, node): + + left_precedence = self.precedence(node.left) + op_precedence = self.precedence(node.ops[0]) + + if left_precedence != 0 and ((op_precedence > left_precedence) or (op_precedence == left_precedence)): + self.code += '(' + self._expression(node.left) + self.code += ')' + else: + self._expression(node.left) + + for op, comparator in zip(node.ops, node.comparators): + self._expression(op) + self._rhs(comparator, op) + + def visit_Eq(self, node): + self.code += '==' + + def visit_NotEq(self, node): + self.code += '!=' + + def visit_Lt(self, node): + self.code += '<' + + def visit_LtE(self, node): + self.code += '<=' + + def visit_Gt(self, node): + self.code += '>' + + def visit_GtE(self, node): + self.code += '>=' + + def visit_Is(self, node): + self.token_break() + self.code += 'is' + + def visit_IsNot(self, node): + self.token_break() + self.code += 'is not' + + def visit_In(self, node): + self.token_break() + self.code += 'in' + + def visit_NotIn(self, node): + self.token_break() + self.code += 'not in' + + def visit_Call(self, node): + + self._lhs(node.func, node) + + self.code += '(' + + first = True + for arg in node.args: + if first: + first = False + else: + self.code += ',' + + self._expression(arg) + + if node.keywords: + for kwarg in node.keywords: + if first: + first = False + else: + self.code += ',' + + assert isinstance(kwarg, ast.keyword) + self.visit_keyword(kwarg) + + if hasattr(node, 'starargs') and node.starargs is not None: + if first: + first = False + else: + self.code += ',' + + self.code += '*' + self._expression(node.starargs) + + if hasattr(node, 'kwargs') and node.kwargs is not None: + if not first: + self.code += ',' + + self.code += '**' + self.visit(node.kwargs) + + self.code += ')' + + def visit_keyword(self, node): + if node.arg is None: + self.code += '**' + self._expression(node.value) + else: + self.code += node.arg + '=' + self._expression(node.value) + + def visit_IfExp(self, node): + + self._rhs(node.body, node) + + self.token_break() + self.code += 'if' + + self._rhs(node.test, node) + + self.token_break() + self.code += 'else' + + self._expression(node.orelse) + + def visit_Attribute(self, node): + self.token_break() + + value_precedence = self.precedence(node.value) + attr_precedence = self.precedence(node) + + if (value_precedence != 0 and (attr_precedence > value_precedence)) or isinstance(node.value, ast.Num): + self.code += '(' + self._expression(node.value) + self.code += ')' + else: + self._expression(node.value) + + self.code += '.' + node.attr + + # endregion + + # region Subscripting + + def visit_Subscript(self, node): + + value_precedence = self.precedence(node.value) + slice_precedence = 17 # self.precedence(node) + + if value_precedence != 0 and (slice_precedence > value_precedence): + self.code += '(' + self._expression(node.value) + self.code += ')' + else: + self._expression(node.value) + + self.code += '[' + + if isinstance(node.slice, ast.Index): + self.visit_Index(node.slice) + elif isinstance(node.slice, ast.Slice): + self.visit_Slice(node.slice) + elif isinstance(node.slice, ast.ExtSlice): + self.visit_ExtSlice(node.slice) + elif isinstance(node.slice, ast.Ellipsis): + self.visit_Ellipsis(node) + else: + raise AssertionError('Unknown slice type %r' % node.slice) + + self.code += ']' + + def visit_Index(self, node): + self._expression(node.value) + + def visit_Slice(self, node): + if node.lower: + self._expression(node.lower) + self.code += ':' + if node.upper: + self._expression(node.upper) + if node.step: + self.code += ':' + self._expression(node.step) + + def visit_ExtSlice(self, node): + first = True + + for s in node.dims: + if not first: + self.code += ',' + else: + first = False + + self._expression(s) + + if len(node.dims) == 1: + self.code += ',' + + # endregion + + # region Comprehensions + + def visit_ListComp(self, node): + self.code += '[' + self._expression(node.elt) + [self.visit_comprehension(x) for x in node.generators] + self.code += ']' + + def visit_SetComp(self, node): + self.code += '{' + self._expression(node.elt) + [self.visit_comprehension(x) for x in node.generators] + self.code += '}' + + def visit_GeneratorExp(self, node): + self.code += '(' + self._expression(node.elt) + [self.visit_comprehension(x) for x in node.generators] + self.code += ')' + + def visit_DictComp(self, node): + self.code += '{' + self._expression(node.key) + self.code += ':' + self._expression(node.value) + [self.visit_comprehension(x) for x in node.generators] + self.code += '}' + + def visit_comprehension(self, node): + assert isinstance(node, ast.comprehension) + + self.token_break() + + if hasattr(node, 'is_async') and node.is_async: + self.code += 'async ' + + self.code += 'for' + self._exprlist([node.target]) + self.token_break() + self.code += 'in' + + self._rhs(node.iter, node) + + if node.ifs: + for i in node.ifs: + self.token_break() + self.code += 'if' + self._rhs(i, node) + + # endregion + + # region Function and Class definitions + + def visit_Lambda(self, node): + + self.token_break() + self.code += 'lambda' + + if node.args: + self.token_break() + self.visit_arguments(node.args) + + self.code += ':' + + self._expression(node.body) + + def visit_arguments(self, node): + first = True + + count_no_defaults = len(node.args) - len(node.defaults) + for i, arg in enumerate(node.args): + if not first: + self.code += ',' + else: + first = False + + self._expression(arg) + + if i >= count_no_defaults: + self.code += '=' + self._expression(node.defaults[i - count_no_defaults]) + + if node.vararg: + if not first: + self.code += ',' + else: + first = False + + self.code += '*' + + if hasattr(node, 'varargannotation'): + self.code += node.vararg + if node.varargannotation is not None: + self.code += ':' + self._expression(node.varargannotation) + elif isinstance(node.vararg, str): + self.code += node.vararg + else: + self.visit(node.vararg) + + if hasattr(node, 'kwonlyargs') and node.kwonlyargs: + + if not node.vararg: + if not first: + self.code += ',' + else: + first = False + + self.code += '*' + + for i, arg in enumerate(node.kwonlyargs): + self.code += ',' + self.visit_arg(arg) + + if node.kw_defaults[i] is not None: + self.code += '=' + self._expression(node.kw_defaults[i]) + + if node.kwarg: + if not first: + self.code += ',' + + self.code += '**' + + if hasattr(node, 'kwargannotation'): + self.code += node.kwarg + if node.kwargannotation is not None: + self.code += ':' + self._expression(node.kwargannotation) + elif isinstance(node.kwarg, str): + self.code += node.kwarg + else: + self.visit(node.kwarg) + + def visit_arg(self, node): + if isinstance(node, ast.Name): + # Python 2 uses Name nodes + return self.visit_Name(node) + + self.code += node.arg + + if node.annotation: + self.code += ':' + self._expression(node.annotation) + + def visit_Repr(self, node): + self.code += '`' + self._expression(node.value) + self.code += '`' + + # endregion + + def visit_Module(self, node): + if hasattr(node, 'docstring') and node.docstring is not None: + # Python 3.6 added a docstring field! Really useful for every use case except this one... + # Put the docstring back into the body + self._suite_body([ast.Expr(value=ast.Str(s=node.docstring))] + node.body) + else: + self._suite_body(node.body) + + def _expression(self, expression): + if isinstance(expression, ast.Yield) or (hasattr(ast, 'YieldFrom') and isinstance(expression, ast.YieldFrom)): + self.code += '(' + self._yield_expr(expression) + self.code += ')' + elif isinstance(expression, ast.Tuple) and len(expression.elts) > 0: + self.code += '(' + self.visit_Tuple(expression) + self.code += ')' + else: + self.visit(expression) + + def _testlist(self, test): + if isinstance(test, ast.Yield) or (hasattr(ast, 'YieldFrom') and isinstance(test, ast.YieldFrom)): + self.code += '(' + self._yield_expr(test) + self.code += ')' + else: + self.visit(test) + + def _exprlist(self, exprlist): + first = True + + for expr in exprlist: + if first: + first = False + else: + self.code += ',' + self._expression(expr) + + def _yield_expr(self, yield_node): + self.token_break() + + if isinstance(yield_node, ast.Yield): + self.code += 'yield' + elif isinstance(yield_node, ast.YieldFrom): + self.code += 'yield from' + + if yield_node.value is not None: + self._expression(yield_node.value) + + def _suite(self, node_list): + + compound_statements = [ + 'For', + 'While', + 'Try', + 'If', + 'With', + 'ClassDef', + 'TryFinally', + 'TryExcept', + 'FunctionDef', + 'AsyncFunctionDef', + 'AsyncFor', + 'AsyncWith', + ] + + if len(node_list) == 1 and node_list[0].__class__.__name__ not in compound_statements: + self._suite_body(node_list) + self.newline() + else: + self.enter_block() + self._suite_body(node_list) + self.leave_block() + + def _suite_body(self, node_list): + + statements = { + 'Assign': self.visit_Assign, + 'AnnAssign': self.visit_AnnAssign, + 'AugAssign': self.visit_AugAssign, + 'Expr': self.visit_Expr, + 'Delete': self.visit_Delete, + 'Pass': self.visit_Pass, + 'Import': self.visit_Import, + 'ImportFrom': self.visit_ImportFrom, + 'Global': self.visit_Global, + 'Nonlocal': self.visit_Nonlocal, + 'Assert': self.visit_Assert, + 'Break': self.visit_Break, + 'Continue': self.visit_Continue, + 'Return': self.visit_Return, + 'Raise': self.visit_Raise, + 'Yield': self.visit_Yield, + 'YieldFrom': self.visit_YieldFrom, + 'For': self.visit_For, + 'While': self.visit_While, + 'Try': self.visit_Try, + 'If': self.visit_If, + 'With': self.visit_With, + 'ClassDef': self.visit_ClassDef, + 'FunctionDef': self.visit_FunctionDef, + 'AsyncFunctionDef': self.visit_AsyncFunctionDef, + 'AsyncFor': self.visit_AsyncFor, + 'AsyncWith': self.visit_AsyncWith, + 'TryFinally': self.visit_TryFinally, + 'TryExcept': self.visit_TryExcept, + 'Print': self.visit_Print, + 'Exec': self.visit_Exec, + } + + for node in node_list: + statements[node.__class__.__name__](node) + + @staticmethod + def _is_right_associative(operator): + return isinstance(operator, ast.Pow) + + @staticmethod + def _is_left_associative(operator): + return not isinstance(operator, ast.Pow) + + def _lhs(self, left_node, op_node): + left_precedence = self.precedence(left_node) + op_precedence = self.precedence(op_node) + + if left_precedence != 0 and ( + (op_precedence > left_precedence) + or (op_precedence == left_precedence and self._is_right_associative(op_node)) + ): + self.code += '(' + self._expression(left_node) + self.code += ')' + else: + self._expression(left_node) + + def _rhs(self, right_node, op_node): + right_precedence = self.precedence(right_node) + op_precedence = self.precedence(op_node) + + if right_precedence != 0 and ((op_precedence > right_precedence) or ( + op_precedence == right_precedence and self._is_left_associative(op_node))): + self.code += '(' + self._expression(right_node) + self.code += ')' + else: + self._expression(right_node) + + def enter_block(self): + self.indent += 1 + self.newline() + + def leave_block(self): + self.indent -= 1 + self.newline() + + def token_break(self): + if len(self.code) == 0: + return + + if self.code[-1].isalnum() or self.code[-1] == '_': + self.code += ' ' + + def end_statement(self): + """ End a statement with a newline, or a semi-colon if it saves characters. """ + + if self.indent == 0: + self.newline() + else: + if self.code[-1] != ';': + self.code += ';' diff --git a/src/python_minifier/ministring.py b/src/python_minifier/ministring.py new file mode 100644 index 00000000..d9003e84 --- /dev/null +++ b/src/python_minifier/ministring.py @@ -0,0 +1,184 @@ +from __future__ import print_function + +BACKSLASH = '\\' + + +class MiniString(object): + """ + Create a representation of a string object + + :param str string: The string to minify + + """ + + def __init__(self, string, quote="'"): + self._s = string + self.safe_mode = False + self.quote = quote + + def __str__(self): + """ + The smallest python literal representation of a string + + :rtype: str + + """ + + if self._s == '': + return '' + + if len(self.quote) == 1: + s = self.to_short() + else: + s = self.to_long() + + try: + eval(self.quote + s + self.quote) + except UnicodeDecodeError: + if self._safe_mode: + raise + + print('Unicode encoding error!') + self._safe_mode = True + + if eval(self.quote + s + self.quote) != self._s: + print('Expected [%r]' % self._s) + print('Actual [%r]' % s) + assert eval(self.quote + s + self.quote) == self._s + + return s + + def to_short(self): + s = '' + + escaped = { + '\n': BACKSLASH + 'n', + '\\': BACKSLASH + BACKSLASH, + '\a': BACKSLASH + 'a', + '\b': BACKSLASH + 'b', + '\f': BACKSLASH + 'f', + '\r': BACKSLASH + 'r', + '\t': BACKSLASH + 't', + '\v': BACKSLASH + 'v', + '\0': BACKSLASH + 'x00', + self.quote: BACKSLASH + self.quote, + } + + for c in self._s: + if c in escaped.keys(): + s += escaped[c] + else: + if self.safe_mode: + unicode_value = ord(c) + if unicode_value <= 0x7f: + s += c + elif unicode_value <= 0xffFF: + s += BACKSLASH + 'u' + format(unicode_value, '04x') + else: + s += BACKSLASH + 'U' + format(unicode_value, '08x') + else: + s += c + + return s + + def to_long(self): + s = '' + + escaped = { + '\\': BACKSLASH + BACKSLASH, + '\a': BACKSLASH + 'a', + '\b': BACKSLASH + 'b', + '\f': BACKSLASH + 'f', + '\r': BACKSLASH + 'r', + '\t': BACKSLASH + 't', + '\v': BACKSLASH + 'v', + '\0': BACKSLASH + 'x00', + self.quote[0]: BACKSLASH + self.quote[0], + } + + for c in self._s: + if c in escaped.keys(): + s += escaped[c] + else: + if self.safe_mode: + unicode_value = ord(c) + if unicode_value <= 0x7f: + s += c + elif unicode_value <= 0xffFF: + s += BACKSLASH + 'u' + format(unicode_value, '04x') + else: + s += BACKSLASH + 'U' + format(unicode_value, '08x') + else: + s += c + + return s + + +class MiniBytes(object): + """ + Create a representation of a bytes object + + :param bytes string: The string to minify + + """ + + def __init__(self, string, quote="'"): + self._b = string + self.quote = quote + + def __str__(self): + """ + The smallest python literal representation of a string + + :rtype: str + + """ + + if self._b == b'': + return '' + + if len(self.quote) == 1: + s = self.to_short() + else: + s = self.to_long() + + if eval('b' + self.quote + s + self.quote) != self._b: + print('Expected [%r]' % self._b) + print('Actual [%r]' % s) + assert eval('b' + self.quote + s + self.quote) == self._b + + return s + + def to_short(self): + b = '' + + for c in self._b: + if c == b'\\': + b += BACKSLASH + elif c == b'\n': + b += BACKSLASH + 'n' + elif c == self.quote: + b += BACKSLASH + self.quote + else: + if c >= 128: + b += BACKSLASH + chr(c) + else: + b += chr(c) + + return b + + def to_long(self): + b = '' + + for c in self._b: + if c == b'\\': + b += BACKSLASH + elif c == self.quote: + b += BACKSLASH + self.quote + else: + if c >= 128: + b += BACKSLASH + chr(c) + else: + b += chr(c) + + return b diff --git a/src/python_minifier/quote_decider.py b/src/python_minifier/quote_decider.py new file mode 100644 index 00000000..178152b4 --- /dev/null +++ b/src/python_minifier/quote_decider.py @@ -0,0 +1,108 @@ +import ast +import copy + + +class QuoteDecider(ast.NodeVisitor): + """ + Decides the quote style to use for f-strings + + Call a QuoteDecider object with the outer JoinedStr Node of an f-string expression. + A list of up to four string elements is returned. This is a stack of the quote styles that + should be used by the JoinedStr Nodes in the expression. + + Enclosed Str and Bytes Nodes may only use quote styles that have not been used by an outer JoinedStr. + Note that Str and Bytes nodes may not contain a blackslash, so raw strings may be needed. + The returned quote order accounts for this. + + """ + + def __init__(self): + super(QuoteDecider, self).__init__() + + self.nested = 0 + self.max_nested = 0 + + self.special_chars = [[], [], [], []] + + def valid_quote(self, quote, start_level): + for n_level in range(start_level, self.max_nested): + if quote in self.special_chars[n_level]: + return False + + return True + + def choose_level(self, level, candidates): + if level == self.max_nested: + return [] + + for quote in candidates: + if not self.valid_quote(quote, level): + continue + + try: + nested_candidates = copy.copy(candidates) + nested_candidates.remove(quote) + return [quote] + self.choose_level(level + 1, nested_candidates) + except Exception: + continue + + raise ValueError('Unable to find a quote style for nested f-string (level %i)' % level) + + def __call__(self, node): + assert isinstance(node, ast.JoinedStr) + + # Assemble the list of special characters that need to be represented at each nesting level + self.visit_JoinedStr(node) + + if self.max_nested > 4: + raise ValueError('Impossible to represent that many nested strings') + + quote_order = self.choose_level(0, candidates=['"', "'", '"""', "'''"]) + + return quote_order + + def visit_Str(self, node): + assert isinstance(node, ast.Str) + + if '\\' in node.s: + raise ValueError('Backslash not allowed in f-string expression') + + for special in ['"', "'", '\n']: + if special in node.s: + self.special_chars[self.nested].append(special) + + self.nested += 1 + self.max_nested = max(self.max_nested, self.nested) + self.nested -= 1 + + def visit_FormattedValue(self, node): + assert isinstance(node, ast.FormattedValue) + self.visit(node.value) + + if node.format_spec: + self.visit_JoinedStr(node.format_spec) + + def visit_JoinedStr(self, node): + assert isinstance(node, ast.JoinedStr) + + self.nested += 1 + self.max_nested = max(self.max_nested, self.nested) + for v in node.values: + if isinstance(v, ast.Str): + continue + self.visit(v) + self.nested -= 1 + + def visit_Bytes(self, node): + assert isinstance(node, ast.Bytes) + + if '\\' in node.s: + raise ValueError('Backslash not allowed in f-string expression') + + for special in ['"', "'", '\n']: + if special in node.s: + self.special_chars[self.nested].append(special) + + self.nested += 1 + self.max_nested = max(self.max_nested, self.nested) + self.nested -= 1 diff --git a/test/test_dir.py b/test/test_dir.py new file mode 100644 index 00000000..4f2c9c98 --- /dev/null +++ b/test/test_dir.py @@ -0,0 +1,28 @@ +import os +from multiprocessing import Pool +from test_file import test_file + + +def test_dir(path, concurrency=None): + pool = Pool() + + try: + for subdir, dirs, files in os.walk(path): + python_files = filter(lambda f: f.endswith('.py'), [os.path.join(subdir, file) for file in files]) + + if concurrency == 1: + for path in python_files: + test_file(path) + else: + pool.map_async(test_file, python_files) + + print('All jobs submitted') + finally: + pool.close() + pool.join() + + print('Done') + + +if __name__ == '__main__': + test_dir('gh') diff --git a/test/test_env.py b/test/test_env.py new file mode 100644 index 00000000..f9d6f8e2 --- /dev/null +++ b/test/test_env.py @@ -0,0 +1,33 @@ +import sys +import os +from multiprocessing import Pool +from test_file import test_file + + +def test_env(concurrency=None): + pool = Pool(concurrency) + + try: + for sys_path in sys.path: + for subdir, dirs, files in os.walk(sys_path): + python_files = filter(lambda f: f.endswith('.py'), [os.path.join(subdir, file) for file in files]) + + if concurrency == 1: + for path in python_files: + test_file(path) + else: + pass + pool.map_async(test_file, python_files) + + print('All jobs submitted') + finally: + pool.close() + pool.join() + + print('Done') + + +if __name__ == '__main__': + print('Interpreter version: ', sys.version_info) + print('sys.path: ', sys.path) + test_env() diff --git a/test/test_file.py b/test/test_file.py new file mode 100644 index 00000000..02ca6aa1 --- /dev/null +++ b/test/test_file.py @@ -0,0 +1,21 @@ +import sys +from python_minifier import minify + +sys.setrecursionlimit(20000) + + +def test_file(path): + with open(path, 'rb') as f: + try: + return minify(f.read(), filename=path) + except SyntaxError: + pass + +if __name__ == '__main__': + if len(sys.argv) <= 1: + print('Usage: test_file.py ') + exit(-1) + + print(test_file(sys.argv[1])) + + exit(0) diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..c8c0c116 --- /dev/null +++ b/tox.ini @@ -0,0 +1,32 @@ +[tox] +envlist = py26,py27,py33,py34,py35,py36,py37,pypy,pypy3 + +[testenv] +commands = python test/test_env.py + +[testenv:py26] +basepython = /usr/bin/python2.6 + +[testenv:py27] +basepython = /usr/bin/python2.7 + +[testenv:py33] +basepython = /usr/bin/python3.3 + +[testenv:py34] +basepython = /usr/bin/python3.4 + +[testenv:py35] +basepython = /usr/bin/python3.5 + +[testenv:py36] +basepython = /usr/bin/python3.6 + +[testenv:py37] +basepython = /usr/bin/python3.7 + +[testenv:pypy] +basepython = /usr/bin/pypy + +[testenv:pypy3] +basepython = /usr/bin/pypy3