From b8db8c34c450e4f03f49be3d2b873a38ca8570bc Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Tue, 28 Apr 2026 15:42:47 +0200 Subject: [PATCH] Add pystandalone --- .github/pull_request_template.md | 24 + .github/workflows/dissect-ci.yml | 95 +++ .gitignore | 11 + COPYRIGHT | 5 + LICENSE | 201 ++++++ MANIFEST.in | 4 + README.md | 55 ++ pyproject.toml | 147 +++++ pystandalone/__init__.py | 0 pystandalone/__main__.py | 6 + pystandalone/available.py | 322 ++++++++++ pystandalone/binary.py | 91 +++ pystandalone/bootstrap.py | 113 ++++ pystandalone/builder.py | 522 +++++++++++++++ pystandalone/chacha20.py | 165 +++++ pystandalone/codesign.py | 282 +++++++++ pystandalone/compiler.py | 65 ++ pystandalone/distribution.py | 441 +++++++++++++ pystandalone/packer.py | 88 +++ pystandalone/redist/__init__.py | 0 pystandalone/redist/monkey.py | 357 +++++++++++ pystandalone/redist/wingui.py | 280 ++++++++ pystandalone/source.py | 329 ++++++++++ pystandalone/zipapp.py | 292 +++++++++ tests/__init__.py | 0 tests/_data/chacha20_unique_iv.bin | 1 + tests/_data/chacha20_zero_iv.bin | 1 + tests/_docs/Makefile | 24 + tests/_docs/conf.py | 44 ++ tests/_docs/index.rst | 8 + tests/conftest.py | 57 ++ tests/redist/__init__.py | 0 tests/redist/test_monkey.py | 984 +++++++++++++++++++++++++++++ tests/test_binary.py | 85 +++ tests/test_bootstrap.py | 148 +++++ tests/test_builder.py | 533 ++++++++++++++++ tests/test_chacha20.py | 59 ++ tests/test_compiler.py | 72 +++ tests/test_distribution.py | 69 ++ tests/test_packer.py | 83 +++ tests/test_source.py | 335 ++++++++++ tests/test_zipapp.py | 225 +++++++ tox.ini | 75 +++ 43 files changed, 6698 insertions(+) create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/dissect-ci.yml create mode 100644 .gitignore create mode 100644 COPYRIGHT create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 pystandalone/__init__.py create mode 100644 pystandalone/__main__.py create mode 100644 pystandalone/available.py create mode 100644 pystandalone/binary.py create mode 100644 pystandalone/bootstrap.py create mode 100644 pystandalone/builder.py create mode 100644 pystandalone/chacha20.py create mode 100644 pystandalone/codesign.py create mode 100644 pystandalone/compiler.py create mode 100644 pystandalone/distribution.py create mode 100644 pystandalone/packer.py create mode 100644 pystandalone/redist/__init__.py create mode 100644 pystandalone/redist/monkey.py create mode 100644 pystandalone/redist/wingui.py create mode 100644 pystandalone/source.py create mode 100644 pystandalone/zipapp.py create mode 100644 tests/__init__.py create mode 100644 tests/_data/chacha20_unique_iv.bin create mode 100644 tests/_data/chacha20_zero_iv.bin create mode 100644 tests/_docs/Makefile create mode 100644 tests/_docs/conf.py create mode 100644 tests/_docs/index.rst create mode 100644 tests/conftest.py create mode 100644 tests/redist/__init__.py create mode 100644 tests/redist/test_monkey.py create mode 100644 tests/test_binary.py create mode 100644 tests/test_bootstrap.py create mode 100644 tests/test_builder.py create mode 100644 tests/test_chacha20.py create mode 100644 tests/test_compiler.py create mode 100644 tests/test_distribution.py create mode 100644 tests/test_packer.py create mode 100644 tests/test_source.py create mode 100644 tests/test_zipapp.py create mode 100644 tox.ini diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..52a77a5 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,24 @@ + diff --git a/.github/workflows/dissect-ci.yml b/.github/workflows/dissect-ci.yml new file mode 100644 index 0000000..fe0aba1 --- /dev/null +++ b/.github/workflows/dissect-ci.yml @@ -0,0 +1,95 @@ +name: Dissect CI +on: + push: + branches: + - main + tags: + - '*' + pull_request: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: fox-it/dissect-workflow-templates/.github/actions/git-checkout@main + + - uses: fox-it/dissect-workflow-templates/.github/actions/tox-run@main + with: + tox-environment: build + + - uses: actions/upload-artifact@v4 + with: + name: packages + path: dist/* + retention-days: 1 + + lint: + needs: build + runs-on: ubuntu-latest + steps: + - uses: fox-it/dissect-workflow-templates/.github/actions/git-checkout@main + + - uses: fox-it/dissect-workflow-templates/.github/actions/tox-run@main + with: + tox-environment: lint + + test: + needs: build + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.allow_failure }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "ubuntu-22.04-arm", "windows-latest", "windows-11-arm", "macos-latest", "macos-15-intel"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + include: + - allow_failure: false + tox-version: "4.27.0" + exclude: + - os: "windows-11-arm" + python-version: "3.10" + + steps: + - uses: fox-it/dissect-workflow-templates/.github/actions/git-checkout@main + + - uses: fox-it/dissect-workflow-templates/.github/actions/tox-run@main + with: + tox-environment: ${{ matrix.python-version }} + runner-os: ${{ matrix.os }} + python-version: ${{ matrix.python-version }} + + - uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.DISSECT_CODECOV_TOKEN }} + env_vars: PYTHON + files: coverage.xml + flags: unittests + verbose: true + + docs-build: + needs: build + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: fox-it/dissect-workflow-templates/.github/actions/git-checkout@main + + - uses: fox-it/dissect-workflow-templates/.github/actions/tox-run@main + with: + tox-environment: docs-build + + publish: + if: ${{ github.ref_name == 'main' || github.ref_type == 'tag' }} + needs: [test] + runs-on: ubuntu-latest + environment: dissect_publish + permissions: + id-token: write + steps: + - uses: actions/download-artifact@v4 + with: + name: packages + path: dist/ + # According to the documentation, it automatically looks inside the `dist/` folder for packages. + - name: Publish package distributions to Pypi + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2e944b3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +coverage.xml +.coverage +dist/ +.eggs/ +*.egg-info/ +*.pyc +__pycache__/ +.pytest_cache/ +tests/_docs/api +tests/_docs/build +.tox/ diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..a61167d --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,5 @@ +Dissect is released as open source by Fox-IT (https://www.fox-it.com) part of NCC Group Plc (https://www.nccgroup.com) + +Developed by the Dissect Team (dissect@fox-it.com) and made available at https://github.com/fox-it/pystandalone + +License terms: Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c053cd7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2022 Fox-IT B.V. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..23519f8 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +exclude .gitattributes +exclude .gitignore +recursive-exclude .github/ * +recursive-exclude tests/_data/ * diff --git a/README.md b/README.md new file mode 100644 index 0000000..c5ce38b --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# pystandalone + +`pystandalone` is a utility to build opinionated standalone Python executables. This does not aim to be a generic "standalone Python" builder, it merely solves some niche use-cases. + +Derived from the amazing `python-build-standalone`, precompiled distributions are provided by [`python-build-pystandalone`](https://github.com/fox-it/python-build-pystandalone). These distributions include some patches to make Python a little bit more "forensically sound", as well as adding some nice utilities: + +- Ability to run Python code from memory (embedded inside the executable) +- Don't update file access times +- Fixes for subprocess execution in ESXi environments +- Easy access to OpenSSL/LibreSSL backed cryptographic ciphers. + +For more information, please see [the documentation](https://docs.dissect.tools/en/latest/projects/pystandalone/index.html). + +## Installation + +`pystandalone` is available on [PyPI](https://pypi.org/project/pystandalone/). + +```bash +pip install pystandalone +``` + +## Build and test instructions + +This project uses `tox` to build source and wheel distributions. Run the following command from the root folder to build +these: + +```bash +tox -e build +``` + +The build artifacts can be found in the `dist/` directory. + +`tox` is also used to run linting and unit tests in a self-contained environment. To run both linting and unit tests +using the default installed Python version, run: + +```bash +tox +``` + +For a more elaborate explanation on how to build and test the project, please see [the +documentation](https://docs.dissect.tools/en/latest/contributing/tooling.html). + +## Contributing + +The Dissect project encourages any contribution to the codebase. To make your contribution fit into the project, please +refer to [the development guide](https://docs.dissect.tools/en/latest/contributing/developing.html). + +## Copyright and license + +Dissect is released as open source by Fox-IT () part of NCC Group Plc +(). + +Developed by the Dissect Team () and made available at . + +License terms: Apache License 2.0 (). For more information, see the LICENSE file. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..da2c02a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,147 @@ +[build-system] +requires = ["setuptools>=80.9.0", "setuptools_scm[toml]>=6.4.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "pystandalone" +description = "" +readme = "README.md" +requires-python = ">=3.10" +license = "Apache-2.0" +license-files = ["LICENSE", "COPYRIGHT"] +authors = [ + {name = "Dissect Team", email = "dissect@fox-it.com"} +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Topic :: Internet :: Log Analysis", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Security", + "Topic :: Utilities", +] +dependencies = [ + "dissect.cstruct>=4,<5", + "dissect.executable>=1,<2", + "lief==0.17.6", + "platformdirs>=4.9.6", + "tqdm>=4.67.3", +] +dynamic = ["version"] + +[project.urls] +homepage = "https://dissect.tools" +documentation = "https://docs.dissect.tools/en/latest/projects/dissect.database" +repository = "https://github.com/fox-it/dissect.database" + +[project.optional-dependencies] +dev = [ + "dissect.cstruct>=4.0.dev,<5.0.dev", + "dissect.executable[dev]>=1.0.dev,<2.0.dev", +] + +[dependency-groups] +test = [ + "pytest", +] +lint = [ + "ruff==0.13.1", + "vermin", + "typing_extensions", +] +build = [ + "build", +] +debug = [ + "ipdb", +] +dev = [ + {include-group = "test"}, + {include-group = "lint"}, + {include-group = "debug"}, +] + +[project.scripts] +pystandalone = "pystandalone.builder:main" +pystandalone-zipapp = "pystandalone.zipapp:main" + +[tool.ruff] +line-length = 120 +required-version = ">=0.13.1" + +[tool.ruff.format] +docstring-code-format = true + +[tool.ruff.lint] +select = [ + "F", + "E", + "W", + "I", + "UP", + "YTT", + "ANN", + "B", + "C4", + "DTZ", + "T10", + "FA", + "ISC", + "G", + "INP", + "PIE", + "PYI", + "PT", + "Q", + "RSE", + "RET", + "SLOT", + "SIM", + "TID", + "TC", + "PTH", + "PLC", + "TRY", + "FLY", + "PERF", + "FURB", + "RUF", + "D" +] +ignore = [ + "E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003", "PLC0415", + # Ignore some pydocstyle rules for now as they require a larger cleanup + "D1", + "D205", + "D301", + "D417", + # Seems bugged: https://github.com/astral-sh/ruff/issues/16824 + "D402", +] +future-annotations = true + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.flake8-type-checking] +strict = true + +[tool.ruff.lint.per-file-ignores] +"tests/_docs/**" = ["INP001"] +"*.pyi" = ["E", "F", "PYI"] + +[tool.ruff.lint.isort] +known-first-party = ["pystandalone"] +required-imports = ["from __future__ import annotations"] + +[tool.setuptools.packages.find] +include = ["pystandalone", "pystandalone.*"] + +[tool.setuptools.package-data] +pystandalone = ["dist/*.tar.zst"] + +[tool.setuptools_scm] diff --git a/pystandalone/__init__.py b/pystandalone/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pystandalone/__main__.py b/pystandalone/__main__.py new file mode 100644 index 0000000..f81dd3d --- /dev/null +++ b/pystandalone/__main__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from pystandalone import builder + +if __name__ == "__main__": + builder.main() diff --git a/pystandalone/available.py b/pystandalone/available.py new file mode 100644 index 0000000..66eb5d9 --- /dev/null +++ b/pystandalone/available.py @@ -0,0 +1,322 @@ +from __future__ import annotations + +DISTRIBUTIONS = [ + ( + "3.10", + "aarch64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.10.20%2B20260414-aarch64-apple-darwin-install_only_stripped.tar.gz", + "21f0959d819dad2f0315382fc738b8ce43a91023d733dec6ffc5eff3232077b6", + ), + ( + "3.10", + "aarch64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.10.20%2B20260414-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz", + "72c2fa5fa6275ce3178456e1740ab77b22a6b1bc577fff7c632a07cf37fcc4fc", + ), + ( + "3.10", + "aarch64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.10.20%2B20260414-aarch64-unknown-linux-musl-install_only_stripped.tar.gz", + "b3d9eb878a964c4363c69eacaa7e1d916511d473864e7adc0f99b74eddea3494", + ), + ( + "3.10", + "i686-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.10.20%2B20260414-i686-pc-windows-msvc-install_only_stripped.tar.gz", + "8caad7e18c205b660b9001b472bc9fd3be2b07e43f1181331d8f9c5ffffd3741", + ), + ( + "3.10", + "x86_64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.10.20%2B20260414-x86_64-apple-darwin-install_only_stripped.tar.gz", + "9c0721c9e1604899497c09df846727e1da4fdba79e2903a45cd5b286bd4482c1", + ), + ( + "3.10", + "x86_64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.10.20%2B20260414-x86_64-pc-windows-msvc-install_only_stripped.tar.gz", + "f411226dd51e157f820ccdf78a6933b9ac2d24fb42e4e0f9b2a1bf2399c07961", + ), + ( + "3.10", + "x86_64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.10.20%2B20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz", + "d29a507cfc4d50e97233ace241809f9e5ef3707be5e2c4e4b09640c9d3e2cd59", + ), + ( + "3.10", + "x86_64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.10.20%2B20260414-x86_64-unknown-linux-musl-install_only_stripped.tar.gz", + "115c5789acf4c8e4b46e7e8d181d494b53b5f81edd123ce570311eeaaab35011", + ), + ( + "3.11", + "aarch64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-aarch64-apple-darwin-install_only_stripped.tar.gz", + "ceb2119cfcb5a7c8e7de81ba92e77c9caed4265bb260a91246bd17c5bec0872c", + ), + ( + "3.11", + "aarch64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-aarch64-pc-windows-msvc-install_only_stripped.tar.gz", + "b8380ccd7e0404a931d90da61d88b6f66b367fce893e29359e176441c004e9a5", + ), + ( + "3.11", + "aarch64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz", + "8c321bdbf605314e109d25d6bdc637d5a5e46798396991e669c0816c3c94d0d3", + ), + ( + "3.11", + "aarch64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-aarch64-unknown-linux-musl-install_only_stripped.tar.gz", + "4176cadf55edd4370857788f4a06f550cef36eb30d78c64ddeecc5f760a0eee8", + ), + ( + "3.11", + "i686-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-i686-pc-windows-msvc-install_only_stripped.tar.gz", + "531ef821eb825fcbbf788cf44423e547509965e638082b1cee86b44600e25b48", + ), + ( + "3.11", + "x86_64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-x86_64-apple-darwin-install_only_stripped.tar.gz", + "8bff67e6ff128af7cc2d417eada62a4b9ebbda20817b1f617dc3e95f5ee13976", + ), + ( + "3.11", + "x86_64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-x86_64-pc-windows-msvc-install_only_stripped.tar.gz", + "10f19631562836caf4967159da4b58b687af4606a514774d847517752d4e311b", + ), + ( + "3.11", + "x86_64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz", + "9f28ebcece4d08fe02373973393bb1e249a478f85e81fac11524a4a383690961", + ), + ( + "3.11", + "x86_64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.11.15%2B20260414-x86_64-unknown-linux-musl-install_only_stripped.tar.gz", + "ec123183c7ec1ad0b2e63363db1a52fd011e43767d0afb0fcbad51a944f9d1d1", + ), + ( + "3.12", + "aarch64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-aarch64-apple-darwin-install_only_stripped.tar.gz", + "7f1cec1a998418c84e0f9b73386074f80efe0f6808dbcf72243e2944a10b42a4", + ), + ( + "3.12", + "aarch64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-aarch64-pc-windows-msvc-install_only_stripped.tar.gz", + "5428e962fcad53eddbebd81b3e29bd20f23890034cc799f3aa450457c6a42f1c", + ), + ( + "3.12", + "aarch64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz", + "c9004dca4da2ace7e28d8c5b83be9d675e2aa8d334d600336f4a7cc1cce9a114", + ), + ( + "3.12", + "aarch64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-aarch64-unknown-linux-musl-install_only_stripped.tar.gz", + "6be50013c5bd2d94d24ab99eba6f6037ce92089324402b3d1877421aca99a4c4", + ), + ( + "3.12", + "i686-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-i686-pc-windows-msvc-install_only_stripped.tar.gz", + "2555390d053c39f0ec10ebf72eb6c6c75ff102bad5b69c702a5dd11aaecd008b", + ), + ( + "3.12", + "x86_64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-x86_64-apple-darwin-install_only_stripped.tar.gz", + "0bb7d09a43adb8db2f2a5dea0300e3b29e741f02fcb81afd197426e7e3d1247b", + ), + ( + "3.12", + "x86_64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-x86_64-pc-windows-msvc-install_only_stripped.tar.gz", + "f34f41ec24aa868b7a68952ca3c1cdd9403513dd42244435eb5d4f6a14d59487", + ), + ( + "3.12", + "x86_64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz", + "68d546422397f485f1478353a7e4b25ccaec1a94d99ffe5adafa46ad665397e7", + ), + ( + "3.12", + "x86_64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.12.13%2B20260414-x86_64-unknown-linux-musl-install_only_stripped.tar.gz", + "89eef74571baec7187f9175ab1e9d86ac241045956580b98266731cffb57fa79", + ), + ( + "3.13", + "aarch64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-aarch64-apple-darwin-install_only_stripped.tar.gz", + "4600a150f6c0d78077fcdc4c3825d073fed4d8f76e3b6bd60c5593f13cce2eec", + ), + ( + "3.13", + "aarch64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-aarch64-pc-windows-msvc-install_only_stripped.tar.gz", + "975422abd724fe4f0e0da961dcbaa995d1090df0a0f001faf7398f0238d89853", + ), + ( + "3.13", + "aarch64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz", + "c61e6d8a98324eaf03f746a6bb897afb5ea5bc7d0cbca6044bade81f6aa4ac19", + ), + ( + "3.13", + "aarch64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-aarch64-unknown-linux-musl-install_only_stripped.tar.gz", + "42aa76ddc7e985c8555836ccb5a00f4aeda31f4b4b32690c3a7f4222d82518f0", + ), + ( + "3.13", + "i686-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-i686-pc-windows-msvc-install_only_stripped.tar.gz", + "43515b107bb854471fb46eb8f124b9fe783efb2ad934d314642107bbb6bcba5f", + ), + ( + "3.13", + "x86_64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-x86_64-apple-darwin-install_only_stripped.tar.gz", + "8b3e41c81bc22117c1100df4c201cf1a539de515423d8117ef241fadb9b02d2c", + ), + ( + "3.13", + "x86_64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-x86_64-pc-windows-msvc-install_only_stripped.tar.gz", + "ed883c472259d77df0ab2d25701adf23c88542c8db5e97ccb33917022b1ff13c", + ), + ( + "3.13", + "x86_64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz", + "766790e038e4f883c0fc6f580f6a2a67eed5a2db1b97535814531ee9a29e16f4", + ), + ( + "3.13", + "x86_64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.13.13%2B20260414-x86_64-unknown-linux-musl-install_only_stripped.tar.gz", + "c0967bb5cef44911e5087981f6c70f8fb380221e6961554ad0933f806614fc23", + ), + ( + "3.14", + "aarch64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-aarch64-apple-darwin-install_only_stripped.tar.gz", + "53499ae9c11a3e72f4b39b2dd02ea02f2ef4e98f1b97929cd140a3b8f279a5db", + ), + ( + "3.14", + "aarch64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-aarch64-pc-windows-msvc-install_only_stripped.tar.gz", + "43c2484725384aa5cba1bd804af26aafc08dd11da8b59d800669b9d5640237e5", + ), + ( + "3.14", + "aarch64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz", + "939de7bb7dcccd321309cd3ba797d97cdf41b3eb8e3cfc1b1a612e71adfb4eca", + ), + ( + "3.14", + "aarch64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-aarch64-unknown-linux-musl-install_only_stripped.tar.gz", + "62ae769365496ebbc68dd17f5a47c68b41015d3f154305571f4893aa3f39ea17", + ), + ( + "3.14", + "i686-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-i686-pc-windows-msvc-install_only_stripped.tar.gz", + "ec8928bcc888616f672c0551f9dbc5d96bce52cb78cff6b6a673b9a7d8fd00aa", + ), + ( + "3.14", + "x86_64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-x86_64-apple-darwin-install_only_stripped.tar.gz", + "71830caea86bb34a2cf8ab8c35b0140cea362c14eaec329e0af67133adac356b", + ), + ( + "3.14", + "x86_64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-x86_64-pc-windows-msvc-install_only_stripped.tar.gz", + "441cb7078406a2e210838e94c27a28c8a9be948b65a7a6a7e56ca098ed56ec1e", + ), + ( + "3.14", + "x86_64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz", + "7db78d056d070dfa1e5dc6a02b670c5382582227f608b95d8ff79e7d581146ae", + ), + ( + "3.14", + "x86_64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.14.4%2B20260414-x86_64-unknown-linux-musl-install_only_stripped.tar.gz", + "dcc6f0dcea3edf6156facf1c52d4767aa93f5676f554e4539271e0e6a7fb4241", + ), + ( + "3.15", + "aarch64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-aarch64-apple-darwin-install_only_stripped.tar.gz", + "49092b331b375af902f19c99744727883e4b5619848ef8504d041e117aecd4c8", + ), + ( + "3.15", + "aarch64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-aarch64-pc-windows-msvc-install_only_stripped.tar.gz", + "8545f77918cf86927ff1c4954daa5a0975f5bf18b7ecd4528e6aec3924859304", + ), + ( + "3.15", + "aarch64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz", + "c3927193b3f68c9b6c21ef9311acba7c11d83fe13c6b5be7a534859fa04a6b24", + ), + ( + "3.15", + "aarch64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-aarch64-unknown-linux-musl-install_only_stripped.tar.gz", + "1a352b1e41289b0c23707f0dae992c6588f58d9ccc496614b3dee7579b3266b0", + ), + ( + "3.15", + "i686-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-i686-pc-windows-msvc-install_only_stripped.tar.gz", + "2b8fb24d491fca127ee49abe934bb2f87507a889aa77327917492de7795ca446", + ), + ( + "3.15", + "x86_64-apple-darwin", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-x86_64-apple-darwin-install_only_stripped.tar.gz", + "ee41f89001a14ae39c1cd257f9f779ea6908a6c1d85c5e925257547f88ee063f", + ), + ( + "3.15", + "x86_64-pc-windows-msvc", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-x86_64-pc-windows-msvc-install_only_stripped.tar.gz", + "96e21734a194824cb5464fdee5b3e9c431c2b60884acc6365525d1d2b03dbc03", + ), + ( + "3.15", + "x86_64-unknown-linux-gnu", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz", + "05ebdf7bc6bb1b96d407f5d8c30ed94ce36679c3b93a47d9e11ca128b57acfe5", + ), + ( + "3.15", + "x86_64-unknown-linux-musl", + "https://github.com/fox-it/python-build-pystandalone/releases/download/20260414/cpython-3.15.0a8%2B20260414-x86_64-unknown-linux-musl-install_only_stripped.tar.gz", + "d040ba2d501e2bfd971a3ef6a8c36003d5418684b7bdfb02bea9bb323b3faf40", + ), +] diff --git a/pystandalone/binary.py b/pystandalone/binary.py new file mode 100644 index 0000000..bbb60c7 --- /dev/null +++ b/pystandalone/binary.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import io +import logging +import os +import struct +import tempfile +from pathlib import Path +from typing import TYPE_CHECKING + +import lief + +from pystandalone.codesign import adhoc_sign +from pystandalone.distribution import Target + +if TYPE_CHECKING: + from pystandalone.distribution import Distribution + +log = logging.getLogger(__name__) + + +def pack_zip(zips: list[bytes]) -> bytes: + """Pack multiple zip files into a single binary blob with a size prefix.""" + buf = io.BytesIO() + for zipf in zips: + buf.write(struct.pack(" bytes: + """Patch the target binary with the given payload.""" + binary = dist.read_python_exe() + if dist.target == Target.WINDOWS: + return patch_pe(binary, payload) + if dist.target == Target.LINUX: + return patch_elf(binary, payload) + if dist.target == Target.MACOS: + return patch_macho(binary, payload) + raise RuntimeError(f"Unsupported target {dist.target}") + + +def patch_pe(binary: bytes, payload: bytes) -> bytes: + """Patch a PE binary with the given payload.""" + if (pe := lief.PE.parse(io.BytesIO(binary))) is None: + return binary + + for node in pe.resources.childs: + if node.id == lief.PE.ResourcesManager.TYPE.RCDATA: + data = next(next(node.childs).childs) + data.content = payload # type: ignore + break + + return pe.write_to_bytes() + + +def patch_elf(binary: bytes, payload: bytes) -> bytes: + """Patch an ELF binary with the given payload.""" + if (elf := lief.ELF.parse(io.BytesIO(binary))) is None: + return binary + + section = elf.get_section(".pystandalone") + + # There are some issues with the way lief writes ELF files, so patch the payload in-place + buf = bytearray(binary) + buf[section.file_offset : section.file_offset + len(payload)] = payload + + return bytes(buf) + + +def patch_macho(binary: bytes, payload: bytes) -> bytes: + """Patch a Mach-O binary with the given payload.""" + if (fat := lief.MachO.parse(io.BytesIO(binary))) is None: + return binary + + for macho in fat: + section = macho.get_section("__pystandalone") + section.content = list(payload) # type: ignore + macho.remove_signature() + + fd, tmp_name = tempfile.mkstemp(suffix=".macho") + tmp_path = Path(tmp_name) + try: + os.close(fd) + fat.write(tmp_name) + data = tmp_path.read_bytes() + finally: + tmp_path.unlink(missing_ok=True) + + return adhoc_sign(data) diff --git a/pystandalone/bootstrap.py b/pystandalone/bootstrap.py new file mode 100644 index 0000000..3362ccc --- /dev/null +++ b/pystandalone/bootstrap.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import importlib.resources +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator + +BOOTSTRAP_TEMPLATE = """ +import runpy +import sys +import zipimport + +import _pystandalone + +payload = _pystandalone.get_payload() + +{decrypt} + +payload_importer = zipimport.metazipimporter('', payload) +{importer} + +# Monkey patch some stuff +import monkey + +runpy._run_module_as_main("run", alter_argv=False) +""" + +NO_CRYPT_TEMPLATE = """ +import hashlib + +DIGEST = {digest!r} + +if hashlib.sha256(payload).digest() != DIGEST: + sys.exit("ERROR: Wrong key") +""" + +DECRYPT_TEMPLATE = """ +import hashlib +from argparse import ArgumentParser +from os import environ + +DIGEST = {digest!r} +IV = {iv!r} + +parser = ArgumentParser(prefix_chars=':') +parser.add_argument(":key", default=environ.get("PYSTANDALONE_KEY"), required=False) +known, remainder = parser.parse_known_args() + +if known.key: + key = known.key.encode() + environ["PYSTANDALONE_KEY_SOURCE"] = "args" + sys.argv = [sys.argv[0]] + remainder +else: + environ["PYSTANDALONE_KEY_SOURCE"] = "prompt" + try: + from wingui import KeyGUI + key = KeyGUI.prompt("Please enter key").encode() + except ImportError: + import getpass + key = getpass.getpass("Key: ").encode() + +cipher = _pystandalone.chacha20(hashlib.sha256(key).digest(), IV) +payload = cipher.decrypt(payload) +cipher.clean() +if hashlib.sha256(payload).digest() != DIGEST: + sys.exit("ERROR: Wrong key") +""" + +FILESYSTEM_IMPORTER_TEMPLATE = """ +# Insert before the filesystem importer +sys.meta_path.insert(len(sys.meta_path) - 1, payload_importer) +""" + +NO_FILESYSTEM_IMPORTER_TEMPLATE = """ +# Remove the filesystem importer and add payload importer +sys.meta_path.pop() +sys.meta_path.append(payload_importer) +""" + + +class Bootstrap: + def __init__( + self, + digest: str, + encrypt: bool = True, + iv: bytes | None = None, + filesystem_importer: bool = False, + ): + self.digest = digest + self.encrypt = encrypt + self.iv = iv + self.filesystem_importer = filesystem_importer + + def pack(self) -> Iterator[tuple[str, str | bytes]]: + bootstrap = BOOTSTRAP_TEMPLATE.format( + decrypt=self._decrypt_stub(), + importer=self._filesystem_stub(), + ) + + yield "bootstrap.py", bootstrap + yield "wingui.py", importlib.resources.read_text("pystandalone.redist", "wingui.py") + yield "monkey.py", importlib.resources.read_text("pystandalone.redist", "monkey.py") + + def _decrypt_stub(self) -> str: + if self.encrypt: + return DECRYPT_TEMPLATE.format(digest=self.digest, iv=self.iv) + return NO_CRYPT_TEMPLATE.format(digest=self.digest) + + def _filesystem_stub(self) -> str: + if self.filesystem_importer: + return FILESYSTEM_IMPORTER_TEMPLATE + return NO_FILESYSTEM_IMPORTER_TEMPLATE diff --git a/pystandalone/builder.py b/pystandalone/builder.py new file mode 100644 index 0000000..0ab64f6 --- /dev/null +++ b/pystandalone/builder.py @@ -0,0 +1,522 @@ +from __future__ import annotations + +import argparse +import hashlib +import logging +import os +import sys +from pathlib import Path + +from pystandalone import binary, chacha20, zipapp +from pystandalone.bootstrap import Bootstrap +from pystandalone.compiler import NoCompiler, PycCompiler +from pystandalone.distribution import ( + Architecture, + Distribution, + Target, + get_distribution_map, +) +from pystandalone.packer import Packer +from pystandalone.source import Source + +log = logging.getLogger(__name__) +logging.lastResort = None +logging.raiseExceptions = False + +DISTRIBUTIONS = get_distribution_map() + + +class Builder: + """Pystandalone executable builder. + + Args: + distribution: The distribution to build for. + code: The path to the source code to include in the payload + (can be a .py file, a .zip file, a .pystandalone spec file, or a directory containing a + .pystandalone spec file or a run.py file). If None, only the library will be included in the binary. + encrypt: Whether to encrypt the payload zip. + key: The encryption key to use (if None, a random key will be generated). + extra: Extra Python code to insert in the bootstrap. + compile: Whether to compile .py files to .pyc in the payload zip + (NOTE: requires you run pystandalone with the same Python version as the target binary). + strict: Whether totreat a mismatch in bytecode magic number as an error when compiling. + filesystem_importer: Whether to include the filesystem importer in the bootstrap. + """ + + def __init__( + self, + distribution: Distribution, + code: Path | None = None, + *, + encrypt: bool = True, + key: str | None = None, + compile: bool = True, + strict: bool = True, + filesystem_importer: bool = False, + ): + self.distribution = distribution + self.code = code + + self.encrypt = self.code is not None and encrypt + + self.filesystem_importer = filesystem_importer + self.strict = strict + + self.key, self.iv = None, None + if self.encrypt: + self.key = key if key is not None else os.urandom(32).hex() + self.iv = os.urandom(16) + elif self.code: + log.warning("NOT encrypting payload zip!") + + self.strict = strict + + self.source = Source.from_path(code) if code else Source() + + compiler = PycCompiler(self.distribution.bytecode_magic, strict=self.strict) if compile else NoCompiler() + self.packer = Packer(compiler) + + def add_library(self, name: str) -> None: + """Add a library module to include in the binary. + + Args: + name: The name of the library module to include (e.g. "argparse"). + """ + self.source.library.add(name) + + def add_module(self, name: str) -> None: + """Add a module to include in the binary. + + Args: + name: The name of the module to include (e.g. "requests"). + """ + self.source.modules.add(name) + + def add_source_file(self, path: str, file: Path) -> None: + """Add a source file to include in the payload zip. + + Args: + path: The path to the file in the payload zip. + file: The path to the source file to include. + """ + self.source.insert_file(path, file) + + def add_source_str(self, path: str, code: str) -> None: + """Add a source string to include in the payload zip. + + Args: + path: The path to the file in the payload zip. + code: The source code to include. + """ + self.source.insert_str(path, code) + + def encrypt_payload(self, payload: bytes) -> bytes: + """Encrypt the payload with the builder's key and iv.""" + return chacha20.encrypt(payload, hashlib.sha256(self.key.encode()).digest(), self.iv) + + def build_library_zip(self) -> bytes: + """Build the library zip.""" + log.info("Packing library (include: %s)", ", ".join(self.source.library)) + return self.packer.pack(self.distribution.pack_library(self.source.library)) + + def build_bootstrap_zip(self, digest: bytes) -> bytes: + """Build the bootstrap zip. + + Args: + digest: The digest of the payload zip, for the bootstrap to verify integrity. + """ + bootstrap = Bootstrap( + digest=digest, + encrypt=self.encrypt, + iv=self.iv, + filesystem_importer=self.filesystem_importer, + ) + + return self.packer.pack(bootstrap.pack()) + + def build_payload_zip(self) -> bytes: + """Build the payload zip.""" + log.info("Packing payload from %s", self.source.run) + return self.packer.pack(self.source.pack()) + + def build_payload_bin(self, zips: list[bytes]) -> bytes: + """Build the payload binary blob. + + Args: + zips: A list of zip files to include in the payload binary. + """ + return binary.pack_zip(zips) + + def build_exe(self, payload: bytes) -> bytes: + """Build the final executable binary. + + Args: + payload: The payload binary blob to patch into the executable. + """ + return binary.patch(self.distribution, payload) + + def _dump_artefact(self, path: Path | None, name: str, buf: bytes) -> None: + """Dump an artefact to disk for debugging purposes. + + Args: + path: The directory path to dump the artefact. + name: The name of the artefact file. + buf: The bytes content of the artefact to write. + """ + if path is None: + return + + if not path.exists(): + path.mkdir(parents=True, exist_ok=True) + + out_path = path.joinpath(name) + log.debug("Writing %s", out_path) + out_path.write_bytes(buf) + + def build(self, *, build_path: Path | None) -> bytes: + """Build the standalone executable binary. + + Args: + build_path: A directory path to dump the artefacts for debugging purposes. + + Returns: + The bytes of the standalone executable binary. + """ + library_zip = self.build_library_zip() + self._dump_artefact(build_path, "library.zip", library_zip) + + zips = [library_zip] + + if self.source.run: + payload_zip = self.build_payload_zip() + self._dump_artefact(build_path, "payload.zip", payload_zip) + + payload_digest = hashlib.sha256(payload_zip).digest() + + if self.encrypt: + log.info("Encrypting payload zip") + payload_zip = self.encrypt_payload(payload_zip) + self._dump_artefact(build_path, "payload.bin", payload_zip) + + log.info("Packing bootstrap code") + bootstrap_zip = self.build_bootstrap_zip(payload_digest) + self._dump_artefact(build_path, "bootstrap.zip", bootstrap_zip) + + zips.append(bootstrap_zip) + zips.append(payload_zip) + else: + log.info("No code payload given, building library only binary") + + standalone_payload = self.build_payload_bin(zips) + self._dump_artefact(build_path, "standalone.bin", standalone_payload) + + exe = self.build_exe(standalone_payload) + self._dump_artefact(build_path, "standalone.exe", exe) + return exe + + +def select_distribution(python: str, target: Target | None, arch: Architecture | None) -> Distribution | None: + if target is None: + try: + target = Target.from_current() + except RuntimeError: + log.error("Unsupported platform for auto-detection. Please specify target with --target.") # noqa: TRY400 + return None + log.warning("Derived target from current system: %s", target.value) + + if arch is None: + try: + arch = Architecture.from_current() + except RuntimeError: + log.error("Unsupported architecture for auto-detection. Please specify architecture with --arch.") # noqa: TRY400 + return None + log.warning("Derived architecture from current system: %s", arch.value) + + if python not in DISTRIBUTIONS: + log.error("No distribution available for Python %s", python) + return None + + if target not in DISTRIBUTIONS[python]: + log.error("No distribution available for Python %s and target %s", python, target.value) + return None + + if arch not in DISTRIBUTIONS[python][target]: + log.error( + "No distribution available for Python %s, target %s and architecture %s", + python, + target.value, + arch.value, + ) + return None + + return DISTRIBUTIONS[python][target][arch].get(progress=True) + + +def setup_logging(verbosity: int) -> None: + """Set up logging with the given verbosity level. + + Args: + verbosity: The verbosity level (0 for critical, 1 for error, 2 for warning, 3 for info, 4 or higher for debug). + """ + if verbosity == 1: + level = logging.ERROR + elif verbosity == 2: + level = logging.WARNING + elif verbosity == 3: + level = logging.INFO + elif verbosity >= 4: + level = logging.DEBUG + else: + level = logging.CRITICAL + + logging.basicConfig(format="%(levelname)s %(message)s", level=level) + + logging.addLevelName(logging.DEBUG, " - ") + logging.addLevelName(logging.INFO, "[*]") + + for lvl in [logging.WARNING, logging.ERROR, logging.CRITICAL]: + logging.addLevelName(lvl, "[!]") + + +def main() -> int: + parser = argparse.ArgumentParser( + prog="pystandalone", + description="Build opinionated standalone Python executables", + ) + + parser.add_argument( + "-c", + "--code", + metavar="CODE", + type=Path, + help="path to code file or directory (.zip, .py, .pystandalone or directory containing run.py or .pystandalone file), or leave empty to generate a library-only binary", # noqa: E501 + ) + parser.add_argument( + "-o", + "--output", + metavar="OUTPUT", + type=Path, + help="path to output binary", + ) + + distribution_group = parser.add_argument_group("distribution options") + distribution_group.add_argument( + "-p", + "--python", + metavar="PYTHON", + default="3.10", + choices=DISTRIBUTIONS.keys(), + help="version of Python to build a binary for (default: %(default)s, available: %(choices)s)", + ) + distribution_group.add_argument( + "-t", + "--target", + metavar="TARGET", + type=Target, + default=None, + choices=Target.__members__.values(), + help="target OS binary (default: autodetect, available: %(choices)s)", + ) + distribution_group.add_argument( + "-a", + "--arch", + metavar="ARCH", + type=Architecture, + default=None, + choices=Architecture.__members__.values(), + help="target architecture (default: autodetect based on target, available: %(choices)s)", + ) + distribution_group.add_argument( + "-d", + "--distribution", + metavar="DISTRIBUTION", + type=Path, + help="path to custom pystandalone distribution (overrides target and arch)", + ) + distribution_group.add_argument( + "--list-available", + action="store_true", + help="list available distributions and exit", + ) + + packaging_group = parser.add_argument_group("packaging options") + packaging_group.add_argument( + "-L", + "--library", + type=str, + nargs="*", + help="extra library modules to include", + ) + packaging_group.add_argument( + "-M", + "--modules", + type=str, + nargs="*", + help="extra external modules to include", + ) + packaging_group.add_argument( + "--compile", + action="store_true", + help="compile .py files to .pyc in the payload zip (NOTE: requires you run pystandalone with the same Python version as the target binary", # noqa: E501 + ) + packaging_group.add_argument( + "--filesystem-importer", + action="store_true", + help="add the filesystem importer", + ) + packaging_group.add_argument( + "--no-strict", + action="store_true", + default=False, + help=argparse.SUPPRESS, + ) + + packaging_group.add_argument( + "--zipapp", + "--pyz", + action="store_true", + help="wrap the binary in a zipapp with a bootstrap ELF loader (Linux x86_64 and aarch64 only)", + ) + + encryption_group = parser.add_argument_group("encryption options") + encryption_group.add_argument( + "-k", + "--key", + type=str, + help="use provided key for encryption", + ) + encryption_group.add_argument( + "--key-file", + type=Path, + help="write encryption key to file", + ) + encryption_group.add_argument( + "--no-crypt", + action="store_true", + default=False, + help="disable encryption", + ) + + parser.add_argument( + "--debug", + action="store_true", + help="dump intermediary files", + ) + parser.add_argument( + "-v", + "--verbose", + action="count", + default=3, + help="increase output verbosity", + ) + args = parser.parse_args() + + if args.debug: + args.verbose = 5 + setup_logging(args.verbose) + + if args.list_available: + for version, targets in DISTRIBUTIONS.items(): + print(f"Python {version}:") + + if args.verbose <= 3: + for target, archs in sorted(targets.items(), key=lambda item: item[0].value): + archs_str = ", ".join(arch.value for arch in sorted(archs, key=lambda item: item.value)) + print(f" {target.value}: {archs_str}") + else: + for _, archs in sorted(targets.items(), key=lambda item: item[0].value): + for _, dist in sorted(archs.items(), key=lambda item: item[0].value): + if args.verbose >= 5: + print(f" {dist.target_triple}: {dist.url} (digest: {dist.digest})") + else: + print(f" {dist.target_triple}") + return 0 + + if args.distribution: + distribution = Distribution(args.distribution) + args.target = distribution.target + args.arch = distribution.arch + + log.info( + "Using custom distribution from %s (target: %s, arch: %s)", + args.distribution, + distribution.target.value, + distribution.arch.value, + ) + else: + if (distribution := select_distribution(args.python, args.target, args.arch)) is None: + return 1 + + log.info( + "Using distribution for Python %s (target: %s, arch: %s)", + args.python, + distribution.target.value, + distribution.arch.value, + ) + + log.info("Setting up builder for %s (%s)", distribution.target.value, distribution.arch.value) + + try: + builder = Builder( + distribution, + args.code, + encrypt=not args.no_crypt, + key=args.key, + compile=args.compile, + strict=not args.no_strict, + filesystem_importer=args.filesystem_importer, + ) + except Exception as e: + log.error("Error creating builder: %s. View debug logs for more details.", e) # noqa: TRY400 + log.debug("Stacktrace:", exc_info=e) + return 1 + + if args.library: + builder.source.library.update(set(args.library)) + + if args.modules: + builder.source.modules.update(set(args.modules)) + + try: + exe = builder.build(build_path=Path("build") if args.debug else None) + except Exception as e: + log.error("Error building binary: %s. View debug logs for more details.", e) # noqa: TRY400 + log.debug("Stacktrace:", exc_info=e) + return 1 + + if args.output is None: + args.output = Path() + + if args.output.is_dir(): + ext = ".exe" if distribution.target == Target.WINDOWS else "" + args.output = Path( + f"pystandalone-{distribution.version}-{distribution.target.value}-{distribution.arch.value}{ext}" + ) + + if args.zipapp: + if distribution.target != Target.LINUX or distribution.arch not in (Architecture.X86_64, Architecture.AARCH64): + log.error("zipapp wrapping is only supported on Linux x86_64 and aarch64 targets") + return 1 + + log.info("Wrapping binary in zipapp with bootstrap ELF loader") + exe = zipapp.wrap(exe) + args.output = args.output.with_suffix(args.output.suffix + ".pyz") + + log.info("Writing %s", args.output) + args.output.write_bytes(exe) + + if builder.key: + log.info("") + log.info("Key: %s", builder.key) + if args.key_file: + log.info("Writing %s", args.key_file) + args.key_file.write_text(builder.key) + + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + pass diff --git a/pystandalone/chacha20.py b/pystandalone/chacha20.py new file mode 100644 index 0000000..7073a68 --- /dev/null +++ b/pystandalone/chacha20.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +import io +import struct +from typing import TYPE_CHECKING, BinaryIO + +if TYPE_CHECKING: + from collections.abc import Iterator + + from typing_extensions import Self + + +class Chacha20(io.RawIOBase): + def __init__(self, fh: BinaryIO, key: bytes, iv: bytes) -> None: + self.fh = fh + self.key = key + self.iv = iv + + self._offset_into_block = 0 + self._original_offset = 0 + if len(iv) == 16: + # Compatibility with OpenSSL-like IV + self._original_offset, self.iv = struct.unpack(" Self: + return self + + def __exit__(self, *args) -> None: + pass + + @property + def closed(self) -> None: + return self.fh.closed + + def seek(self, pos: int, whence: int = io.SEEK_SET) -> int: + new_pos = self.fh.seek(pos, whence) + pos, offset = divmod(new_pos, 64) + + block_low = (self._original_offset + pos) & 0xFFFFFFFF + block_high = ((self._original_offset >> 32) + (pos >> 32)) & 0xFFFFFFFF + + self._key_stream = _chacha20_xor_stream(self.key, self.iv, block_high << 32 | block_low) + for _ in range(offset): + next(self._key_stream) + return new_pos + + def read(self, n: int = -1) -> bytes: + return bytes(a ^ x for a, x in zip(self.fh.read(n), self._key_stream, strict=False)) + + def readable(self) -> bool: + return self.fh.readable() + + def readinto(self, buffer: memoryview) -> int: + data = self.read(len(buffer)) + size = len(data) + buffer[:size] = data + return size + + def seekable(self) -> bool: + return self.fh.seekable() + + def tell(self) -> int: + return self.fh.tell() + + def close(self) -> None: + self.fh.close() + + def flush(self) -> None: + self.fh.flush() + + +def _chacha20_xor_stream(key: bytes, iv: bytes, position: int = 0) -> Iterator[int]: + """Generate a chacha20 xor stream.""" + if not isinstance(position, int): + raise TypeError("position must be an integer") + + if position.bit_length() > 64: + raise ValueError("position can't be larger than 64 bits") + + if not isinstance(key, bytes): + raise TypeError("key must be a bytes-like object") + + if not isinstance(iv, bytes): + raise TypeError("iv must be a bytes-like object") + + if len(key) != 32: + raise ValueError("key must be 32 bytes") + + if len(iv) != 8: + raise ValueError("iv must be 8 bytes") + + def rotate(v: int, c: int) -> int: + return ((v << c) & 0xFFFFFFFF) | v >> (32 - c) + + def quarter_round(x: int, a: int, b: int, c: int, d: int) -> None: + x[a] = (x[a] + x[b]) & 0xFFFFFFFF + x[d] = rotate(x[d] ^ x[a], 16) + x[c] = (x[c] + x[d]) & 0xFFFFFFFF + x[b] = rotate(x[b] ^ x[c], 12) + x[a] = (x[a] + x[b]) & 0xFFFFFFFF + x[d] = rotate(x[d] ^ x[a], 8) + x[c] = (x[c] + x[d]) & 0xFFFFFFFF + x[b] = rotate(x[b] ^ x[c], 7) + + ctx = [0] * 16 + # "expand 32-byte k" + ctx[:4] = (1634760805, 857760878, 2036477234, 1797285236) + ctx[4:12] = struct.unpack("<8L", key) + ctx[12:14] = struct.unpack(" bytes: + """Encrypt or decrypt with the chacha20 cipher.""" + if not isinstance(data, bytes): + raise TypeError("data must be a bytes-like object") + + if not key: + raise ValueError("key is empty") + + if not isinstance(key, bytes): + raise TypeError("key must be a bytes-like object") + + if len(key) != 32: + raise ValueError("key must be 32 bytes") + + if not iv: + iv = b"\x00" * 8 + + if not isinstance(iv, bytes): + raise TypeError("iv must be a bytes-like object") + + if len(iv) == 16: + # Compatibility with OpenSSL-like IV + position, iv = struct.unpack(" bytes: + """Ad-hoc sign a Mach-O binary (thin or fat).""" + if len(data) < 4: + return data + + magic = struct.unpack(">I", data[:4])[0] + if magic in (FAT_MAGIC, FAT_MAGIC_SWAPPED): + return _sign_fat(data) + + magic_le = struct.unpack(" bytes: + """Sign each slice of a fat Mach-O binary.""" + _, nfat_arch = struct.unpack(">II", data[:FAT_HEADER_SIZE]) + + slices: list[tuple[int, int, int, bytes]] = [] + for i in range(nfat_arch): + entry_offset = FAT_HEADER_SIZE + i * FAT_ARCH_SIZE + cputype, cpusubtype, offset, size, align = struct.unpack( + ">IIIiI", data[entry_offset : entry_offset + FAT_ARCH_SIZE] + ) + slice_data = data[offset : offset + size] + signed = _sign_thin(bytearray(slice_data)) + slices.append((cputype, cpusubtype, align, signed)) + + return _assemble_fat(slices) + + +def _assemble_fat(slices: list[tuple[int, int, int, bytes]]) -> bytes: + """Reassemble a fat binary from individually signed slices.""" + header_size = FAT_HEADER_SIZE + len(slices) * FAT_ARCH_SIZE + buf = bytearray() + buf += struct.pack(">II", FAT_MAGIC, len(slices)) + + # Calculate offsets with proper alignment + offset = header_size + offsets = [] + for _, _, align, slice_data in slices: + alignment = 1 << align + offset = (offset + alignment - 1) & ~(alignment - 1) + offsets.append(offset) + offset += len(slice_data) + + # Write fat arch entries + for (cputype, cpusubtype, align, slice_data), off in zip(slices, offsets, strict=False): + buf += struct.pack(">IIIiI", cputype, cpusubtype, off, len(slice_data), align) + + # Write slices at their offsets + for (_, _, _, slice_data), off in zip(slices, offsets, strict=False): + if len(buf) < off: + buf += b"\x00" * (off - len(buf)) + buf += slice_data + + return bytes(buf) + + +def _sign_thin(data: bytearray) -> bytes: + """Ad-hoc sign a thin (single-architecture) 64-bit Mach-O binary.""" + # Parse Mach-O header (little-endian) + magic, _, _, _, ncmds, sizeofcmds, _, _ = struct.unpack(" 0: + sig_blob += b"\x00" * pad_size + + # Append signature blob + data += sig_blob + + return bytes(data) + + +def _build_code_signature( + code_size: int, + exec_seg_base: int, + exec_seg_limit: int, + page_hashes: list[bytes], +) -> bytes: + """Build a complete ad-hoc code signature SuperBlob with page hashes.""" + nhashes = len(page_hashes) + + ident = b"\x00" + ident_offset = 88 + hash_offset = ident_offset + len(ident) + cd_length = hash_offset + nhashes * CS_HASH_SIZE + total_length = 12 + 8 + cd_length + + buf = bytearray() + + # SuperBlob header (big-endian) + buf += struct.pack(">III", CSMAGIC_EMBEDDED_SIGNATURE, total_length, 1) + + # BlobIndex entry (big-endian) + buf += struct.pack(">II", CSSLOT_CODEDIRECTORY, 20) + + # CodeDirectory (big-endian) + buf += struct.pack(">II", CSMAGIC_CODEDIRECTORY, cd_length) + buf += struct.pack(">I", CS_CODEDIRECTORY_VERSION) + buf += struct.pack(">I", CS_ADHOC) + buf += struct.pack(">I", hash_offset) + buf += struct.pack(">I", ident_offset) + buf += struct.pack(">I", 0) # nSpecialSlots + buf += struct.pack(">I", nhashes) # nCodeSlots + buf += struct.pack(">I", code_size) # codeLimit + buf += struct.pack(">B", CS_HASH_SIZE) + buf += struct.pack(">B", CS_HASHTYPE_SHA256) + buf += struct.pack(">B", 0) # platform + buf += struct.pack(">B", CS_PAGE_SIZE_LOG2) + buf += struct.pack(">I", 0) # spare2 + buf += struct.pack(">I", 0) # scatterOffset + buf += struct.pack(">I", 0) # teamOffset + buf += struct.pack(">I", 0) # spare3 + buf += struct.pack(">Q", code_size) # codeLimit64 + buf += struct.pack(">Q", exec_seg_base) + buf += struct.pack(">Q", exec_seg_limit) + buf += struct.pack(">Q", CS_EXECSEG_MAIN_BINARY) + + # Identifier + buf += ident + + # Page hashes + for h in page_hashes: + buf += h + + return bytes(buf) + + +def _compute_page_hashes(data: bytes, code_size: int) -> list[bytes]: + """Compute SHA-256 hashes for each page of the binary.""" + hashes = [] + for i in range(0, code_size, CS_PAGE_SIZE): + page = data[i : i + CS_PAGE_SIZE] + hashes.append(hashlib.sha256(page).digest()) + return hashes + + +def _align_up(value: int, alignment: int) -> int: + return (value + alignment - 1) & ~(alignment - 1) diff --git a/pystandalone/compiler.py b/pystandalone/compiler.py new file mode 100644 index 0000000..25c9607 --- /dev/null +++ b/pystandalone/compiler.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import importlib +import io +import marshal +import platform +from typing import TYPE_CHECKING, BinaryIO, TextIO + +from pystandalone import chacha20 + +if TYPE_CHECKING: + from pathlib import Path + + +class Compiler: + EXT = "" + + def compile(self, s: str | bytes, name: str | None = None, strip: bool = True) -> bytes: + raise NotImplementedError + + def compile_file(self, path: Path, **kwargs) -> bytes: + return self.compile(path.read_text(), **kwargs) + + def compile_fileobj(self, fh: TextIO | BinaryIO, **kwargs) -> bytes: + return self.compile(fh.read(), **kwargs) + + +class NoCompiler(Compiler): + def compile(self, s: str | bytes, name: str | None = None, strip: bool = True) -> bytes: + return s.encode() if isinstance(s, str) else s + + +class PycCompiler(Compiler): + EXT = "c" + + def __init__(self, magic: bytes, *, strict: bool = True): + if (running_implementation := platform.python_implementation()).lower() != "cpython": + raise RuntimeError(f"{running_implementation} is not supported for compiling. Please run with CPython.") + + if magic != importlib.util.MAGIC_NUMBER and strict: + raise RuntimeError("Compiling with a different Python version than the target binary") + + self.magic = magic + + def compile(self, s: str | bytes, name: str | None = None, strip: bool = True) -> bytes: + name = name or "" + + cobj = compile(s, name, "exec", dont_inherit=True, optimize=2 if strip else -1) + + out = io.BytesIO() + out.write(self.magic) + out.write(b"\x00\x00\x00\x00") # flags + out.write(b"\x00\x00\x00\x00") # timestamp + out.write(b"\x00\x00\x00\x00") # source size + out.write(marshal.dumps(cobj)) + return out.getvalue() + + +class CryptCompiler(NoCompiler): + def __init__(self, key: bytes, iv: bytes): + self.key = key + self.iv = iv + + def compile(self, s: str | bytes, name: str | None = None, strip: bool = True) -> bytes: + return chacha20.encrypt(super().compile(s, name, strip), self.key, self.iv) diff --git a/pystandalone/distribution.py b/pystandalone/distribution.py new file mode 100644 index 0000000..fbc889d --- /dev/null +++ b/pystandalone/distribution.py @@ -0,0 +1,441 @@ +from __future__ import annotations + +import contextlib +import hashlib +import io +import json +import logging +import os +import platform +import sys +import tarfile +import tempfile +import urllib.parse +import urllib.request +from dataclasses import dataclass +from enum import Enum +from functools import cache, cached_property +from itertools import groupby +from operator import attrgetter +from pathlib import Path +from typing import TYPE_CHECKING + +import tqdm +from platformdirs import user_data_path + +from pystandalone import available + +if TYPE_CHECKING: + from collections.abc import Iterator + from tarfile import TarFile + + from typing_extensions import Self + +log = logging.getLogger(__name__) + + +DEFAULT_INCLUDE = { + "encodings.cp437", +} +DEFAULT_EXCLUDE = { + "__phello__.foo.py", + "_markupbase.py", + "_pydecimal.py", + "_sysconfigdata", # Build artifact + "aifc.py", + "antigravity.py", + "asynchat.py", + "asyncore.py", + "bdb.py", + "cgi.py", + "cgitb.py", + "chunk.py", + "colorsys.py", + "config-", # Build artifact + "cProfile.py", + "crypt.py", + "ctypes.test", + "curses", + "dbm", + "decimal.py", + "difflib.py", + "distutils", + "doctest.py", + "encodings.cp", + "encodings.euc", + "encodings.gb", + "encodings.hp", + "encodings.iso", + "encodings.koi", + "encodings.kz", + "encodings.mac", + "encodings.shift", + "ensurepip", + "ftplib.py", + "html", + "idlelib", + "imaplib.py", + "imghdr.py", + "lib2to3", + "LICENSE.txt", + "mailbox.py", + "mailcap.py", + "msilib", + "multiprocessing", + "netrc.py", + "nntplib.py", + "optparse.py", + "pdb.py", + "pickletools.py", + "pipes.py", + "poplib.py", + "profile.py", + "pstats.py", + "pyclbr.py", + "pydoc.py", + "pydoc_data", + "sched.py", + "secrets.py", + "site-packages", + "smtpd.py", + "smtplib.py", + "sndhdr.py", + "sqlite3", + "statistics.py", + "sunau.py", + "symtable.py", + "tabnanny.py", + "telnetlib.py", + "test", + "this.py", + "timeit.py", + "tkinter", + "trace.py", + "unittest", + "turtle.py", + "turtledemo", + "uu.py", + "venv", + "wave.py", + "webbrowser.py", + "wsgiref", + "xdrlib.py", + "xmlrpc", + "zipapp.py", +} + + +class Target(Enum): + LINUX = "linux" + WINDOWS = "windows" + MACOS = "macos" + + def __str__(self) -> str: + return self.value + + @classmethod + def from_current(cls) -> Self: + if sys.platform.startswith("linux"): + return cls.LINUX + if sys.platform.startswith("win"): + return cls.WINDOWS + if sys.platform.startswith("darwin"): + return cls.MACOS + raise RuntimeError(f"Unsupported platform {sys.platform}") + + @classmethod + def from_triple(cls, triple: str) -> Self: + if "-apple-darwin" in triple: + return Target.MACOS + if "-unknown-linux-" in triple: + return Target.LINUX + if "-pc-windows-" in triple: + return Target.WINDOWS + raise ValueError(f"Unsupported target triple {triple}") + + +class Architecture(Enum): + I686 = "i686" + X86_64 = "x86_64" + AARCH64 = "aarch64" + + def __str__(self) -> str: + return self.value + + @classmethod + def from_current(cls) -> Self: + arch = platform.machine().lower() + if arch in ("i686", "i386"): + return cls.I686 + if arch in ("x86_64", "amd64"): + return cls.X86_64 + if arch in ("aarch64", "arm64"): + return cls.AARCH64 + raise RuntimeError(f"Unsupported architecture {arch}") + + @classmethod + def from_triple(cls, triple: str) -> Self: + return cls(triple.split("-")[0]) + + +@dataclass +class DistributionInfo: + version: str + target_triple: str + url: str + digest: str + + @property + def target(self) -> Target: + return Target.from_triple(self.target_triple) + + @property + def arch(self) -> Architecture: + return Architecture.from_triple(self.target_triple) + + def ensure(self, progress: bool = False) -> Path: + cache = cache_path() + + archive_name = urllib.parse.urlparse(self.url).path.split("/")[-1] + archive_path = cache / archive_name + lock_path = cache / (archive_name + ".lock") + + with _file_lock(lock_path): + if not archive_path.exists() or checksum(archive_path) != self.digest: + log.info( + "Downloading distribution archive for Python %s (target: %s, arch: %s)", + self.version, + self.target.value, + self.arch.value, + ) + + fd, tmp_name = tempfile.mkstemp(prefix=archive_name, suffix=".tmp", dir=cache) + tmp_path = Path(tmp_name) + try: + os.close(fd) + download(self.url, tmp_path, name=archive_name, progress=progress) + + if checksum(tmp_path) != self.digest: + raise ValueError("Downloaded file has an invalid digest") + + tmp_path.replace(archive_path) + finally: + tmp_path.unlink(missing_ok=True) + + return archive_path + + def get(self, progress: bool = False) -> Distribution: + return Distribution(self.ensure(progress)) + + +@contextlib.contextmanager +def _file_lock(path: Path) -> Iterator[None]: + """Cross-platform file lock using fcntl (Unix) or LockFileEx (Windows).""" + fd = os.open(path, os.O_CREAT | os.O_RDWR) + try: + if sys.platform == "win32": + import ctypes + import ctypes.wintypes + import msvcrt + + class OVERLAPPED(ctypes.Structure): + _fields_ = ( + ("Internal", ctypes.wintypes.LPARAM), + ("InternalHigh", ctypes.wintypes.LPARAM), + ("Offset", ctypes.wintypes.DWORD), + ("OffsetHigh", ctypes.wintypes.DWORD), + ("hEvent", ctypes.wintypes.HANDLE), + ) + + LOCKFILE_EXCLUSIVE_LOCK = 0x0002 + if not ctypes.windll.kernel32.LockFileEx( + ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(fd)), + ctypes.wintypes.DWORD(LOCKFILE_EXCLUSIVE_LOCK), + ctypes.wintypes.DWORD(0), + ctypes.wintypes.DWORD(1), + ctypes.wintypes.DWORD(0), + ctypes.byref(OVERLAPPED()), + ): + raise ctypes.WinError() + else: + import fcntl + + fcntl.flock(fd, fcntl.LOCK_EX) + yield + finally: + os.close(fd) + + +def cache_path() -> Path: + return user_data_path("pystandalone", ensure_exists=True) + + +def download(url: str, path: Path, name: str | None = None, progress: bool = False) -> None: + with urllib.request.urlopen(url) as response: + total = response.headers.get("Content-Length") + total = int(total) if total else None + + if progress: + bar = tqdm.tqdm(total=total, unit="B", unit_scale=True, desc=name or path.name) + else: + bar = contextlib.nullcontext() + + with bar, path.open("wb") as fh: + while chunk := response.read(io.DEFAULT_BUFFER_SIZE): + fh.write(chunk) + + if progress: + bar.update(len(chunk)) + + +def checksum(path: Path) -> str: + ctx = hashlib.sha256() + with path.open("rb") as fh: + for chunk in iter(lambda: fh.read(io.DEFAULT_BUFFER_SIZE), b""): + ctx.update(chunk) + return ctx.hexdigest() + + +@cache +def get_distribution_map() -> dict[str, dict[Target, dict[Architecture, DistributionInfo]]]: + result = {} + + distributions = [DistributionInfo(*entry) for entry in available.DISTRIBUTIONS] + for version, it_version in groupby(distributions, attrgetter("version")): + result.setdefault(version, {}) + + for target, it_target in groupby(it_version, attrgetter("target")): + result[version].setdefault(target, {}) + + for arch, it_arch in groupby(it_target, attrgetter("arch")): + if (target, arch) == (Target.LINUX, Architecture.X86_64): + # For Linux x86_64, prefer musl builds + it_arch = filter(lambda d: "-musl" in d.target_triple, it_arch) + elif (target, arch) == (Target.LINUX, Architecture.AARCH64): + # For Linux aarch64, prefer libc builds (as the musl builds are not static yet) + it_arch = filter(lambda d: "-gnu" in d.target_triple, it_arch) + + # Take the most recent build + result[version][target][arch] = sorted(it_arch, key=attrgetter("url"))[-1] + + return result + + +class Distribution: + """Distribution archive utility class. + + Provides convenience methods for extracting data and information from distribution archives. + """ + + def __init__(self, path: Path): + self.path = path + self.tar = None + + def __repr__(self) -> str: + return f"" + + def open(self) -> TarFile: + return tarfile.open(self.path, mode="r") + + @cached_property + def metadata(self) -> dict: + with self.open() as tf: + try: + return json.load(tf.extractfile("python/PYSTANDALONE.json")) + except Exception: + raise ValueError("Invalid distribution archive") + + @property + def version(self) -> str: + return self.metadata["python_version"] + + @property + def major_minor_version(self) -> str: + version = self.version + return ".".join(version.split(".")[:2]) + + @property + def target_triple(self) -> str: + return self.metadata["target_triple"] + + @property + def target(self) -> Target: + return Target.from_triple(self.target_triple) + + @property + def arch(self) -> Architecture: + return Architecture.from_triple(self.target_triple) + + @property + def bytecode_magic(self) -> bytes: + return bytes.fromhex(self.metadata["python_bytecode_magic_number"]) + + def read_python_exe(self) -> bytes: + with self.open() as tf: + return tf.extractfile(f"python/{self.metadata['python_exe']}").read() + + def pack_library( + self, include: list[str] | None = None, exclude: list[str] | None = None + ) -> Iterator[tuple[str, bytes]]: + include = set(include) if include else set() + exclude = set(exclude) if exclude else set() + + include |= DEFAULT_INCLUDE + exclude |= DEFAULT_EXCLUDE + + library_path = f"python/{self.metadata['python_stdlib']}/" + + include = tuple(include) + exclude = tuple(exclude) + + with self.open() as tf: + for member in tf.getmembers(): + if not member.name.startswith(library_path) or not member.isreg() or "__pycache__" in member.name: + continue + + relative_path = member.name[len(library_path) :] + module_name = relative_path.replace("/", ".") + + if module_name.startswith(include) or not module_name.startswith(exclude): + yield (relative_path, tf.extractfile(member).read()) + + +if __name__ == "__main__": + import re + + RELEASES_URL = "https://api.github.com/repos/fox-it/python-build-pystandalone/releases/latest" + RE_ASSET = re.compile(r"cpython-(\d+\.\d+)\.[^+]+\+\d+-(.+)-install_only_stripped\.tar\.gz") + + request = urllib.request.Request(RELEASES_URL, headers={"Accept": "application/vnd.github+json"}) + with urllib.request.urlopen(request) as response: + release = json.loads(response.read()) + + entries = [] + for asset in release["assets"]: + name = asset["name"] + + if "freethreaded" in name: + continue + + if (match := RE_ASSET.fullmatch(name)) is None: + continue + + version = match.group(1) + triple = match.group(2) + url = asset["browser_download_url"] + digest = asset["digest"].removeprefix("sha256:") + + entries.append((version, triple, url, digest)) + + entries.sort(key=lambda e: (e[0], e[1])) + + lines = [] + for version, triple, url, digest in entries: + lines.append(f' (\n "{version}",\n "{triple}",\n "{url}",\n "{digest}",\n ),') + + output = "from __future__ import annotations\n\nDISTRIBUTIONS = [\n" + "\n".join(lines) + "\n]\n" + + output_path = Path(__file__).parent / "available.py" + output_path.write_text(output) + + print(f"Wrote {len(entries)} distributions to {output_path}") diff --git a/pystandalone/packer.py b/pystandalone/packer.py new file mode 100644 index 0000000..bc91ff1 --- /dev/null +++ b/pystandalone/packer.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import io +import logging +import zipfile +from typing import TYPE_CHECKING + +from pystandalone.compiler import NoCompiler + +if TYPE_CHECKING: + from collections.abc import Iterator + + from pystandalone.compiler import Compiler + +log = logging.getLogger(__name__) + + +COMPRESSION = zipfile.ZIP_DEFLATED + + +class Packer: + """Take source code and pack it into a zip file, optionally compiling .py files to .pyc along the way. + + Args: + compiler: The compiler to use for compiling .py files. + """ + + def __init__(self, compiler: Compiler | None = None): + self.compiler = compiler or NoCompiler() + + def pack(self, content: Iterator[tuple[str, str | bytes]]) -> bytes: + """Pack the given content into a zip file, compiling .py if necessary. + + Args: + content: An iterator of ``(path, content)`` tuples to pack. Paths should be relative. + """ + buf, zipf = mkzip() + seen = set() + with zipf: + for path, data in content: + zippath = path + + if path.endswith(".py"): + try: + data = self.compiler.compile(data, name=path) + zippath += self.compiler.EXT + except Exception as e: + log.error("Failed to compile %s, skipping", path) # noqa: TRY400 + log.debug("", exc_info=e) + continue + + if zippath in seen: + continue + seen.add(zippath) + + zwrite(zipf, zinfo(zippath), data) + + return buf.getvalue() + + +def zinfo(name: str) -> zipfile.ZipInfo: + """Create a ZipInfo with a fixed timestamp. + + Args: + name: The name of the file in the zip archive. + """ + return zipfile.ZipInfo(name, (1980, 0, 0, 0, 0, 0)) + + +def zwrite(zipf: zipfile.ZipFile, info: zipfile.ZipInfo, s: bytes) -> None: + """Write bytes to a ZipFile with a given ZipInfo. + + Args: + zipf: The ZipFile object to write to. + info: The ZipInfo object containing metadata for the file. + s: The bytes to write to the zip file. + """ + zipf.writestr(info, s, compress_type=zipf.compression) + + +def mkzip() -> tuple[io.BytesIO, zipfile.ZipFile]: + """Create an in-memory ZipFile for writing. + + Returns: + A tuple containing the BytesIO buffer and the ZipFile object. + """ + buf = io.BytesIO() + return buf, zipfile.ZipFile(buf, mode="w", compression=COMPRESSION) diff --git a/pystandalone/redist/__init__.py b/pystandalone/redist/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pystandalone/redist/monkey.py b/pystandalone/redist/monkey.py new file mode 100644 index 0000000..b31daa4 --- /dev/null +++ b/pystandalone/redist/monkey.py @@ -0,0 +1,357 @@ +from __future__ import annotations + +import _io +import _pyio +import builtins +import dataclasses +import errno +import glob +import io +import os +import pathlib +import re +import ssl +import stat +import sys +import zipimport +from typing import IO, TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator + + +def is_version(major: int, minor: int) -> bool: + return sys.version_info[0] == major and sys.version_info[1] == minor + + +def min_version(major: int, minor: int) -> bool: + return sys.version_info >= (major, minor) + + +CASE_INSENSITIVE = zipimport.path_sep == "\\" + +RE_MAGIC = re.compile("||") + +_zipfs_cache = {} + + +def _get_zipfs(archive: str) -> tuple[zipimport.zipimporter, dict[str, dict | tuple]]: + if archive in _zipfs_cache: + importer, fs = _zipfs_cache[archive] + else: + fs = {} + importer = next(imp for imp in sys.meta_path if getattr(imp, "archive", None) == archive) + + # zipimport._zip_directory_cache is fragile but so far stable across 3.10-3.15, use it until it breaks + for toc_entry in zipimport._zip_directory_cache.get(archive, {}).values(): + # (path, compress, data_size, file_size, file_offset, time, date, crc) + if toc_entry is None: + # None entries are inserted for implicit directories, but we handle those ourselves, so skip them + continue + + path = toc_entry[0] + if CASE_INSENSITIVE: + path = path.lower() + + if zipimport.alt_path_sep: + path = path.replace(zipimport.alt_path_sep, zipimport.path_sep) + + obj = fs + dirname, _, basename = path.rpartition(zipimport.path_sep) + for part in dirname.split(zipimport.path_sep): + if part not in obj: + obj[part] = {} + obj = obj[part] + + obj[basename] = toc_entry + + _zipfs_cache[archive] = (importer, fs) + + return importer, fs + + +def zipfs_get_entry(archive: str, path: str) -> dict | tuple: + if CASE_INSENSITIVE: + path = path.lower() + + if zipimport.alt_path_sep: + path = path.replace(zipimport.alt_path_sep, zipimport.path_sep) + + _, obj = _get_zipfs(archive) + for part in path.split(zipimport.path_sep): + if part == "": + continue + + try: + obj = obj[part] + except KeyError: + raise OSError(errno.ENOENT, "", path) + + return obj + + +def zipfs_stat(archive: str, path: str) -> os.stat_result: + entry = zipfs_get_entry(archive, path) + if isinstance(entry, dict): + return os.stat_result([stat.S_IFDIR | 0o777, id(entry), 0, 0, 0, 0, 0, 0, 0, 0]) + + # (path, compress, data_size, file_size, file_offset, time, date, crc) + _, _, _, file_size, file_offset, _, _, _ = entry + # mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime + return os.stat_result([stat.S_IFREG | 0o777, file_offset, 0, 0, 0, 0, file_size, 0, 0, 0]) + + +def zipfs_find_magic(path: str | pathlib.Path) -> tuple[str, str] | None: + path = str(path) + if (match := RE_MAGIC.search(path)) is None: + return None + + return path[match.start() : match.end()], path[match.start() :] + + +# io patches + + +def _monkey_open(path: str | pathlib.Path, *args, **kwargs) -> IO: + if zipfs_find_magic(path) is None: + return _io.open(path, *args, **kwargs) + # Forward to pyio so we can catch it with the pyio monkey patch + return _pyio.open(path, *args, **kwargs) + + +builtins.open = _monkey_open +io.OpenWrapper = _monkey_open + + +_pyio_FileIO = _pyio.FileIO + + +def _monkey_FileIO(path: str, *args, **kwargs) -> io.FileIO: + if (magic := zipfs_find_magic(path)) is None: + return _pyio_FileIO(path, *args, **kwargs) + + archive, mpath = magic + importer, _ = _get_zipfs(archive) + + if mpath == archive: + # Return the whole archive + # ._buf is added by our pystandalone patches + data = importer._buf + else: + toc_entry = zipfs_get_entry(archive, mpath) + if isinstance(toc_entry, dict): + raise IsADirectoryError(f"Is a directory: {mpath!r}") + + # zipimport._get_data is fragile but so far stable across 3.10-3.15, use it until it breaks + data = zipimport._get_data(importer.archive, toc_entry, importer._buf) + + buf = io.BytesIO(data) + # Patch some attributes to make it look more like a real FileIO object + buf._blksize = io.DEFAULT_BUFFER_SIZE + buf._isatty_open_only = lambda *args, **kwargs: False + + return buf + + +_pyio.FileIO = _monkey_FileIO +io.FileIO = _monkey_FileIO + + +# os patches + +_os_stat = os.stat +_os_lstat = os.lstat +_os_access = os.access + + +def _monkey_stat(path: str, *args, **kwargs) -> os.stat_result: + if (magic := zipfs_find_magic(path)) is None: + return _os_stat(path, *args, **kwargs) + + archive, mpath = magic + return zipfs_stat(archive, mpath) + + +def _monkey_lstat(path: str, *args, **kwargs) -> os.stat_result: + if (magic := zipfs_find_magic(path)) is None: + return _os_lstat(path, *args, **kwargs) + + archive, mpath = magic + return zipfs_stat(archive, mpath) + + +def _monkey_access(path: str, mode: int, *args, **kwargs) -> bool: + if zipfs_find_magic(path) is None: + return _os_access(path, mode, *args, **kwargs) + + if mode & os.W_OK: + return False + + if (magic := zipfs_find_magic(path)) is None: + return _os_access(path, mode) + + archive, mpath = magic + try: + zipfs_stat(archive, mpath) + except OSError: + return False + + return True + + +os.stat = _monkey_stat +os.lstat = _monkey_lstat +os.access = _monkey_access + + +_os_listdir = os.listdir + + +def _monkey_listdir(path: str) -> list[str]: + if (magic := zipfs_find_magic(path)) is None: + return _os_listdir(path) + + archive, mpath = magic + toc_entry = zipfs_get_entry(archive, mpath) + if not isinstance(toc_entry, dict): + raise NotADirectoryError(f"Not a directory: {mpath!r}") + + return list(toc_entry.keys()) + + +os.listdir = _monkey_listdir + +_os_scandir = os.scandir + + +class ScandirIterator: + def __init__(self, iterator: Iterator[DirEntry]): + self._iterator = iterator + + def __del__(self): + self.close() + + def __enter__(self): + return self._iterator + + def __exit__(self, *args, **kwargs): + return False + + def __iter__(self): + return self._iterator + + def __next__(self, *args): + return next(self._iterator, *args) + + def close(self) -> None: + pass + + +class DirEntry: + def __init__(self, path: str, toc_entry: dict | tuple): + self.toc_entry = toc_entry + + self.name = path.rpartition(zipimport.path_sep)[2] + self.path = path + + def inode(self) -> int: + return self.toc_entry[4] if isinstance(self.toc_entry, tuple) else id(self.toc_entry) + + def is_dir(self, follow_symlinks: bool = True) -> builtins.bool: + return isinstance(self.toc_entry, dict) + + def is_file(self, follow_symlinks: bool = True) -> builtins.bool: + return isinstance(self.toc_entry, tuple) + + def is_symlink(self) -> bool: + return False + + def stat(self, follow_symlinks: bool = True) -> os.stat_result: + if isinstance(self.toc_entry, dict): + return os.stat_result([stat.S_IFDIR, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + + # (path, compress, data_size, file_size, file_offset, time, date, crc) + _, _, _, file_size, _, _, _, _ = self.toc_entry + return os.stat_result([stat.S_IFREG, 0, 0, 0, 0, 0, file_size, 0, 0, 0]) + + +def _monkey_scandir(path: str) -> ScandirIterator: + if (magic := zipfs_find_magic(path)) is None: + return _os_scandir(path) + + archive, mpath = magic + toc_entry = zipfs_get_entry(archive, mpath) + + if not isinstance(toc_entry, dict): + raise NotADirectoryError(f"Not a directory: {mpath!r}") + + def iterator() -> Iterator[DirEntry]: + for name, entry in toc_entry.items(): + yield DirEntry(zipimport.path_sep.join([mpath, name]), entry) + + return ScandirIterator(iterator()) + + +os.scandir = _monkey_scandir + + +# Windows specific os.path patches +if os.name == "nt" and min_version(3, 12): + import genericpath + + os.path.isdir = genericpath.isdir + os.path.isfile = genericpath.isfile + os.path.islink = genericpath.islink + os.path.exists = genericpath.exists + if min_version(3, 13): + os.path.lexists = genericpath.lexists + + +# ssl patches + +_ssl_SSLContext_load_verify_locations = ssl.SSLContext.load_verify_locations + + +def _monkey_load_verify_locations( + self: ssl.SSLContext, cafile: str | None = None, capath: str | None = None, cadata: str | bytes | None = None +) -> ssl.SSLContext: + if zipfs_find_magic(cafile) is None: + return _ssl_SSLContext_load_verify_locations(self, cafile, capath, cadata) + + return _ssl_SSLContext_load_verify_locations(self, None, None, pathlib.Path(cafile).read_bytes()) + + +ssl.SSLContext.load_verify_locations = _monkey_load_verify_locations + + +# dataclasses patches + +_dataclasses__process_class = dataclasses._process_class + + +def _monkey__process_class(cls, *args, **kwargs): # noqa + # We strip C docstrings from the pystandalone binaries + # This means that dataclasses can't generate __doc__ strings for the generated classes, + # which causes them to fail when trying to generate __text_signature__ + # This is fixed in python-3.11.4 with a try/except, but for convenience we just patch it here + cls.__doc__ = cls.__name__ + return _dataclasses__process_class(cls, *args, **kwargs) + + +dataclasses._process_class = _monkey__process_class + + +# pathlib patches +if hasattr(pathlib, "_NormalAccessor"): + pathlib._NormalAccessor.stat = staticmethod(_monkey_stat) + pathlib._NormalAccessor.lstat = staticmethod(_monkey_lstat) + pathlib._NormalAccessor.open = staticmethod(_monkey_open) + pathlib._NormalAccessor.listdir = staticmethod(_monkey_listdir) + pathlib._NormalAccessor.scandir = staticmethod(_monkey_scandir) + +# glob patches +if hasattr(glob, "_StringGlobber") and is_version(3, 13): + glob._StringGlobber.scandir = staticmethod(_monkey_scandir) + +# Apply io patches last +io.open = _monkey_open diff --git a/pystandalone/redist/wingui.py b/pystandalone/redist/wingui.py new file mode 100644 index 0000000..67c7783 --- /dev/null +++ b/pystandalone/redist/wingui.py @@ -0,0 +1,280 @@ +from __future__ import annotations + +from ctypes import ( + POINTER, + WINFUNCTYPE, + Structure, + WinDLL, + WinError, + byref, + c_int, + c_int64, + c_void_p, + create_string_buffer, + get_last_error, + string_at, +) +from ctypes import wintypes as w +from typing import Any + + +def _winerror(result: int, *args) -> Any: + if not result: + raise WinError(get_last_error()) + return result + + +LRESULT = c_int64 +HCURSOR = c_void_p + +WNDPROC = WINFUNCTYPE(LRESULT, w.HWND, w.UINT, w.WPARAM, w.LPARAM) + + +class WNDCLASSW(Structure): + _fields_ = ( + ("style", w.UINT), + ("lpfnWndProc", WNDPROC), + ("cbClsExtra", c_int), + ("cbWndExtra", c_int), + ("hInstance", w.HINSTANCE), + ("hIcon", w.HICON), + ("hCursor", HCURSOR), + ("hbrBackground", w.HBRUSH), + ("lpszMenuName", w.LPCWSTR), + ("lpszClassName", w.LPCWSTR), + ) + + +class PAINTSTRUCT(Structure): + _fields_ = ( + ("hdc", w.HDC), + ("fErase", w.BOOL), + ("rcPaint", w.RECT), + ("fRestore", w.BOOL), + ("fIncUpdate", w.BOOL), + ("rgbReserved", w.BYTE * 32), + ) + + +def RECT(top: int, left: int, right: int, bottom: int) -> w.RECT: + rectangle = w.RECT() + rectangle.top = top + rectangle.left = left + rectangle.right = right + rectangle.bottom = bottom + return rectangle + + +kernel32 = WinDLL("kernel32", use_last_error=True) +kernel32.GetModuleHandleW.argtypes = (w.LPCWSTR,) +kernel32.GetModuleHandleW.restype = w.HMODULE +kernel32.GetModuleHandleW._winerror = _winerror + +user32 = WinDLL("user32", use_last_error=True) +user32.CreateWindowExW.argtypes = ( + w.DWORD, + w.LPCWSTR, + w.LPCWSTR, + w.DWORD, + c_int, + c_int, + c_int, + c_int, + w.HWND, + w.HMENU, + w.HINSTANCE, + w.LPVOID, +) +user32.CreateWindowExW.restype = w.HWND +user32.CreateWindowExW._winerror = _winerror +user32.LoadIconW.argtypes = w.HINSTANCE, w.LPCWSTR +user32.LoadIconW.restype = w.HICON +user32.LoadIconW._winerror = _winerror +user32.LoadCursorW.argtypes = w.HINSTANCE, w.LPCWSTR +user32.LoadCursorW.restype = HCURSOR +user32.LoadCursorW._winerror = _winerror +user32.RegisterClassW.argtypes = (POINTER(WNDCLASSW),) +user32.RegisterClassW.restype = w.ATOM +user32.RegisterClassW._winerror = _winerror +user32.ShowWindow.argtypes = w.HWND, c_int +user32.ShowWindow.restype = w.BOOL +user32.UpdateWindow.argtypes = (w.HWND,) +user32.UpdateWindow.restype = w.BOOL +user32.UpdateWindow._winerror = _winerror +user32.GetMessageW.argtypes = POINTER(w.MSG), w.HWND, w.UINT, w.UINT +user32.GetMessageW.restype = w.BOOL +user32.TranslateMessage.argtypes = (POINTER(w.MSG),) +user32.TranslateMessage.restype = w.BOOL +user32.DispatchMessageW.argtypes = (POINTER(w.MSG),) +user32.DispatchMessageW.restype = LRESULT +user32.BeginPaint.argtypes = w.HWND, POINTER(PAINTSTRUCT) +user32.BeginPaint.restype = w.HDC +user32.BeginPaint._winerror = _winerror +user32.GetClientRect.argtypes = w.HWND, POINTER(w.RECT) +user32.GetClientRect.restype = w.BOOL +user32.GetClientRect._winerror = _winerror +user32.DrawTextW.argtypes = w.HDC, w.LPCWSTR, c_int, POINTER(w.RECT), w.UINT +user32.DrawTextW.restype = c_int +user32.EndPaint.argtypes = w.HWND, POINTER(PAINTSTRUCT) +user32.EndPaint.restype = w.BOOL +user32.PostQuitMessage.argtypes = (c_int,) +user32.PostQuitMessage.restype = None +user32.DefWindowProcW.argtypes = w.HWND, w.UINT, w.WPARAM, w.LPARAM +user32.DefWindowProcW.restype = LRESULT + +gdi32 = WinDLL("gdi32", use_last_error=True) +gdi32.GetStockObject.argtypes = (c_int,) +gdi32.GetStockObject.restype = w.HGDIOBJ + +SendMessage = user32.SendMessageA +SendMessage.argtypes = (w.HWND, w.UINT, w.WPARAM, w.LPARAM) +SendMessage.restype = c_void_p + +CW_USEDEFAULT = -2147483648 +IDI_APPLICATION = w.LPCWSTR(32512) + +CS_HREDRAW = 2 +CS_VREDRAW = 1 + +IDC_ARROW = w.LPCWSTR(32512) +WHITE_BRUSH = 0 + +SW_SHOWNORMAL = 1 + +WM_DESTROY = 2 +WM_PAINT = 15 +WM_COMMAND = 273 + +DT_SINGLELINE = 32 +DT_CENTER = 1 +DT_VCENTER = 4 + +WS_CHILD = 0x40000000 +WS_VISIBLE = 0x10000000 +WS_BORDER = 0x00800000 +WS_OVERLAPPEDWINDOW = 13565952 + + +BS_PUSHBUTTON = 0 +BS_CHECKBOX = 2 +BS_AUTOCHECKBOX = 3 + + +ES_PASSWORD = 32 +ES_WANTRETURN = 4096 +EM_SETPASSWORDCHAR = 204 + + +class KeyGUI: + result = "" + pass_shown = False + gui_display_text = "" + accept_button = None + input_field = None + checkbox = None + reveal_text = None + label = None + + @classmethod + def prompt( + cls: KeyGUI, + text: str, + title: str = "Unlock", + label_text: str = "Key:", + reveal_text: str = "reveal key", + button_text: str = "Unlock", + ) -> str: + cls.gui_display_text = text + cls.label = label_text + cls.reveal_text = reveal_text + wndclass = WNDCLASSW() + wndclass.style = CS_HREDRAW | CS_VREDRAW + wndclass.lpfnWndProc = WNDPROC(_winmessage) + wndclass.cbClsExtra = wndclass.cbWndExtra = 0 + wndclass.hInstance = kernel32.GetModuleHandleW(None) + wndclass.hIcon = user32.LoadIconW(None, IDI_APPLICATION) + wndclass.hCursor = user32.LoadCursorW(None, IDC_ARROW) + wndclass.hbrBackground = gdi32.GetStockObject(WHITE_BRUSH) + wndclass.lpszMenuName = None + wndclass.lpszClassName = "KeyPrompt" + user32.RegisterClassW(byref(wndclass)) + hwnd = user32.CreateWindowExW( + 0, + wndclass.lpszClassName, + title, + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, + CW_USEDEFAULT, + 800, + 200, + None, + None, + wndclass.hInstance, + None, + ) + cls.input_field = user32.CreateWindowExW( + 0, + "Edit", + None, + WS_CHILD | WS_VISIBLE | WS_BORDER | ES_PASSWORD | ES_WANTRETURN, + 60, + 60, + 600, + 32, + hwnd, + 0, + 0, + 0, + ) + cls.accept_button = user32.CreateWindowExW( + 0, "Button", button_text, WS_CHILD | WS_VISIBLE | WS_BORDER, 670, 60, 60, 32, hwnd, 0, 0, 0 + ) + cls.checkbox = user32.CreateWindowExW( + 0, "Button", None, WS_CHILD | WS_VISIBLE | BS_AUTOCHECKBOX, 60, 95, 16, 16, hwnd, 0, 0, 0 + ) + SendMessage(cls.input_field, EM_SETPASSWORDCHAR, w.WPARAM(ord("*")), 0) + user32.ShowWindow(hwnd, SW_SHOWNORMAL) + user32.UpdateWindow(hwnd) + msg = w.MSG() + while user32.GetMessageW(byref(msg), None, 0, 0) != 0: + user32.TranslateMessage(byref(msg)) + user32.DispatchMessageW(byref(msg)) + return str(cls.result) + + +def _winmessage(hwnd: w.HWND, message: w.UINT, wParam: w.WPARAM, lParam: w.LPARAM) -> w.LRESULT: + ps = PAINTSTRUCT() + rect = w.RECT() + if message == WM_COMMAND: + if lParam == KeyGUI.accept_button: + buf = create_string_buffer(b"", size=1000) + user32.GetWindowTextA(KeyGUI.input_field, byref(buf), 1000) + user32.PostQuitMessage(0) + KeyGUI.result = string_at(buf).decode("ascii") + + if lParam == KeyGUI.checkbox: + if not KeyGUI.pass_shown: + SendMessage(KeyGUI.input_field, EM_SETPASSWORDCHAR, 0, 0) + KeyGUI.pass_shown = True + else: + SendMessage(KeyGUI.input_field, EM_SETPASSWORDCHAR, w.WPARAM(ord("*")), 0) + KeyGUI.pass_shown = False + + user32.InvalidateRect(hwnd, 0, 0) + + return 0 + + if message == WM_PAINT: + hdc = user32.BeginPaint(hwnd, byref(ps)) + user32.GetClientRect(hwnd, byref(rect)) + user32.DrawTextW(hdc, KeyGUI.gui_display_text, c_int(-1), byref(RECT(10, 10, 590, 40)), DT_SINGLELINE) + user32.DrawTextW(hdc, KeyGUI.label, c_int(-1), byref(RECT(60, 10, 200, 140)), DT_SINGLELINE) + user32.DrawTextW(hdc, KeyGUI.reveal_text, c_int(-1), byref(RECT(95, 80, 200, 140)), DT_SINGLELINE) + user32.EndPaint(hwnd, byref(ps)) + user32.EndPaint(hwnd, byref(ps)) + return 0 + if message == WM_DESTROY: + user32.PostQuitMessage(0) + return 0 + + return user32.DefWindowProcW(hwnd, message, wParam, lParam) diff --git a/pystandalone/source.py b/pystandalone/source.py new file mode 100644 index 0000000..97b59db --- /dev/null +++ b/pystandalone/source.py @@ -0,0 +1,329 @@ +from __future__ import annotations + +import configparser +import fnmatch +import importlib.util +import io +import logging +import os +import re +import subprocess +import zipfile +from pathlib import Path +from typing import TYPE_CHECKING, BinaryIO + +if TYPE_CHECKING: + from collections.abc import Iterator + from importlib.machinery import ModuleSpec + + from typing_extensions import Self + + +log = logging.getLogger(__name__) + + +RUN_MODULE_TEMPLATE = """ +import sys +from {0} import {1} + +if __name__ == '__main__': + sys.exit({2}()) +""" + + +class Source: + """Source code for a pystandalone binary. + + This class represents the source code for a pystandalone binary. + It can be created from a file path, a directory, or a .pystandalone spec file. + It contains information about the entry point, included modules, and other metadata. + """ + + def __init__(self): + self.run: str | None = None + self.base: Path | None = None + self.include_base = False + + self.library = set() + self.modules = set() + + self.include = set() + self.exclude = set() + + self.metadata = {} + self.build = {} + self.insert = {} + + @classmethod + def from_path(cls, path: Path) -> Self: + """Create a Source object from a file path. + + Args: + path: The file path to create the Source object from. + """ + source = cls() + + # Support spec entry selection with :entry syntax + spec_entry = None + if ":" in path.name: + name, _, spec_entry = path.name.rpartition(":") + path = path.with_name(name) + + if path.suffix in (".py", ".zip"): + source.run = str(path) + source.base = path.parent + + elif path.name.endswith(".pystandalone"): + source._populate_from_spec(path, spec_entry) + + elif path.is_dir(): + if (spec_file := path.joinpath(".pystandalone")).exists(): + source._populate_from_spec(spec_file, spec_entry) + elif path.joinpath("run.py").exists(): + source.run = "run.py" + source.base = path + source.include_base = True + else: + raise ValueError("No .pystandalone or run.py file in code directory") + + else: + # TODO: support plain module entry points with :entry syntax + raise ValueError("Unsupported source path") + + return source + + def _populate_from_spec(self, path: Path, entry: str | None = None) -> None: + """Populate the Source object from a .pystandalone spec file.""" + spec = configparser.ConfigParser(allow_no_value=True) + spec.read(path) + + run_entry = "run" + (f":{entry}" if entry else "") + + if not spec.has_option(run_entry, "entry"): + raise ValueError(f".pystandalone file has no entrypoint {run_entry}") + + self.run = spec.get(run_entry, "entry") + self.base = path.parent.resolve() + + if not self.base.joinpath(self.run).is_file(): + # If the run entry is not a file, it must be an entry point string like module:func + if self.run.count(":") != 1: + raise ValueError(f"Run entry is not an existing file or a valid entrypoint string: {self.run}") + entry_module, _, _ = self.run.partition(":") + self.modules.add(entry_module.split(".")[0]) + + if "library" in spec: + self.library.update(set(spec.options("library"))) + + if "modules" in spec: + self.modules.update(set(spec.options("modules"))) + + if "include" in spec: + self.include.update(set(spec.options("include"))) + + if "exclude" in spec: + self.exclude.update(set(spec.options("exclude"))) + + for name, section in spec.items(): + if name.startswith("build:"): + _, _, module_name = name.partition(":") + self.build[module_name] = dict(section.items()) + + def insert_file(self, path: str, file: Path) -> None: + """Insert a file into the source. + + Args: + path: The path where the file should be inserted. + file: The file to be inserted. + """ + self.insert[path] = file.read_text() + + def insert_str(self, path: str, code: str) -> None: + """Insert a string of code into the source. + + Args: + path: The path where the code should be inserted. + code: The string of code to be inserted. + """ + self.insert[path] = code + + def pack(self) -> Iterator[tuple[str, str | bytes]]: + """Pack the source code into an iterable of file paths and contents.""" + re_excl = _re_from_set(self.exclude) + re_incl = _re_from_set(self.include) + + for path, content in self._pack(): + if re_excl and re_excl.match(path) and not (re_incl and re_incl.match(path)): + continue + + yield path, content + + def _pack(self) -> Iterator[tuple[str, str | bytes]]: + """Pack the source code into an iterable of file paths and contents.""" + if self.run.endswith(".zip"): + log.info("Packing from existing zip file %s", self.run) + yield from pack_zip(self.base.joinpath(self.run)) + + else: + if (run_file := self.base.joinpath(self.run)).is_file(): + log.info("Packing existing run file %s", run_file) + yield "run.py", run_file.read_text() + else: + entry_module, _, entry_func = self.run.partition(":") + log.info("Creating and packing run file for %s", entry_module) + yield "run.py", RUN_MODULE_TEMPLATE.format(entry_module, entry_func.split(".")[0], entry_func) + + if self.include_base: + log.info("Packing base files from %s", self.base) + yield from self._pack_base() + + if self.modules: + log.info("Packing %d modules", len(self.modules)) + yield from self._pack_modules() + + if self.insert: + log.info("Packing %d inserted files", len(self.insert)) + yield from self.insert.items() + + def _pack_base(self) -> Iterator[tuple[str, str | bytes]]: + """Pack the base files from the source directory.""" + run_file = self.base.joinpath(self.run) + + for entry in self.base.rglob("*"): + # If an entry is not a file or a separately packed run file + if not entry.is_file() or entry.samefile(run_file): + continue + + yield str(entry.relative_to(self.base)).replace("\\", "/"), entry.read_bytes() + + def _pack_modules(self) -> Iterator[tuple[str, str | bytes]]: + """Pack the specified modules.""" + for module in self.modules: + log.info("Packing module '%s'", module) + yield from self._pack_module(module) + + def _pack_module(self, module: str) -> Iterator[tuple[str, str | bytes]]: + """Pack a single module.""" + if (spec := importlib.util.find_spec(module)) is None: + raise ValueError(f"Module {module} not found") + + if module in self.build: + # This module has a build step, but we need to find an appropriate base directory + # to run the build step from + base = _find_module_base_path(spec) + + if base is None: + log.warning( + "Skipping build command for module '%s': unable to find a unique base directory for the module", + module, + ) + else: + log.info("Running build command for module '%s' from base '%s'", module, base) + yield from self._pack_build_output(module, base) + + yield from _pack_module(spec) + + def _pack_build_output(self, module: str, base: Path) -> Iterator[tuple[str, str | bytes]]: + """Run the build command for a module and pack its output.""" + log.info('Running build steps for module "%s"', module) + + for path, command in self.build[module].items(): + log.info("Building '%s' from '%s'", path, command) + result = subprocess.run(command, shell=True, cwd=base, capture_output=True) + + if result.returncode != 0: + raise RuntimeError( + f'Build command for module "{module}" failed with exit code {result.returncode}:\n' + f"stdout:\n{result.stdout.decode()}\n" + f"stderr:\n{result.stderr.decode()}" + ) + + yield path, result.stdout + + +def pack_zip(zip: Path | bytes | BinaryIO) -> Iterator[tuple[str, str | bytes]]: + if isinstance(zip, bytes): + zip = io.BytesIO(zip) + + with zipfile.ZipFile(zip) as zipf: + for zipinfo in zipf.infolist(): + if not zipinfo.is_dir(): + yield zipinfo.filename, zipf.read(zipinfo) + + +def pack_module(name: str) -> Iterator[tuple[str, str | bytes]]: + """Pack a module by name into an iterable of file paths and contents.""" + if (spec := importlib.util.find_spec(name)) is None: + raise ValueError(f"Module {name} not found") + + yield from _pack_module(spec) + + +def _pack_module(spec: ModuleSpec) -> Iterator[tuple[str, str | bytes]]: + for base_path, full_path in _iter_module_files(spec): + yield str(full_path.relative_to(base_path)).replace("\\", "/"), full_path.read_bytes() + + if (not spec.origin and len(spec.submodule_search_locations)) or ( + spec.origin and spec.origin.endswith("__init__.py") + ): + # Fill in missing __init__.py files for namespace packages + relative_path = "" + for part in spec.name.split("."): + relative_path = "/".join([relative_path, part]) if relative_path else part # noqa: FLY002 + yield f"{relative_path}/__init__.py", b"" + + +def _iter_module_files(spec: ModuleSpec) -> Iterator[tuple[Path, str]]: + """Iterate over the files in a module specified by a ModuleSpec.""" + if not spec.submodule_search_locations and spec.origin: + # Single file modules, so base path must be site-packages + yield _get_module_base_path(spec.name, spec.origin), spec.origin + + else: + # Normal and namespace modules + for search_path in spec.submodule_search_locations: + search_path = Path(search_path) + base = _get_module_base_path(spec.name, search_path) + + for root, dirs, files in os.walk(search_path): + if "__pycache__" in dirs: + dirs.remove("__pycache__") + + for file in files: + # ignore .pyc files if we have .py originals + if file.endswith(".pyc") and file[:-1] in files: + continue + + yield base, Path(root).joinpath(file) + + +def _find_module_base_path(spec: ModuleSpec) -> Path | None: + if spec.origin: + # This covers things like dissect.target and other regular packages, which will + # have an origin of package/name/__init__.py + return _get_module_base_path(spec.name, Path(spec.origin)) + + if len(spec.submodule_search_locations) == 1: + # This covers namespace packages that don't have an __init__.py + return _get_module_base_path(spec.name, Path(spec.submodule_search_locations[0])) + + return None + + +def _get_module_base_path(name: str, path: Path) -> Path: + # Namespaced packages end in site-packages/name/space/ + # Module packages end in site-packages/module + # Single file packages end in site-packages/ + # E.g. /site-packages/dissect/target/__init__.py -> /site-packages/ + # E.g. /site-packages/acquire -> /site-packages/ + # E.g. /site-packages/six.py -> /site-packages/ + base = path.parent if path.is_file() else path + if path.name == "__init__.py" or path.is_dir(): + for _ in name.split("."): + base = base.parent + + return base + + +def _re_from_set(s: set[str]) -> re.Pattern | None: + return re.compile("|".join([fnmatch.translate(e) for e in s])) if s else None diff --git a/pystandalone/zipapp.py b/pystandalone/zipapp.py new file mode 100644 index 0000000..da0b5ba --- /dev/null +++ b/pystandalone/zipapp.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +import argparse +import hashlib +import importlib.resources +import itertools +import logging +import os +import sys +from pathlib import Path + +from pystandalone import chacha20 +from pystandalone.compiler import CryptCompiler, NoCompiler +from pystandalone.packer import Packer +from pystandalone.source import Source, pack_module, pack_zip + +log = logging.getLogger(__name__) +logging.lastResort = None +logging.raiseExceptions = False + +BOOTSTRAP_SENTINEL = b"Kusjes van SRT!" + +BOOTSTRAP_STANDALONE_TEMPLATE = """ +import runpy +import sys + +{decrypt} + +runpy._run_module_as_main("run", alter_argv=False) +""" + +DECRYPT_TEMPLATE = f""" +import hashlib +import zipimport +from argparse import ArgumentParser +from os import environ + +import chacha20 + +SENTINEL = {{sentinel!r}} +IV = {{iv!r}} + +parser = ArgumentParser(prefix_chars=':') +parser.add_argument(":key", default=environ.get("PYSTANDALONE_KEY"), required=False) +known, remainder = parser.parse_known_args() + +if known.key: + key = known.key + environ["PYSTANDALONE_KEY_SOURCE"] = "args" + sys.argv = [sys.argv[0]] + remainder +else: + environ["PYSTANDALONE_KEY_SOURCE"] = "prompt" + try: + from wingui import KeyGUI + key = KeyGUI.prompt("Please enter key") + except ImportError: + import getpass + key = getpass.getpass("Key: ") + +key = hashlib.sha256(key.encode()).digest() + +if chacha20.decrypt(SENTINEL, key, IV) != {BOOTSTRAP_SENTINEL!r}: + sys.exit("ERROR: Wrong key") + +_get_data = zipimport._get_data +def get_data(archive, toc_entry): + buf = _get_data(archive, toc_entry) + return chacha20.decrypt(buf, key, IV) +zipimport._get_data = get_data +""" + +BOOTSTRAP_WRAP_TEMPLATE = """ +import platform +import os +import sys +import zipfile +from pathlib import Path + +from dissect.executable.elf.tools import loader + +with zipfile.ZipFile(Path(__file__).parent) as zip: + with zip.open("exe") as fh: + loader.load( + fh, + argv=sys.argv, + env=os.environ, + libc="/lib64/libc.so.6" if platform.system().lower() == "vmkernel" else None + ) +""" + + +class Builder: + def __init__(self, code: Path, *, encrypt: bool = True, key: str | None = None): + self.code = code + self.encrypt = encrypt + + self.key, self.iv = None, None + if self.encrypt: + self.key = key if key is not None else os.urandom(32).hex() + self.iv = os.urandom(16) + + self.source = Source.from_path(code) if code else Source() + + def build(self) -> bytes: + # Pack the source code first, optionally encrypting it + if self.encrypt: + key = hashlib.sha256(self.key.encode()).digest() + compiler = CryptCompiler(key, self.iv) + sentinel = chacha20.encrypt(BOOTSTRAP_SENTINEL, key, self.iv) + + decrypt = DECRYPT_TEMPLATE.format( + sentinel=sentinel, + iv=self.iv, + ) + else: + compiler = NoCompiler() + sentinel = BOOTSTRAP_SENTINEL + decrypt = "" + + payload = Packer(compiler).pack(self.source.pack()) + + return Packer().pack( + ( + ( + "__main__.py", + BOOTSTRAP_STANDALONE_TEMPLATE.format(decrypt=decrypt), + ), + ("chacha20.py", importlib.resources.read_text("pystandalone", "chacha20.py")), + ("wingui.py", importlib.resources.read_text("pystandalone.redist", "wingui.py")), + # Include the packed source code plainly + *(pack_zip(payload)), + ) + ) + + +def wrap(exe: bytes) -> bytes: + """Wrap the given executable in a zipapp with a bootstrap ELF loader. + + Args: + exe: The executable to wrap. + """ + return Packer().pack( + itertools.chain( + [ + ("__main__.py", BOOTSTRAP_WRAP_TEMPLATE), + ("exe", exe), + ], + *[ + pack_module(module) + for module in [ + "dissect.cstruct", + "dissect.executable", + "dissect.util", + ] + ], + ) + ) + + +def setup_logging(verbosity: int) -> None: + """Set up logging with the given verbosity level. + + Args: + verbosity: The verbosity level (0 for critical, 1 for error, 2 for warning, 3 for info, 4 or higher for debug). + """ + if verbosity == 1: + level = logging.ERROR + elif verbosity == 2: + level = logging.WARNING + elif verbosity == 3: + level = logging.INFO + elif verbosity >= 4: + level = logging.DEBUG + else: + level = logging.CRITICAL + + logging.basicConfig(format="%(levelname)s %(message)s", level=level) + + logging.addLevelName(logging.DEBUG, " - ") + logging.addLevelName(logging.INFO, "[*]") + + for lvl in [logging.WARNING, logging.ERROR, logging.CRITICAL]: + logging.addLevelName(lvl, "[!]") + + +def main() -> int: + parser = argparse.ArgumentParser( + prog="pystandalone-zipapp", + description="Build opinionated standalone Python zipapps", + ) + + parser.add_argument( + "-c", + "--code", + metavar="CODE", + type=Path, + help="path to code file or directory (.zip, .py, .pystandalone or directory containing run.py or .pystandalone file), or leave empty to generate a module-only zipapp", # noqa: E501 + ) + parser.add_argument( + "-o", + "--output", + metavar="OUTPUT", + type=Path, + help="path to output zipapp", + ) + + packaging_group = parser.add_argument_group("packaging options") + packaging_group.add_argument( + "-M", + "--modules", + type=str, + nargs="*", + help="extra external modules to include", + ) + + encryption_group = parser.add_argument_group("encryption options") + encryption_group.add_argument( + "-k", + "--key", + type=str, + help="use provided key for encryption", + ) + encryption_group.add_argument( + "--key-file", + type=Path, + help="write encryption key to file", + ) + encryption_group.add_argument( + "--no-crypt", + action="store_true", + default=False, + help="disable encryption", + ) + + parser.add_argument( + "-v", + "--verbose", + action="count", + default=3, + help="increase output verbosity", + ) + args = parser.parse_args() + + setup_logging(args.verbose) + + log.info("Setting up builder") + + try: + builder = Builder( + args.code, + encrypt=not args.no_crypt, + key=args.key, + ) + except Exception as e: + log.error("Error creating builder: %s. View debug logs for more details.", e) # noqa: TRY400 + log.debug("Stacktrace:", exc_info=e) + return 1 + + if args.modules: + builder.source.modules.update(set(args.modules)) + + try: + exe = builder.build() + except Exception as e: + log.error("Error building zipapp: %s. View debug logs for more details.", e) # noqa: TRY400 + log.debug("Stacktrace:", exc_info=e) + return 1 + + if args.output is None: + args.output = Path() + + if args.output.is_dir(): + args.output = args.output / "pystandalone-zipapp.pyz" + + log.info("Writing %s", args.output) + args.output.write_bytes(exe) + + if builder.key: + log.info("") + log.info("Key: %s", builder.key) + if args.key_file: + log.info("Writing %s", args.key_file) + args.key_file.write_text(builder.key) + + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + pass diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/_data/chacha20_unique_iv.bin b/tests/_data/chacha20_unique_iv.bin new file mode 100644 index 0000000..3c1d9f1 --- /dev/null +++ b/tests/_data/chacha20_unique_iv.bin @@ -0,0 +1 @@ +>H{ð²Ìpbß \ No newline at end of file diff --git a/tests/_data/chacha20_zero_iv.bin b/tests/_data/chacha20_zero_iv.bin new file mode 100644 index 0000000..474c60c --- /dev/null +++ b/tests/_data/chacha20_zero_iv.bin @@ -0,0 +1 @@ +ÝŒÁÏ®Jÿ21 \ No newline at end of file diff --git a/tests/_docs/Makefile b/tests/_docs/Makefile new file mode 100644 index 0000000..e693b42 --- /dev/null +++ b/tests/_docs/Makefile @@ -0,0 +1,24 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= -jauto -w $(BUILDDIR)/warnings.log --fail-on-warning +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: clean help Makefile + +clean: Makefile + rm -rf api + @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/tests/_docs/conf.py b/tests/_docs/conf.py new file mode 100644 index 0000000..5fb111f --- /dev/null +++ b/tests/_docs/conf.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +project = "pystandalone" + +extensions = [ + "autoapi.extension", + "sphinx.ext.autodoc", + "sphinx.ext.autosectionlabel", + "sphinx.ext.doctest", + "sphinx.ext.napoleon", + "sphinx_argparse_cli", +] + +exclude_patterns = [] + +html_theme = "furo" + +autoapi_type = "python" +autoapi_dirs = ["../../pystandalone/"] +autoapi_ignore = ["*tests*", "*.tox*", "*venv*", "*examples*"] +autoapi_python_use_implicit_namespaces = True +autoapi_add_toctree_entry = False +autoapi_root = "api" +autoapi_options = [ + "members", + "undoc-members", + "show-inheritance", + "show-module-summary", + "special-members", + "imported-members", +] +autoapi_keep_files = True +autoapi_template_dir = "_templates/autoapi" + +autodoc_typehints = "signature" +autodoc_member_order = "groupwise" + +autosectionlabel_prefix_document = True + +suppress_warnings = [ + # https://github.com/readthedocs/sphinx-autoapi/issues/285 + "autoapi.python_import_resolution", + "ref.python", +] diff --git a/tests/_docs/index.rst b/tests/_docs/index.rst new file mode 100644 index 0000000..67dd97d --- /dev/null +++ b/tests/_docs/index.rst @@ -0,0 +1,8 @@ +API Reference +============= + +.. toctree:: + :maxdepth: 1 + :glob: + + /api/*/index diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0b92051 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import urllib.error +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest + +from pystandalone.distribution import Architecture, Target, get_distribution_map + +if TYPE_CHECKING: + from pystandalone.distribution import Distribution + +DISTRIBUTIONS = get_distribution_map() +VERSIONS = sorted(DISTRIBUTIONS.keys()) + + +def absolute_path(path: str) -> Path: + return Path(__file__).parent / path + + +def _get_distribution(version: str, target: Target, arch: Architecture) -> Distribution: + try: + info = DISTRIBUTIONS[version][target][arch] + except KeyError: + pytest.skip(f"No distribution available for {version} {target.value} {arch.value}") + + try: + return info.get() + except urllib.error.URLError as e: + pytest.skip(f"Unable to download distribution: {e}") + + +@pytest.fixture(scope="session") +def linux_distribution() -> Distribution: + return _get_distribution("3.12", Target.LINUX, Architecture.X86_64) + + +@pytest.fixture(scope="session") +def windows_distribution() -> Distribution: + return _get_distribution("3.12", Target.WINDOWS, Architecture.X86_64) + + +@pytest.fixture(scope="session") +def macos_distribution() -> Distribution: + return _get_distribution("3.12", Target.MACOS, Architecture.AARCH64) + + +@pytest.fixture(scope="session", params=VERSIONS) +def native_distribution(request: pytest.FixtureRequest) -> Distribution: + try: + target = Target.from_current() + arch = Architecture.from_current() + except RuntimeError as e: + pytest.skip(str(e)) + + return _get_distribution(request.param, target, arch) diff --git a/tests/redist/__init__.py b/tests/redist/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/redist/test_monkey.py b/tests/redist/test_monkey.py new file mode 100644 index 0000000..aeb8357 --- /dev/null +++ b/tests/redist/test_monkey.py @@ -0,0 +1,984 @@ +from __future__ import annotations + +import os +import stat +import subprocess +import textwrap +from typing import TYPE_CHECKING + +from pystandalone.builder import Builder + +if TYPE_CHECKING: + from pathlib import Path + + from pystandalone.distribution import Distribution + + +def _build_and_run( + distribution: Distribution, + tmp_path: Path, + code: str, + *, + extra_files: dict[str, str] | None = None, +) -> subprocess.CompletedProcess: + """Build a native binary with the given run.py code and optional extra payload files.""" + run = tmp_path / "run.py" + run.write_text(code) + + builder = Builder( + distribution, + run, + encrypt=False, + compile=False, + strict=False, + ) + + if extra_files: + for path, content in extra_files.items(): + builder.add_source_str(path, content) + + exe = builder.build(build_path=None) + + out_path = tmp_path / "binary" + out_path.write_bytes(exe) + out_path.chmod(out_path.stat().st_mode | stat.S_IEXEC) + + env = os.environ.copy() + env["PYTHONUNBUFFERED"] = "1" + + return subprocess.run([str(out_path)], capture_output=True, timeout=30, env=env) + + +def test_monkey_open_payload_file(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that builtins.open can read files from the payload zip.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + data = open("/data/hello.txt").read() + print(data, flush=True) + """), + extra_files={"data/hello.txt": "hello from payload"}, + ) + + assert result.returncode == 0, result.stderr + assert b"hello from payload" in result.stdout + + +def test_monkey_io_open_payload_file(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that io.open can read files from the payload zip.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import io + data = io.open("/data/hello.txt").read() + print(data, flush=True) + """), + extra_files={"data/hello.txt": "io open works"}, + ) + + assert result.returncode == 0, result.stderr + assert b"io open works" in result.stdout + + +def test_monkey_io_fileio(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that io.FileIO can read files from the payload zip.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import io + f = io.FileIO("/data/file.txt") + data = f.read() + f.close() + print(data.decode(), flush=True) + """), + extra_files={"data/file.txt": "fileio works"}, + ) + + assert result.returncode == 0, result.stderr + assert b"fileio works" in result.stdout + + +def test_monkey_io_fileio_binary(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that io.FileIO reads payload files as bytes.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import io + f = io.FileIO("/data/file.bin") + data = f.read() + f.close() + print(f"type={type(data).__name__}", flush=True) + print(repr(data), flush=True) + """), + extra_files={"data/file.bin": "raw bytes here"}, + ) + + assert result.returncode == 0, result.stderr + assert b"type=bytes" in result.stdout + assert b"raw bytes here" in result.stdout + + +def test_monkey_open_binary_mode(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that builtins.open can read payload files in binary mode.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + data = open("/data/bin.dat", "rb").read() + print(repr(data), flush=True) + """), + extra_files={"data/bin.dat": "binary content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"binary content" in result.stdout + + +def test_monkey_open_real_file(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that builtins.open still works for real filesystem files.""" + real_file = tmp_path / "real.txt" + real_file.write_text("real file content") + + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent(f"""\ + data = open({str(real_file)!r}).read() + print(data, flush=True) + """), + ) + + assert result.returncode == 0, result.stderr + assert b"real file content" in result.stdout + + +def test_monkey_stat_file(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.stat works for files in the payload zip.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + import stat + st = os.stat("/data/file.txt") + print(f"isreg={stat.S_ISREG(st.st_mode)}", flush=True) + print(f"size={st.st_size}", flush=True) + """), + extra_files={"data/file.txt": "x" * 42}, + ) + + assert result.returncode == 0, result.stderr + assert b"isreg=True" in result.stdout + + +def test_monkey_stat_dir(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.stat works for directories in the payload zip.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + import stat + st = os.stat("/data") + print(f"isdir={stat.S_ISDIR(st.st_mode)}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"isdir=True" in result.stdout + + +def test_monkey_lstat(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.lstat works for payload files.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + import stat + st = os.lstat("/data/file.txt") + print(f"isreg={stat.S_ISREG(st.st_mode)}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"isreg=True" in result.stdout + + +def test_monkey_listdir(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.listdir works for directories in the payload zip.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + entries = sorted(os.listdir("/pkg")) + print(",".join(entries), flush=True) + """), + extra_files={ + "pkg/__init__.py": "", + "pkg/a.py": "a = 1", + "pkg/b.py": "b = 2", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"__init__.py" in result.stdout + assert b"a.py" in result.stdout + assert b"b.py" in result.stdout + + +def test_monkey_scandir(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.scandir works for directories in the payload zip.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + with os.scandir("/pkg") as entries: + for entry in sorted(entries, key=lambda e: e.name): + print(f"{entry.name} is_file={entry.is_file()} is_dir={entry.is_dir()}", flush=True) + """), + extra_files={ + "pkg/__init__.py": "", + "pkg/mod.py": "x = 1", + "pkg/sub/nested.py": "y = 2", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"__init__.py is_file=True is_dir=False" in result.stdout + assert b"mod.py is_file=True is_dir=False" in result.stdout + assert b"sub is_file=False is_dir=True" in result.stdout + + +def test_monkey_import_payload_module(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that modules in the payload zip can be imported.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import mymod + print(mymod.VALUE, flush=True) + """), + extra_files={"mymod.py": "VALUE = 'imported ok'"}, + ) + + assert result.returncode == 0, result.stderr + assert b"imported ok" in result.stdout + + +def test_monkey_import_payload_package(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that packages in the payload zip can be imported.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from mypkg.core import VALUE + print(VALUE, flush=True) + """), + extra_files={ + "mypkg/__init__.py": "", + "mypkg/core.py": "VALUE = 'package ok'", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"package ok" in result.stdout + + +def test_monkey_dataclass(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that dataclasses work in the standalone binary.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import dataclasses + + @dataclasses.dataclass + class Point: + x: int + y: int + + p = Point(1, 2) + print(f"{p.x},{p.y}", flush=True) + """), + ) + + assert result.returncode == 0, result.stderr + assert b"1,2" in result.stdout + + +def test_monkey_stat_nonexistent(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.stat raises OSError for nonexistent payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + try: + os.stat("/does/not/exist.txt") + print("NO_ERROR", flush=True) + except OSError: + print("GOT_OSERROR", flush=True) + """), + ) + + assert result.returncode == 0, result.stderr + assert b"GOT_OSERROR" in result.stdout + + +def test_monkey_open_is_a_directory(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that opening a payload directory raises IsADirectoryError.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + try: + open("/pkg") + print("NO_ERROR", flush=True) + except IsADirectoryError: + print("GOT_ISADIRECTORYERROR", flush=True) + """), + extra_files={"pkg/__init__.py": ""}, + ) + + assert result.returncode == 0, result.stderr + assert b"GOT_ISADIRECTORYERROR" in result.stdout + + +def test_monkey_listdir_not_a_directory(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.listdir on a payload file raises NotADirectoryError.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + try: + os.listdir("/data.txt") + print("NO_ERROR", flush=True) + except NotADirectoryError: + print("GOT_NOTADIRECTORYERROR", flush=True) + """), + extra_files={"data.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"GOT_NOTADIRECTORYERROR" in result.stdout + + +def test_monkey_pathlib_read_text(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.read_text works for payload files.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import PurePosixPath + data = PurePosixPath("/data/hello.txt") + # Use open() which goes through the monkey-patched builtins.open + with open(str(data)) as f: + print(f.read(), flush=True) + """), + extra_files={"data/hello.txt": "pathlib read works"}, + ) + + assert result.returncode == 0, result.stderr + assert b"pathlib read works" in result.stdout + + +def test_monkey_pathlib_read_bytes(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.read_bytes works for payload files.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import Path + data = Path("/data/file.bin").read_bytes() + print(repr(data), flush=True) + """), + extra_files={"data/file.bin": "binary pathlib"}, + ) + + assert result.returncode == 0, result.stderr + assert b"binary pathlib" in result.stdout + + +def test_monkey_pathlib_exists(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.exists works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import Path + print(f"file_exists={Path('/data/file.txt').exists()}", flush=True) + print(f"dir_exists={Path('/data').exists()}", flush=True) + print(f"missing={Path('/nope.txt').exists()}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"file_exists=True" in result.stdout + assert b"dir_exists=True" in result.stdout + assert b"missing=False" in result.stdout + + +def test_monkey_pathlib_is_file(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.is_file works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import Path + print(f"file={Path('/data/file.txt').is_file()}", flush=True) + print(f"dir={Path('/data').is_file()}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"file=True" in result.stdout + assert b"dir=False" in result.stdout + + +def test_monkey_pathlib_is_dir(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.is_dir works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import Path + print(f"dir={Path('/data').is_dir()}", flush=True) + print(f"file={Path('/data/file.txt').is_dir()}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"dir=True" in result.stdout + assert b"file=False" in result.stdout + + +def test_monkey_pathlib_stat(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.stat works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import stat + from pathlib import Path + st = Path("/data/file.txt").stat() + print(f"isreg={stat.S_ISREG(st.st_mode)}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"isreg=True" in result.stdout + + +def test_monkey_pathlib_iterdir(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.iterdir works for payload directories.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import Path + entries = sorted(p.name for p in Path("/pkg").iterdir()) + print(",".join(entries), flush=True) + """), + extra_files={ + "pkg/__init__.py": "", + "pkg/a.py": "a = 1", + "pkg/b.py": "b = 2", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"__init__.py" in result.stdout + assert b"a.py" in result.stdout + assert b"b.py" in result.stdout + + +def test_monkey_pathlib_open(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.open works for payload files.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import Path + with Path("/data/file.txt").open() as f: + print(f.read(), flush=True) + """), + extra_files={"data/file.txt": "pathlib open works"}, + ) + + assert result.returncode == 0, result.stderr + assert b"pathlib open works" in result.stdout + + +def test_monkey_pathlib_glob(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.glob works for payload directories.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import Path + matches = sorted(p.name for p in Path("/pkg").glob("*.py")) + print(",".join(matches), flush=True) + """), + extra_files={ + "pkg/__init__.py": "", + "pkg/a.py": "a = 1", + "pkg/b.py": "b = 2", + "pkg/data.txt": "not python", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"__init__.py" in result.stdout + assert b"a.py" in result.stdout + assert b"b.py" in result.stdout + assert b"data.txt" not in result.stdout + + +def test_monkey_pathlib_rglob(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib.Path.rglob works for payload directories.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + from pathlib import Path + matches = sorted(str(p).replace("/", "") for p in Path("/pkg").rglob("*.py")) + print(",".join(matches), flush=True) + """), + extra_files={ + "pkg/__init__.py": "", + "pkg/a.py": "a = 1", + "pkg/sub/__init__.py": "", + "pkg/sub/nested.py": "n = 1", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"__init__.py" in result.stdout + assert b"a.py" in result.stdout + assert b"nested.py" in result.stdout + + +def test_monkey_pathlib_real_file(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pathlib operations still work for real filesystem files.""" + real_file = tmp_path / "real.txt" + real_file.write_text("real pathlib content") + + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent(f"""\ + from pathlib import Path + p = Path({str(real_file)!r}) + print(f"exists={{p.exists()}}", flush=True) + print(f"is_file={{p.is_file()}}", flush=True) + print(p.read_text(), flush=True) + """), + ) + + assert result.returncode == 0, result.stderr + assert b"exists=True" in result.stdout + assert b"is_file=True" in result.stdout + assert b"real pathlib content" in result.stdout + + +def test_monkey_ssl_load_verify_locations_pem(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that SSLContext.load_verify_locations works for .pem files in the payload zip.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import ssl + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + try: + ctx.load_verify_locations("/certs/ca.pem") + print("LOADED_OK", flush=True) + except ssl.SSLError as e: + # Expected if the cert content is not a real certificate, + # but the monkey patch itself worked (it read the file and called the original) + print(f"SSL_ERROR={e}", flush=True) + except Exception as e: + print(f"OTHER_ERROR={type(e).__name__}:{e}", flush=True) + """), + extra_files={"certs/ca.pem": "-----BEGIN CERTIFICATE-----\nZm9v\n-----END CERTIFICATE-----\n"}, + ) + + assert result.returncode == 0, result.stderr + # The monkey patch should have read the file and passed it as cadata. + # With a fake cert, we expect either LOADED_OK or SSL_ERROR (cert parse failure). + # Either way, no OTHER_ERROR means the patch worked. + assert b"OTHER_ERROR" not in result.stdout + + +def test_monkey_ssl_load_verify_locations_real_file(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that SSLContext.load_verify_locations still works for real filesystem files.""" + cert_file = tmp_path / "ca.pem" + cert_file.write_text("-----BEGIN CERTIFICATE-----\nZm9v\n-----END CERTIFICATE-----\n") + + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent(f"""\ + import ssl + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + try: + ctx.load_verify_locations({str(cert_file)!r}) + print("LOADED_OK", flush=True) + except ssl.SSLError as e: + print(f"SSL_ERROR={{e}}", flush=True) + except Exception as e: + print(f"OTHER_ERROR={{type(e).__name__}}:{{e}}", flush=True) + """), + ) + + assert result.returncode == 0, result.stderr + assert b"OTHER_ERROR" not in result.stdout + + +def test_monkey_ssl_load_verify_locations_der(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that SSLContext.load_verify_locations reads non-.pem payload files in binary mode.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import ssl + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + try: + ctx.load_verify_locations("/certs/ca.der") + print("LOADED_OK", flush=True) + except ssl.SSLError as e: + print(f"SSL_ERROR={e}", flush=True) + except Exception as e: + print(f"OTHER_ERROR={type(e).__name__}:{e}", flush=True) + """), + extra_files={"certs/ca.der": "\x30\x82\x01\x00"}, + ) + + assert result.returncode == 0, result.stderr + assert b"OTHER_ERROR" not in result.stdout + + +def test_monkey_os_path_exists(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.path.exists works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os.path + print(f"file={os.path.exists('/data/file.txt')}", flush=True) + print(f"dir={os.path.exists('/data')}", flush=True) + print(f"missing={os.path.exists('/nope.txt')}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"file=True" in result.stdout + assert b"dir=True" in result.stdout + assert b"missing=False" in result.stdout + + +def test_monkey_os_path_lexists(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.path.lexists works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os.path + print(f"file={os.path.lexists('/data/file.txt')}", flush=True) + print(f"dir={os.path.lexists('/data')}", flush=True) + print(f"missing={os.path.lexists('/nope.txt')}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"file=True" in result.stdout + assert b"dir=True" in result.stdout + assert b"missing=False" in result.stdout + + +def test_monkey_os_path_isfile(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.path.isfile works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os.path + print(f"file={os.path.isfile('/data/file.txt')}", flush=True) + print(f"dir={os.path.isfile('/data')}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"file=True" in result.stdout + assert b"dir=False" in result.stdout + + +def test_monkey_os_path_isdir(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.path.isdir works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os.path + print(f"dir={os.path.isdir('/data')}", flush=True) + print(f"file={os.path.isdir('/data/file.txt')}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"dir=True" in result.stdout + assert b"file=False" in result.stdout + + +def test_monkey_os_path_getsize(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.path.getsize works for payload files.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os.path + size = os.path.getsize("/data/file.txt") + print(f"size={size}", flush=True) + """), + extra_files={"data/file.txt": "x" * 42}, + ) + + assert result.returncode == 0, result.stderr + assert b"size=42" in result.stdout + + +def test_monkey_os_path_islink(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.path.islink returns False for payload paths (no symlinks in zips).""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os.path + print(f"islink={os.path.islink('/data/file.txt')}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"islink=False" in result.stdout + + +def test_monkey_os_access(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.access works for payload paths.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + print(f"exists={os.access('/data/file.txt', os.F_OK)}", flush=True) + print(f"read={os.access('/data/file.txt', os.R_OK)}", flush=True) + print(f"missing={os.access('/nope.txt', os.F_OK)}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"exists=True" in result.stdout + assert b"read=True" in result.stdout + assert b"missing=False" in result.stdout + + +def test_monkey_os_path_realpath(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.path.realpath returns the payload path as-is.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os.path + p = os.path.realpath("/data/file.txt").replace(os.path.sep, "/") + print(f"path={p}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"/data/file.txt" in result.stdout + + +def test_monkey_os_path_abspath(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.path.abspath returns the payload path as-is.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os.path + p = os.path.abspath("/data/file.txt").replace(os.path.sep, "/") + print(f"path={p}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"/data/file.txt" in result.stdout + + +def test_monkey_os_fspath(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that os.fspath works with payload paths (pathlib and string).""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import os + from pathlib import Path + # String path + s = os.fspath("/data/file.txt").replace(os.path.sep, "/") + print(f"str={s}", flush=True) + # Path object + p = os.fspath(Path("/data/file.txt")).replace(os.path.sep, "/") + print(f"path={p}", flush=True) + """), + extra_files={"data/file.txt": "content"}, + ) + + assert result.returncode == 0, result.stderr + assert b"str=/data/file.txt" in result.stdout + assert b"path=/data/file.txt" in result.stdout + + +def test_monkey_shutil_copyfile(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that shutil.copyfile can copy a payload file to the real filesystem.""" + dest = tmp_path / "copied.txt" + + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent(f"""\ + import shutil + shutil.copyfile("/data/file.txt", {str(dest)!r}) + with open({str(dest)!r}) as f: + print(f.read(), flush=True) + """), + extra_files={"data/file.txt": "copy me"}, + ) + + assert result.returncode == 0, result.stderr + assert b"copy me" in result.stdout + + +def test_monkey_shutil_copy(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that shutil.copy can copy a payload file to the real filesystem.""" + dest = tmp_path / "copied.txt" + + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent(f"""\ + import shutil + shutil.copy("/data/file.txt", {str(dest)!r}) + with open({str(dest)!r}) as f: + print(f.read(), flush=True) + """), + extra_files={"data/file.txt": "copy me too"}, + ) + + assert result.returncode == 0, result.stderr + assert b"copy me too" in result.stdout + + +def test_monkey_importlib_resources_files(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that importlib.resources.files works for payload packages.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import importlib.resources + files = importlib.resources.files("mypkg") + data = (files / "data.txt").read_text() + print(data, flush=True) + """), + extra_files={ + "mypkg/__init__.py": "", + "mypkg/data.txt": "resources files works", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"resources files works" in result.stdout + + +def test_monkey_importlib_resources_read_text(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that importlib.resources.read_text works for payload packages.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import importlib.resources + try: + data = importlib.resources.read_text("mypkg", "data.txt") + print(data, flush=True) + except Exception as e: + print(f"ERROR={type(e).__name__}:{e}", flush=True) + """), + extra_files={ + "mypkg/__init__.py": "", + "mypkg/data.txt": "read_text works", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"read_text works" in result.stdout or b"ERROR=" in result.stdout + + +def test_monkey_importlib_resources_read_binary(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that importlib.resources.read_binary works for payload packages.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import importlib.resources + try: + data = importlib.resources.read_binary("mypkg", "data.bin") + print(repr(data), flush=True) + except Exception as e: + print(f"ERROR={type(e).__name__}:{e}", flush=True) + """), + extra_files={ + "mypkg/__init__.py": "", + "mypkg/data.bin": "binary resource", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"binary resource" in result.stdout or b"ERROR=" in result.stdout + + +def test_monkey_pkgutil_get_data(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that pkgutil.get_data works for payload packages.""" + result = _build_and_run( + native_distribution, + tmp_path, + textwrap.dedent("""\ + import pkgutil + data = pkgutil.get_data("mypkg", "data.txt") + print(data.decode(), flush=True) + """), + extra_files={ + "mypkg/__init__.py": "", + "mypkg/data.txt": "pkgutil works", + }, + ) + + assert result.returncode == 0, result.stderr + assert b"pkgutil works" in result.stdout diff --git a/tests/test_binary.py b/tests/test_binary.py new file mode 100644 index 0000000..418ebe5 --- /dev/null +++ b/tests/test_binary.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import struct +from typing import TYPE_CHECKING + +import lief + +from pystandalone.binary import pack_zip, patch + +if TYPE_CHECKING: + from pystandalone.distribution import Distribution + + +def test_pack_zip_single() -> None: + """Test that pack_zip packs a single zip with a size prefix.""" + data = b"PK\x03\x04fake_zip_data" + result = pack_zip([data]) + + size = struct.unpack(" None: + """Test that pack_zip packs multiple zips sequentially with size prefixes.""" + zip1 = b"zip_one" + zip2 = b"zip_two_longer" + result = pack_zip([zip1, zip2]) + + offset = 0 + for expected in [zip1, zip2]: + size = struct.unpack(" None: + """Test that pack_zip with no zips returns empty bytes.""" + assert pack_zip([]) == b"" + + +def test_patch_elf(linux_distribution: Distribution) -> None: + """Test that patching an ELF binary writes the payload into the .pystandalone section.""" + payload = b"\x01\x02\x03\x04" * 16 + + result = patch(linux_distribution, payload) + elf = lief.ELF.parse(result) + section = elf.get_section(".pystandalone") + section_data = bytes(section.content) + + assert section_data[: len(payload)] == payload + + +def test_patch_pe(windows_distribution: Distribution) -> None: + """Test that patching a PE binary writes the payload into the RCDATA resource.""" + payload = b"\x01\x02\x03\x04" * 16 + + result = patch(windows_distribution, payload) + pe = lief.PE.parse(result) + + found = False + for node in pe.resources.childs: + if node.id == lief.PE.ResourcesManager.TYPE.RCDATA: + data = bytes(next(next(node.childs).childs).content) + assert data == payload + found = True + break + + assert found + + +def test_patch_macho(macos_distribution: Distribution) -> None: + """Test that patching a Mach-O binary writes the payload into the __pystandalone section.""" + payload = b"\x01\x02\x03\x04" * 16 + + result = patch(macos_distribution, payload) + fat = lief.MachO.parse(result) + + for macho in fat: + section = macho.get_section("__pystandalone") + section_data = bytes(section.content) + assert section_data[: len(payload)] == payload diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py new file mode 100644 index 0000000..dfdf787 --- /dev/null +++ b/tests/test_bootstrap.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +import os +import subprocess +import sys +import textwrap + +from pystandalone.bootstrap import DECRYPT_TEMPLATE, Bootstrap + + +def test_bootstrap_pack_no_encrypt() -> None: + """Test that bootstrap without encryption uses the no-crypt template.""" + b = Bootstrap(digest=b"\x00" * 32, encrypt=False) + packed = dict(b.pack()) + + assert "bootstrap.py" in packed + assert len(packed["bootstrap.py"]) > 0 + assert "monkey.py" in packed + assert len(packed["monkey.py"]) > 0 + assert "wingui.py" in packed + assert len(packed["wingui.py"]) > 0 + assert "DIGEST" in packed["bootstrap.py"] + assert "cipher" not in packed["bootstrap.py"] + + +def test_bootstrap_pack_encrypt() -> None: + """Test that bootstrap with encryption uses the decrypt template.""" + b = Bootstrap(digest=b"\x00" * 32, encrypt=True, iv=b"\x01" * 8) + packed = dict(b.pack()) + + assert "chacha20" in packed["bootstrap.py"] + assert "DIGEST" in packed["bootstrap.py"] + assert repr(b"\x01" * 8) in packed["bootstrap.py"] + + +def test_bootstrap_filesystem_importer() -> None: + """Test that filesystem_importer=True inserts before the filesystem importer.""" + b = Bootstrap(digest=b"\x00" * 32, encrypt=False, filesystem_importer=True) + packed = dict(b.pack()) + + assert "Insert before the filesystem importer" in packed["bootstrap.py"] + assert "Remove the filesystem importer" not in packed["bootstrap.py"] + + +def test_bootstrap_no_filesystem_importer() -> None: + """Test that filesystem_importer=False removes the filesystem importer.""" + b = Bootstrap(digest=b"\x00" * 32, encrypt=False, filesystem_importer=False) + packed = dict(b.pack()) + + assert "Remove the filesystem importer" in packed["bootstrap.py"] + assert "Insert before the filesystem importer" not in packed["bootstrap.py"] + + +def test_decrypt_stub_contains_iv() -> None: + """Test that the decrypt stub contains the IV and digest.""" + b = Bootstrap(digest=b"\xaa" * 32, encrypt=True, iv=b"\xbb" * 8) + stub = b._decrypt_stub() + + assert repr(b"\xaa" * 32) in stub + assert repr(b"\xbb" * 8) in stub + assert "chacha20" in stub + + +def test_no_crypt_stub_contains_digest() -> None: + """Test that the no-crypt stub contains the digest but no decryption.""" + b = Bootstrap(digest=b"\xcc" * 32, encrypt=False) + stub = b._decrypt_stub() + + assert repr(b"\xcc" * 32) in stub + assert "chacha20" not in stub + + +def test_generated_bootstrap_compiles() -> None: + """Test that all generated bootstrap variants produce valid Python syntax.""" + for encrypt in (True, False): + for fs_importer in (True, False): + b = Bootstrap( + digest=b"\x00" * 32, + encrypt=encrypt, + iv=b"\x00" * 8 if encrypt else None, + filesystem_importer=fs_importer, + ) + packed = dict(b.pack()) + for name, code in packed.items(): + compile(code, name, "exec") + + +def _run_key_script(args: list[str], env: dict[str, str] | None = None) -> subprocess.CompletedProcess: + """Run a script that exercises the key parsing logic from DECRYPT_TEMPLATE.""" + # Build a minimal script that sets up the required globals, runs the key parsing + # portion of the decrypt template, and prints the result. + decrypt_code = DECRYPT_TEMPLATE.format(digest=b"\x00" * 32, iv=b"\x00" * 8) + + # Extract just the key parsing logic (everything up to the cipher usage) + lines = decrypt_code.strip().splitlines() + key_lines = [] + for line in lines: + if "cipher" in line or "_pystandalone" in line: + break + key_lines.append(line) + + script = textwrap.dedent("""\ + import sys + sys.argv = ["test"] + {args!r} + {key_code} + print(repr(key)) + """).format(args=args, key_code="\n".join(key_lines)) + + return subprocess.run( + [sys.executable, "-c", script], + capture_output=True, + text=True, + env=env, + ) + + +def test_key_from_args_hex() -> None: + """Test that hex keys are handled correctly.""" + hex_key = "aa" * 32 + result = _run_key_script([":key", hex_key]) + + assert result.returncode == 0 + assert repr(hex_key) in result.stdout + + +def test_key_from_args_raw() -> None: + """Test that a non-hex keys are handled correctly.""" + result = _run_key_script([":key", "mypassword"]) + + assert result.returncode == 0 + assert repr(b"mypassword") in result.stdout + + +def test_key_from_args_equals() -> None: + """Test that :key=VALUE syntax works.""" + result = _run_key_script([":key=secretkey"]) + + assert result.returncode == 0 + assert repr(b"secretkey") in result.stdout + + +def test_key_from_env() -> None: + """Test that the key is read from PYSTANDALONE_KEY environment variable.""" + env = {**os.environ, "PYSTANDALONE_KEY": "envkey"} + result = _run_key_script([], env=env) + + assert result.returncode == 0 + assert repr(b"envkey") in result.stdout diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 0000000..1e8b078 --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,533 @@ +from __future__ import annotations + +import hashlib +import os +import stat +import subprocess +import sys +from typing import TYPE_CHECKING + +import pytest + +from pystandalone import chacha20 +from pystandalone.builder import Builder, main, select_distribution +from pystandalone.compiler import NoCompiler, PycCompiler +from pystandalone.distribution import Architecture, Target, get_distribution_map + +if TYPE_CHECKING: + from pathlib import Path + + from pystandalone.distribution import Distribution + + +def test_builder_init_defaults(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that a Builder with code sets up encryption by default.""" + code = tmp_path / "run.py" + code.write_text("pass") + + builder = Builder(linux_distribution, code, compile=False) + + assert builder.encrypt is True + assert builder.key is not None + assert builder.iv is not None + assert len(builder.iv) == 16 + + +def test_builder_init_compile(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that a Builder with compile=True uses PycCompiler.""" + code = tmp_path / "run.py" + code.write_text("pass") + + builder = Builder(native_distribution, code, strict=False) + + assert isinstance(builder.packer.compiler, PycCompiler) + + +def test_builder_init_no_code(linux_distribution: Distribution) -> None: + """Test that a Builder without code disables encryption.""" + builder = Builder(linux_distribution, compile=False) + + assert builder.encrypt is False + assert builder.key is None + assert builder.iv is None + assert builder.source.run is None + + +def test_builder_init_no_encrypt(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that encryption can be disabled.""" + code = tmp_path / "run.py" + code.write_text("pass") + + builder = Builder(linux_distribution, code, encrypt=False, compile=False) + + assert builder.encrypt is False + assert builder.key is None + assert builder.iv is None + + +def test_builder_init_custom_key(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that a custom encryption key is used when provided.""" + code = tmp_path / "run.py" + code.write_text("pass") + + builder = Builder(linux_distribution, code, key="mysecretkey", compile=False) + + assert builder.key == "mysecretkey" + + +def test_builder_init_no_compile(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that compilation can be disabled.""" + code = tmp_path / "run.py" + code.write_text("pass") + + builder = Builder(linux_distribution, code, compile=False) + + assert isinstance(builder.packer.compiler, NoCompiler) + + +def test_add_library(linux_distribution: Distribution) -> None: + """Test that add_library adds to the source library set.""" + builder = Builder(linux_distribution, compile=False) + builder.add_library("json") + builder.add_library("csv") + + assert "json" in builder.source.library + assert "csv" in builder.source.library + + +def test_add_module(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that add_module adds to the source modules set.""" + code = tmp_path / "run.py" + code.write_text("pass") + + builder = Builder(linux_distribution, code, compile=False) + builder.add_module("requests") + + assert "requests" in builder.source.modules + + +def test_add_source_str(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that add_source_str inserts code into the source.""" + code = tmp_path / "run.py" + code.write_text("pass") + + builder = Builder(linux_distribution, code, compile=False) + builder.add_source_str("helper.py", "x = 1") + + packed = dict(builder.source.pack()) + assert packed["helper.py"] == "x = 1" + + +def test_add_source_file(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that add_source_file inserts a file into the source.""" + code = tmp_path / "run.py" + code.write_text("pass") + data = tmp_path / "data.txt" + data.write_text("hello") + + builder = Builder(linux_distribution, code, compile=False) + builder.add_source_file("extra/data.txt", data) + + packed = dict(builder.source.pack()) + assert packed["extra/data.txt"] == "hello" + + +def test_encrypt_payload_roundtrip(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that encrypting and decrypting payload produces original data.""" + code = tmp_path / "run.py" + code.write_text("pass") + + builder = Builder(linux_distribution, code, compile=False) + original = b"test payload data " * 100 + + encrypted = builder.encrypt_payload(original) + assert encrypted != original + + decrypted = chacha20.decrypt(encrypted, hashlib.sha256(builder.key.encode()).digest(), builder.iv) + assert decrypted == original + + +def test_build_library_zip(linux_distribution: Distribution) -> None: + """Test that build_library_zip produces a non-empty zip.""" + builder = Builder(linux_distribution, compile=False) + result = builder.build_library_zip() + + assert len(result) > 0 + # Should be a valid zip (starts with PK magic) + assert result[:2] == b"PK" + + +def test_dump_artefact(linux_distribution: Distribution, tmp_path: Path) -> None: + """Test that _dump_artefact writes files to the given path.""" + builder = Builder(linux_distribution, compile=False) + + dump_dir = tmp_path / "dump" + builder._dump_artefact(dump_dir, "test.bin", b"hello") + + assert (dump_dir / "test.bin").read_bytes() == b"hello" + + +def test_dump_artefact_none(linux_distribution: Distribution) -> None: + """Test that _dump_artefact with None path is a no-op.""" + builder = Builder(linux_distribution, compile=False) + builder._dump_artefact(None, "test.bin", b"hello") + + +def test_select_distribution_valid() -> None: + """Test that select_distribution returns a distribution for valid inputs.""" + distributions = get_distribution_map() + + # Pick any available version/target/arch combo + version = next(iter(distributions)) + target = next(iter(distributions[version])) + arch = next(iter(distributions[version][target])) + + dist = select_distribution(version, target, arch) + assert dist is not None + + +def test_select_distribution_invalid_version() -> None: + """Test that select_distribution raises KeyError for a non-existent version.""" + assert select_distribution("1.0", Target.LINUX, Architecture.X86_64) is None + + +def test_select_distribution_invalid_arch() -> None: + """Test that select_distribution returns None for a non-existent arch combo.""" + distributions = get_distribution_map() + version = next(iter(distributions)) + + assert select_distribution(version, Target.MACOS, Architecture.I686) is None + + +def _build_and_run( + distribution: Distribution, + tmp_path: Path, + code: str, + *, + compile: bool = False, + encrypt: bool = True, + encrypt_key: str | None = None, + decrypt_key: str | None = None, + decrypt_key_env: bool = False, + args: list[str] | None = None, +) -> subprocess.CompletedProcess: + run = tmp_path / "run.py" + run.write_text(code) + + builder = Builder( + distribution, + run, + encrypt=encrypt, + key=encrypt_key, + compile=compile, + strict=True, + ) + + exe = builder.build(build_path=None) + key = decrypt_key if decrypt_key is not None else builder.key + + out_path = tmp_path / "binary" + out_path.write_bytes(exe) + out_path.chmod(out_path.stat().st_mode | stat.S_IEXEC) + + cmd = [str(out_path)] + if encrypt and not decrypt_key_env: + cmd.append(f":key={key}") + + if args: + cmd.extend(args) + + env = os.environ.copy() + env["PYTHONUNBUFFERED"] = "1" + + if encrypt and decrypt_key_env: + env["PYSTANDALONE_KEY"] = key + + return subprocess.run(cmd, capture_output=True, timeout=30, env=env) + + +def test_exe_hello_world(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that a simple hello world script runs and produces expected output.""" + result = _build_and_run( + native_distribution, + tmp_path, + "print('hello from pystandalone', flush=True)", + encrypt=False, + ) + + assert result.returncode == 0 + assert b"hello from pystandalone" in result.stdout + + +def test_exe_compile(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that compiling the code produces a working executable.""" + if native_distribution.major_minor_version != f"{sys.version_info.major}.{sys.version_info.minor}": + pytest.skip("Can't test compile on a different Python version") + + result = _build_and_run( + native_distribution, + tmp_path, + "print('compiled code works', flush=True)", + compile=True, + encrypt=False, + ) + + assert result.returncode == 0 + assert b"compiled code works" in result.stdout + + +def test_exe_encrypted(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that an encrypted binary runs with the correct key.""" + result = _build_and_run( + native_distribution, + tmp_path, + "print('encrypted hello', flush=True)", + encrypt=True, + encrypt_key="testkey123", + ) + + assert result.returncode == 0 + assert b"encrypted hello" in result.stdout + + +def test_exe_encrypted_wrong_key(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that an encrypted binary fails with a wrong key.""" + result = _build_and_run( + native_distribution, + tmp_path, + "print('should not see this')", + encrypt=True, + encrypt_key="correctkey", + decrypt_key="wrongkey", + ) + + assert result.returncode != 0 + assert b"Wrong key" in result.stderr + + +def test_exe_exit_code(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that the exit code from the script is propagated.""" + result = _build_and_run(native_distribution, tmp_path, "import sys; sys.exit(42)", encrypt=False) + assert result.returncode == 42 + + +def test_exe_argv_passthrough(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that command-line arguments are passed through to the script.""" + result = _build_and_run( + native_distribution, + tmp_path, + "import sys; print(' '.join(sys.argv[1:]), flush=True)", + encrypt=False, + args=["foo", "bar"], + ) + + assert result.returncode == 0 + assert b"foo bar" in result.stdout + + +def test_exe_stderr(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that stderr output from the script is captured.""" + result = _build_and_run( + native_distribution, + tmp_path, + "import sys; print('error msg', file=sys.stderr)", + encrypt=False, + ) + + assert result.returncode == 0 + assert b"error msg" in result.stderr + + +def test_exe_import_stdlib(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that stdlib modules can be imported and used.""" + result = _build_and_run( + native_distribution, + tmp_path, + "import json; print(json.dumps({'key': 'value'}), flush=True)", + encrypt=False, + ) + + assert result.returncode == 0 + assert b'{"key": "value"}' in result.stdout + + +def test_exe_key_from_env(native_distribution: Distribution, tmp_path: Path) -> None: + """Test that the encryption key can be passed via environment variable.""" + result = _build_and_run( + native_distribution, + tmp_path, + "print('env key works', flush=True)", + encrypt=True, + encrypt_key="envkey123", + decrypt_key_env=True, + ) + + assert result.returncode == 0 + assert b"env key works" in result.stdout + + +def test_main_encrypted(native_distribution: Distribution, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that main produces an encrypted binary that runs.""" + code = tmp_path / "run.py" + code.write_text("print('main encrypted', flush=True)") + + output = tmp_path / "binary" + + monkeypatch.setattr( + "sys.argv", + [ + "pystandalone", + "-c", + str(code), + "-o", + str(output), + "-d", + str(native_distribution.path), + "-k", + "mainkey123", + "--no-strict", + ], + ) + + assert main() == 0 + assert output.exists() + + output.chmod(output.stat().st_mode | stat.S_IEXEC) + result = subprocess.run( + [str(output), ":key=mainkey123"], + capture_output=True, + timeout=30, + env={**os.environ, "PYTHONUNBUFFERED": "1"}, + ) + + assert result.returncode == 0 + assert b"main encrypted" in result.stdout + + +def test_main_no_crypt(native_distribution: Distribution, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that main produces an unencrypted binary that runs.""" + code = tmp_path / "run.py" + code.write_text("print('main no crypt', flush=True)") + + output = tmp_path / "binary" + + monkeypatch.setattr( + "sys.argv", + [ + "pystandalone", + "-c", + str(code), + "-o", + str(output), + "-d", + str(native_distribution.path), + "--no-crypt", + "--no-strict", + ], + ) + + assert main() == 0 + assert output.exists() + + output.chmod(output.stat().st_mode | stat.S_IEXEC) + result = subprocess.run( + [str(output)], + capture_output=True, + timeout=30, + env={**os.environ, "PYTHONUNBUFFERED": "1"}, + ) + + assert result.returncode == 0 + assert b"main no crypt" in result.stdout + + +def test_main_key_file(native_distribution: Distribution, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that main writes the key to a file when --key-file is used.""" + code = tmp_path / "run.py" + code.write_text("pass") + + output = tmp_path / "binary" + key_file = tmp_path / "key.txt" + + monkeypatch.setattr( + "sys.argv", + [ + "pystandalone", + "-c", + str(code), + "-o", + str(output), + "-d", + str(native_distribution.path), + "-k", + "savedkey", + "--key-file", + str(key_file), + "--no-strict", + ], + ) + + assert main() == 0 + assert key_file.exists() + assert key_file.read_text() == "savedkey" + + +def test_main_default_output( + native_distribution: Distribution, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Test that main uses a default output path when -o is not specified.""" + code = tmp_path / "run.py" + code.write_text("pass") + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr( + "sys.argv", + [ + "pystandalone", + "-c", + str(code), + "-d", + str(native_distribution.path), + "--no-crypt", + "--no-strict", + ], + ) + + assert main() == 0 + + # Default output name includes version, target and arch + outputs = list(tmp_path.glob("pystandalone-*")) + assert len(outputs) == 1 + assert outputs[0].stat().st_size > 0 + + +def test_main_invalid_code_dir( + native_distribution: Distribution, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Test that main returns 1 for an empty directory with no run.py.""" + empty_dir = tmp_path / "empty" + empty_dir.mkdir() + + monkeypatch.setattr( + "sys.argv", + [ + "pystandalone", + "-c", + str(empty_dir), + "-d", + str(native_distribution.path), + "--no-strict", + ], + ) + + assert main() == 1 + + +def test_main_list_available(monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture) -> None: + """Test that --list-available lists distributions and exits successfully.""" + monkeypatch.setattr("sys.argv", ["pystandalone", "--list-available"]) + + assert main() == 0 + + captured = capsys.readouterr() + assert "Python" in captured.out diff --git a/tests/test_chacha20.py b/tests/test_chacha20.py new file mode 100644 index 0000000..87f02c2 --- /dev/null +++ b/tests/test_chacha20.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +import io + +import pytest + +from pystandalone.chacha20 import Chacha20, decrypt +from tests.conftest import absolute_path + + +@pytest.mark.parametrize( + ("path", "key", "iv"), + [ + ("_data/chacha20_zero_iv.bin", b"\x00" * 32, b"\x00" * 8), + ("_data/chacha20_unique_iv.bin", b"\x00" * 32, b"ABLAFLAFLADEADBE"), + ], +) +@pytest.mark.parametrize( + ("pos", "whence", "n", "expected"), + [ + (1, io.SEEK_SET, -1, b"ello_world"), + (64, io.SEEK_SET, 64, b""), + (-10, io.SEEK_END, 10, b"ello_world"), + (2, io.SEEK_CUR, 2, b"ll"), + ], +) +def test_stream_seek_read(path: str, key: bytes, iv: bytes, pos: int, whence: int, n: int, expected: bytes) -> None: + """Test that seeking and reading from the Chacha20 stream works as expected.""" + with absolute_path(path).open("rb") as fh: + stream = Chacha20(fh, key=key, iv=iv) + stream.seek(pos, whence) + assert stream.read(n) == expected + + +@pytest.mark.parametrize( + ("path", "key", "iv"), + [ + ("_data/chacha20_zero_iv.bin", b"\x00" * 32, b"\x00" * 8), + ("_data/chacha20_unique_iv.bin", b"\x00" * 32, b"ABLAFLAFLADEADBE"), + ], +) +@pytest.mark.parametrize( + ("pos", "whence", "n"), + [ + (1, io.SEEK_SET, -1), + (64, io.SEEK_SET, 64), + (-10, io.SEEK_END, 10), + (20, io.SEEK_CUR, 20), + ], +) +def test_compare_stream_and_oneshot(path: str, key: bytes, iv: bytes, pos: int, whence: int, n: int) -> None: + buf = absolute_path(path).read_bytes() + + stream = Chacha20(io.BytesIO(buf), key=key, iv=iv) + oneshot = io.BytesIO(decrypt(buf, key=key, iv=iv)) + + stream.seek(pos, whence) + oneshot.seek(pos, whence) + assert stream.read(n) == oneshot.read(n) diff --git a/tests/test_compiler.py b/tests/test_compiler.py new file mode 100644 index 0000000..546de47 --- /dev/null +++ b/tests/test_compiler.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import importlib +import io +import marshal +from typing import TYPE_CHECKING + +import pytest + +from pystandalone.compiler import NoCompiler, PycCompiler + +if TYPE_CHECKING: + from pathlib import Path + +MAGIC = importlib.util.MAGIC_NUMBER + + +def test_passthrough_compiler() -> None: + """Test that NoCompiler returns .py files as-is without compilation.""" + c = NoCompiler() + assert c.compile("x = 1") == b"x = 1" + + +def test_strict_rejects_wrong_magic() -> None: + """Test that we reject compiling with a different Python version by default.""" + with pytest.raises(RuntimeError, match="different Python version"): + PycCompiler(b"\x00\x00\x00\x00") + + +def test_strict_false_accepts_wrong_magic() -> None: + """Test that we can disable strict mode to allow compiling with a different Python version.""" + c = PycCompiler(b"\x00\x00\x00\x00", strict=False) + assert c.magic == b"\x00\x00\x00\x00" + + +def test_compile_str_produces_valid_pyc() -> None: + """Test that compiling a string produces a valid .pyc file with the correct magic number and structure.""" + c = PycCompiler(MAGIC) + result = c.compile("x = 1 + 2") + + assert result[:4] == MAGIC + # flags, timestamp, source size = 12 bytes of zeros + assert result[4:16] == b"\x00" * 12 + # remainder is a marshalled code object + code = marshal.loads(result[16:]) + assert code.co_filename == "" + + +def test_compile_str_custom_name() -> None: + """Test that we can specify a custom filename for the code object when compiling a string.""" + c = PycCompiler(MAGIC) + code = marshal.loads(c.compile("pass", name="custom.py")[16:]) + assert code.co_filename == "custom.py" + + +def test_compile_fileobj() -> None: + """Test that compiling from a file-like object works.""" + c = PycCompiler(MAGIC) + result = c.compile_fileobj(io.StringIO("y = 42")) + code = marshal.loads(result[16:]) + assert code.co_filename == "" + + +def test_compile_file(tmp_path: Path) -> None: + """Test that compiling from a file path works and uses the correct filename in the code object.""" + src = tmp_path / "script.py" + src.write_text("z = 99") + + c = PycCompiler(MAGIC) + result = c.compile_file(src) + code = marshal.loads(result[16:]) + assert code.co_filename == "" diff --git a/tests/test_distribution.py b/tests/test_distribution.py new file mode 100644 index 0000000..d539e88 --- /dev/null +++ b/tests/test_distribution.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +from pystandalone.distribution import Architecture, Target + +if TYPE_CHECKING: + from pystandalone.distribution import Distribution + + +def test_distribution_metadata(linux_distribution: Distribution) -> None: + """Test that distribution metadata contains expected keys.""" + metadata = linux_distribution.metadata + assert "python_version" in metadata + assert "target_triple" in metadata + assert "python_bytecode_magic_number" in metadata + assert "python_exe" in metadata + + +def test_distribution_version(linux_distribution: Distribution) -> None: + """Test that distribution version matches expected major.minor.patch format.""" + assert re.match(r"\d+\.\d+\.\d+", linux_distribution.version) + + +def test_distribution_target_arch(linux_distribution: Distribution) -> None: + """Test that the distribution target and architecture match the expected values.""" + assert linux_distribution.target == Target.LINUX + assert linux_distribution.arch == Architecture.X86_64 + + +def test_distribution_bytecode_magic(linux_distribution: Distribution) -> None: + """Test that distribution bytecode magic is 4 bytes.""" + magic = linux_distribution.bytecode_magic + assert isinstance(magic, bytes) + assert len(magic) == 4 + + +def test_distribution_read_python_exe(linux_distribution: Distribution) -> None: + """Test that we can read the Python executable from the distribution.""" + exe = linux_distribution.read_python_exe() + assert len(exe) > 0 + + +def test_distribution_pack_library(linux_distribution: Distribution) -> None: + """Test that pack_library yields stdlib modules.""" + packed = dict(linux_distribution.pack_library()) + + assert "os.py" in packed + assert "json/__init__.py" in packed + assert isinstance(packed["os.py"], bytes) + + +def test_distribution_pack_library_exclude(linux_distribution: Distribution) -> None: + """Test that pack_library exclude filtering works.""" + packed = dict(linux_distribution.pack_library(exclude=["json"])) + + assert "os.py" in packed + filenames = set(packed.keys()) + assert not any(f.startswith("json/") or f == "json.py" for f in filenames) + + +def test_distribution_pack_library_include(linux_distribution: Distribution) -> None: + """Test that pack_library include overrides default excludes.""" + packed_without = dict(linux_distribution.pack_library()) + packed_with = dict(linux_distribution.pack_library(include=["sqlite3"])) + + assert "sqlite3" not in {f.split("/")[0] for f in packed_without} + assert any(f.startswith("sqlite3/") for f in packed_with) diff --git a/tests/test_packer.py b/tests/test_packer.py new file mode 100644 index 0000000..1192378 --- /dev/null +++ b/tests/test_packer.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import importlib +import io +import zipfile + +from pystandalone.compiler import NoCompiler, PycCompiler +from pystandalone.packer import Packer, mkzip, zinfo, zwrite + + +def test_pack_compiles_py_to_pyc() -> None: + """Test that .py files are compiled to .pyc in the output zip.""" + compiler = PycCompiler(importlib.util.MAGIC_NUMBER) + packer = Packer(compiler) + + content = [("run.py", "x = 1"), ("lib/util.py", "y = 2")] + result = packer.pack(iter(content)) + + zf = zipfile.ZipFile(io.BytesIO(result)) + names = set(zf.namelist()) + assert "run.pyc" in names + assert "lib/util.pyc" in names + assert "run.py" not in names + + +def test_pack_non_py_passthrough() -> None: + """Test that non-.py files are packed as-is without compilation.""" + compiler = PycCompiler(importlib.util.MAGIC_NUMBER) + packer = Packer(compiler) + + content = [("data.json", b'{"key": "value"}'), ("run.py", "pass")] + result = packer.pack(iter(content)) + + zf = zipfile.ZipFile(io.BytesIO(result)) + assert zf.read("data.json") == b'{"key": "value"}' + + +def test_pack_no_compiler() -> None: + """Test that NoCompiler keeps .py files as-is.""" + packer = Packer(NoCompiler()) + + content = [("run.py", "x = 1"), ("lib.py", b"y = 2")] + result = packer.pack(iter(content)) + + zf = zipfile.ZipFile(io.BytesIO(result)) + names = set(zf.namelist()) + assert "run.py" in names + assert "lib.py" in names + assert zf.read("run.py") == b"x = 1" + assert zf.read("lib.py") == b"y = 2" + + +def test_pack_skips_invalid_py() -> None: + """Test that files that fail compilation are skipped entirely.""" + compiler = PycCompiler(importlib.util.MAGIC_NUMBER) + packer = Packer(compiler) + + content = [("good.py", "x = 1"), ("bad.py", "def !!!")] + result = packer.pack(iter(content)) + + zf = zipfile.ZipFile(io.BytesIO(result)) + names = set(zf.namelist()) + assert "good.pyc" in names + assert "bad.py" not in names + assert "bad.pyc" not in names + + +def test_mkzip_creates_deflated_zip() -> None: + """Test that mkzip creates a writable deflated zip archive.""" + buf, zf = mkzip() + assert zf.compression == zipfile.ZIP_DEFLATED + + with zf: + zwrite(zf, zinfo("test.txt"), b"hello") + + result = zipfile.ZipFile(buf) + assert result.read("test.txt") == b"hello" + + +def test_zinfo_fixed_timestamp() -> None: + """Test that zinfo creates entries with a fixed timestamp.""" + info = zinfo("file.py") + assert info.date_time == (1980, 0, 0, 0, 0, 0) diff --git a/tests/test_source.py b/tests/test_source.py new file mode 100644 index 0000000..cb36f7b --- /dev/null +++ b/tests/test_source.py @@ -0,0 +1,335 @@ +from __future__ import annotations + +import sys +import zipfile +from typing import TYPE_CHECKING + +import pytest + +from pystandalone.source import Source + +if TYPE_CHECKING: + from pathlib import Path + + +MOCK_SPEC = """ +[run] +entry = myapp:main + +[run:other] +entry = other:main + +[library] +uu + +[modules] +dissect +special + +[include] +foo.* + +[exclude] +bar.* + +[build:special] +special/file.py = some-tool-to-run +""" + + +def test_source_from_file(tmp_path: Path) -> None: + """Test that we can create a Source object from a file path.""" + src_file = tmp_path / "script.py" + src_file.touch() + + source = Source.from_path(src_file) + assert source.run == str(src_file) + assert source.base == tmp_path + assert not source.include_base + + +def test_source_from_zip(tmp_path: Path) -> None: + """Test that we can create a Source object from a zip file path.""" + src_zip = tmp_path / "archive.zip" + src_zip.touch() + + source = Source.from_path(src_zip) + assert source.run == str(src_zip) + assert source.base == tmp_path + assert not source.include_base + + +def test_source_from_dir_with_run(tmp_path: Path) -> None: + """Test that we can create a Source object from a directory containing a run.py file.""" + src_file = tmp_path / "run.py" + src_file.touch() + + source = Source.from_path(tmp_path) + assert source.run == "run.py" + assert source.base == tmp_path + assert source.include_base + + +def test_source_from_dir_with_spec(tmp_path: Path) -> None: + """Test that we can create a Source object from a directory containing a .pystandalone spec file.""" + spec_file = tmp_path / ".pystandalone" + spec_file.write_text(MOCK_SPEC) + + source = Source.from_path(tmp_path) + assert source.run == "myapp:main" + assert source.base == tmp_path + assert not source.include_base + assert source.library == {"uu"} + assert source.modules == {"dissect", "myapp", "special"} + assert source.include == {"foo.*"} + assert source.exclude == {"bar.*"} + + +def test_source_from_spec_with_entry(tmp_path: Path) -> None: + """Test that we can create a Source object from a .pystandalone spec file with an entry selection.""" + spec_file = tmp_path / ".pystandalone" + spec_file.write_text(MOCK_SPEC) + + source = Source.from_path(tmp_path / ".pystandalone:other") + assert source.run == "other:main" + assert source.base == tmp_path + assert not source.include_base + + +def test_pack_run_file(tmp_path: Path) -> None: + """Test that packing a source with a run file yields the run file contents.""" + run_file = tmp_path / "script.py" + run_file.write_text("print('hello')") + + source = Source.from_path(run_file) + packed = dict(source.pack()) + + assert packed["run.py"] == "print('hello')" + + +def test_pack_entrypoint_generates_run(tmp_path: Path) -> None: + """Test that an entrypoint string generates a synthetic run.py.""" + spec_file = tmp_path / ".pystandalone" + spec_file.write_text("[run]\nentry = myapp:main\n") + + source = Source.from_path(spec_file) + # Don't try to pack modules, just test run.py generation + source.modules.clear() + packed = dict(source.pack()) + + assert "import sys" in packed["run.py"] + assert "from myapp import main" in packed["run.py"] + assert "sys.exit(main())" in packed["run.py"] + + +def test_pack_base_includes_all_files(tmp_path: Path) -> None: + """Test that packing a directory with include_base yields all files except run.py.""" + run_file = tmp_path / "run.py" + run_file.write_text("pass") + (tmp_path / "lib.py").write_text("x = 1") + sub = tmp_path / "pkg" + sub.mkdir() + (sub / "mod.py").write_text("y = 2") + + source = Source.from_path(tmp_path) + packed = dict(source.pack()) + + assert "run.py" in packed + assert packed["lib.py"] == b"x = 1" + assert packed["pkg/mod.py"] == b"y = 2" + + +def test_pack_insert_str(tmp_path: Path) -> None: + """Test that inserted strings are included in the packed output.""" + run_file = tmp_path / "script.py" + run_file.write_text("pass") + + source = Source.from_path(run_file) + source.insert_str("extra/config.py", "CFG = True") + packed = dict(source.pack()) + + assert packed["extra/config.py"] == "CFG = True" + + +def test_pack_insert_file(tmp_path: Path) -> None: + """Test that inserted files are included in the packed output.""" + run_file = tmp_path / "script.py" + run_file.write_text("pass") + data_file = tmp_path / "data.txt" + data_file.write_text("some data") + + source = Source.from_path(run_file) + source.insert_file("injected/data.txt", data_file) + packed = dict(source.pack()) + + assert packed["injected/data.txt"] == "some data" + + +def test_pack_exclude_filter(tmp_path: Path) -> None: + """Test that exclude patterns filter out matching files during pack.""" + run_file = tmp_path / "run.py" + run_file.write_text("pass") + (tmp_path / "keep.py").write_text("keep") + (tmp_path / "skip.txt").write_text("skip") + + source = Source.from_path(tmp_path) + source.exclude.add("*.txt") + packed = dict(source.pack()) + + assert "keep.py" in packed + assert "skip.txt" not in packed + + +def test_pack_include_overrides_exclude(tmp_path: Path) -> None: + """Test that include patterns override exclude patterns during pack.""" + run_file = tmp_path / "run.py" + run_file.write_text("pass") + (tmp_path / "important.log").write_text("important") + (tmp_path / "debug.log").write_text("debug") + + source = Source.from_path(tmp_path) + source.exclude.add("*.log") + source.include.add("important.log") + packed = dict(source.pack()) + + assert "important.log" in packed + assert "debug.log" not in packed + + +def test_pack_namespace_package_generates_init(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that packing a namespace package generates missing __init__.py files.""" + # Create a namespace package (no __init__.py) + pkg = tmp_path / "nspkg" / "sub" + pkg.mkdir(parents=True) + (pkg / "__init__.py").touch() + (pkg / "mod.py").write_text("x = 1") + + spec_file = tmp_path / ".pystandalone" + spec_file.write_text("[run]\nentry = nspkg.sub:main\n") + + monkeypatch.syspath_prepend(str(tmp_path)) + source = Source.from_path(spec_file) + packed = dict(source.pack()) + + # Top-level module gets a generated __init__.py + assert packed["nspkg/__init__.py"] == b"" + assert packed["nspkg/sub/__init__.py"] == b"" + assert packed["nspkg/sub/mod.py"] == b"x = 1" + + +def test_pack_regular_package_generates_init(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that packing a regular package with __init__.py still generates __init__.py entries.""" + pkg = tmp_path / "mypkg" + pkg.mkdir() + (pkg / "__init__.py").write_text("# init") + (pkg / "core.py").write_text("y = 2") + + spec_file = tmp_path / ".pystandalone" + spec_file.write_text("[run]\nentry = mypkg:main\n") + + monkeypatch.syspath_prepend(str(tmp_path)) + source = Source.from_path(spec_file) + packed = dict(source.pack()) + + assert "mypkg/__init__.py" in packed + assert packed["mypkg/core.py"] == b"y = 2" + + +def test_pack_build_output(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that build commands are executed and their stdout is packed.""" + pkg = tmp_path / "buildpkg" + pkg.mkdir() + (pkg / "__init__.py").touch() + + spec_file = tmp_path / ".pystandalone" + spec_file.write_text( + "[run]\nentry = buildpkg:main\n\n" + "[build:buildpkg]\n" + f"buildpkg/generated.py = {sys.executable} -c \"import sys; sys.stdout.buffer.write(b'GENERATED = True')\"\n" + ) + + monkeypatch.syspath_prepend(str(tmp_path)) + source = Source.from_path(spec_file) + packed = dict(source.pack()) + + assert packed["buildpkg/generated.py"] == b"GENERATED = True" + + +def test_pack_build_output_failure(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that a failed build command raises a RuntimeError.""" + pkg = tmp_path / "failpkg" + pkg.mkdir() + (pkg / "__init__.py").touch() + + spec_file = tmp_path / ".pystandalone" + spec_file.write_text( + "[run]\n" + "entry = failpkg:main\n\n" + "[build:failpkg]\n" + f'failpkg/out.py = {sys.executable} -c "import sys; sys.exit(1)"\n' + ) + + monkeypatch.syspath_prepend(str(tmp_path)) + source = Source.from_path(spec_file) + with pytest.raises(RuntimeError, match="failed with exit code"): + dict(source.pack()) + + +def _make_zip(path: Path, files: dict[str, str]) -> None: + """Helper to create a zip file with the given files.""" + with zipfile.ZipFile(path, "w") as zf: + for name, content in files.items(): + zf.writestr(name, content) + + +def test_pack_zip_run(tmp_path: Path) -> None: + """Test that packing a zip run file yields all files from the zip.""" + zip_path = tmp_path / "app.zip" + _make_zip(zip_path, {"run.py": "print('hi')", "lib/util.py": "x = 1"}) + + source = Source.from_path(zip_path) + packed = dict(source.pack()) + + assert packed["run.py"] == b"print('hi')" + assert packed["lib/util.py"] == b"x = 1" + + +def test_pack_zip_with_insert(tmp_path: Path) -> None: + """Test that inserts are included on top of zip contents.""" + zip_path = tmp_path / "app.zip" + _make_zip(zip_path, {"run.py": "pass"}) + + source = Source.from_path(zip_path) + source.insert_str("extra.py", "EXTRA = True") + packed = dict(source.pack()) + + assert packed["run.py"] == b"pass" + assert packed["extra.py"] == "EXTRA = True" + + +def test_pack_zip_with_exclude(tmp_path: Path) -> None: + """Test that exclude patterns filter zip contents.""" + zip_path = tmp_path / "app.zip" + _make_zip(zip_path, {"run.py": "pass", "tests/test_foo.py": "test", "lib/core.py": "core"}) + + source = Source.from_path(zip_path) + source.exclude.add("tests/*") + packed = dict(source.pack()) + + assert "run.py" in packed + assert "lib/core.py" in packed + assert "tests/test_foo.py" not in packed + + +def test_pack_zip_with_include_overrides_exclude(tmp_path: Path) -> None: + """Test that include patterns override exclude patterns for zip contents.""" + zip_path = tmp_path / "app.zip" + _make_zip(zip_path, {"data/keep.dat": "keep", "data/skip.dat": "skip"}) + + source = Source.from_path(zip_path) + source.exclude.add("data/*") + source.include.add("data/keep.dat") + packed = dict(source.pack()) + + assert "data/keep.dat" in packed + assert "data/skip.dat" not in packed diff --git a/tests/test_zipapp.py b/tests/test_zipapp.py new file mode 100644 index 0000000..a386755 --- /dev/null +++ b/tests/test_zipapp.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import os +import subprocess +import sys +import zipfile +from typing import TYPE_CHECKING + +from pystandalone.zipapp import Builder, main + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +def _build_and_run( + tmp_path: Path, + code: str, + *, + encrypt: bool = True, + encrypt_key: str | None = None, + decrypt_key: str | None = None, + decrypt_key_env: bool = False, + args: list[str] | None = None, +) -> subprocess.CompletedProcess: + run = tmp_path / "run.py" + run.write_text(code) + + builder = Builder(run, encrypt=encrypt, key=encrypt_key) + pyz = tmp_path / "app.pyz" + pyz.write_bytes(builder.build()) + + key = decrypt_key if decrypt_key is not None else builder.key + + cmd = [sys.executable, str(pyz)] + if encrypt and not decrypt_key_env: + cmd.append(f":key={key}") + + if args: + cmd.extend(args) + + env = os.environ.copy() + env["PYTHONUNBUFFERED"] = "1" + + if encrypt and decrypt_key_env: + env["PYSTANDALONE_KEY"] = key + + return subprocess.run(cmd, capture_output=True, timeout=30, env=env) + + +def test_zipapp_hello_world(tmp_path: Path) -> None: + """Test that a simple unencrypted zipapp runs and produces expected output.""" + result = _build_and_run( + tmp_path, + "print('hello from zipapp', flush=True)", + encrypt=False, + ) + + assert result.returncode == 0 + assert b"hello from zipapp" in result.stdout + + +def test_zipapp_encrypted(tmp_path: Path) -> None: + """Test that an encrypted zipapp runs with the correct key.""" + result = _build_and_run( + tmp_path, + "print('encrypted zipapp', flush=True)", + encrypt=True, + encrypt_key="testkey123", + ) + + assert result.returncode == 0 + assert b"encrypted zipapp" in result.stdout + + +def test_zipapp_encrypted_wrong_key(tmp_path: Path) -> None: + """Test that an encrypted zipapp fails with a wrong key.""" + result = _build_and_run( + tmp_path, + "print('should not see this')", + encrypt=True, + encrypt_key="correctkey", + decrypt_key="wrongkey", + ) + + assert result.returncode != 0 + assert b"Wrong key" in result.stderr + + +def test_zipapp_encrypted_key_from_env(tmp_path: Path) -> None: + """Test that the encryption key can be passed via environment variable.""" + result = _build_and_run( + tmp_path, + "print('env key works', flush=True)", + encrypt=True, + encrypt_key="envkey123", + decrypt_key_env=True, + ) + + assert result.returncode == 0 + assert b"env key works" in result.stdout + + +def test_zipapp_exit_code(tmp_path: Path) -> None: + """Test that the exit code from the script is propagated.""" + result = _build_and_run(tmp_path, "import sys; sys.exit(42)", encrypt=False) + assert result.returncode == 42 + + +def test_zipapp_argv_passthrough(tmp_path: Path) -> None: + """Test that command-line arguments are passed through to the script.""" + result = _build_and_run( + tmp_path, + "import sys; print(' '.join(sys.argv[1:]), flush=True)", + encrypt=False, + args=["foo", "bar"], + ) + + assert result.returncode == 0 + assert b"foo bar" in result.stdout + + +def test_zipapp_argv_passthrough_encrypted(tmp_path: Path) -> None: + """Test that command-line arguments are passed through and :key is stripped.""" + result = _build_and_run( + tmp_path, + "import sys; print(' '.join(sys.argv[1:]), flush=True)", + encrypt=True, + encrypt_key="argkey", + args=["--flag", "value"], + ) + + assert result.returncode == 0 + assert b"--flag value" in result.stdout + assert b":key" not in result.stdout + + +def test_zipapp_stderr(tmp_path: Path) -> None: + """Test that stderr output from the script is captured.""" + result = _build_and_run( + tmp_path, + "import sys; print('error msg', file=sys.stderr)", + encrypt=False, + ) + + assert result.returncode == 0 + assert b"error msg" in result.stderr + + +def test_zipapp_import_stdlib(tmp_path: Path) -> None: + """Test that stdlib modules can be imported and used.""" + result = _build_and_run( + tmp_path, + "import json; print(json.dumps({'key': 'value'}), flush=True)", + encrypt=False, + ) + + assert result.returncode == 0 + assert b'{"key": "value"}' in result.stdout + + +def test_main_with_key(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that main with a key produces an encrypted output.""" + code = tmp_path / "run.py" + code.write_text("pass") + + output = tmp_path / "out.pyz" + + monkeypatch.setattr("sys.argv", ["pystandalone-zipapp", "-c", str(code), "-o", str(output), "-k", "mykey"]) + + result = main() + + assert result == 0 + assert output.exists() + assert output.stat().st_size > 0 + + zf = zipfile.ZipFile(output) + assert "__main__.py" in zf.namelist() + + +def test_main_with_key_file(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that main writes the key to a file when --key-file is used.""" + code = tmp_path / "run.py" + code.write_text("pass") + + output = tmp_path / "out.pyz" + key_file = tmp_path / "key.txt" + + monkeypatch.setattr( + "sys.argv", + ["pystandalone-zipapp", "-c", str(code), "-o", str(output), "-k", "savedkey", "--key-file", str(key_file)], + ) + + result = main() + + assert result == 0 + assert key_file.exists() + assert key_file.read_text() == "savedkey" + + +def test_main_default_output(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that main uses default output path when -o is not specified.""" + code = tmp_path / "run.py" + code.write_text("pass") + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr("sys.argv", ["pystandalone-zipapp", "-c", str(code), "-k", "testkey"]) + + result = main() + + assert result == 0 + assert (tmp_path / "pystandalone-zipapp.pyz").exists() + + +def test_main_invalid_code_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that main returns 1 for an empty directory with no run.py.""" + empty_dir = tmp_path / "empty" + empty_dir.mkdir() + + monkeypatch.setattr("sys.argv", ["pystandalone-zipapp", "-c", str(empty_dir)]) + + result = main() + + assert result == 1 diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..1a47b1c --- /dev/null +++ b/tox.ini @@ -0,0 +1,75 @@ +[tox] +envlist = lint, py3, pypy3 +# This version of tox will autoprovision itself and the requirements defined in +# requires if they are not available on the host system. This requires the +# locally installed tox to have a minimum version 3.3.0. This means the names +# of the configuration options are still according to the tox 3.x syntax. +minversion = 4.27.0 +# This version of virtualenv will install setuptools version 68.2.2 and pip +# 23.3.1. These versions fully support python projects defined only through a +# pyproject.toml file (PEP-517/PEP-518/PEP-621). This pip version also support +# the proper version resolving with (sub-)dependencies defining dev extra's. +requires = virtualenv>=20.24.6 + +[testenv] +extras = dev +deps = + pytest-cov + pytest-xdist + coverage +dependency_groups = test +commands = + pytest --basetemp="{envtmpdir}" --import-mode="append" {posargs:--color=yes --cov=pystandalone --cov-report=term-missing -n auto -v tests} + coverage report + coverage xml + +[testenv:benchmark] +deps = + pytest-benchmark + pytest-codspeed +dependency_groups = test +passenv = + CODSPEED_ENV +commands = + pytest --basetemp="{envtmpdir}" --import-mode="append" -m benchmark {posargs:--color=yes -v tests} + +[testenv:build] +package = skip +dependency_groups = build +commands = + pyproject-build + +[testenv:fix] +package = skip +dependency_groups = lint +commands = + ruff check --fix pystandalone tests + ruff format pystandalone tests + +[testenv:lint] +package = skip +dependency_groups = lint +commands = + ruff check pystandalone tests + ruff format --check pystandalone tests + vermin -t=3.10- --no-tips --lint pystandalone tests + +[testenv:docs-build] +allowlist_externals = make +deps = + sphinx + sphinx-autoapi + sphinx_argparse_cli + sphinx-copybutton + sphinx-design + furo +commands = + make -C tests/_docs clean + make -C tests/_docs html + +[testenv:docs-linkcheck] +allowlist_externals = make +deps = {[testenv:docs-build]deps} +commands = + make -C tests/_docs clean + make -C tests/_docs linkcheck