diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index 0d99dfa..0000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: Build Documentation - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - workflow_dispatch: - -permissions: - contents: read - pages: write - id-token: write - -concurrency: - group: "pages" - cancel-in-progress: false - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e ".[docs]" - - - name: Build HTML documentation - run: | - cd docs - make html - - - name: Upload artifact - uses: actions/upload-pages-artifact@v3 - with: - path: ./docs/_build/html - - deploy: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest - needs: build - if: >- - github.repository == 'Comfy-Org/pyisolate' && - github.event_name == 'push' && - github.ref == 'refs/heads/main' - steps: - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/.github/workflows/pytorch.yml b/.github/workflows/pytorch.yml index 124da3b..c6d80cd 100644 --- a/.github/workflows/pytorch.yml +++ b/.github/workflows/pytorch.yml @@ -46,8 +46,8 @@ jobs: - name: Test example with PyTorch run: | source .venv/bin/activate - cd example - python main.py -v + cd example/torch_share + python host.py test-pytorch-cuda: name: Test with PyTorch CUDA @@ -105,5 +105,5 @@ jobs: - name: Test example with PyTorch run: | source .venv/bin/activate - cd example - python main.py -v + cd example/torch_share + python host.py diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 9c7c974..9c6dfa8 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -41,8 +41,8 @@ jobs: - name: Test example run: | .venv\Scripts\activate - cd example - python main.py -v + cd example\torch_share + python host.py test-windows-pytorch: name: Test on Windows with PyTorch diff --git a/CLAUDE.md b/CLAUDE.md index 8a5edd4..c12b1f6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,130 +1,124 @@ # CLAUDE.md -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. +Guidance for AI agents maintaining the pyisolate codebase. -## Project Overview +## Identity -**pyisolate** is a Python library for running extensions across multiple isolated virtual environments with RPC communication. It solves dependency conflicts by isolating extensions in separate venvs while maintaining seamless host-extension communication through AsyncRPC. +**pyisolate** is a Python library (PyPI: `pyisolate`, v0.10.1) for running extensions in isolated virtual environments with seamless inter-process communication. It provides dependency isolation, zero-copy tensor transfer, and bubblewrap sandboxing for GPU-heavy workloads. -## Development Commands - -### Environment Setup -```bash -# Preferred (using uv - much faster) -uv venv && source .venv/bin/activate && uv pip install -e ".[dev,docs]" -pre-commit install - -# With benchmarking dependencies -uv pip install -e ".[dev,docs,bench]" - -# Alternative (using pip) -python -m venv venv && source venv/bin/activate && pip install -e ".[dev,docs]" -``` +License: MIT | Python: >=3.10 -### Testing -```bash -# Run all tests -pytest - -# Run with coverage -pytest --cov=pyisolate --cov-report=html --cov-report=term-missing +## Architecture -# Run integration tests -pytest tests/test_integration.py -v +### Public API (`pyisolate/`) + +| File | Purpose | +|------|---------| +| `__init__.py` | Package exports: `ExtensionBase`, `ExtensionManager`, `ExtensionConfig`, `SandboxMode`, `ProxiedSingleton`, `SealedNodeExtension`, adapter registration | +| `host.py` | `ExtensionManager` — creates/manages isolated extensions and their venvs | +| `shared.py` | `ExtensionBase` / `ExtensionLocal` — base classes with lifecycle hooks (`before_module_loaded`, `on_module_loaded`) | +| `sealed.py` | `SealedNodeExtension` — minimal extension for sealed workers (no host framework imports) | +| `config.py` | TypedDicts: `ExtensionManagerConfig`, `ExtensionConfig`, `SandboxConfig`, `CUDAWheelConfig`. Enum: `SandboxMode` | +| `interfaces.py` | `IsolationAdapter` and `SerializerRegistryProtocol` — structural typing protocols for application adapters | +| `path_helpers.py` | Host `sys.path` serialization and child-side reconstruction | + +### Internal (`pyisolate/_internal/`) + +| File | Purpose | +|------|---------| +| `rpc_protocol.py` | `AsyncRPC` engine, `ProxiedSingleton` metaclass, `LocalMethodRegistry` | +| `rpc_transports.py` | `JSONSocketTransport` (primary) — length-prefixed JSON over UDS. No pickle. `QueueTransport` (legacy) | +| `rpc_serialization.py` | Message structures: `RPCRequest`, `RPCResponse`, `RPCCallback` | +| `serialization_registry.py` | `SerializerRegistry` — O(1) type lookup, MRO chain, `data_type` flag | +| `tensor_serializer.py` | Zero-copy tensors via `/dev/shm` (CPU) and CUDA IPC (GPU). `TensorKeeper` (5.0s default retention) | +| `model_serialization.py` | Generic `serialize_for_isolation()` / `deserialize_from_isolation()` | +| `host.py` | `Extension` class — process lifecycle (venv → deps → launch → RPC → shutdown) | +| `bootstrap.py` | Child-side init: sys.path reconstruction, adapter rehydration | +| `uds_client.py` | Child entrypoint (`python -m pyisolate._internal.uds_client`) | +| `environment.py` | uv venv creation, dependency installation, torch package exclusion | +| `environment_conda.py` | pixi/conda environment creation, fingerprint caching | +| `cuda_wheels.py` | CUDA wheel resolver — probes host torch/CUDA, fetches matching wheels | +| `sandbox.py` | `build_bwrap_command()` — deny-by-default filesystem, GPU passthrough, sealed-worker `--clearenv` | +| `sandbox_detect.py` | Multi-distro detection: RHEL, Ubuntu AppArmor, SELinux, Arch | +| `event_bridge.py` | Child-to-host event dispatch | +| `perf_trace.py` | Structured event logging (`PYISOLATE_TRACE_FILE`) | + +### Other Directories + +| Directory | Purpose | +|-----------|---------| +| `tests/` | Unit and integration tests (pytest, ~50 test functions) | +| `example/` | Working 3-extension demo showing dependency isolation (numpy 1.x vs 2.x) | +| `benchmarks/` | RPC overhead and memory benchmarks with cross-platform runner scripts | +| `docs/` | Reference docs: RPC protocol, debugging, edge cases, platform compatibility | -# Test the working example -cd example && python main.py -v -``` +## Development Commands -### Benchmarking ```bash -# Install benchmark dependencies -uv pip install -e ".[bench]" - -# Run full benchmark suite -python benchmark.py - -# Quick benchmarks (fewer iterations) -python benchmark.py --quick - -# Skip torch benchmarks -python benchmark.py --no-torch +# Environment setup +uv venv && source .venv/bin/activate && uv pip install -e ".[dev,test]" +pre-commit install -# Run benchmarks via pytest -pytest tests/test_benchmarks.py -v -s -``` +# Testing +pytest # all tests with coverage +pytest tests/test_rpc_contract.py -v # specific test file +pytest -k "test_sandbox" -v # pattern match -### Code Quality -```bash -# Lint and format +# Code quality ruff check pyisolate tests ruff format pyisolate tests -# All quality checks -tox -e lint -``` - -### Build -```bash -# Build package +# Build python -m build - -# Build docs -cd docs && make html ``` -## Architecture - -### Core Components -- **ExtensionManager**: Manages multiple extensions and their isolated venvs -- **ExtensionBase**: Base class for extensions with lifecycle hooks (`before_module_loaded`, `on_module_loaded`) -- **AsyncRPC**: Inter-process communication system with context-aware call tracking -- **ProxiedSingleton**: Enables shared APIs across processes (like `DatabaseSingleton` in example) - -Note: The example uses a two-level pattern where `ExampleExtensionBase` handles the lifecycle hooks and creates actual extension instances that implement `initialize()`, `prepare_shutdown()`, and custom methods like `do_stuff()`. +## Public API Surface -### Key Directories -- `pyisolate/`: Main package with public API in `__init__.py` -- `pyisolate/_internal/`: Core implementation (RPC, process management) -- `example/`: Working demo with 3 extensions showcasing dependency conflicts -- `tests/`: Integration and edge case tests +Exported from `pyisolate/__init__.py`: -### Extension Workflow -1. ExtensionManager creates isolated venv per extension -2. Installs extension-specific dependencies -3. Launches extension in separate process via `_internal/client.py` -4. Establishes bidirectional RPC communication -5. Extensions can call host methods and shared singletons transparently +```text +ExtensionBase ExtensionManager ExtensionManagerConfig +ExtensionConfig SandboxMode SealedNodeExtension +ProxiedSingleton local_execution singleton_scope +flush_tensor_keeper purge_orphan_sender_shm_files +register_adapter get_adapter +``` -## Configuration +Everything in `_internal/` is private implementation. Do not expose internal types in public API changes. -### Python Support -3.9 - 3.12 (tested in CI) +## Isolation Modes -### Dependencies -- **Runtime**: None (pure Python) -- **Development**: pytest, ruff, pre-commit -- **Testing**: torch>=2.0.0, numpy (for `share_torch` and tensor tests) -- **Benchmarking**: torch, numpy, psutil, tabulate (for performance measurement) +| Mode | Provisioner | share_torch | Tensor Transport | +|:-----|:------------|:------------|:-----------------| +| cuda_share | uv | yes | CUDA IPC + `/dev/shm` | +| torch_share | uv | yes | `/dev/shm` only | +| json_share | uv | no | JSON serialization | +| sealed_worker | uv or pixi | no | JSON serialization | -### Key Config Files -- `pyproject.toml`: Project metadata, dependencies, tool configuration -- `tox.ini`: Multi-Python testing environments -- `.pre-commit-config.yaml`: Git hooks for code quality +Invalid: pixi + `share_torch=True`. Invalid: `share_cuda_ipc=True` without `share_torch=True`. -## Special Features +## Testing Conventions -### Dependency Isolation -Each extension gets its own venv - handles conflicting packages like numpy 1.x vs 2.x (demonstrated in example). +- Tests live in `tests/` with `test_` prefix +- Integration tests that create real venvs are slow (~30s each) +- Tests marked `@pytest.mark.network` require external network access +- `pytest --cov=pyisolate` for coverage (configured in pyproject.toml) +- No tests should import from `_internal/` unless testing internal behavior -### PyTorch Sharing -Use `share_torch: true` in extension config to share PyTorch models across processes for memory efficiency. +## Environment Variables -### RPC Patterns -- Extensions can call host methods recursively (host→extension→host) -- Shared singletons work transparently via RPC proxying -- Context tracking prevents circular calls +| Variable | Purpose | +|----------|---------| +| `PYISOLATE_CHILD` | `"1"` in child processes | +| `PYISOLATE_DEBUG_RPC` | `"1"` for verbose RPC logging | +| `PYISOLATE_TRACE_FILE` | Path for structured perf trace JSONL output | +| `PYISOLATE_ENABLE_CUDA_IPC` | `"1"` to enable CUDA IPC tensor transport | +| `PYISOLATE_PATH_DEBUG` | `"1"` for sys.path logging during child init | +| `PYISOLATE_ENFORCE_SANDBOX` | Force bwrap sandboxing | -## Testing Notes +## Key Invariants -The test suite covers real venv creation, dependency conflicts, and RPC edge cases. The `example/` directory provides a working demonstration with 3 extensions that showcase the core functionality. +- No pickle anywhere in the transport layer — JSON-RPC only +- Library is application-agnostic — no references to specific integrations in library code +- Fail loud — surface failures immediately, no silent degradation +- `_internal/` is private — public API goes through `__init__.py` exports diff --git a/README.md b/README.md index 8bbd39b..9bf0919 100644 --- a/README.md +++ b/README.md @@ -2,94 +2,22 @@ **Run Python extensions in isolated virtual environments with seamless inter-process communication.** -> 🚨 **Fail Loud Policy**: pyisolate assumes the rest of ComfyUI core is correct. Missing prerequisites or runtime failures immediately raise descriptive exceptions instead of being silently ignored. - -pyisolate enables you to run Python extensions with conflicting dependencies in the same application by automatically creating isolated environments for each extension. The default provisioner uses `uv`, and ComfyUI integrations can also provision a conda environment through `pixi` when an extension needs conda-first packages. Extensions communicate with the host process through a transparent RPC system, making the isolation invisible to your code while keeping the host environment dependency-free. +pyisolate enables you to run Python extensions with conflicting dependencies in the same application by automatically creating isolated environments for each extension. Extensions communicate with the host process through a transparent JSON-RPC system, making the isolation invisible to your code while keeping the host environment dependency-free. ## Requirements -- Python 3.9+ +- Python 3.10+ - The [`uv`](https://github.com/astral-sh/uv) CLI available on your `PATH` -- `pip`/`venv` for bootstrapping the development environment -- PyTorch is optional and only required for tensor-sharing features (for example, `share_torch=True`) - -If you want tensor-sharing features, install PyTorch separately (for example: `pip install torch`). - -## Environment Variables - -PyIsolate uses several environment variables for configuration and debugging: - -### Core Variables (Set by PyIsolate automatically) -- **`PYISOLATE_CHILD`**: Set to `"1"` in isolated child processes. Used to detect if code is running in host or child. -- **`PYISOLATE_HOST_SNAPSHOT`**: Path to JSON file containing the host's `sys.path` and environment variables. Used during child process initialization. -- **`PYISOLATE_MODULE_PATH`**: Path to the extension module being loaded. Used to detect ComfyUI root directory. - -### Debug Variables (Set by user) -- **`PYISOLATE_PATH_DEBUG`**: Set to `"1"` to enable detailed sys.path logging during child process initialization. Useful for debugging import issues. - -Example usage: -```bash -# Enable detailed path logging -export PYISOLATE_PATH_DEBUG=1 -python main.py - -# Disable path logging (default) -unset PYISOLATE_PATH_DEBUG -python main.py -``` - -## Quick Start - -### Option A – run everything for me - -```bash -cd /path/to/pyisolate -./quickstart.sh -``` - -The script installs `uv`, creates the dev venv, installs pyisolate in editable mode, runs the multi-extension example, and executes the Comfy Hello World demo. - -### Option B – manual setup (5 minutes) - -1. **Create the dev environment** - ```bash - cd /path/to/pyisolate - uv venv - source .venv/bin/activate # Windows: .venv\\Scripts\\activate - uv pip install -e ".[dev]" - ``` -2. **Run the example extensions** - ```bash - cd example - python main.py - cd .. - ``` - Expected output: - ``` - Extension1 | ✓ PASSED | Data processing with pandas/numpy 1.x - Extension2 | ✓ PASSED | Array processing with numpy 2.x - Extension3 | ✓ PASSED | HTML parsing with BeautifulSoup/scipy - ``` -3. **Run the Comfy Hello World** - ```bash - cd comfy_hello_world - python main.py - ``` - You should see the isolated custom node load, execute, and fetch data from the shared singleton service. - -## Documentation - -- Project site: https://comfy-org.github.io/pyisolate/ -- Walkthroughs & architecture notes: see `mysolate/HELLO_WORLD.md` and `mysolate/GETTING_STARTED.md` +- PyTorch is optional and only required for tensor-sharing features (`share_torch=True`) ## Key Benefits -- 🔒 **Dependency Isolation**: Run extensions with incompatible dependencies (e.g., numpy 1.x and 2.x) in the same application -- 🚀 **Zero-Copy PyTorch Tensor Sharing**: Share PyTorch tensors between processes without serialization overhead -- 📦 **Multiple Environment Backends**: Use `uv` by default or a conda/pixi environment when the extension needs conda-native dependencies -- 🔄 **Transparent Communication**: Call async methods across process boundaries as if they were local -- 🎯 **Simple API**: Clean, intuitive interface with minimal boilerplate -- ⚡ **Fast**: Uses `uv` for blazing-fast virtual environment creation +- **Dependency Isolation**: Run extensions with incompatible dependencies (e.g., numpy 1.x and 2.x) in the same application +- **Zero-Copy PyTorch Tensor Sharing**: Share PyTorch tensors between processes without serialization overhead +- **Multiple Environment Backends**: Use `uv` by default or a conda/pixi environment for conda-native dependencies +- **Bubblewrap Sandboxing**: Deny-by-default filesystem isolation on Linux with GPU passthrough +- **Transparent Communication**: Call async methods across process boundaries as if they were local +- **Fast**: Uses `uv` for blazing-fast virtual environment creation ## Installation @@ -130,13 +58,11 @@ import pyisolate import asyncio async def main(): - # Configure the extension manager config = pyisolate.ExtensionManagerConfig( venv_root_path="./venvs" ) manager = pyisolate.ExtensionManager(pyisolate.ExtensionBase, config) - # Load an extension with specific dependencies extension = manager.load_extension( pyisolate.ExtensionConfig( name="data_processor", @@ -146,11 +72,9 @@ async def main(): ) ) - # Use the extension result = await extension.process_data([1, 2, 3, 4, 5]) print(f"Mean: {result}") # Mean: 3.0 - # Cleanup await extension.stop() asyncio.run(main()) @@ -161,97 +85,25 @@ asyncio.run(main()) Share PyTorch tensors between processes without serialization: ```python -# extensions/ml_extension/__init__.py -from pyisolate import ExtensionBase -import torch - -class MLExtension(ExtensionBase): - async def process_tensor(self, tensor: torch.Tensor): - # Tensor is shared, not copied! - return tensor.mean() -``` - -```python -# main.py extension = manager.load_extension( pyisolate.ExtensionConfig( name="ml_processor", module_path="./extensions/ml_extension", - share_torch=True # Enable zero-copy tensor sharing + share_torch=True, # Enable zero-copy tensor sharing + share_cuda_ipc=True, # CUDA IPC for GPU tensors (Linux) ) ) -# Large tensor is shared, not serialized +# Large tensor is shared via /dev/shm, not serialized large_tensor = torch.randn(1000, 1000) mean = await extension.process_tensor(large_tensor) ``` -### Execution Model Axis - -ComfyUI integrations now treat environment provisioning and runtime boundary as separate choices: - -- `package_manager = "uv"` or `package_manager = "conda"` chooses how the child environment is built -- `execution_model = "host-coupled"` or `execution_model = "sealed_worker"` chooses how much host runtime state the child may inherit - -`host-coupled` remains the default for the classic `uv` path. `sealed_worker` is the foreign-interpreter path: no host `sys.path` reconstruction, no host framework runtime imports as a crutch, JSON-RPC tensor transport, and no sandbox in this phase. - -### UV Backend for Sealed Workers - -ComfyUI extensions can also request a sealed `uv` worker explicitly: - -```toml -[project] -name = "uv-sealed-node" -version = "0.1.0" -dependencies = ["boltons"] - -[tool.comfy.isolation] -can_isolate = true -package_manager = "uv" -execution_model = "sealed_worker" -share_torch = false -``` - -Trade-offs for `package_manager = "uv"` with `execution_model = "sealed_worker"`: - -- `share_torch` must be `False` -- tensors cross the boundary through JSON-compatible RPC values instead of shared-memory tensor handles -- host `sys.path` reconstruction is disabled -- host framework runtime imports such as `comfy.isolation.extension_wrapper` must not be required in the child -- `bwrap` sandboxing is intentionally disabled in this phase - -### Conda Backend for Sealed Workers - -ComfyUI extensions can declare a conda-backed isolated environment in `pyproject.toml`: - -```toml -[project] -name = "weather-node" -version = "0.1.0" -dependencies = ["xarray", "cfgrib"] - -[tool.comfy.isolation] -can_isolate = true -package_manager = "conda" -share_torch = false -conda_channels = ["conda-forge"] -conda_dependencies = ["eccodes", "cfgrib"] -``` - -Trade-offs for `package_manager = "conda"`: - -- `share_torch` is forced `False` -- `bwrap` sandboxing is skipped -- the child uses its own interpreter instead of the host Python -- the child is treated as a sealed foreign runtime and must not import host framework runtime code through leaked `sys.path` -- tensor transfer crosses the RPC boundary as JSON-compatible values instead of shared-memory tensor handles - ### Shared State with Singletons Share state across all extensions using ProxiedSingleton: ```python -# shared.py from pyisolate import ProxiedSingleton class DatabaseAPI(ProxiedSingleton): @@ -266,151 +118,57 @@ class DatabaseAPI(ProxiedSingleton): ``` ```python -# extensions/extension_a/__init__.py -class ExtensionA(ExtensionBase): - async def save_result(self, result): - db = DatabaseAPI() # Returns proxy to host's instance - await db.set("result", result) - -# extensions/extension_b/__init__.py -class ExtensionB(ExtensionBase): - async def get_result(self): - db = DatabaseAPI() # Returns proxy to host's instance - return await db.get("result") +# In any extension — returns proxy to host's real instance +db = DatabaseAPI() +await db.set("result", result) ``` -### Complete Application Structure +### Execution Models -A complete pyisolate application requires a special `main.py` entry point to handle virtual environment activation: +pyisolate provides two execution models: -```python -# main.py -if __name__ == "__main__": - # When running as the main script, import and run your host application - from host import main - main() -else: - # When imported by extension processes, ensure venv is properly activated - import os - import site - import sys - - if os.name == "nt": # Windows-specific venv activation - venv = os.environ.get("VIRTUAL_ENV", "") - if venv != "": - sys.path.insert(0, os.path.join(venv, "Lib", "site-packages")) - site.addsitedir(os.path.join(venv, "Lib", "site-packages")) -``` +- **`host-coupled`** (default): Child process shares the host's torch runtime and can use zero-copy tensor transfer via `/dev/shm` and CUDA IPC. +- **`sealed_worker`**: Fully isolated child with its own interpreter. No host `sys.path` reconstruction, JSON-RPC tensor transport only. -```python -# host.py - Your main application logic -import pyisolate -import asyncio +And two environment backends: -async def async_main(): - # Create extension manager - config = pyisolate.ExtensionManagerConfig( - venv_root_path="./extension-venvs" - ) - manager = pyisolate.ExtensionManager(ExtensionBase, config) - - # Load extensions (e.g., from a directory or configuration file) - extensions = [] - for extension_path in discover_extensions(): - extension_config = pyisolate.ExtensionConfig( - name=extension_name, - module_path=extension_path, - isolated=True, - dependencies=load_dependencies(extension_path), - apis=[SharedAPI] # Optional shared singletons - ) - extension = manager.load_extension(extension_config) - extensions.append(extension) - - # Use extensions - for extension in extensions: - result = await extension.process() - print(f"Result: {result}") - - # Clean shutdown - for extension in extensions: - await extension.stop() - -def main(): - asyncio.run(async_main()) -``` - -This structure ensures that: -- The host application runs normally when executed directly -- Extension processes properly activate their virtual environments when spawned -- Windows-specific path handling is properly managed - -## Features - -### Core Features -- **Automatic Virtual Environment Management**: Creates and manages isolated environments automatically -- **Bidirectional RPC**: Extensions can call host methods and vice versa -- **Async/Await Support**: Full support for asynchronous programming -- **Lifecycle Hooks**: `before_module_loaded()`, `on_module_loaded()`, and `stop()` for setup/teardown -- **Error Propagation**: Exceptions are properly propagated across process boundaries - -### Advanced Features -- **Dependency Resolution**: Automatically installs extension-specific dependencies -- **Platform Support**: Works on Windows, Linux, and soon to be tested on macOS -- **Context Tracking**: Ensures callbacks happen on the same asyncio loop as the original call -- **Fast Installation**: Uses `uv` for 10-100x faster package installation without every extension having its own copy of libraries - -## Architecture +- **`uv`** (default): Fast pip-compatible virtual environments. +- **`conda`**: pixi-backed conda environments for packages that need conda-forge. +```python +# Sealed worker with conda environment +config = pyisolate.ExtensionConfig( + name="weather_processor", + module_path="./extensions/weather", + isolated=True, + execution_model="sealed_worker", + package_manager="conda", + share_torch=False, + conda_channels=["conda-forge"], + conda_dependencies=["eccodes", "cfgrib"], + dependencies=["xarray", "cfgrib"], +) ``` -┌─────────────────────┐ RPC ┌─────────────┐ -│ Host Process │◄────────────►│ Extension A │ -│ │ │ (venv A) │ -│ ┌──────────────┐ │ └─────────────┘ -│ │ Shared │ │ RPC ┌─────────────┐ -│ │ Singletons │ │◄────────────►│ Extension B │ -│ └──────────────┘ │ │ (venv B) │ -└─────────────────────┘ └─────────────┘ -``` - -## Implementing a Host Adapter (IsolationAdapter) -When integrating pyisolate with your application (like ComfyUI), you implement the `IsolationAdapter` protocol. This tells pyisolate how to configure isolated processes for your environment. +### Implementing an Adapter -### Reference Implementation - -The canonical example is in `tests/fixtures/test_adapter.py`: +Applications integrate via the `IsolationAdapter` protocol: ```python from pyisolate.interfaces import IsolationAdapter -from pyisolate._internal.shared import ProxiedSingleton - -class MockHostAdapter(IsolationAdapter): - """Reference adapter showing all protocol methods.""" +class MyAppAdapter(IsolationAdapter): @property def identifier(self) -> str: - """Return unique adapter identifier (e.g., 'comfyui').""" return "myapp" def get_path_config(self, module_path: str) -> dict: - """Configure sys.path for isolated extensions. - - Returns: - - preferred_root: Your app's root directory - - additional_paths: Extra paths for imports - """ return { "preferred_root": "/path/to/myapp", "additional_paths": ["/path/to/myapp/extensions"], } - def setup_child_environment(self, snapshot: dict) -> None: - """Configure child process after sys.path reconstruction.""" - pass # Set up logging, environment, etc. - def register_serializers(self, registry) -> None: - """Register custom type serializers for RPC transport.""" registry.register( "MyCustomType", serializer=lambda obj: {"data": obj.data}, @@ -418,66 +176,49 @@ class MockHostAdapter(IsolationAdapter): ) def provide_rpc_services(self) -> list: - """Return ProxiedSingleton classes to expose via RPC.""" return [MyRegistry, MyProgressReporter] - - def handle_api_registration(self, api, rpc) -> None: - """Post-registration hook for API-specific setup.""" - pass ``` -### Testing Your Adapter - -Run the contract tests to verify your adapter implements the protocol correctly: +## Architecture -```bash -# The test suite verifies all protocol methods -pytest tests/test_adapter_contract.py -v +``` +┌─────────────────────┐ RPC ┌─────────────┐ +│ Host Process │◄────────────►│ Extension A │ +│ │ │ (venv A) │ +│ ┌──────────────┐ │ └─────────────┘ +│ │ Shared │ │ RPC ┌─────────────┐ +│ │ Singletons │ │◄────────────►│ Extension B │ +│ └──────────────┘ │ │ (venv B) │ +└─────────────────────┘ └─────────────┘ ``` -## Roadmap - -### ✅ Completed -- [x] Core isolation and RPC system -- [x] Automatic virtual environment creation -- [x] Bidirectional communication -- [x] PyTorch tensor sharing -- [x] Shared singleton pattern -- [x] Comprehensive test suite -- [x] Windows, Linux support -- [x] Security features (path normalization) -- [x] Fast installation with `uv` -- [x] Context tracking for RPC calls -- [x] Async/await support -- [x] Performance benchmarking suite -- [x] Memory usage tracking and benchmarking -- [x] Network access restrictions -- [x] Filesystem access sandboxing - -### 🚧 In Progress -- [ ] Documentation site -- [ ] macOS testing -- [ ] Wrapper for non-async calls between processes - -### 🔮 Future Plans -- [ ] CPU/Memory usage limits -- [ ] Hot-reloading of extensions -- [ ] Distributed RPC (across machines) -- [ ] Profiling and debugging tools +## Features -## Use Cases +### Core +- Automatic virtual environment management +- Bidirectional JSON-RPC over Unix Domain Sockets (no pickle) +- Full async/await support +- Lifecycle hooks: `before_module_loaded()`, `on_module_loaded()`, `stop()` +- Error propagation across process boundaries -pyisolate is perfect for: +### Advanced +- Bubblewrap sandbox with deny-by-default filesystem (Linux) +- CUDA wheel resolution for custom GPU package builds +- Zero-copy tensor transfer via CUDA IPC and `/dev/shm` +- Performance tracing (`PYISOLATE_TRACE_FILE`) +- Multi-distro sandbox detection (RHEL, Ubuntu, Arch, SELinux) -- **Plugin Systems**: When plugins may require conflicting dependencies -- **ML Pipelines**: Different models requiring different library versions -- **Microservices in a Box**: Multiple services with different dependencies in one app -- **Testing**: Running tests with different dependency versions in parallel -- **Legacy Code Integration**: Wrapping legacy code with specific dependency requirements +## Environment Variables -## Development +| Variable | Description | +|----------|-------------| +| `PYISOLATE_CHILD` | Set to `"1"` in isolated child processes | +| `PYISOLATE_DEBUG_RPC` | `"1"` for verbose RPC message logging | +| `PYISOLATE_TRACE_FILE` | Path for structured performance trace output | +| `PYISOLATE_ENABLE_CUDA_IPC` | `"1"` to enable CUDA IPC tensor transport | +| `PYISOLATE_PATH_DEBUG` | `"1"` for detailed sys.path logging during child init | -We welcome contributions! +## Development ```bash # Setup development environment @@ -495,69 +236,15 @@ ruff check pyisolate tests python benchmarks/simple_benchmark.py ``` -### Benchmarking - -pyisolate includes a comprehensive benchmarking suite to measure RPC call overhead: - -```bash -# Install benchmark dependencies -uv pip install -e ".[bench]" - -# Quick benchmark using existing example extensions -python benchmarks/simple_benchmark.py - -# Full benchmark suite with statistical analysis -python benchmarks/benchmark.py - -# Quick mode with fewer iterations for faster results -python benchmarks/benchmark.py --quick - -# Skip torch benchmarks (if torch not available) -python benchmarks/benchmark.py --no-torch - -# Skip GPU benchmarks -python benchmarks/benchmark.py --no-gpu -``` - -#### Example Benchmark Output - -``` -================================================== -BENCHMARK RESULTS -================================================== -Test Mean (ms) Std Dev (ms) Runs --------------------------------------------------- -small_int 0.63 0.05 1000 -small_string 0.64 0.06 1000 -medium_string 0.65 0.07 1000 -tiny_tensor 0.79 0.08 1000 -small_tensor 0.80 0.11 1000 -medium_tensor 0.81 0.06 1000 -large_tensor 0.78 0.08 1000 -model_tensor 0.88 0.29 1000 - -Fastest result: 0.63ms -``` - -The benchmarks measure: - -1. **Small Data RPC Overhead**: ~0.6ms for basic data types (integers, strings) -2. **Tensor Overhead**: Minimal overhead (~0.2ms) for sharing tensors up to 6GB via zero-copy shared memory -3. **Scaling**: Performance remains O(1) regardless of tensor size +## Use Cases -> ⚠️ **Note for CPU Tensors**: When checking out or running benchmarks with `share_torch=True`, ensuring `TMPDIR=/dev/shm` is recommended to guarantee that shared memory files are visible to sandboxed child processes. +pyisolate is designed for: +- **Plugin Systems**: When plugins may require conflicting dependencies +- **ML Pipelines**: Different models requiring different library versions +- **Microservices in a Box**: Multiple services with different dependencies in one app +- **Legacy Code Integration**: Wrapping legacy code with specific dependency requirements ## License pyisolate is licensed under the MIT License. See [LICENSE](LICENSE) for details. - -## Acknowledgments - -- Built on Python's `multiprocessing` and `asyncio` -- Uses [`uv`](https://github.com/astral-sh/uv) for fast package management -- Inspired by plugin systems like Chrome Extensions and VS Code Extensions - ---- - -**Star this repo** if you find it useful! ⭐ diff --git a/README_COMFYUI.md b/README_COMFYUI.md deleted file mode 100644 index 51789e2..0000000 --- a/README_COMFYUI.md +++ /dev/null @@ -1,311 +0,0 @@ -# PyIsolate for ComfyUI Custom Nodes - -**Process isolation for ComfyUI custom nodes - solve dependency conflicts without breaking your workflow.** - -> 🎯 **Quick Start**: Get your custom node isolated in under 5 minutes. See [Installation](#installation) and [Converting Your Node](#converting-your-custom-node). - -## What Problem Does This Solve? - -ComfyUI custom nodes often require conflicting dependencies: -- Node A needs `numpy==1.24.0` -- Node B needs `numpy==2.0.0` -- Both can't coexist in the same environment - -**PyIsolate solution**: Each custom node runs in its own isolated process with its own dependencies, while sharing PyTorch tensors with zero-copy performance. - -## Installation - -### Prerequisites -- Python 3.9+ -- ComfyUI installed -- The [`uv`](https://github.com/astral-sh/uv) package manager - -### Install uv (if not already installed) -```bash -# Linux/macOS -curl -LsSf https://astral.sh/uv/install.sh | sh - -# Windows -powershell -c "irm https://astral.sh/uv/install.ps1 | iex" -``` - -### Install PyIsolate in ComfyUI - -```bash -cd ComfyUI -source .venv/bin/activate # Windows: .venv\Scripts\activate - -Clone from pollockjj's repo: -git clone https://github.com/pollockjj/pyisolate -cd pyisolate -git install . - -``` - -### Enable Isolation in ComfyUI - -Add the `--use-process-isolation` flag when launching ComfyUI: - -```bash -python main.py --use-process-isolation -``` - -**That's it.** ComfyUI will now automatically detect and isolate any custom nodes with a `pyisolate.yaml` manifest. - ---- - -## Converting a Custom Node - -### Step 1: Create `pyisolate.yaml` - -In the custom node directory, create a `pyisolate.yaml` file: - -```yaml -# custom_nodes/MyAwesomeNode/pyisolate.yaml -isolated: true -share_torch: true # Enable `zero-copy` PyTorch tensor sharing - Allows fast copy of tensors, but at a higher memory and filespace footprint - -dependencies: - - numpy==2.0.0 # Node specific numpy version - - pillow==10.0.0 # Node specific dependencies - - my-special-lib>=1.5 -``` - -### Step 2: Test It - -```bash -cd ComfyUI -python main.py --use-process-isolation -``` - -**Expected logs - Loading:** -PyIsolate and internal functions that use it use a "][" as log prefix. -``` -][ ComfyUI-IsolationTest cache miss, spawning process for metadata # First run or cache invalidation -][ ComfyUI-PyIsolatedV3 loaded from cache # Subsequent runs where nodes and environment is unchanged so cache is reused -][ ComfyUI-APIsolated loaded from cache -][ ComfyUI-DepthAnythingV2 loaded from cache - -][ ComfyUI-IsolationTest metadata cached -][ ComfyUI-IsolationTest ejecting after metadata extraction -``` - - -**Expected logs - Reporting:** -``` -Import times for custom nodes: - 0.0 seconds: /path/to/ComfyUI/custom_nodes/websocket_image_save.py - 0.0 seconds: /path/to/ComfyUI/custom_nodes/comfyui-florence2 - 0.0 seconds: /path/to/ComfyUI/custom_nodes/comfyui-videohelpersuite - 0.0 seconds: /path/to/ComfyUI/custom_nodes/ComfyUI-GGUF - 0.0 seconds: /path/to/ComfyUI/custom_nodes/comfyui-kjnodes - 0.0 seconds: /path/to/ComfyUI/custom_nodes/ComfyUI-Manager - 0.1 seconds: /path/to/ComfyUI/custom_nodes/ComfyUI-Crystools - 0.3 seconds: /path/to/ComfyUI/custom_nodes/ComfyUI-WanVideoWrapper - 0.4 seconds: /path/to/ComfyUI/custom_nodes/RES4LYF - - -Import times for isolated custom nodes: - 0.0 seconds: /path/to/ComfyUI/custom_nodes/ComfyUI-DepthAnythingV2 - 0.0 seconds: /path/to/ComfyUI/custom_nodes/ComfyUI-PyIsolatedV3 - 0.0 seconds: /path/to/ComfyUI/custom_nodes/ComfyUI-APIsolated - 3.2 seconds: /path/to/ComfyUI/custom_nodes/ComfyUI-IsolationTest #First-time cost -``` - - -**Expected logs - during workflow usage:** -``` -got prompt # A new workflow where isolated nodes are used -][ ComfyUI-PyIsolatedV3 - just-in-time spawning of isolated custom_node -][ ComfyUI-APIsolated - just-in-time spawning of isolated custom_node -Prompt executed in 68.34 seconds - -got prompt # same workflow -Prompt executed in 61.68 seconds - -got prompt # different workflow, same two custom_nodes used -Prompt executed in 72.29 seconds - -got prompt # same 2nd workflow as above -Prompt executed in 66.17 seconds - -got prompt # new workflow, no isolated nodes used -][ ComfyUI-APIsolated isolated custom_node not in execution graph, evicting -][ ComfyUI-PyIsolatedV3 isolated custom_node not in execution graph, evicting -Prompt executed in 8.49 seconds - -``` - -## What Works - -✅ **Standard Python code execution:** -- Any standard Python code inside node functions using Comfy standard imports and each custom_node's pysiolate.yaml's dependencies -- Custom dependencies and conflicting library versions in isolated custom_nodes - -✅ **Zero-copy tensor sharing (linux only):** -- PyTorch tensors pass between processes without serialization -- ~1ms overhead per RPC call -- No memory duplication - -✅ **ComfyUI V3 API support - at least one node tested with (`comfy_api.latest`):** - ### Core - - io.ComfyNode - - io.NodeOutput - - io.Schema - - ### Numeric & Combo - - io.Int, io.Int.Input - - io.Float, io.Float.Input, io.Float.Output - - io.Combo, io.Combo.Input - - ### Text & Flags - - io.String, io.String.Input - - io.Boolean, io.Boolean.Input - - ### Images, Latents, Conditioning - - io.Image, io.Image.Input, io.Image.Type - - io.Latent, io.Latent.Input, io.Latent.Output - - io.Conditioning, io.Conditioning.Input, io.Conditioning.Output - - io.Sigmas, io.Sigmas.Input - - ### Models & Samplers - - io.Model, io.Model.Input, io.Model.Output - - io.Vae, io.Vae.Input - - io.Sampler.Input - - io.UpscaleModel, io.UpscaleModel.Input, io.UpscaleModel.Output - - io.LatentUpscaleModel.Input, io.LatentUpscaleModel.Output - - io.ControlNet.Output - - io.Guider.Input - - io.WanCameraEmbedding, io.WanCameraEmbedding.Input - - ### CLIP / Vision - - io.ClipVisionOutput, io.ClipVisionOutput.Input - - ### Media - - io.Video, io.Video.Input, io.Video.Output - - io.Audio, io.Audio.Output - - io.AudioEncoder.Input, io.AudioEncoder.Output, io.AudioEncoderOutput.Output - - ### Geometry / Voxel - - io.Mesh, io.Mesh.Input, io.Mesh.Output - - io.Voxel, io.Voxel.Input, io.Voxel.Output - - ### Misc - - io.Hidden, io.Hidden.prompt, io.Hidden.extra_pnginfo - - io.FolderType, io.FolderType.output - - io.MatchType, io.MatchType.Input, io.MatchType.Output, io.MatchType.Template - - io.Photomaker.Input, io.Photomaker.Output - - io.UploadType, io.UploadType.video - - io.AnyType, io.AnyType.Output - - -See [Appendix: Supported APIs](#appendix-supported-apis) for complete function lists. - -✅ **ComfyUI core proxies (fully supported):** -- `model_management.py` - Device management, memory operations, interrupt handling -- `folder_paths.py` - Path resolution, model discovery, file operations -- All functions callable from isolated nodes via transparent RPC - -✅ **ComfyUI standard V1 types that work across isolation:** - -| Input/Output Type | Status | Notes | -|-------------------|--------|-------| -| `IMAGE` | ✅ Works | PyTorch tensor, zero-copy | -| `MASK` | ✅ Works | PyTorch tensor, zero-copy | -| `LATENT` | ✅ Works | Dict with tensor, serializes cleanly | -| `INT` | ✅ Works | Primitive type | -| `FLOAT` | ✅ Works | Primitive type | -| `STRING` | ✅ Works | Primitive type | -| `BOOLEAN` | ✅ Works | Primitive type | -| `CONDITIONING` | ✅ Works | List of tuples with tensors | -| `CONTROL_NET` | unknown | Not tested | -| `MODEL` | ⚠️ Basic | ModelPatcher object, standard inference | -| `CLIP` | ⚠️ Basic | standard CLIP decoding tested isolated | -| `VAE` | ⚠️ Basic | standard VAE decoding tested isolated | - -**Key insight:** Any ComfyUI type that is fundamentally a **tensor, dict, list, or primitive** will work. Complex stateful objects like `MODEL`, `CLIP`, `VAE` cannot cross the isolation boundary (yet). - -✅ **Dependency conflicts of isolated custom_nodes** -- Different numpy versions, diffusers, etc. - ---- - -## What Doesn't Work - -❌ **PromptServer route decoration:** -```python -# This pattern does NOT work -from server import PromptServer -@PromptServer.instance.routes.get("/my_route") -def my_handler(request): - pass -``` -**Why**: Route decorators execute at module import time, before isolation is ready. -**Workaround**: Use `route_manifest.json` (see [Advanced: Web Routes](#advanced-web-routes)). - -❌ **Monkey patching ComfyUI core:** -```python -# This will NEVER work in isolation -import comfy.model_management -comfy.model_management.some_function = my_patched_version -``` -**Why**: Each isolated process has its own copy of ComfyUI code. Patches don't propagate. -**Solution**: Don't monkey patch. Use proper extension patterns instead. - ---- - -## Live Examples - -Three working isolated custom node packs are available for reference: - -| Node Pack | What It Does | Isolation Benefit | -|-----------|--------------|-------------------| -| [ComfyUI-PyIsolatedV3](https://github.com/pollockjj/ComfyUI-PyIsolated) | Demo node using `deepdiff` | Shows basic isolation setup | -| [ComfyUI-APIsolated](https://github.com/pollockjj/ComfyUI-APIsolated) | API nodes (OpenAI, Gemini, etc.) | Isolated API dependencies | -| [ComfyUI-IsolationTest](https://github.com/pollockjj/ComfyUI-IsolationTest) | 70+ ComfyUI core nodes | Proves isolation doesn't break functionality | - - - - -## Performance Characteristics - -### Startup Time -| Scenario | Time | Notes | -|----------|------|-------| -| **First run (cache miss)** | speed dependent environment | Creates venv, installs deps, caches metadata | -| **Subsequent runs (cache hit)** | almost instantaneous | Loads cached metadata, no spawn | -| **Process spawn on first execution** | 1-3 seconds (background) | Only when node first executes in workflow | - -### Runtime Overhead -| Operation | Overhead | Impact | -|-----------|----------|--------| -| **RPC call (simple data)** | ~0.3ms | Negligible | -| **Tensor passing (share_torch)** | ~1ms | Zero-copy, minimal | -| **Large model loading** | Same as non-isolated | No overhead | - -### Memory Footprint -- **Per isolated node:** ~50-300MB -- **Tensors:** Shared memory (no duplication) -- **Models:** Can be shared via ProxiedSingleton - -**Bottom line:** Isolation adds ~1-2ms per node execution. For typical workflows (seconds per generation), this is <0.1% overhead. - ---- - -## Troubleshooting - -### "Cache miss, spawning process" on every startup -**Cause:** Cache invalidated (code changed, manifest changed, or Python version changed). -**Fix:** Normal behavior on first run or after updates. Subsequent runs will be fast. - -### "Module not found" errors in isolated node -**Cause:** Dependency not listed in `pyisolate.yaml`. -**Fix:** Add the missing package to the `dependencies` list. - -### Node works non-isolated but fails isolated -**Cause:** Likely using a pattern that doesn't work with isolation (see [What Doesn't Work](#what-doesnt-work-yet)). -**Fix:** Check logs for specific error, review the node's `__init__.py` for module-level side effects. - -### "Torch already imported" warning spam -**Cause:** Isolated processes reload torch, triggering ComfyUI's warning. -**Fix:** Known issue diff --git a/BENCHMARK_INSTRUCTIONS.md b/benchmarks/INSTRUCTIONS.md similarity index 100% rename from BENCHMARK_INSTRUCTIONS.md rename to benchmarks/INSTRUCTIONS.md diff --git a/benchmarks/README.md b/benchmarks/README.md index 1680774..bb32889 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -2,6 +2,16 @@ This document describes the benchmarking suite for measuring RPC call overhead in pyisolate. +## Runner Scripts + +Platform-specific benchmark runner scripts are included in this directory: + +- `run_benchmarks_linux.sh` — Linux/macOS benchmark runner +- `run_benchmarks_windows.bat` — Windows CMD benchmark runner +- `run_benchmarks_windows.ps1` — Windows PowerShell benchmark runner +- `run_benchmarks_powershell_launcher.bat` — PowerShell execution policy wrapper +- `INSTRUCTIONS.md` — Step-by-step instructions for running benchmarks + ## Overview The benchmark suite measures the performance overhead of proxied calls compared to local execution, specifically excluding setup costs like virtual environment creation and process startup. diff --git a/run_benchmarks_linux.sh b/benchmarks/run_benchmarks_linux.sh similarity index 100% rename from run_benchmarks_linux.sh rename to benchmarks/run_benchmarks_linux.sh diff --git a/run_benchmarks_powershell_launcher.bat b/benchmarks/run_benchmarks_powershell_launcher.bat similarity index 100% rename from run_benchmarks_powershell_launcher.bat rename to benchmarks/run_benchmarks_powershell_launcher.bat diff --git a/run_benchmarks_windows.bat b/benchmarks/run_benchmarks_windows.bat similarity index 100% rename from run_benchmarks_windows.bat rename to benchmarks/run_benchmarks_windows.bat diff --git a/run_benchmarks_windows.ps1 b/benchmarks/run_benchmarks_windows.ps1 similarity index 100% rename from run_benchmarks_windows.ps1 rename to benchmarks/run_benchmarks_windows.ps1 diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 98654b3..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Clean build directory -clean: - rm -rf $(BUILDDIR) - -# Build markdown documentation -markdown: - @$(SPHINXBUILD) -b markdown "$(SOURCEDIR)" "$(BUILDDIR)/markdown" $(SPHINXOPTS) $(O) - @echo "Markdown build finished. The markdown files are in $(BUILDDIR)/markdown." - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api.rst b/docs/api.rst deleted file mode 100644 index bc4512e..0000000 --- a/docs/api.rst +++ /dev/null @@ -1,36 +0,0 @@ -API Reference -============= - -This page contains the public API reference for pyisolate. - -.. currentmodule:: pyisolate - -Main Classes ------------- - -.. autoclass:: ExtensionBase - :members: - :inherited-members: - :show-inheritance: - -.. autoclass:: ExtensionManager - :members: - :show-inheritance: - -Configuration -------------- - -.. autoclass:: ExtensionManagerConfig - :members: - :show-inheritance: - -.. autoclass:: ExtensionConfig - :members: - :show-inheritance: - -Utilities ---------- - -.. autoclass:: ProxiedSingleton - :members: - :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index d507614..0000000 --- a/docs/conf.py +++ /dev/null @@ -1,70 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# For the full list of built-in configuration values, see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -import os -import sys - -sys.path.insert(0, os.path.abspath("..")) - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -project = "pyisolate" -copyright = "2026, Jacob Segal" -author = "Jacob Segal" - -version = "0.9.1" -release = "0.9.1" - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.viewcode", - "sphinx.ext.githubpages", - "sphinx.ext.napoleon", # Support for Google/NumPy style docstrings - "sphinx.ext.intersphinx", # Link to other project's documentation - "myst_parser", # Markdown support -] - -# MyST parser configuration -myst_enable_extensions = [ - "colon_fence", - "deflist", -] -source_suffix = { - ".rst": "restructuredtext", - ".md": "markdown", -} - -templates_path = ["_templates"] -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - -# -- Autodoc configuration --------------------------------------------------- -autodoc_default_options = { - "members": True, - "member-order": "bysource", - "special-members": "__init__", - "undoc-members": False, - "exclude-members": "__weakref__", - "imported-members": False, -} - -# Napoleon settings -napoleon_google_docstring = True -napoleon_numpy_docstring = True - - -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - -html_theme = "sphinx_rtd_theme" -html_static_path = ["_static"] - -# -- Intersphinx configuration ----------------------------------------------- -intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), -} diff --git a/docs/debugging.md b/docs/debugging.md index f7ae068..75d303b 100644 --- a/docs/debugging.md +++ b/docs/debugging.md @@ -42,7 +42,7 @@ if os.environ.get("PYISOLATE_CHILD") == "1": **Cause**: Tensor was garbage collected before remote process could access shared memory. -**Solution**: The `TensorKeeper` class holds references for 30 seconds by default. If you see this error: +**Solution**: The `TensorKeeper` class holds references for 5.0 seconds by default. If you see this error: 1. Increase `TensorKeeper.retention_seconds` for slow networks 2. Ensure tensors aren't being explicitly deleted too early 3. Check that `/dev/shm` has sufficient space diff --git a/docs/edge_cases.md b/docs/edge_cases.md index f8a8b75..13aa334 100644 --- a/docs/edge_cases.md +++ b/docs/edge_cases.md @@ -28,10 +28,10 @@ if "received from another process" in str(e): **Behavior**: `FileNotFoundError` on deserialization. -**Handling**: `TensorKeeper` holds tensor references for 30 seconds: +**Handling**: `TensorKeeper` holds tensor references for 5.0 seconds: ```python class TensorKeeper: - def __init__(self, retention_seconds: float = 30.0): + def __init__(self, retention_seconds: float = 5.0): # Keeps strong references to prevent GC ``` diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 8a3272a..0000000 --- a/docs/index.rst +++ /dev/null @@ -1,59 +0,0 @@ -.. pyisolate documentation master file - -pyisolate Documentation -======================= - -**pyisolate** is a Python library for running extensions across multiple isolated virtual environments with RPC communication. - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - api - rpc_protocol - debugging - edge_cases - platform_compatibility - -Overview --------- - -pyisolate solves dependency conflicts by isolating extensions in separate venvs while maintaining seamless host-extension communication through AsyncRPC. - -Key Features -~~~~~~~~~~~~ - -* **Dependency Isolation**: Each extension gets its own virtual environment -* **Transparent RPC**: Seamless communication between host and extensions -* **PyTorch Sharing**: Optionally share PyTorch models across processes for memory efficiency -* **Simple API**: Easy to use with minimal configuration - -Quick Example -~~~~~~~~~~~~~ - -.. code-block:: python - - from pyisolate import ExtensionManager - - async def main(): - manager = ExtensionManager("./extensions") - await manager.start() - - # Extensions can be called transparently - result = await manager.extensions['my_extension'].process_data(data) - - await manager.stop() - -Installation ------------- - -.. code-block:: bash - - pip install pyisolate - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/rpc_protocol.md b/docs/rpc_protocol.md index 68c09c9..e745819 100644 --- a/docs/rpc_protocol.md +++ b/docs/rpc_protocol.md @@ -218,15 +218,15 @@ PyTorch tensors are not serialized directly. Instead, they are converted to `Ten ## Transport Layer -The protocol supports multiple transport implementations: +The primary transport is `JSONSocketTransport` — length-prefixed JSON over Unix Domain Sockets (or TCP on Windows). All isolation modes use this transport. -### QueueTransport +### JSONSocketTransport (Primary) -Uses `multiprocessing.Queue` for communication. Used when subprocess isolation is via `multiprocessing.Process`. +Uses length-prefixed JSON-RPC over raw sockets. No pickle. This is the standard transport for all Linux isolation modes (sandboxed and non-sandboxed) and Windows TCP mode. -### UDSTransport +### QueueTransport (Legacy) -Uses Unix Domain Sockets for communication. Used when subprocess isolation is via `bubblewrap` sandbox. +Uses `multiprocessing.Queue` for communication. This is a legacy backward-compatibility path and is not used in current isolation modes. ### Transport Interface diff --git a/example/bwrap_torch_share/host.py b/example/bwrap_torch_share/host.py new file mode 100644 index 0000000..d95a9bd --- /dev/null +++ b/example/bwrap_torch_share/host.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +"""bwrap + torch_share example — sandbox-enforced host-coupled isolation. + +Same as torch_share but with sandbox_mode=REQUIRED. The child runs +inside a bubblewrap sandbox with deny-by-default filesystem. +Linux only. Exit 0 + "PASS" on success. +""" + +import asyncio +import contextlib +import logging +import os +import sys +import tempfile +from pathlib import Path + +repo_root = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(repo_root)) + +from pyisolate._internal.adapter_registry import AdapterRegistry +from pyisolate._internal.rpc_protocol import AsyncRPC, ProxiedSingleton +from pyisolate._internal.sandbox_detect import detect_sandbox_capability +from pyisolate.config import ExtensionConfig, SandboxMode +from pyisolate.host import Extension +from pyisolate.interfaces import SerializerRegistryProtocol +from example.torch_share.extension import TorchShareExtension + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) + + +class MinimalAdapter: + @property + def identifier(self) -> str: + return "bwrap_torch_share_example" + + def get_path_config(self, module_path: str) -> dict | None: + return {"preferred_root": os.getcwd(), "additional_paths": []} + + def setup_child_environment(self, snapshot: dict) -> None: + pass + + def register_serializers(self, registry: SerializerRegistryProtocol) -> None: + try: + from pyisolate._internal.tensor_serializer import deserialize_tensor, serialize_tensor + registry.register("torch.Tensor", serialize_tensor, deserialize_tensor) + except Exception: + pass + + def provide_rpc_services(self) -> list[type[ProxiedSingleton]]: + return [] + + def handle_api_registration(self, api: ProxiedSingleton, rpc: AsyncRPC) -> None: + pass + + +async def main() -> int: + if sys.platform != "linux": + logger.error("bwrap examples are Linux-only") + return 1 + + cap = detect_sandbox_capability() + if not cap.available: + logger.error(f"bwrap not available: {cap.restriction_model}") + return 1 + + pyisolate_root = str(repo_root) + extension_module_path = str(Path(repo_root) / "example" / "torch_share" / "extension") + + tmp = tempfile.mkdtemp(prefix="bwrap_torch_share_") + venv_root = os.path.join(tmp, "venvs") + os.makedirs(venv_root, exist_ok=True) + shared_tmp = os.path.join(tmp, "ipc_shared") + os.makedirs(shared_tmp, exist_ok=True) + os.environ["TMPDIR"] = shared_tmp + + venv_bin = os.path.dirname(sys.executable) + path = os.environ.get("PATH", "") + if venv_bin not in path.split(os.pathsep): + os.environ["PATH"] = f"{venv_bin}{os.pathsep}{path}" + + AdapterRegistry.unregister() + AdapterRegistry.register(MinimalAdapter()) + + ext = None + try: + config = ExtensionConfig( + name="bwrap_torch_share_example", + module_path=extension_module_path, + isolated=True, + dependencies=[f"-e {pyisolate_root}"], + apis=[], + share_torch=True, + share_cuda_ipc=False, + sandbox_mode=SandboxMode.REQUIRED, + sandbox={"writable_paths": [shared_tmp, "/dev/shm"]}, + env={"PYISOLATE_SIGNAL_CLEANUP": "1"}, + ) + + logger.info("Loading bwrap + torch_share extension...") + ext = Extension( + module_path=extension_module_path, + extension_type=TorchShareExtension, + config=config, + venv_root_path=venv_root, + ) + ext.ensure_process_started() + + proxy = ext.get_proxy() + result = await proxy.ping() + logger.info(f"ping result: {result}") + + if result == "pong_torch_share": + logger.info("PASS — bwrap + torch_share example completed successfully") + return 0 + else: + logger.error(f"FAIL — unexpected ping result: {result}") + return 1 + finally: + if ext is not None: + with contextlib.suppress(Exception): + ext.stop() + AdapterRegistry.unregister() + + +if __name__ == "__main__": + sys.exit(asyncio.run(main())) diff --git a/example/extensions/extension1/__init__.py b/example/extensions/extension1/__init__.py deleted file mode 100644 index d8eca5d..0000000 --- a/example/extensions/extension1/__init__.py +++ /dev/null @@ -1,53 +0,0 @@ -import logging - -import numpy as np -import pandas as pd -from shared import DatabaseSingleton, ExampleExtension -from typing_extensions import override - -logger = logging.getLogger(__name__) -db = DatabaseSingleton() - - -class Extension1(ExampleExtension): - """Extension using pandas and numpy 1.x for data processing.""" - - @override - async def initialize(self): - logger.debug("Extension1 initialized.") - - @override - async def prepare_shutdown(self): - logger.debug("Extension1 preparing for shutdown.") - - @override - async def do_stuff(self, value: str) -> str: - logger.debug("Extension1 processing data with pandas and numpy") - - # Create a DataFrame with some data - data = {"name": ["Alice", "Bob", "Charlie"], "score": [95, 87, 92]} - df = pd.DataFrame(data) - - # Use numpy for calculations - mean_score = np.mean(df["score"]) - numpy_version = np.__version__ - pandas_version = pd.__version__ - - result = { - "extension": "extension1", - "data_rows": len(df), - "mean_score": float(mean_score), - "numpy_version": numpy_version, - "pandas_version": pandas_version, - "input_value": value, - } - - # Store result in shared database - await db.set_value("extension1_result", result) - - return f"Extension1 processed {len(df)} rows with mean score {mean_score}" - - -def example_entrypoint() -> ExampleExtension: - """Entrypoint function for the extension.""" - return Extension1() diff --git a/example/extensions/extension1/manifest.yaml b/example/extensions/extension1/manifest.yaml deleted file mode 100644 index 23a9161..0000000 --- a/example/extensions/extension1/manifest.yaml +++ /dev/null @@ -1,6 +0,0 @@ -enabled: true -isolated: true -share_torch: false -dependencies: - - numpy>=1.21.0,<2.0.0 - - pandas>=1.3.0 diff --git a/example/extensions/extension2/__init__.py b/example/extensions/extension2/__init__.py deleted file mode 100644 index 96abf35..0000000 --- a/example/extensions/extension2/__init__.py +++ /dev/null @@ -1,49 +0,0 @@ -import logging - -import numpy as np -from shared import DatabaseSingleton, ExampleExtension -from typing_extensions import override - -logger = logging.getLogger(__name__) -db = DatabaseSingleton() - - -class Extension2(ExampleExtension): - """Extension using numpy 2.x and requests for HTTP operations.""" - - @override - async def initialize(self): - logger.debug("Extension2 initialized.") - - @override - async def prepare_shutdown(self): - logger.debug("Extension2 preparing for shutdown.") - - @override - async def do_stuff(self, value: str) -> str: - logger.debug("Extension2 simulating HTTP request with numpy 2.x") - - # Simulate API response data - mock_response = {"status": "success", "data": [1.5, 2.3, 3.1, 4.7, 5.9]} - - # Use numpy 2.x for array operations - arr = np.array(mock_response["data"]) - stats = { - "extension": "extension2", - "numpy_version": np.__version__, - "array_sum": float(np.sum(arr)), - "array_mean": float(np.mean(arr)), - "array_std": float(np.std(arr)), - "input_value": value, - "simulated_request": True, - } - - # Store result in shared database - await db.set_value("extension2_result", stats) - - return f"Extension2 processed array with sum {stats['array_sum']}" - - -def example_entrypoint() -> ExampleExtension: - """Entrypoint function for the extension.""" - return Extension2() diff --git a/example/extensions/extension2/manifest.yaml b/example/extensions/extension2/manifest.yaml deleted file mode 100644 index d73c2b8..0000000 --- a/example/extensions/extension2/manifest.yaml +++ /dev/null @@ -1,6 +0,0 @@ -enabled: true -isolated: true -share_torch: false -dependencies: - - numpy>=2.0.0 - - requests>=2.25.0 diff --git a/example/extensions/extension3/__init__.py b/example/extensions/extension3/__init__.py deleted file mode 100644 index 937a8dd..0000000 --- a/example/extensions/extension3/__init__.py +++ /dev/null @@ -1,70 +0,0 @@ -import logging - -import numpy as np -import scipy.stats as stats -from bs4 import BeautifulSoup -from shared import DatabaseSingleton, ExampleExtension -from typing_extensions import override - -logger = logging.getLogger(__name__) -db = DatabaseSingleton() - - -class Extension3(ExampleExtension): - """Extension using beautifulsoup4 and scipy for web scraping and statistics.""" - - @override - async def initialize(self): - logger.debug("Extension3 initialized.") - - @override - async def prepare_shutdown(self): - logger.debug("Extension3 preparing for shutdown.") - - @override - async def do_stuff(self, value: str) -> str: - logger.debug("Extension3 parsing HTML and computing statistics") - - # Mock HTML content - html_content = """ - -
-