From 06fbe41e2f1fa96deceb1fc8ab9c3beb1efad793 Mon Sep 17 00:00:00 2001 From: hsteude Date: Wed, 30 Jul 2025 22:18:12 +0200 Subject: [PATCH 01/16] Initial version for pipe-fiction --- pipelines/pipe-fiction/.vscode/launch.json | 22 + pipelines/pipe-fiction/README.md | 438 ++++++++++ .../pipe-fiction-codebase/.dockerignore | 59 ++ .../pipe-fiction-codebase/.python-version | 1 + .../pipe-fiction-codebase/Dockerfile | 18 + .../pipe-fiction-codebase/README.md | 49 ++ .../pipe_fiction/__init__.py | 0 .../pipe_fiction/data_generator.py | 27 + .../pipe_fiction/data_processor.py | 61 ++ .../pipe-fiction-codebase/pyproject.toml | 15 + .../pipe-fiction-codebase/uv.lock | 45 + pipelines/pipe-fiction/pipelines/.gitignore | 207 +++++ .../pipe-fiction/pipelines/.python-version | 1 + pipelines/pipe-fiction/pipelines/README.md | 0 .../pipe-fiction/pipelines/components.py | 99 +++ pipelines/pipe-fiction/pipelines/pipeline.py | 8 + .../pipe-fiction/pipelines/pyproject.toml | 15 + .../pipelines/run_in_k8s_cluster.py | 26 + .../pipelines/run_locally_in_docker.py | 15 + .../pipelines/run_locally_in_subproc.py | 7 + .../pipe-fiction/pipelines/utils/__init__.py | 0 .../pipelines/utils/auth_session.py | 101 +++ .../utils/kfp_docker_monkey_patches.py | 180 ++++ pipelines/pipe-fiction/pipelines/uv.lock | 782 ++++++++++++++++++ 24 files changed, 2176 insertions(+) create mode 100644 pipelines/pipe-fiction/.vscode/launch.json create mode 100644 pipelines/pipe-fiction/README.md create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/.dockerignore create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/.python-version create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/Dockerfile create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/README.md create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/__init__.py create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_processor.py create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/pyproject.toml create mode 100644 pipelines/pipe-fiction/pipe-fiction-codebase/uv.lock create mode 100644 pipelines/pipe-fiction/pipelines/.gitignore create mode 100644 pipelines/pipe-fiction/pipelines/.python-version create mode 100644 pipelines/pipe-fiction/pipelines/README.md create mode 100644 pipelines/pipe-fiction/pipelines/components.py create mode 100644 pipelines/pipe-fiction/pipelines/pipeline.py create mode 100644 pipelines/pipe-fiction/pipelines/pyproject.toml create mode 100644 pipelines/pipe-fiction/pipelines/run_in_k8s_cluster.py create mode 100644 pipelines/pipe-fiction/pipelines/run_locally_in_docker.py create mode 100644 pipelines/pipe-fiction/pipelines/run_locally_in_subproc.py create mode 100644 pipelines/pipe-fiction/pipelines/utils/__init__.py create mode 100644 pipelines/pipe-fiction/pipelines/utils/auth_session.py create mode 100644 pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py create mode 100644 pipelines/pipe-fiction/pipelines/uv.lock diff --git a/pipelines/pipe-fiction/.vscode/launch.json b/pipelines/pipe-fiction/.vscode/launch.json new file mode 100644 index 0000000..36a8670 --- /dev/null +++ b/pipelines/pipe-fiction/.vscode/launch.json @@ -0,0 +1,22 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Remote Attach", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/pipe-fiction-codebase", + "remoteRoot": "/app" + } + ], + "justMyCode": false, + "subProcess": true + } + ] +} diff --git a/pipelines/pipe-fiction/README.md b/pipelines/pipe-fiction/README.md new file mode 100644 index 0000000..a29e7fe --- /dev/null +++ b/pipelines/pipe-fiction/README.md @@ -0,0 +1,438 @@ +# KFP Pipeline Development & Debugging Demo + +This repository demonstrates **advanced development and debugging techniques** for Kubeflow Pipelines (KFP), enabling developers to build, test, and debug ML pipelines efficiently across different environments. + +**Note:** This demo uses intentionally simple examples to clearly illustrate the core concepts and debugging workflows. The techniques shown here should also apply to complex ML workloads. + +## Overview + +As part of our MLOps platform, we support KFP for orchestrating machine learning workflows. This demo showcases: + +- **Local Development** with immediate feedback loops +- **Interactive Debugging** with full IDE integration +- **Multi-environment Support** (subprocess, Docker, cluster) +- **Best Practices** for pipeline development and code organization + +## Quick Start + +### Prerequisites + +- Python 3.12+ +- Docker (for Docker runner) +- VS Code (recommended) or any debugpy-compatible IDE +- Access to a Kubeflow cluster (for remote execution) + +### Setup + +1. **Navigate to the demo:** + ```bash + # After cloning the larger example repository + cd pipelines/pipe-fiction + ``` + +2. **Install dependencies for both virtual environments:** + + **Pipeline environment (KFP-specific packages):** + ```bash + cd pipelines + uv sync + source .venv/bin/activate # Activate when working on pipeline code + ``` + +3. **Build the base Docker image:** + ```bash + cd pipe-fiction-codebase + docker build -t /: . + ``` + More details on this in the `pipe-fiction-codebase` directory. + +## Repository Organization + +This demo is structured to demonstrate **separation** between standard Python code and KFP orchestration setup, while solving a key challenge with KFP Lightweight Components: + +### The KFP Lightweight Component Challenge + +KFP Lightweight Components are designed to be **self-contained** - meaning all code must be either: +- Defined inline within the component function +- Installed via `packages_to_install` parameter + +This creates a problem: code duplication. If you need the same utility function in multiple components, you typically have to copy-paste the code into each component, leading to maintenance nightmares, which is the reason most people use container components for heavy lifting. + +Alternative approaches like publishing packages to PyPI or private registries are possible, but create their own challenges - you'd need to publish and version your package for every code change during development, which significantly slows down the iteration cycle. + +### Our Solution: Base Image with Pre-installed Package + +We solve this by **pre-installing our ML package into the base Docker image**: + +```dockerfile +# In pipe-fiction-codebase/Dockerfile +FROM python:3.12-slim +WORKDIR /app + +# Install our package into the base image +COPY pyproject.toml README.md ./ +COPY pipe_fiction/ ./pipe_fiction/ +RUN uv pip install --system -e . +``` + +This allows us to **import** (not copy) our code in any component: + +```python +@component(base_image="/:") +def any_component(): + # Clean import - no code duplication! + from pipe_fiction.data_generator import DataGenerator + from pipe_fiction.data_processor import DataProcessor + + # Use the classes normally + generator = DataGenerator() + processor = DataProcessor() +``` + +### Code Package (`pipe-fiction-codebase/`) + +Contains the core ML logic as a **standalone Python package**: + +``` +pipe-fiction-codebase/ +├── pipe_fiction/ +│ ├── data_generator.py # Generate sample data +│ └── data_processor.py # Data transformation logic +├── Dockerfile # Containerization with package installation +└── pyproject.toml # Package definition +``` + +**Key Benefits of This Approach:** + +- **No Code Duplication** - Import the same classes/functions across multiple components without copying code +- **Independent Development** - The `pipe_fiction` package can be developed, tested, and debugged completely independently of KFP +- **Data Scientists in Their Home Turf** - Familiar Python development environment without KFP complexity +- **Reusability** - The same code can be used in notebooks, scripts, web services, or other orchestration frameworks +- **Standard Testing** - Use pytest, unittest, or any testing framework without KFP complexity +- **IDE Support** - Full autocomplete, refactoring, and debugging support for your core logic +- **Version Management** - Package versioning independent of pipeline versions +- **Clean Components** - Pipeline components focus on orchestration, not business logic implementation + +### Pipeline Orchestration (`pipelines/`) + +Contains KFP-specific orchestration code: + +``` +pipelines/ +├── components.py # KFP component definitions (import from base image) +├── pipeline.py # Pipeline assembly +├── run_locally_*.py # Local execution scripts +├── submit_to_cluster.py # Remote execution +├── .venv/ # Virtual environment with custom package +└── utils/ # KFP utilities and patches +``` + +**Local Package Installation for IDE Support:** + +The pipelines directory also contains a virtual environment where, alongside KFP-specific packages, the custom package is installed in development mode: + +```bash +# Install the custom package locally for IDE support +uv pip install -e ../pipe-fiction-codebase/ +``` + +This enables full IDE integration: +- Autocomplete and IntelliSense for imported package code +- Type checking and error detection in component definitions +- "Go to definition" works across package imports +- Refactoring support across the entire codebase + +**This separation allows you to:** + +1. **Develop core logic** using standard Python development practices +2. **Test business logic** without spinning up KFP environments +3. **Debug algorithms** using familiar tools and workflows +4. **Reuse code** across multiple components without duplication +5. **Maintain clean abstractions** between ML code and infrastructure +6. **Scale development** - multiple developers can work on the package independently + +## Execution Environments + +There are (at least) three ways to execute the pipeline that uses logic from the custom package in tasks within the DAG: + +### 1. Subprocess Runner (Fastest Development) + +**Best for:** Quick iteration, algorithm development, initial testing + +```bash +cd pipelines +python run_locally_in_subproc.py +``` + +**Advantages:** +- Fastest execution - no container overhead +- Direct debugging - breakpoints work immediately +- Live code changes - no rebuilds needed +- Full IDE integration - all debugging features available +- Local Package Access - SubprocessRunner uses the package installed in the local .venv +- No Image Rebuilds - Code changes are immediately available without Docker builds +- Immediate Debugging - Set breakpoints in both pipeline and package code instantly +- Fast iteration - Modify algorithms and test immediately + +**Limitations:** +- Environment differences - may not match production environment exactly +- Dependency conflicts - uses local Python environment +- Limited isolation - no containerization benefits + +### 2. Docker Runner (Container-based Development) + +**Best for:** Pipelines with container components and multiple differing environments in the KFP tasks + +```bash +cd pipelines +python run_locally_in_docker.py +``` + +**Advantages:** +- Production environment - identical to cluster execution +- Full debugging support - step into containerized code +- Dependency isolation - no local conflicts +- Volume mounting - access local data files +- Port forwarding - debug server accessible from IDE + +**Limitations:** +- Slower iteration - container startup overhead +- Docker dependency - requires Docker runtime +- Limited resource control - basic Docker constraints only + +### 3. Cluster Execution (In-Cluster Debugging) + +**Best for:** In-cluster issues, cluster-specific debugging, resource-intensive workloads + +```bash +cd pipelines +python submit_to_cluster.py +``` + +**Advantages:** +- Real production environment - actual cluster resources +- Remote debugging - debug running pods via port-forwarding +- Scalability testing - real resource constraints +- Integration testing - with actual cluster services + +**Limitations:** +- Slowest feedback - submission and scheduling overhead +- Resource constraints - limited by cluster quotas +- Complex setup - requires cluster access and networking + +## Development Workflows + +### Subprocess Runner Workflow +For rapid pipeline development and testing: +1. Implement changes in component or custom package code +2. Run `python run_locally_in_subproc.py` to validate immediately +3. Build and push Docker image when ready for cluster: `docker build -t /: . && docker push` +4. Update image reference in pipeline components if needed +5. Submit pipeline to cluster: `python submit_to_cluster.py` + +### Docker Runner Workflow + +**For pipeline-only changes:** +1. Modify files in `pipelines/` directory (components, pipeline definitions) +2. Run `python run_locally_in_docker.py` - changes are immediately reflected +3. Submit to cluster when ready + +**For custom package changes:** +1. Modify code in `pipe-fiction-codebase/` +2. Rebuild Docker image locally: `docker build -t /: .` +3. Run `python run_locally_in_docker.py` to test with new image +4. Push image to registry: `docker push /:` +5. Update image reference in pipeline components if needed +6. Submit pipeline to cluster + +### Cluster Execution Workflow + +**For pipeline-only changes:** +1. Modify files in `pipelines/` directory +2. Submit directly to cluster: `python submit_to_cluster.py` + +**For custom package changes:** +1. Modify code in `pipe-fiction-codebase/` +2. Rebuild and push Docker image: `docker build -t /: . && docker push` +3. Update image reference in pipeline components +4. Submit pipeline to cluster + +## Debugging Setup + +### VS Code Configuration + +Create `.vscode/launch.json`: + +```json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Remote Attach", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/../pipe-fiction-codebase", + "remoteRoot": "/app" + } + ], + "justMyCode": false, + "subProcess": true + } + ] +} +``` + +### Other IDE Support + +**PyCharm:** +- Run → Edit Configurations → Python Remote Debug +- Host: `localhost`, Port: `5678` +- Path mappings: Local: `pipe-fiction-codebase` → Remote: `/app` + +**Any debugpy-compatible editor:** +- Connect to `localhost:5678` +- Configure path mappings as needed + +### Debugging Workflow + +1. **Enable debug mode:** + ```python + # In run_locally_in_docker.py + environment={'KFP_DEBUG': 'true'} + ``` + +2. **Start the pipeline:** + ```bash + python run_locally_in_docker.py + ``` + +3. **Connect debugger:** + - Pipeline will pause and wait for debugger connection + - Attach your IDE debugger to `localhost:5678` + +4. **Debug interactively:** + - Set breakpoints in your pipeline components + - Step through code execution + - Inspect variables and data structures + - Debug both pipeline logic and imported modules + +## Example: Debugging a Data Processing Pipeline + +This demo includes a simple data processing pipeline that demonstrates common debugging scenarios: + +### Components + +1. **DataGenerator Component** (`generate_data_comp`) + - Generates sample text data for processing + - Demonstrates data creation debugging + - Logs operations with structured logging + +2. **DataProcessor Component** (`process_data_comp`) + - Processes text data and extracts information + - Counts words and generates statistics + - Demonstrates data transformation debugging + +### Debugging Scenarios + +**Data Generation Logic:** +```python +generator = DataGenerator() +lines = generator.create_sample_data() # Set breakpoint here +``` + +**Data Processing Logic:** +```python +processor = DataProcessor() +processed_data = processor.process_lines(lines) # Debug transformations +summary = processor.get_summary(processed_data) # Inspect results +``` + +**Cross-Component Data Flow:** +- Debug how data flows between pipeline components +- Inspect intermediate outputs and transformations +- Validate data contracts between components + +## Advanced Features + +### Volume Mounting for Data Access + +```python +# Mount local data directory into container +local.init(runner=local.DockerRunner( + volumes={ + os.path.abspath('../data'): {'bind': '/app/data', 'mode': 'ro'} + } +)) + +# Access files in container +result = example_pipeline(file_path='/app/data/local-data-file.txt') +``` + +### Environment-Controlled Debugging + +```python +# Enable/disable debugging via environment variables +environment={ + 'KFP_DEBUG': 'true', # Enable debugging + 'KFP_DEBUG_PORT': '5678', # Custom debug port +} +``` + +### Cluster Debugging with Port Forwarding + +```bash +# Find your pipeline pod +kubectl get pods | grep your-pipeline + +# Forward debug port +kubectl port-forward pod/your-pod-name 5678:5678 + +# Connect local debugger to remote pod +# Use the same VS Code configuration +``` + +## Technical Implementation Notes + +### KFP Version Compatibility + +This demo includes monkey patches for older KFP versions (pre-2.14) to enable: +- Port forwarding for debugging +- Environment variable injection +- Volume mounting for data access + +These patches provide forward compatibility and will be obsolete when upgrading to KFP 2.14+. + +### Debugging Architecture + +The debugging setup works by: +1. **Injecting debugpy** into pipeline components +2. **Port forwarding** from container to host +3. **Path mapping** between local IDE and remote container +4. **Environment control** for enabling/disabling debug mode + +## Contributing + +This demo represents best practices we've developed for KFP pipeline development. Contributions and improvements are welcome! + +### Future Enhancements + +- Support for KFP 2.14+ native features +- Additional debugging tools integration +- Performance profiling examples +- Multi-language component support + +## Additional Resources + +- [Kubeflow Pipelines Documentation](https://kubeflow-pipelines.readthedocs.io/) +- [debugpy Documentation](https://github.com/microsoft/debugpy) +- [VS Code Python Debugging](https://code.visualstudio.com/docs/python/debugging) + +--- + +For questions or support with KFP development on our MLOps platform, please reach out to our team. diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/.dockerignore b/pipelines/pipe-fiction/pipe-fiction-codebase/.dockerignore new file mode 100644 index 0000000..72a77d3 --- /dev/null +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/.dockerignore @@ -0,0 +1,59 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Git +.git/ +.gitignore + +# uv +.uv/ +uv.lock + +# Tests +.pytest_cache/ +.coverage +htmlcov/ + +# Other +*.log +.mypy_cache/ + diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/.python-version b/pipelines/pipe-fiction/pipe-fiction-codebase/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/Dockerfile b/pipelines/pipe-fiction/pipe-fiction-codebase/Dockerfile new file mode 100644 index 0000000..d875fdd --- /dev/null +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/Dockerfile @@ -0,0 +1,18 @@ +# Use Python 3.12 as base image +FROM python:3.12-slim + +# Set working directory +WORKDIR /app + +# Install uv for package management +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/ + +# Copy project files +COPY pyproject.toml README.md ./ +COPY pipe_fiction/ ./pipe_fiction/ + +# Install the package with uv +RUN uv pip install --system -e . + +# Set Python path +ENV PYTHONPATH=/app diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/README.md b/pipelines/pipe-fiction/pipe-fiction-codebase/README.md new file mode 100644 index 0000000..336f0b9 --- /dev/null +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/README.md @@ -0,0 +1,49 @@ +# Pipe Fiction Codebase + +A simple Python package demonstrating ML code organization for KFP (Kubeflow Pipelines) development. + +## Package Structure + +``` +pipe_fiction/ +├── data_generator.py # Generate sample text data +└── data_processor.py # Process and analyze text data +``` + +## Installation + +Install dependencies and the package in development mode: + +```bash +uv sync +source .venv/bin/activate +``` + +## Usage Example + +```python +from pipe_fiction.data_generator import DataGenerator +from pipe_fiction.data_processor import DataProcessor + +# Generate sample data +generator = DataGenerator() +lines = generator.create_sample_data() + +# Process the data +processor = DataProcessor() +processed_data = processor.process_lines(lines) +summary = processor.get_summary(processed_data) + +print(f"Processed {summary['total_lines']} lines with {summary['total_words']} words") +``` + +## Docker Image + +Build the Docker image with the package pre-installed: + +```bash +docker buildx build --platform linux/amd64 \ + -t /pipe-fiction:latest . +``` + +This image can then be used as the `base_image` in KFP components, allowing them to import from `pipe_fiction` without code duplication. diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/__init__.py b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py new file mode 100644 index 0000000..f1fd527 --- /dev/null +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py @@ -0,0 +1,27 @@ +""" +Simple DataGenerator for demonstrating KFP debugging. +""" + +from loguru import logger +from typing import List + + +class DataGenerator: + """Simple text data generator for debugging demos.""" + + def __init__(self): + logger.info("📝 Initializing DataGenerator") + + def create_sample_data(self) -> List[str]: + """Create sample text lines for processing.""" + logger.info("🔧 Creating sample text data") + + lines = [ + "Here are some random useless lines of text.", + "Line 1: MLOps is an important topic.", + "Line 2: Kubeflow Pipeline are hard to debug, sometimes.", + "Line 3: prokube.ai seams to be a nice company." + ] + + logger.success(f"✅ Created {len(lines)} sample lines") + return lines diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_processor.py b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_processor.py new file mode 100644 index 0000000..34908af --- /dev/null +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_processor.py @@ -0,0 +1,61 @@ +""" +Simple DataProcessor for demonstrating KFP debugging. +""" + +from loguru import logger +from typing import List, Dict + + +class DataProcessor: + """Simple text processor with logging.""" + + def __init__(self): + logger.info("🔧 Initializing DataProcessor") + + def process_lines(self, lines: List[str]) -> List[Dict[str, str]]: + """Process lines and extract useful information.""" + logger.info(f"⚙️ Processing {len(lines)} lines") + + processed_data = [] + + for i, line in enumerate(lines, 1): + # Clean the line + clean_line = line.strip() + + if not clean_line: + logger.debug(f"⏭️ Skipping empty line {i}") + continue + + # Extract some info + processed_item = { + "line_number": i, + "original": clean_line, + "word_count": len(clean_line.split()), + "contains_mlops": "mlops" in clean_line.lower(), + "contains_kubeflow": "kubeflow" in clean_line.lower(), + "length": len(clean_line) + } + + processed_data.append(processed_item) + logger.debug(f"✨ Processed line {i}: {processed_item['word_count']} words") + + logger.success(f"🎉 Successfully processed {len(processed_data)} lines") + return processed_data + + def get_summary(self, processed_data: List[Dict[str, str]]) -> Dict[str, any]: + """Get summary statistics.""" + logger.info("📊 Generating summary statistics") + + if not processed_data: + return {"total_lines": 0} + + summary = { + "total_lines": len(processed_data), + "total_words": sum(item["word_count"] for item in processed_data), + "mlops_mentions": sum(1 for item in processed_data if item["contains_mlops"]), + "kubeflow_mentions": sum(1 for item in processed_data if item["contains_kubeflow"]), + "avg_line_length": sum(item["length"] for item in processed_data) / len(processed_data) + } + + logger.info(f"📈 Summary: {summary}") + return summary diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/pyproject.toml b/pipelines/pipe-fiction/pipe-fiction-codebase/pyproject.toml new file mode 100644 index 0000000..8f57dc9 --- /dev/null +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "pipe-fiction" +version = "0.1.0" +description = "Simple Python package for KFP demo" +requires-python = ">=3.12" +dependencies = [ + "loguru>=0.7.3", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["pipe_fiction"] diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/uv.lock b/pipelines/pipe-fiction/pipe-fiction-codebase/uv.lock new file mode 100644 index 0000000..57f1ebb --- /dev/null +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/uv.lock @@ -0,0 +1,45 @@ +version = 1 +revision = 2 +requires-python = ">=3.12" + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "loguru" +version = "0.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, +] + +[[package]] +name = "pipe-fiction" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "loguru" }, +] + +[package.metadata] +requires-dist = [{ name = "loguru", specifier = ">=0.7.3" }] + +[[package]] +name = "win32-setctime" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, +] diff --git a/pipelines/pipe-fiction/pipelines/.gitignore b/pipelines/pipe-fiction/pipelines/.gitignore new file mode 100644 index 0000000..77e6235 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/.gitignore @@ -0,0 +1,207 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml + + +# kfp +local_outputs/* diff --git a/pipelines/pipe-fiction/pipelines/.python-version b/pipelines/pipe-fiction/pipelines/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/pipelines/pipe-fiction/pipelines/README.md b/pipelines/pipe-fiction/pipelines/README.md new file mode 100644 index 0000000..e69de29 diff --git a/pipelines/pipe-fiction/pipelines/components.py b/pipelines/pipe-fiction/pipelines/components.py new file mode 100644 index 0000000..b6ffe44 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/components.py @@ -0,0 +1,99 @@ +from kfp.dsl import Output, Dataset, Input, component +from typing import List, Dict +import debugpy + + +@component(base_image="hsteude/pipe-fiction:latest", packages_to_install=["debugpy"]) +def generate_data_comp() -> List: + import os + + if os.getenv("KFP_DEBUG") == "true": + import debugpy + + debug_port = int(os.getenv("KFP_DEBUG_PORT", "5678")) + debugpy.listen(("0.0.0.0", debug_port)) + debugpy.wait_for_client() + + from pipe_fiction.data_generator import DataGenerator + + generator = DataGenerator() + lines = generator.create_sample_data() + return lines + +@component( + base_image="hsteude/pipe-fiction:latest", + packages_to_install=["debugpy"], +) +def process_data_comp(lines: List[str]) -> List[str]: + import os + + if os.getenv("KFP_DEBUG") == "true": + import os + import debugpy + + debug_port = int(os.getenv("KFP_DEBUG_PORT", "5678")) + debugpy.listen(("0.0.0.0", debug_port)) + debugpy.wait_for_client() + + from pipe_fiction.data_processor import DataProcessor + + processor = DataProcessor() + processed_lines = processor.process_lines(lines) # Step into here! + + return processed_lines + + +# works: +# connect with telnet localhost 4444 +# @component(base_image="pipe-fiction:latest") +# def greeter_component(names: List = ["Laura", "Malte", "Paul"]): +# import remote_pdb +# +# # Remote debugger auf Port 4444 +# remote_pdb.set_trace(host='0.0.0.0', port=4444) +# +# from pipe_fiction.hello_world import HelloWorld +# greeter = HelloWorld("Python Entwickler") +# greetings = greeter.say_hello_multiple(names) +# +# for i, greeting in enumerate(greetings, 1): +# print(f" {i}. {greeting}") +# + + +@component(base_image="hsteude/kfp-hello-world:latest", packages_to_install=["debugpy"]) +def greeter_component(names: List = ["Laura", "Malte", "Paula"]): + import os + + # Check environment variable for debug mode + if os.getenv("KFP_DEBUG") == "true": + import debugpy + + debug_port = int(os.getenv("KFP_DEBUG_PORT", "5678")) + debugpy.listen(("0.0.0.0", debug_port)) + debugpy.wait_for_client() + debugpy.breakpoint() + + # Your actual component logic + from pipe_fiction.hello_world import HelloWorld + + greeter = HelloWorld("Python Developer") + greetings = greeter.say_hello_multiple(names) + for i, greeting in enumerate(greetings, 1): + print(f" {i}. {greeting}") + print() + + +# @component(base_image="pipe-fiction:latest", packages_to_install=['pudb']) +# def greeter_component(names: List = ["Laura", "Malte", "Paul"]): +# import pudb.remote +# +# # PuDB Remote-Debugger +# pudb.remote.set_trace(term_size=(120, 40), host='0.0.0.0', port=6899) +# +# from pipe_fiction.hello_world import HelloWorld +# greeter = HelloWorld("Python Entwickler") +# greetings = greeter.say_hello_multiple(names) +# +# for i, greeting in enumerate(greetings, 1): +# print(f" {i}. {greeting}") diff --git a/pipelines/pipe-fiction/pipelines/pipeline.py b/pipelines/pipe-fiction/pipelines/pipeline.py new file mode 100644 index 0000000..41602f4 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/pipeline.py @@ -0,0 +1,8 @@ +from kfp.dsl import pipeline +from components import generate_data_comp, process_data_comp + + +@pipeline +def example_pipeline(): + data_gen_task = generate_data_comp() + process_data_task = process_data_comp(lines=data_gen_task.output) diff --git a/pipelines/pipe-fiction/pipelines/pyproject.toml b/pipelines/pipe-fiction/pipelines/pyproject.toml new file mode 100644 index 0000000..4d41fad --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "pipelines" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "debugpy>=1.8.15", + "docker>=7.1.0", + "ipdb>=0.13.13", + "kfp==2.7", + "pdbpp>=0.11.7", + "pip>=25.1.1", + "truststore>=0.10.3", +] diff --git a/pipelines/pipe-fiction/pipelines/run_in_k8s_cluster.py b/pipelines/pipe-fiction/pipelines/run_in_k8s_cluster.py new file mode 100644 index 0000000..48d86cc --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/run_in_k8s_cluster.py @@ -0,0 +1,26 @@ +import os +import truststore +from utils.auth_session import get_istio_auth_session +from kfp.client import Client +from pipeline import example_pipeline + + +truststore.inject_into_ssl() + + +auth_session = get_istio_auth_session( + url=os.environ["KUBEFLOW_ENDPOINT"], + username=os.environ["KUBEFLOW_USERNAME"], + password=os.environ["KUBEFLOW_PASSWORD"], +) +print(os.environ["KUBEFLOW_ENDPOINT"]) + +namespace = os.environ.get('KUBEFLOW_NAMESPACE', None) or \ + os.environ['KUBEFLOW_USERNAME'].split("@")[0].replace(".", "-") + +client = Client(host=f"{os.environ['KUBEFLOW_ENDPOINT']}/pipeline", namespace=namespace, + cookies=auth_session["session_cookie"], verify_ssl=False) + +run = client.create_run_from_pipeline_func( + example_pipeline, +) diff --git a/pipelines/pipe-fiction/pipelines/run_locally_in_docker.py b/pipelines/pipe-fiction/pipelines/run_locally_in_docker.py new file mode 100644 index 0000000..13a3363 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/run_locally_in_docker.py @@ -0,0 +1,15 @@ +from kfp import local +from pipeline import example_pipeline +from utils import kfp_docker_monkey_patches + +local.init( + runner=local.DockerRunner( + ports={"5678/tcp": 5678}, + environment={ + "KFP_DEBUG": "true", + }, + ) +) + + +result = example_pipeline() diff --git a/pipelines/pipe-fiction/pipelines/run_locally_in_subproc.py b/pipelines/pipe-fiction/pipelines/run_locally_in_subproc.py new file mode 100644 index 0000000..7e8f8a7 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/run_locally_in_subproc.py @@ -0,0 +1,7 @@ +from kfp import local +from pipeline import example_pipeline + +local.init(runner=local.SubprocessRunner(use_venv=False)) + +result = example_pipeline() + diff --git a/pipelines/pipe-fiction/pipelines/utils/__init__.py b/pipelines/pipe-fiction/pipelines/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pipelines/pipe-fiction/pipelines/utils/auth_session.py b/pipelines/pipe-fiction/pipelines/utils/auth_session.py new file mode 100644 index 0000000..707c386 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/utils/auth_session.py @@ -0,0 +1,101 @@ +import re +import requests +from urllib.parse import urlsplit + + +def get_istio_auth_session(url: str, username: str, password: str) -> dict: + """ + Determine if the specified URL is secured by Dex and try to obtain a session cookie. + WARNING: only Dex `staticPasswords` and `LDAP` authentication are currently supported + (we default default to using `staticPasswords` if both are enabled) + + :param url: Kubeflow server URL, including protocol + :param username: Dex `staticPasswords` or `LDAP` username + :param password: Dex `staticPasswords` or `LDAP` password + :return: auth session information + """ + # define the default return object + auth_session = { + "endpoint_url": url, # KF endpoint URL + "redirect_url": None, # KF redirect URL, if applicable + "dex_login_url": None, # Dex login URL (for POST of credentials) + "is_secured": None, # True if KF endpoint is secured + "session_cookie": None # Resulting session cookies in the form "key1=value1; key2=value2" + } + + # use a persistent session (for cookies) + with requests.Session() as s: + + ################ + # Determine if Endpoint is Secured + ################ + resp = s.get(url, allow_redirects=True, verify=False) + if resp.status_code != 200: + raise RuntimeError( + f"HTTP status code '{resp.status_code}' for GET against: {url}" + ) + + auth_session["redirect_url"] = resp.url + + # if we were NOT redirected, then the endpoint is UNSECURED + if len(resp.history) == 0: + auth_session["is_secured"] = False + return auth_session + else: + auth_session["is_secured"] = True + + ################ + # Get Dex Login URL + ################ + redirect_url_obj = urlsplit(auth_session["redirect_url"]) + + # if we are at `/auth?=xxxx` path, we need to select an auth type + if re.search(r"/auth$", redirect_url_obj.path): + ####### + # TIP: choose the default auth type by including ONE of the following + ####### + + # OPTION 1: set "staticPasswords" as default auth type + redirect_url_obj = redirect_url_obj._replace( + path=re.sub(r"/auth$", "/auth/local", redirect_url_obj.path) + ) + # OPTION 2: set "ldap" as default auth type + # redirect_url_obj = redirect_url_obj._replace( + # path=re.sub(r"/auth$", "/auth/ldap", redirect_url_obj.path) + # ) + + # if we are at `/auth/xxxx/login` path, then no further action is needed (we can use it for login POST) + if re.search(r"/auth/.*/login$", redirect_url_obj.path): + auth_session["dex_login_url"] = redirect_url_obj.geturl() + + # else, we need to be redirected to the actual login page + else: + # this GET should redirect us to the `/auth/xxxx/login` path + resp = s.get(redirect_url_obj.geturl(), allow_redirects=True, verify=False) + if resp.status_code != 200: + raise RuntimeError( + f"HTTP status code '{resp.status_code}' for GET against: {redirect_url_obj.geturl()}" + ) + + # set the login url + auth_session["dex_login_url"] = resp.url + + ################ + # Attempt Dex Login + ################ + resp = s.post( + auth_session["dex_login_url"], + data={"login": username, "password": password}, + verify=False, + allow_redirects=True + ) + if len(resp.history) == 0: + raise RuntimeError( + f"Login credentials were probably invalid - " + f"No redirect after POST to: {auth_session['dex_login_url']}" + ) + + # store the session cookies in a "key1=value1; key2=value2" string + auth_session["session_cookie"] = "; ".join([f"{c.name}={c.value}" for c in s.cookies]) + + return auth_session diff --git a/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py b/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py new file mode 100644 index 0000000..796118d --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py @@ -0,0 +1,180 @@ +""" +Monkey patches for KFP DockerRunner to enable port mapping and environment variable support. + +This module patches older versions of KFP that don't have built-in port/environment support +to match the upstream 2.14+ API. Import this module BEFORE using DockerRunner with ports/environment. + +Usage (exactly like upstream KFP 2.14+): + import kfp_docker_patches # Apply patches + from kfp import local + + # Explicit ports and environment (upstream-compatible API) + local.init(runner=local.DockerRunner( + ports={'5678/tcp': 5678}, + environment={'KFP_DEBUG': 'true', 'MY_VAR': 'value'} + )) +""" + +from kfp import local +from kfp.local import docker_task_handler +from kfp.local.config import DockerRunner +import docker + + +def apply_docker_port_patches(): + """Apply all necessary patches to enable port support in DockerRunner.""" + + # Patch 1: Enable ports argument in DockerRunner + _patch_docker_runner_args() + + # Patch 2: Extend run_docker_container to accept additional arguments + _patch_run_docker_container() + + # Patch 3: Modify DockerTaskHandler to pass through container arguments + _patch_docker_task_handler() + + # Patch 4: Extend DockerRunner constructor + _patch_docker_runner_init() + + +def _patch_docker_runner_args(): + """Add ports and environment to allowed DockerRunner arguments.""" + if not hasattr(DockerRunner, 'DOCKER_CONTAINER_RUN_ARGS'): + # Create set with essential arguments including ports and environment for older versions + DockerRunner.DOCKER_CONTAINER_RUN_ARGS = { + 'ports', 'environment', 'volumes', 'network_mode', 'user', + 'working_dir', 'entrypoint', 'command', 'auto_remove', 'privileged' + } + else: + # Add ports and environment to existing set + DockerRunner.DOCKER_CONTAINER_RUN_ARGS.add('ports') + DockerRunner.DOCKER_CONTAINER_RUN_ARGS.add('environment') + + +def _patch_run_docker_container(): + """Patch run_docker_container to accept additional Docker arguments.""" + + # Backup original function + original_run_docker_container = docker_task_handler.run_docker_container + + def patched_run_docker_container(client, image, command, volumes, **kwargs): + """Enhanced run_docker_container with support for additional Docker arguments.""" + + # Add latest tag if not present + if ':' not in image: + image = f'{image}:latest' + + # Check if image exists + image_exists = any( + image in existing_image.tags + for existing_image in client.images.list() + ) + + if image_exists: + print(f'Found image {image!r}\n') + else: + print(f'Pulling image {image!r}') + repository, tag = image.split(':') + client.images.pull(repository=repository, tag=tag) + print('Image pull complete\n') + + # Run container with all provided arguments + container = client.containers.run( + image=image, + command=command, + detach=True, + stdout=True, + stderr=True, + volumes=volumes, + **kwargs # Pass through ports and other arguments + ) + + # Stream logs + for line in container.logs(stream=True): + print(line.decode(), end='') + + return container.wait()['StatusCode'] + + # Replace original function + docker_task_handler.run_docker_container = patched_run_docker_container + + +def _patch_docker_task_handler(): + """Patch DockerTaskHandler to pass container arguments to run_docker_container.""" + + # Backup original method + original_docker_task_handler_run = docker_task_handler.DockerTaskHandler.run + + def patched_docker_task_handler_run(self): + """Enhanced DockerTaskHandler.run method with container args support.""" + import docker + client = docker.from_env() + try: + volumes = self.get_volumes_to_mount() + + # Get additional container arguments from runner + extra_args = {} + if hasattr(self.runner, 'container_run_args'): + extra_args = self.runner.container_run_args + elif hasattr(self.runner, '__dict__'): + # Fallback: use all non-private attributes as container args + extra_args = {k: v for k, v in self.runner.__dict__.items() + if not k.startswith('_') and k != 'container_run_args'} + + if 'volumes' in extra_args: + user_volumes = extra_args.pop('volumes') + volumes.update(user_volumes) + return_code = docker_task_handler.run_docker_container( + client=client, + image=self.image, + command=self.full_command, + volumes=volumes, + **extra_args + ) + finally: + client.close() + + from kfp.local import status + return status.Status.SUCCESS if return_code == 0 else status.Status.FAILURE + + # Replace original method + docker_task_handler.DockerTaskHandler.run = patched_docker_task_handler_run + + +def _patch_docker_runner_init(): + """Patch DockerRunner constructor to store container arguments.""" + + # Backup original init (if it exists) + original_docker_runner_init = getattr(DockerRunner, '__init__', None) + + def patched_docker_runner_init(self, **kwargs): + """Enhanced DockerRunner constructor that stores container run arguments.""" + import os + + # Auto-pass debug environment variables to container + environment = kwargs.get('environment', {}) + if 'KFP_DEBUG' not in environment and 'KFP_DEBUG' in os.environ: + environment['KFP_DEBUG'] = os.environ['KFP_DEBUG'] + if 'KFP_DEBUG_PORT' not in environment and 'KFP_DEBUG_PORT' in os.environ: + environment['KFP_DEBUG_PORT'] = os.environ['KFP_DEBUG_PORT'] + + if environment: + kwargs['environment'] = environment + + # Store container run args for later use + self.container_run_args = kwargs + + # Call original __post_init__ if it exists (for dataclass compatibility) + if hasattr(DockerRunner, '__post_init__'): + self.__post_init__() + + # Replace constructor + DockerRunner.__init__ = patched_docker_runner_init + + +# Apply patches immediately when module is imported +apply_docker_port_patches() + +print("✅ KFP Docker port & environment patches applied successfully!") +print(" Usage (upstream 2.14+ compatible): DockerRunner(ports={'5678/tcp': 5678}, environment={'DEBUG': 'true'})") +print(" This patch will be obsolete once you upgrade to KFP 2.14+") diff --git a/pipelines/pipe-fiction/pipelines/uv.lock b/pipelines/pipe-fiction/pipelines/uv.lock new file mode 100644 index 0000000..75ed636 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/uv.lock @@ -0,0 +1,782 @@ +version = 1 +revision = 2 +requires-python = ">=3.12" +resolution-markers = [ + "python_full_version >= '3.13'", + "python_full_version < '3.13'", +] + +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978, upload-time = "2024-11-30T04:30:14.439Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" }, +] + +[[package]] +name = "cachetools" +version = "5.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, +] + +[[package]] +name = "certifi" +version = "2025.7.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/76/52c535bcebe74590f296d6c77c86dabf761c41980e1347a2422e4aa2ae41/certifi-2025.7.14.tar.gz", hash = "sha256:8ea99dbdfaaf2ba2f9bac77b9249ef62ec5218e7c2b2e903378ed5fccf765995", size = 163981, upload-time = "2025-07-14T03:29:28.449Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4f/52/34c6cf5bb9285074dc3531c437b3919e825d976fde097a7a73f79e726d03/certifi-2025.7.14-py3-none-any.whl", hash = "sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2", size = 162722, upload-time = "2025-07-14T03:29:26.863Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" }, + { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" }, + { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload-time = "2025-05-02T08:32:38.803Z" }, + { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload-time = "2025-05-02T08:32:40.251Z" }, + { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload-time = "2025-05-02T08:32:41.705Z" }, + { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload-time = "2025-05-02T08:32:43.709Z" }, + { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload-time = "2025-05-02T08:32:46.197Z" }, + { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload-time = "2025-05-02T08:32:48.105Z" }, + { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload-time = "2025-05-02T08:32:49.719Z" }, + { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload-time = "2025-05-02T08:32:51.404Z" }, + { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload-time = "2025-05-02T08:32:53.079Z" }, + { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload-time = "2025-05-02T08:32:54.573Z" }, + { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622, upload-time = "2025-05-02T08:32:56.363Z" }, + { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435, upload-time = "2025-05-02T08:32:58.551Z" }, + { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653, upload-time = "2025-05-02T08:33:00.342Z" }, + { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231, upload-time = "2025-05-02T08:33:02.081Z" }, + { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243, upload-time = "2025-05-02T08:33:04.063Z" }, + { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442, upload-time = "2025-05-02T08:33:06.418Z" }, + { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147, upload-time = "2025-05-02T08:33:08.183Z" }, + { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057, upload-time = "2025-05-02T08:33:09.986Z" }, + { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454, upload-time = "2025-05-02T08:33:11.814Z" }, + { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174, upload-time = "2025-05-02T08:33:13.707Z" }, + { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166, upload-time = "2025-05-02T08:33:15.458Z" }, + { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064, upload-time = "2025-05-02T08:33:17.06Z" }, + { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641, upload-time = "2025-05-02T08:33:18.753Z" }, + { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" }, +] + +[[package]] +name = "click" +version = "8.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "debugpy" +version = "1.8.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/3a9a28ddb750a76eaec445c7f4d3147ea2c579a97dbd9e25d39001b92b21/debugpy-1.8.15.tar.gz", hash = "sha256:58d7a20b7773ab5ee6bdfb2e6cf622fdf1e40c9d5aef2857d85391526719ac00", size = 1643279, upload-time = "2025-07-15T16:43:29.135Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/4a/4508d256e52897f5cdfee6a6d7580974811e911c6d01321df3264508a5ac/debugpy-1.8.15-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:3dcc7225cb317469721ab5136cda9ff9c8b6e6fb43e87c9e15d5b108b99d01ba", size = 2511197, upload-time = "2025-07-15T16:43:42.343Z" }, + { url = "https://files.pythonhosted.org/packages/99/8d/7f6ef1097e7fecf26b4ef72338d08e41644a41b7ee958a19f494ffcffc29/debugpy-1.8.15-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:047a493ca93c85ccede1dbbaf4e66816794bdc214213dde41a9a61e42d27f8fc", size = 4229517, upload-time = "2025-07-15T16:43:44.14Z" }, + { url = "https://files.pythonhosted.org/packages/3f/e8/e8c6a9aa33a9c9c6dacbf31747384f6ed2adde4de2e9693c766bdf323aa3/debugpy-1.8.15-cp312-cp312-win32.whl", hash = "sha256:b08e9b0bc260cf324c890626961dad4ffd973f7568fbf57feb3c3a65ab6b6327", size = 5276132, upload-time = "2025-07-15T16:43:45.529Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ad/231050c6177b3476b85fcea01e565dac83607b5233d003ff067e2ee44d8f/debugpy-1.8.15-cp312-cp312-win_amd64.whl", hash = "sha256:e2a4fe357c92334272eb2845fcfcdbec3ef9f22c16cf613c388ac0887aed15fa", size = 5317645, upload-time = "2025-07-15T16:43:46.968Z" }, + { url = "https://files.pythonhosted.org/packages/28/70/2928aad2310726d5920b18ed9f54b9f06df5aa4c10cf9b45fa18ff0ab7e8/debugpy-1.8.15-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:f5e01291ad7d6649aed5773256c5bba7a1a556196300232de1474c3c372592bf", size = 2495538, upload-time = "2025-07-15T16:43:48.927Z" }, + { url = "https://files.pythonhosted.org/packages/9e/c6/9b8ffb4ca91fac8b2877eef63c9cc0e87dd2570b1120054c272815ec4cd0/debugpy-1.8.15-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94dc0f0d00e528d915e0ce1c78e771475b2335b376c49afcc7382ee0b146bab6", size = 4221874, upload-time = "2025-07-15T16:43:50.282Z" }, + { url = "https://files.pythonhosted.org/packages/55/8a/9b8d59674b4bf489318c7c46a1aab58e606e583651438084b7e029bf3c43/debugpy-1.8.15-cp313-cp313-win32.whl", hash = "sha256:fcf0748d4f6e25f89dc5e013d1129ca6f26ad4da405e0723a4f704583896a709", size = 5275949, upload-time = "2025-07-15T16:43:52.079Z" }, + { url = "https://files.pythonhosted.org/packages/72/83/9e58e6fdfa8710a5e6ec06c2401241b9ad48b71c0a7eb99570a1f1edb1d3/debugpy-1.8.15-cp313-cp313-win_amd64.whl", hash = "sha256:73c943776cb83e36baf95e8f7f8da765896fd94b05991e7bc162456d25500683", size = 5317720, upload-time = "2025-07-15T16:43:53.703Z" }, + { url = "https://files.pythonhosted.org/packages/07/d5/98748d9860e767a1248b5e31ffa7ce8cb7006e97bf8abbf3d891d0a8ba4e/debugpy-1.8.15-py2.py3-none-any.whl", hash = "sha256:bce2e6c5ff4f2e00b98d45e7e01a49c7b489ff6df5f12d881c67d2f1ac635f3d", size = 5282697, upload-time = "2025-07-15T16:44:07.996Z" }, +] + +[[package]] +name = "decorator" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, +] + +[[package]] +name = "docker" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834, upload-time = "2024-05-23T11:13:57.216Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, +] + +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, +] + +[[package]] +name = "executing" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693, upload-time = "2025-01-22T15:41:29.403Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702, upload-time = "2025-01-22T15:41:25.929Z" }, +] + +[[package]] +name = "fancycompleter" +version = "0.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyreadline3", marker = "python_full_version < '3.13' and sys_platform == 'win32'" }, + { name = "pyrepl", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/4c/d11187dee93eff89d082afda79b63c79320ae1347e49485a38f05ad359d0/fancycompleter-0.11.1.tar.gz", hash = "sha256:5b4ad65d76b32b1259251516d0f1cb2d82832b1ff8506697a707284780757f69", size = 341776, upload-time = "2025-05-26T12:59:11.045Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/c3/6f0e3896f193528bbd2b4d2122d4be8108a37efab0b8475855556a8c4afa/fancycompleter-0.11.1-py3-none-any.whl", hash = "sha256:44243d7fab37087208ca5acacf8f74c0aa4d733d04d593857873af7513cdf8a6", size = 11207, upload-time = "2025-05-26T12:59:09.857Z" }, +] + +[[package]] +name = "google-api-core" +version = "2.25.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/21/e9d043e88222317afdbdb567165fdbc3b0aad90064c7e0c9eb0ad9955ad8/google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8", size = 165443, upload-time = "2025-06-12T20:52:20.439Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/4b/ead00905132820b623732b175d66354e9d3e69fcf2a5dcdab780664e7896/google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7", size = 160807, upload-time = "2025-06-12T20:52:19.334Z" }, +] + +[[package]] +name = "google-auth" +version = "2.40.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cachetools" }, + { name = "pyasn1-modules" }, + { name = "rsa" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/9b/e92ef23b84fa10a64ce4831390b7a4c2e53c0132568d99d4ae61d04c8855/google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77", size = 281029, upload-time = "2025-06-04T18:04:57.577Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/63/b19553b658a1692443c62bd07e5868adaa0ad746a0751ba62c59568cd45b/google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca", size = 216137, upload-time = "2025-06-04T18:04:55.573Z" }, +] + +[[package]] +name = "google-cloud-core" +version = "2.4.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d6/b8/2b53838d2acd6ec6168fd284a990c76695e84c65deee79c9f3a4276f6b4f/google_cloud_core-2.4.3.tar.gz", hash = "sha256:1fab62d7102844b278fe6dead3af32408b1df3eb06f5c7e8634cbd40edc4da53", size = 35861, upload-time = "2025-03-10T21:05:38.948Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/86/bda7241a8da2d28a754aad2ba0f6776e35b67e37c36ae0c45d49370f1014/google_cloud_core-2.4.3-py2.py3-none-any.whl", hash = "sha256:5130f9f4c14b4fafdff75c79448f9495cfade0d8775facf1b09c3bf67e027f6e", size = 29348, upload-time = "2025-03-10T21:05:37.785Z" }, +] + +[[package]] +name = "google-cloud-storage" +version = "2.19.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-cloud-core" }, + { name = "google-crc32c" }, + { name = "google-resumable-media" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/76/4d965702e96bb67976e755bed9828fa50306dca003dbee08b67f41dd265e/google_cloud_storage-2.19.0.tar.gz", hash = "sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2", size = 5535488, upload-time = "2024-12-05T01:35:06.49Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/94/6db383d8ee1adf45dc6c73477152b82731fa4c4a46d9c1932cc8757e0fd4/google_cloud_storage-2.19.0-py2.py3-none-any.whl", hash = "sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba", size = 131787, upload-time = "2024-12-05T01:35:04.736Z" }, +] + +[[package]] +name = "google-crc32c" +version = "1.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/ae/87802e6d9f9d69adfaedfcfd599266bf386a54d0be058b532d04c794f76d/google_crc32c-1.7.1.tar.gz", hash = "sha256:2bff2305f98846f3e825dbeec9ee406f89da7962accdb29356e4eadc251bd472", size = 14495, upload-time = "2025-03-26T14:29:13.32Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/b7/787e2453cf8639c94b3d06c9d61f512234a82e1d12d13d18584bd3049904/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2d73a68a653c57281401871dd4aeebbb6af3191dcac751a76ce430df4d403194", size = 30470, upload-time = "2025-03-26T14:34:31.655Z" }, + { url = "https://files.pythonhosted.org/packages/ed/b4/6042c2b0cbac3ec3a69bb4c49b28d2f517b7a0f4a0232603c42c58e22b44/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:22beacf83baaf59f9d3ab2bbb4db0fb018da8e5aebdce07ef9f09fce8220285e", size = 30315, upload-time = "2025-03-26T15:01:54.634Z" }, + { url = "https://files.pythonhosted.org/packages/29/ad/01e7a61a5d059bc57b702d9ff6a18b2585ad97f720bd0a0dbe215df1ab0e/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19eafa0e4af11b0a4eb3974483d55d2d77ad1911e6cf6f832e1574f6781fd337", size = 33180, upload-time = "2025-03-26T14:41:32.168Z" }, + { url = "https://files.pythonhosted.org/packages/3b/a5/7279055cf004561894ed3a7bfdf5bf90a53f28fadd01af7cd166e88ddf16/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d86616faaea68101195c6bdc40c494e4d76f41e07a37ffdef270879c15fb65", size = 32794, upload-time = "2025-03-26T14:41:33.264Z" }, + { url = "https://files.pythonhosted.org/packages/0f/d6/77060dbd140c624e42ae3ece3df53b9d811000729a5c821b9fd671ceaac6/google_crc32c-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:b7491bdc0c7564fcf48c0179d2048ab2f7c7ba36b84ccd3a3e1c3f7a72d3bba6", size = 33477, upload-time = "2025-03-26T14:29:10.94Z" }, + { url = "https://files.pythonhosted.org/packages/8b/72/b8d785e9184ba6297a8620c8a37cf6e39b81a8ca01bb0796d7cbb28b3386/google_crc32c-1.7.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:df8b38bdaf1629d62d51be8bdd04888f37c451564c2042d36e5812da9eff3c35", size = 30467, upload-time = "2025-03-26T14:36:06.909Z" }, + { url = "https://files.pythonhosted.org/packages/34/25/5f18076968212067c4e8ea95bf3b69669f9fc698476e5f5eb97d5b37999f/google_crc32c-1.7.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:e42e20a83a29aa2709a0cf271c7f8aefaa23b7ab52e53b322585297bb94d4638", size = 30309, upload-time = "2025-03-26T15:06:15.318Z" }, + { url = "https://files.pythonhosted.org/packages/92/83/9228fe65bf70e93e419f38bdf6c5ca5083fc6d32886ee79b450ceefd1dbd/google_crc32c-1.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:905a385140bf492ac300026717af339790921f411c0dfd9aa5a9e69a08ed32eb", size = 33133, upload-time = "2025-03-26T14:41:34.388Z" }, + { url = "https://files.pythonhosted.org/packages/c3/ca/1ea2fd13ff9f8955b85e7956872fdb7050c4ace8a2306a6d177edb9cf7fe/google_crc32c-1.7.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b211ddaf20f7ebeec5c333448582c224a7c90a9d98826fbab82c0ddc11348e6", size = 32773, upload-time = "2025-03-26T14:41:35.19Z" }, + { url = "https://files.pythonhosted.org/packages/89/32/a22a281806e3ef21b72db16f948cad22ec68e4bdd384139291e00ff82fe2/google_crc32c-1.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:0f99eaa09a9a7e642a61e06742856eec8b19fc0037832e03f941fe7cf0c8e4db", size = 33475, upload-time = "2025-03-26T14:29:11.771Z" }, + { url = "https://files.pythonhosted.org/packages/b8/c5/002975aff514e57fc084ba155697a049b3f9b52225ec3bc0f542871dd524/google_crc32c-1.7.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32d1da0d74ec5634a05f53ef7df18fc646666a25efaaca9fc7dcfd4caf1d98c3", size = 33243, upload-time = "2025-03-26T14:41:35.975Z" }, + { url = "https://files.pythonhosted.org/packages/61/cb/c585282a03a0cea70fcaa1bf55d5d702d0f2351094d663ec3be1c6c67c52/google_crc32c-1.7.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e10554d4abc5238823112c2ad7e4560f96c7bf3820b202660373d769d9e6e4c9", size = 32870, upload-time = "2025-03-26T14:41:37.08Z" }, +] + +[[package]] +name = "google-resumable-media" +version = "2.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-crc32c" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/5a/0efdc02665dca14e0837b62c8a1a93132c264bd02054a15abb2218afe0ae/google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0", size = 2163099, upload-time = "2024-08-07T22:20:38.555Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/35/b8d3baf8c46695858cb9d8835a53baa1eeb9906ddaf2f728a5f5b640fd1e/google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa", size = 81251, upload-time = "2024-08-07T22:20:36.409Z" }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.70.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/24/33db22342cf4a2ea27c9955e6713140fedd51e8b141b5ce5260897020f1a/googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257", size = 145903, upload-time = "2025-04-14T10:17:02.924Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload-time = "2025-04-14T10:17:01.271Z" }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, +] + +[[package]] +name = "ipdb" +version = "0.13.13" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "decorator" }, + { name = "ipython" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/1b/7e07e7b752017f7693a0f4d41c13e5ca29ce8cbcfdcc1fd6c4ad8c0a27a0/ipdb-0.13.13.tar.gz", hash = "sha256:e3ac6018ef05126d442af680aad863006ec19d02290561ac88b8b1c0b0cfc726", size = 17042, upload-time = "2023-03-09T15:40:57.487Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/4c/b075da0092003d9a55cf2ecc1cae9384a1ca4f650d51b00fc59875fe76f6/ipdb-0.13.13-py3-none-any.whl", hash = "sha256:45529994741c4ab6d2388bfa5d7b725c2cf7fe9deffabdb8a6113aa5ed449ed4", size = 12130, upload-time = "2023-03-09T15:40:55.021Z" }, +] + +[[package]] +name = "ipython" +version = "9.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "decorator" }, + { name = "ipython-pygments-lexers" }, + { name = "jedi" }, + { name = "matplotlib-inline" }, + { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "prompt-toolkit" }, + { name = "pygments" }, + { name = "stack-data" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/54/80/406f9e3bde1c1fd9bf5a0be9d090f8ae623e401b7670d8f6fdf2ab679891/ipython-9.4.0.tar.gz", hash = "sha256:c033c6d4e7914c3d9768aabe76bbe87ba1dc66a92a05db6bfa1125d81f2ee270", size = 4385338, upload-time = "2025-07-01T11:11:30.606Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/f8/0031ee2b906a15a33d6bfc12dd09c3dfa966b3cb5b284ecfb7549e6ac3c4/ipython-9.4.0-py3-none-any.whl", hash = "sha256:25850f025a446d9b359e8d296ba175a36aedd32e83ca9b5060430fe16801f066", size = 611021, upload-time = "2025-07-01T11:11:27.85Z" }, +] + +[[package]] +name = "ipython-pygments-lexers" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/4c/5dd1d8af08107f88c7f741ead7a40854b8ac24ddf9ae850afbcf698aa552/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81", size = 8393, upload-time = "2025-01-17T11:24:34.505Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074, upload-time = "2025-01-17T11:24:33.271Z" }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, +] + +[[package]] +name = "kfp" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "docstring-parser" }, + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-cloud-storage" }, + { name = "kfp-pipeline-spec" }, + { name = "kfp-server-api" }, + { name = "kubernetes" }, + { name = "protobuf" }, + { name = "pyyaml" }, + { name = "requests-toolbelt" }, + { name = "tabulate" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e3/f8/8f9a4e34c6222fa8808644aa14b3cbc0fdba95eb06eed7ce8aeb299c1dc2/kfp-2.7.0.tar.gz", hash = "sha256:8a2065527ec3d50617bd374c2b25cffeab16d93b34e4be08c1ca3e4bd8d2cc0c", size = 441776, upload-time = "2024-02-14T20:25:43.265Z" } + +[[package]] +name = "kfp-pipeline-spec" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/0a/269a792545cf8a87a30b84bebe69a2b07c483b2887690e8f48c9a91e8060/kfp_pipeline_spec-0.3.0-py3-none-any.whl", hash = "sha256:1db84524a0a2d6c9d36e7e87e6fa0e181bf1ba1513d29dcd54f7b8822e7a52a2", size = 12598, upload-time = "2024-01-10T00:24:34.83Z" }, +] + +[[package]] +name = "kfp-server-api" +version = "2.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "python-dateutil" }, + { name = "six" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d9/4b/1b1c9286047e78ebc9de2a9d4d43921d6efb5e6550fdc38229127a03aa53/kfp-server-api-2.0.5.tar.gz", hash = "sha256:c9cfbf0e87271d3bfe96e5ecc9ffbdd6ab566bc1c9a9ddc2a39d7698a16e26ff", size = 63401, upload-time = "2023-12-08T19:21:48.908Z" } + +[[package]] +name = "kubernetes" +version = "26.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "google-auth" }, + { name = "python-dateutil" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "requests-oauthlib" }, + { name = "setuptools" }, + { name = "six" }, + { name = "urllib3" }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/34/19/2f351c0eaf05234dc33a6e0ffc7894e9dedab0ff341311c5b4ba44f2d8ac/kubernetes-26.1.0.tar.gz", hash = "sha256:5854b0c508e8d217ca205591384ab58389abdae608576f9c9afc35a3c76a366c", size = 736370, upload-time = "2023-02-16T01:04:37.088Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/21/ada0c5eedb678ab663f8e387734418fdd1a26be28fc919a0c32e52964047/kubernetes-26.1.0-py2.py3-none-any.whl", hash = "sha256:e3db6800abf7e36c38d2629b5cb6b74d10988ee0cba6fba45595a7cbe60c0042", size = 1446361, upload-time = "2023-02-16T01:04:34.33Z" }, +] + +[[package]] +name = "matplotlib-inline" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159, upload-time = "2024-04-15T13:44:44.803Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899, upload-time = "2024-04-15T13:44:43.265Z" }, +] + +[[package]] +name = "oauthlib" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, +] + +[[package]] +name = "parso" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/94/68e2e17afaa9169cf6412ab0f28623903be73d1b32e208d9e8e541bb086d/parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d", size = 400609, upload-time = "2024-04-05T09:43:55.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650, upload-time = "2024-04-05T09:43:53.299Z" }, +] + +[[package]] +name = "pdbpp" +version = "0.11.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fancycompleter" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/4c/118ef9534ac0632859b48c305d8c5dc9d6f963564fdfa66bc785c560247c/pdbpp-0.11.7.tar.gz", hash = "sha256:cb6604ac31a35ed0f2a29650a8c022b26284620be3e01cfd41b683b91da1ff14", size = 76026, upload-time = "2025-07-18T09:36:02.781Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/e9/704bbc08aace64fee536e4c2c20f63f64f6fdbad72938c5ed46c9723a9f1/pdbpp-0.11.7-py3-none-any.whl", hash = "sha256:51916b63693898cf4881b36b4501c83947758d73f582f1f84893662b163bdb75", size = 30545, upload-time = "2025-07-18T09:36:01.478Z" }, +] + +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, +] + +[[package]] +name = "pip" +version = "25.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/59/de/241caa0ca606f2ec5fe0c1f4261b0465df78d786a38da693864a116c37f4/pip-25.1.1.tar.gz", hash = "sha256:3de45d411d308d5054c2168185d8da7f9a2cd753dbac8acbfa88a8909ecd9077", size = 1940155, upload-time = "2025-05-02T15:14:02.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/a2/d40fb2460e883eca5199c62cfc2463fd261f760556ae6290f88488c362c0/pip-25.1.1-py3-none-any.whl", hash = "sha256:2913a38a2abf4ea6b64ab507bd9e967f3b53dc1ede74b01b0931e1ce548751af", size = 1825227, upload-time = "2025-05-02T15:13:59.102Z" }, +] + +[[package]] +name = "pipelines" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "debugpy" }, + { name = "docker" }, + { name = "ipdb" }, + { name = "kfp" }, + { name = "pdbpp" }, + { name = "pip" }, + { name = "truststore" }, +] + +[package.metadata] +requires-dist = [ + { name = "debugpy", specifier = ">=1.8.15" }, + { name = "docker", specifier = ">=7.1.0" }, + { name = "ipdb", specifier = ">=0.13.13" }, + { name = "kfp", specifier = "==2.7" }, + { name = "pdbpp", specifier = ">=0.11.7" }, + { name = "pip", specifier = ">=25.1.1" }, + { name = "truststore", specifier = ">=0.10.3" }, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.51" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bb/6e/9d084c929dfe9e3bfe0c6a47e31f78a25c54627d64a66e884a8bf5474f1c/prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed", size = 428940, upload-time = "2025-04-15T09:18:47.731Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/4f/5249960887b1fbe561d9ff265496d170b55a735b76724f10ef19f9e40716/prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07", size = 387810, upload-time = "2025-04-15T09:18:44.753Z" }, +] + +[[package]] +name = "proto-plus" +version = "1.26.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f4/ac/87285f15f7cce6d4a008f33f1757fb5a13611ea8914eb58c3d0d26243468/proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012", size = 56142, upload-time = "2025-03-10T15:54:38.843Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/6d/280c4c2ce28b1593a19ad5239c8b826871fc6ec275c21afc8e1820108039/proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66", size = 50163, upload-time = "2025-03-10T15:54:37.335Z" }, +] + +[[package]] +name = "protobuf" +version = "4.25.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/01/34c8d2b6354906d728703cb9d546a0e534de479e25f1b581e4094c4a85cc/protobuf-4.25.8.tar.gz", hash = "sha256:6135cf8affe1fc6f76cced2641e4ea8d3e59518d1f24ae41ba97bcad82d397cd", size = 380920, upload-time = "2025-05-28T14:22:25.153Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/ff/05f34305fe6b85bbfbecbc559d423a5985605cad5eda4f47eae9e9c9c5c5/protobuf-4.25.8-cp310-abi3-win32.whl", hash = "sha256:504435d831565f7cfac9f0714440028907f1975e4bed228e58e72ecfff58a1e0", size = 392745, upload-time = "2025-05-28T14:22:10.524Z" }, + { url = "https://files.pythonhosted.org/packages/08/35/8b8a8405c564caf4ba835b1fdf554da869954712b26d8f2a98c0e434469b/protobuf-4.25.8-cp310-abi3-win_amd64.whl", hash = "sha256:bd551eb1fe1d7e92c1af1d75bdfa572eff1ab0e5bf1736716814cdccdb2360f9", size = 413736, upload-time = "2025-05-28T14:22:13.156Z" }, + { url = "https://files.pythonhosted.org/packages/28/d7/ab27049a035b258dab43445eb6ec84a26277b16105b277cbe0a7698bdc6c/protobuf-4.25.8-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:ca809b42f4444f144f2115c4c1a747b9a404d590f18f37e9402422033e464e0f", size = 394537, upload-time = "2025-05-28T14:22:14.768Z" }, + { url = "https://files.pythonhosted.org/packages/bd/6d/a4a198b61808dd3d1ee187082ccc21499bc949d639feb948961b48be9a7e/protobuf-4.25.8-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:9ad7ef62d92baf5a8654fbb88dac7fa5594cfa70fd3440488a5ca3bfc6d795a7", size = 294005, upload-time = "2025-05-28T14:22:16.052Z" }, + { url = "https://files.pythonhosted.org/packages/d6/c6/c9deaa6e789b6fc41b88ccbdfe7a42d2b82663248b715f55aa77fbc00724/protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:83e6e54e93d2b696a92cad6e6efc924f3850f82b52e1563778dfab8b355101b0", size = 294924, upload-time = "2025-05-28T14:22:17.105Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c1/6aece0ab5209981a70cd186f164c133fdba2f51e124ff92b73de7fd24d78/protobuf-4.25.8-py3-none-any.whl", hash = "sha256:15a0af558aa3b13efef102ae6e4f3efac06f1eea11afb3a57db2901447d9fb59", size = 156757, upload-time = "2025-05-28T14:22:24.135Z" }, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762, upload-time = "2020-12-28T15:15:30.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" }, +] + +[[package]] +name = "pure-eval" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752, upload-time = "2024-07-21T12:58:21.801Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" }, +] + +[[package]] +name = "pyasn1" +version = "0.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload-time = "2024-09-10T22:41:42.55Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload-time = "2024-09-11T16:00:36.122Z" }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pyreadline3" +version = "3.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839, upload-time = "2024-09-19T02:40:10.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" }, +] + +[[package]] +name = "pyrepl" +version = "0.11.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/08/4f/7088417e5465c53a30b918d30542aad89352ea0d635a5d077717c69a7d2b/pyrepl-0.11.4.tar.gz", hash = "sha256:efe988b4a6e5eed587e9769dc2269aeec2b6feec2f5d77995ee85b9ad7cf7063", size = 51089, upload-time = "2025-07-17T22:56:25.42Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/a5/ce97a778f096aaa27cfcb7ad09f1198cf73277dcab6c68a4b8f332d91e48/pyrepl-0.11.4-py3-none-any.whl", hash = "sha256:ac30d6340267a21c39e1b1934f92bca6b8735017d14b17e40f903b2d1563541d", size = 55596, upload-time = "2025-07-17T22:56:24.537Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, + { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload-time = "2024-08-06T20:32:25.131Z" }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload-time = "2024-08-06T20:32:26.511Z" }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload-time = "2024-08-06T20:32:28.363Z" }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload-time = "2024-08-06T20:32:30.058Z" }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload-time = "2024-08-06T20:32:31.881Z" }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload-time = "2024-08-06T20:32:37.083Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload-time = "2024-08-06T20:32:38.898Z" }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload-time = "2024-08-06T20:32:40.241Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload-time = "2024-08-06T20:32:44.801Z" }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload-time = "2024-08-06T20:32:46.432Z" }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload-time = "2024-08-06T20:32:51.188Z" }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload-time = "2024-08-06T20:32:53.019Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload-time = "2024-08-06T20:32:54.708Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" }, +] + +[[package]] +name = "requests" +version = "2.32.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/0a/929373653770d8a0d7ea76c37de6e41f11eb07559b103b1c02cafb3f7cf8/requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422", size = 135258, upload-time = "2025-06-09T16:43:07.34Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" }, +] + +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, +] + +[[package]] +name = "requests-toolbelt" +version = "0.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0c/4c/07f01c6ac44f7784fa399137fbc8d0cdc1b5d35304e8c0f278ad82105b58/requests-toolbelt-0.10.1.tar.gz", hash = "sha256:62e09f7ff5ccbda92772a29f394a49c3ad6cb181d568b1337626b2abb628a63d", size = 208956, upload-time = "2022-10-25T03:14:58.576Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/d3/bf87a36bff1cb88fd30a509fd366c70ec30676517ee791b2f77e0e29817a/requests_toolbelt-0.10.1-py2.py3-none-any.whl", hash = "sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7", size = 54525, upload-time = "2022-10-25T03:14:55.289Z" }, +] + +[[package]] +name = "rsa" +version = "4.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, +] + +[[package]] +name = "setuptools" +version = "80.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "stack-data" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "pure-eval" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707, upload-time = "2023-09-30T13:58:05.479Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, +] + +[[package]] +name = "tabulate" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, +] + +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, +] + +[[package]] +name = "truststore" +version = "0.10.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/19/d90d35e584f58bac270beee003dd93b664335c0b9074b03b8604c6ea36ec/truststore-0.10.3.tar.gz", hash = "sha256:16ff5f6faf692acca470f9b92e66b4c0faccb9b702d0b0486d3d465932b6b3b1", size = 26214, upload-time = "2025-07-29T19:05:31.67Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/49/184050dc32c6ae6a1ef4ebd16ab5128483c02fa1e686d5559df0ba1c08b2/truststore-0.10.3-py3-none-any.whl", hash = "sha256:5bcc0889390f7b69e56be3df02f4912cfbb5a8bdb77a63fdcacb91049707879b", size = 18649, upload-time = "2025-07-29T19:05:30.414Z" }, +] + +[[package]] +name = "urllib3" +version = "1.26.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/e8/6ff5e6bc22095cfc59b6ea711b687e2b7ed4bdb373f7eeec370a97d7392f/urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32", size = 307380, upload-time = "2024-08-29T15:43:11.37Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225, upload-time = "2024-08-29T15:43:08.921Z" }, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301, upload-time = "2024-01-06T02:10:57.829Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" }, +] + +[[package]] +name = "websocket-client" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648, upload-time = "2024-04-23T22:16:16.976Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826, upload-time = "2024-04-23T22:16:14.422Z" }, +] From 4391c561d66b708775b2e813145b8a766132f82b Mon Sep 17 00:00:00 2001 From: hsteude Date: Thu, 31 Jul 2025 11:14:29 +0200 Subject: [PATCH 02/16] Put some love in the README --- pipelines/pipe-fiction/README.md | 369 ++++++++++++++----------------- 1 file changed, 171 insertions(+), 198 deletions(-) diff --git a/pipelines/pipe-fiction/README.md b/pipelines/pipe-fiction/README.md index a29e7fe..6d6a035 100644 --- a/pipelines/pipe-fiction/README.md +++ b/pipelines/pipe-fiction/README.md @@ -10,57 +10,23 @@ As part of our MLOps platform, we support KFP for orchestrating machine learning - **Local Development** with immediate feedback loops - **Interactive Debugging** with full IDE integration -- **Multi-environment Support** (subprocess, Docker, cluster) - **Best Practices** for pipeline development and code organization -## Quick Start - -### Prerequisites - -- Python 3.12+ -- Docker (for Docker runner) -- VS Code (recommended) or any debugpy-compatible IDE -- Access to a Kubeflow cluster (for remote execution) - -### Setup +## Why? -1. **Navigate to the demo:** - ```bash - # After cloning the larger example repository - cd pipelines/pipe-fiction - ``` - -2. **Install dependencies for both virtual environments:** - - **Pipeline environment (KFP-specific packages):** - ```bash - cd pipelines - uv sync - source .venv/bin/activate # Activate when working on pipeline code - ``` - -3. **Build the base Docker image:** - ```bash - cd pipe-fiction-codebase - docker build -t /: . - ``` - More details on this in the `pipe-fiction-codebase` directory. - -## Repository Organization - -This demo is structured to demonstrate **separation** between standard Python code and KFP orchestration setup, while solving a key challenge with KFP Lightweight Components: +KFP pipelines are hard to develop and debug - here we try to tackle both challenges. ### The KFP Lightweight Component Challenge -KFP Lightweight Components are designed to be **self-contained** - meaning all code must be either: +KFP Lightweight Components are easier to use than container components. However, they are designed to be **self-contained** - meaning all code must be either: - Defined inline within the component function - Installed via `packages_to_install` parameter This creates a problem: code duplication. If you need the same utility function in multiple components, you typically have to copy-paste the code into each component, leading to maintenance nightmares, which is the reason most people use container components for heavy lifting. -Alternative approaches like publishing packages to PyPI or private registries are possible, but create their own challenges - you'd need to publish and version your package for every code change during development, which significantly slows down the iteration cycle. +Alternative approaches like publishing packages to PyPI or private registries are possible, but create their own challenges - you'd need to publish and version your package for every code change during development, which is not great. -### Our Solution: Base Image with Pre-installed Package +**Our Solution: Base Image with Pre-installed Package** We solve this by **pre-installing our ML package into the base Docker image**: @@ -89,9 +55,74 @@ def any_component(): processor = DataProcessor() ``` +### Debugging + +Why is debugging a challenge? +- In the cluster, the code runs in pods that you can't easily debug into +- When executing components locally, you must pay attention to DAG order (without the local runner) +- The local runners are not readily supported by standard debugging workflows in IDEs like VS Code or PyCharm +- This often creates a long debug loop that includes waiting for CI/CD pipelines for image builds and pipeline execution + +**Our Solution**: A combination of using the new local runner features of KFP and remote debugging sessions, as detailed below. + +## Quick Start + +### Prerequisites + +- Python 3.12+ +- Docker (for Docker runner) +- VS Code (recommended) or any debugpy-compatible IDE +- Access to a Kubeflow cluster (for remote execution) + +### Setup + +1. **Navigate to the demo:** + ```bash + # After cloning the example repository + cd pipelines/pipe-fiction + ``` + +2. **Install dependencies for the pipelines environment:** + + **Pipeline environment (KFP-specific packages):** + ```bash + cd pipelines + uv sync + source .venv/bin/activate # Activate when working on pipeline code + uv pip install -e ../pipe-fiction-codebase/ # Install custom package + ``` + +3. **(RE-)Build the base Docker image if needed:** + ```bash + cd pipe-fiction-codebase + docker build -t /: . + ``` + More details on this in the `pipe-fiction-codebase` directory. + +4. **Run the pipeline** + + Run locally using subprocesses (also works in KF-notebooks): + ```bash + python run_locally_in_subproc.py + ``` + + Run locally using Docker: + ```bash + python run_locally_in_docker.py + ``` + + Submit to the cluster: + ```bash + python run_in_k8s_cluster.py + ``` + +## Repository Organization + +This demo is structured to demonstrate **separation** between standard Python code and KFP orchestration setup: + ### Code Package (`pipe-fiction-codebase/`) -Contains the core ML logic as a **standalone Python package**: +Contains the core logic as a **standalone Python package**. This Python package is not KFP-related and can be independently developed, tested, and debugged. The only thing that reminds us of K8s is the Dockerfile. The important thing is that it can be installed as a package. ``` pipe-fiction-codebase/ @@ -102,17 +133,6 @@ pipe-fiction-codebase/ └── pyproject.toml # Package definition ``` -**Key Benefits of This Approach:** - -- **No Code Duplication** - Import the same classes/functions across multiple components without copying code -- **Independent Development** - The `pipe_fiction` package can be developed, tested, and debugged completely independently of KFP -- **Data Scientists in Their Home Turf** - Familiar Python development environment without KFP complexity -- **Reusability** - The same code can be used in notebooks, scripts, web services, or other orchestration frameworks -- **Standard Testing** - Use pytest, unittest, or any testing framework without KFP complexity -- **IDE Support** - Full autocomplete, refactoring, and debugging support for your core logic -- **Version Management** - Package versioning independent of pipeline versions -- **Clean Components** - Pipeline components focus on orchestration, not business logic implementation - ### Pipeline Orchestration (`pipelines/`) Contains KFP-specific orchestration code: @@ -122,7 +142,7 @@ pipelines/ ├── components.py # KFP component definitions (import from base image) ├── pipeline.py # Pipeline assembly ├── run_locally_*.py # Local execution scripts -├── submit_to_cluster.py # Remote execution +├── run_in_k8s_cluster.py # Remote execution ├── .venv/ # Virtual environment with custom package └── utils/ # KFP utilities and patches ``` @@ -142,28 +162,36 @@ This enables full IDE integration: - "Go to definition" works across package imports - Refactoring support across the entire codebase -**This separation allows you to:** - -1. **Develop core logic** using standard Python development practices -2. **Test business logic** without spinning up KFP environments -3. **Debug algorithms** using familiar tools and workflows -4. **Reuse code** across multiple components without duplication -5. **Maintain clean abstractions** between ML code and infrastructure -6. **Scale development** - multiple developers can work on the package independently +*Note: this trick only works when there are no dependency conflicts between the Python venvs in the pipelines folder and the custom packages. As soon as there are multiple packages with significantly different dependencies that should run in different KFP components, this trick no longer works.* ## Execution Environments -There are (at least) three ways to execute the pipeline that uses logic from the custom package in tasks within the DAG: +As indicated in the quick start section, there are (at least) three ways to execute the pipeline that uses logic from the custom package in tasks within the DAG: ### 1. Subprocess Runner (Fastest Development) **Best for:** Quick iteration, algorithm development, initial testing +In this setup, the pipeline is run on your local machine using subprocesses. + ```bash cd pipelines python run_locally_in_subproc.py ``` +**Workflow** + +A typical workflow using the subprocess runner could look like this: +1. Implement changes in component or custom package code +2. Run `python run_locally_in_subproc.py` to see if it works +3. Set breakpoints using the debugger or IDE to figure out what's wrong +4. Build and push Docker image when ready for submission to the cluster (this could also be done in a CI/CD pipeline): + `docker build -t /: . && docker push` +5. Update image reference in pipeline components if needed +6. Submit pipeline to cluster: `python submit_to_cluster.py` + +Note that this workflow also works inside Kubeflow notebooks. + **Advantages:** - Fastest execution - no container overhead - Direct debugging - breakpoints work immediately @@ -171,95 +199,112 @@ python run_locally_in_subproc.py - Full IDE integration - all debugging features available - Local Package Access - SubprocessRunner uses the package installed in the local .venv - No Image Rebuilds - Code changes are immediately available without Docker builds -- Immediate Debugging - Set breakpoints in both pipeline and package code instantly -- Fast iteration - Modify algorithms and test immediately **Limitations:** - Environment differences - may not match production environment exactly - Dependency conflicts - uses local Python environment - Limited isolation - no containerization benefits +- Lightweight components only - this does not work for container components ### 2. Docker Runner (Container-based Development) **Best for:** Pipelines with container components and multiple differing environments in the KFP tasks +This setup is similar to the local execution in subprocesses, however in this case the local Docker engine on your machine is used to run the pipeline tasks inside Docker containers. + ```bash cd pipelines python run_locally_in_docker.py ``` +**Workflow** + +For changes in the pipeline directory: +1. Modify files in `pipelines/` directory (components, pipeline definitions, pipeline arguments) +2. Run `python run_locally_in_docker.py` - changes are immediately reflected +3. Submit to cluster when ready + +For changes in the custom Python package: +1. Modify code in `pipe-fiction-codebase/` +2. Rebuild Docker image locally (no push needed): + `docker build -t /: .` +3. Run `python run_locally_in_docker.py` to test with new image +4. To debug the code inside the components, you'll need to use remote debugging (see dedicated section below) +5. Rebuild the image if needed and push it to your registry: + `docker push /:` +6. Update image reference in pipeline components if needed +7. Submit pipeline to cluster: `python submit_to_cluster.py` + **Advantages:** - Production environment - identical to cluster execution -- Full debugging support - step into containerized code +- Debugging support over remote debugger - step into containerized code - Dependency isolation - no local conflicts -- Volume mounting - access local data files -- Port forwarding - debug server accessible from IDE **Limitations:** +- Port forwarding needed - to connect debugger or any other tools - Slower iteration - container startup overhead - Docker dependency - requires Docker runtime -- Limited resource control - basic Docker constraints only +- Image builds needed - for changes in the custom Python package +- Limited resource control - basic Docker constraints only, things like `task.set_env_vars()` or the caching mechanisms are not supported ### 3. Cluster Execution (In-Cluster Debugging) **Best for:** In-cluster issues, cluster-specific debugging, resource-intensive workloads +Here we use the KFP backend as it runs inside the Kubernetes cluster, as intended. + ```bash cd pipelines python submit_to_cluster.py ``` +**Cluster Execution Workflow** + +For pipeline-only changes: +1. Modify files in `pipelines/` directory +2. Set the env var KFP_DEBUG to true for the task you want to debug: + `task.set_env_variable("KFP_DEBUG", "True")` (See remote debugging section for more details on how to connect the debugger) +3. Submit directly to cluster: `python submit_to_cluster.py` + +For custom package changes: +1. Modify code in `pipe-fiction-codebase/` +2. Rebuild and push Docker image: `docker build -t /: . && docker push` +3. Update image reference in pipeline components +4. Set the env var KFP_DEBUG to true for the task you want to debug: + `task.set_env_variable("KFP_DEBUG", "True")` (See remote debugging section for more details) +5. Submit pipeline to cluster + **Advantages:** - Real production environment - actual cluster resources -- Remote debugging - debug running pods via port-forwarding +- All the KFP features - everything from caching to parallelism works here - Scalability testing - real resource constraints -- Integration testing - with actual cluster services +- Integration testing - with actual cluster services, without port forwards or similar **Limitations:** - Slowest feedback - submission and scheduling overhead -- Resource constraints - limited by cluster quotas - Complex setup - requires cluster access and networking -## Development Workflows +## Remote Debugging -### Subprocess Runner Workflow -For rapid pipeline development and testing: -1. Implement changes in component or custom package code -2. Run `python run_locally_in_subproc.py` to validate immediately -3. Build and push Docker image when ready for cluster: `docker build -t /: . && docker push` -4. Update image reference in pipeline components if needed -5. Submit pipeline to cluster: `python submit_to_cluster.py` +As indicated above, you can use your preferred way of debugging Python code in the custom Python package, and for pipelines executed locally using the subprocess runner. However, as soon as the code is run in containers, we need a remote debugging setup. -### Docker Runner Workflow +In this demo we use [debugpy](https://github.com/microsoft/debugpy). So we start a debugging server inside the component: -**For pipeline-only changes:** -1. Modify files in `pipelines/` directory (components, pipeline definitions) -2. Run `python run_locally_in_docker.py` - changes are immediately reflected -3. Submit to cluster when ready - -**For custom package changes:** -1. Modify code in `pipe-fiction-codebase/` -2. Rebuild Docker image locally: `docker build -t /: .` -3. Run `python run_locally_in_docker.py` to test with new image -4. Push image to registry: `docker push /:` -5. Update image reference in pipeline components if needed -6. Submit pipeline to cluster +```python +@component(packages_to_install=["debugpy"]) +def your_component_name(): + import os -### Cluster Execution Workflow + if os.getenv("KFP_DEBUG") == "true": + import debugpy -**For pipeline-only changes:** -1. Modify files in `pipelines/` directory -2. Submit directly to cluster: `python submit_to_cluster.py` - -**For custom package changes:** -1. Modify code in `pipe-fiction-codebase/` -2. Rebuild and push Docker image: `docker build -t /: . && docker push` -3. Update image reference in pipeline components -4. Submit pipeline to cluster - -## Debugging Setup + debug_port = int(os.getenv("KFP_DEBUG_PORT", "5678")) + debugpy.listen(("0.0.0.0", debug_port)) + debugpy.wait_for_client() + ... +``` -### VS Code Configuration +The debug server then waits until a debugger connects. This can for example be done with VS Code like so: Create `.vscode/launch.json`: @@ -288,102 +333,49 @@ Create `.vscode/launch.json`: } ``` -### Other IDE Support - -**PyCharm:** -- Run → Edit Configurations → Python Remote Debug -- Host: `localhost`, Port: `5678` -- Path mappings: Local: `pipe-fiction-codebase` → Remote: `/app` - -**Any debugpy-compatible editor:** -- Connect to `localhost:5678` -- Configure path mappings as needed +If you now run the pipeline in Docker by executing `python run_locally_in_docker.py`, the code will wait until you open this project in VS Code and hit the debug button. Note that you'll need to have the [Python debugging extension](https://code.visualstudio.com/docs/python/debugging) installed in VS Code. ### Debugging Workflow 1. **Enable debug mode:** + + For local run in Docker: ```python # In run_locally_in_docker.py environment={'KFP_DEBUG': 'true'} ``` + + For execution in KFP: + ```python + # In the pipeline.py file + task.set_env_variable("KFP_DEBUG", "True") + ``` 2. **Start the pipeline:** + + Locally: ```bash python run_locally_in_docker.py ``` + In KFP cluster: + ```bash + python run_in_k8s_cluster.py + ``` + 3. **Connect debugger:** - Pipeline will pause and wait for debugger connection - Attach your IDE debugger to `localhost:5678` + - In the `run_locally_in_docker.py` the port settings are already set such that it works. However, for KFP you'll need to create a port forwarding from the component's pod to your machine (see next section) 4. **Debug interactively:** - - Set breakpoints in your pipeline components + + Now debugging should work as you know it: + - Set breakpoints in your pipeline components or the code package that gets imported - Step through code execution - Inspect variables and data structures - Debug both pipeline logic and imported modules -## Example: Debugging a Data Processing Pipeline - -This demo includes a simple data processing pipeline that demonstrates common debugging scenarios: - -### Components - -1. **DataGenerator Component** (`generate_data_comp`) - - Generates sample text data for processing - - Demonstrates data creation debugging - - Logs operations with structured logging - -2. **DataProcessor Component** (`process_data_comp`) - - Processes text data and extracts information - - Counts words and generates statistics - - Demonstrates data transformation debugging - -### Debugging Scenarios - -**Data Generation Logic:** -```python -generator = DataGenerator() -lines = generator.create_sample_data() # Set breakpoint here -``` - -**Data Processing Logic:** -```python -processor = DataProcessor() -processed_data = processor.process_lines(lines) # Debug transformations -summary = processor.get_summary(processed_data) # Inspect results -``` - -**Cross-Component Data Flow:** -- Debug how data flows between pipeline components -- Inspect intermediate outputs and transformations -- Validate data contracts between components - -## Advanced Features - -### Volume Mounting for Data Access - -```python -# Mount local data directory into container -local.init(runner=local.DockerRunner( - volumes={ - os.path.abspath('../data'): {'bind': '/app/data', 'mode': 'ro'} - } -)) - -# Access files in container -result = example_pipeline(file_path='/app/data/local-data-file.txt') -``` - -### Environment-Controlled Debugging - -```python -# Enable/disable debugging via environment variables -environment={ - 'KFP_DEBUG': 'true', # Enable debugging - 'KFP_DEBUG_PORT': '5678', # Custom debug port -} -``` - ### Cluster Debugging with Port Forwarding ```bash @@ -406,6 +398,8 @@ This demo includes monkey patches for older KFP versions (pre-2.14) to enable: - Environment variable injection - Volume mounting for data access +in the DockerRunner of KFP local. + These patches provide forward compatibility and will be obsolete when upgrading to KFP 2.14+. ### Debugging Architecture @@ -415,24 +409,3 @@ The debugging setup works by: 2. **Port forwarding** from container to host 3. **Path mapping** between local IDE and remote container 4. **Environment control** for enabling/disabling debug mode - -## Contributing - -This demo represents best practices we've developed for KFP pipeline development. Contributions and improvements are welcome! - -### Future Enhancements - -- Support for KFP 2.14+ native features -- Additional debugging tools integration -- Performance profiling examples -- Multi-language component support - -## Additional Resources - -- [Kubeflow Pipelines Documentation](https://kubeflow-pipelines.readthedocs.io/) -- [debugpy Documentation](https://github.com/microsoft/debugpy) -- [VS Code Python Debugging](https://code.visualstudio.com/docs/python/debugging) - ---- - -For questions or support with KFP development on our MLOps platform, please reach out to our team. From 040c4c1a8b83fcaa3c4e9b8db4975d6a07c349ba Mon Sep 17 00:00:00 2001 From: hsteude Date: Thu, 31 Jul 2025 15:34:21 +0200 Subject: [PATCH 03/16] Back to remote_debugger argument (not env var) --- pipelines/pipe-fiction/.vscode/launch.json | 13 ++- pipelines/pipe-fiction/README.md | 90 ++++++++++--------- .../pipe-fiction/pipelines/components.py | 20 ++--- pipelines/pipe-fiction/pipelines/pipeline.py | 4 +- 4 files changed, 68 insertions(+), 59 deletions(-) diff --git a/pipelines/pipe-fiction/.vscode/launch.json b/pipelines/pipe-fiction/.vscode/launch.json index 36a8670..2fea988 100644 --- a/pipelines/pipe-fiction/.vscode/launch.json +++ b/pipelines/pipe-fiction/.vscode/launch.json @@ -2,7 +2,18 @@ "version": "0.2.0", "configurations": [ { - "name": "Python Debugger: Remote Attach", + "name": "Pipeline: Remote SubprocessRunner", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + }, + "justMyCode": false, + "subProcess": true + }, + { + "name": "Pipeline: Remote KFP/DockerRunner", "type": "debugpy", "request": "attach", "connect": { diff --git a/pipelines/pipe-fiction/README.md b/pipelines/pipe-fiction/README.md index 6d6a035..642ec68 100644 --- a/pipelines/pipe-fiction/README.md +++ b/pipelines/pipe-fiction/README.md @@ -194,9 +194,7 @@ Note that this workflow also works inside Kubeflow notebooks. **Advantages:** - Fastest execution - no container overhead -- Direct debugging - breakpoints work immediately - Live code changes - no rebuilds needed -- Full IDE integration - all debugging features available - Local Package Access - SubprocessRunner uses the package installed in the local .venv - No Image Rebuilds - Code changes are immediately available without Docker builds @@ -205,6 +203,7 @@ Note that this workflow also works inside Kubeflow notebooks. - Dependency conflicts - uses local Python environment - Limited isolation - no containerization benefits - Lightweight components only - this does not work for container components +- Remote debugging required - CLI-based debuggers (like `pdb` with `breakpoint()`) work directly, but IDE debugging requires remote debugging setup ### 2. Docker Runner (Container-based Development) @@ -262,16 +261,14 @@ python submit_to_cluster.py For pipeline-only changes: 1. Modify files in `pipelines/` directory -2. Set the env var KFP_DEBUG to true for the task you want to debug: - `task.set_env_variable("KFP_DEBUG", "True")` (See remote debugging section for more details on how to connect the debugger) +2. Enable remote debugging for the task you want to debug (see remote debugging section for details) 3. Submit directly to cluster: `python submit_to_cluster.py` For custom package changes: 1. Modify code in `pipe-fiction-codebase/` 2. Rebuild and push Docker image: `docker build -t /: . && docker push` 3. Update image reference in pipeline components -4. Set the env var KFP_DEBUG to true for the task you want to debug: - `task.set_env_variable("KFP_DEBUG", "True")` (See remote debugging section for more details) +4. Enable remote debugging for the task you want to debug (see remote debugging section for details) 5. Submit pipeline to cluster **Advantages:** @@ -286,25 +283,24 @@ For custom package changes: ## Remote Debugging -As indicated above, you can use your preferred way of debugging Python code in the custom Python package, and for pipelines executed locally using the subprocess runner. However, as soon as the code is run in containers, we need a remote debugging setup. +All debugging across environments (SubprocessRunner, DockerRunner, and cluster execution) now uses remote debugging with [debugpy](https://github.com/microsoft/debugpy) for IDE integration. For CLI-based debugging, `breakpoint()` still works directly with the SubprocessRunner. -In this demo we use [debugpy](https://github.com/microsoft/debugpy). So we start a debugging server inside the component: +### Component Setup -```python -@component(packages_to_install=["debugpy"]) -def your_component_name(): - import os +Components are configured with a `remote_debugging` parameter to enable debug mode: - if os.getenv("KFP_DEBUG") == "true": +```python +@component(base_image="/:", packages_to_install=["debugpy"]) +def your_component_name(remote_debugging: bool = False): + if remote_debugging: import debugpy - - debug_port = int(os.getenv("KFP_DEBUG_PORT", "5678")) - debugpy.listen(("0.0.0.0", debug_port)) + debugpy.listen(("0.0.0.0", 5678)) debugpy.wait_for_client() - ... + + # Your component logic here... ``` -The debug server then waits until a debugger connects. This can for example be done with VS Code like so: +### VS Code Setup Create `.vscode/launch.json`: @@ -313,7 +309,18 @@ Create `.vscode/launch.json`: "version": "0.2.0", "configurations": [ { - "name": "Python Debugger: Remote Attach", + "name": "Pipeline: Remote SubprocessRunner", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + }, + "justMyCode": false, + "subProcess": true + }, + { + "name": "Pipeline: Remote KFP/DockerRunner", "type": "debugpy", "request": "attach", "connect": { @@ -322,7 +329,7 @@ Create `.vscode/launch.json`: }, "pathMappings": [ { - "localRoot": "${workspaceFolder}/../pipe-fiction-codebase", + "localRoot": "${workspaceFolder}/pipe-fiction-codebase", "remoteRoot": "/app" } ], @@ -333,44 +340,40 @@ Create `.vscode/launch.json`: } ``` -If you now run the pipeline in Docker by executing `python run_locally_in_docker.py`, the code will wait until you open this project in VS Code and hit the debug button. Note that you'll need to have the [Python debugging extension](https://code.visualstudio.com/docs/python/debugging) installed in VS Code. - ### Debugging Workflow 1. **Enable debug mode:** - For local run in Docker: - ```python - # In run_locally_in_docker.py - environment={'KFP_DEBUG': 'true'} - ``` - - For execution in KFP: + Pass `remote_debugging=True` to your component when calling it in the pipeline: ```python - # In the pipeline.py file - task.set_env_variable("KFP_DEBUG", "True") + # In your pipeline definition + task = your_component_name(remote_debugging=True) ``` 2. **Start the pipeline:** - Locally: + SubprocessRunner: + ```bash + python run_locally_in_subproc.py + ``` + + DockerRunner: ```bash python run_locally_in_docker.py ``` - In KFP cluster: + Cluster: ```bash python run_in_k8s_cluster.py ``` 3. **Connect debugger:** - Pipeline will pause and wait for debugger connection - - Attach your IDE debugger to `localhost:5678` - - In the `run_locally_in_docker.py` the port settings are already set such that it works. However, for KFP you'll need to create a port forwarding from the component's pod to your machine (see next section) + - Use the appropriate VS Code configuration to attach: + - "Pipeline: Remote SubprocessRunner" for subprocess execution + - "Pipeline: Remote KFP/DockerRunner" for Docker and cluster execution 4. **Debug interactively:** - - Now debugging should work as you know it: - Set breakpoints in your pipeline components or the code package that gets imported - Step through code execution - Inspect variables and data structures @@ -378,6 +381,8 @@ If you now run the pipeline in Docker by executing `python run_locally_in_docker ### Cluster Debugging with Port Forwarding +For cluster execution, you'll need port forwarding: + ```bash # Find your pipeline pod kubectl get pods | grep your-pipeline @@ -385,8 +390,7 @@ kubectl get pods | grep your-pipeline # Forward debug port kubectl port-forward pod/your-pod-name 5678:5678 -# Connect local debugger to remote pod -# Use the same VS Code configuration +# Connect local debugger using the "Pipeline: Remote KFP/DockerRunner" configuration ``` ## Technical Implementation Notes @@ -405,7 +409,7 @@ These patches provide forward compatibility and will be obsolete when upgrading ### Debugging Architecture The debugging setup works by: -1. **Injecting debugpy** into pipeline components -2. **Port forwarding** from container to host -3. **Path mapping** between local IDE and remote container -4. **Environment control** for enabling/disabling debug mode +1. **Injecting debugpy** into pipeline components via the `remote_debugging` parameter +2. **Port forwarding** from container to host (for Docker/cluster execution) +3. **Path mapping** between local IDE and remote container (for Docker/cluster execution) +4. **Unified debugging experience** across all execution environments diff --git a/pipelines/pipe-fiction/pipelines/components.py b/pipelines/pipe-fiction/pipelines/components.py index b6ffe44..0c8780c 100644 --- a/pipelines/pipe-fiction/pipelines/components.py +++ b/pipelines/pipe-fiction/pipelines/components.py @@ -4,14 +4,11 @@ @component(base_image="hsteude/pipe-fiction:latest", packages_to_install=["debugpy"]) -def generate_data_comp() -> List: - import os - - if os.getenv("KFP_DEBUG") == "true": +def generate_data_comp(remote_debugging: bool = False) -> List: + if remote_debugging: import debugpy - debug_port = int(os.getenv("KFP_DEBUG_PORT", "5678")) - debugpy.listen(("0.0.0.0", debug_port)) + debugpy.listen(("0.0.0.0", 5678)) debugpy.wait_for_client() from pipe_fiction.data_generator import DataGenerator @@ -20,19 +17,16 @@ def generate_data_comp() -> List: lines = generator.create_sample_data() return lines + @component( base_image="hsteude/pipe-fiction:latest", packages_to_install=["debugpy"], ) -def process_data_comp(lines: List[str]) -> List[str]: - import os - - if os.getenv("KFP_DEBUG") == "true": - import os +def process_data_comp(lines: List[str], remote_debugging: bool = False) -> List[str]: + if remote_debugging: import debugpy - debug_port = int(os.getenv("KFP_DEBUG_PORT", "5678")) - debugpy.listen(("0.0.0.0", debug_port)) + debugpy.listen(("0.0.0.0", 5678)) debugpy.wait_for_client() from pipe_fiction.data_processor import DataProcessor diff --git a/pipelines/pipe-fiction/pipelines/pipeline.py b/pipelines/pipe-fiction/pipelines/pipeline.py index 41602f4..9a33357 100644 --- a/pipelines/pipe-fiction/pipelines/pipeline.py +++ b/pipelines/pipe-fiction/pipelines/pipeline.py @@ -4,5 +4,5 @@ @pipeline def example_pipeline(): - data_gen_task = generate_data_comp() - process_data_task = process_data_comp(lines=data_gen_task.output) + data_gen_task = generate_data_comp(remote_debugging=True) + process_data_task = process_data_comp(lines=data_gen_task.output, remote_debugging=False) From 8fb2bcd8ccfa2f2ec67e442e56fdc104336074a8 Mon Sep 17 00:00:00 2001 From: hsteude Date: Thu, 31 Jul 2025 15:36:57 +0200 Subject: [PATCH 04/16] Added script to submit pipelines from kf-notebook --- .../pipelines/submit_to_cluster_from_kf_notebook.py | 8 ++++++++ ...in_k8s_cluster.py => submit_to_cluster_from_remote.py} | 0 2 files changed, 8 insertions(+) create mode 100644 pipelines/pipe-fiction/pipelines/submit_to_cluster_from_kf_notebook.py rename pipelines/pipe-fiction/pipelines/{run_in_k8s_cluster.py => submit_to_cluster_from_remote.py} (100%) diff --git a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_kf_notebook.py b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_kf_notebook.py new file mode 100644 index 0000000..0a8a554 --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_kf_notebook.py @@ -0,0 +1,8 @@ +from kfp.client import Client +from pipeline import example_pipeline + +client = Client() + +run = client.create_run_from_pipeline_func( + example_pipeline, +) diff --git a/pipelines/pipe-fiction/pipelines/run_in_k8s_cluster.py b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py similarity index 100% rename from pipelines/pipe-fiction/pipelines/run_in_k8s_cluster.py rename to pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py From e924b4a75517db4dcebdc16d0669404dfcb87efc Mon Sep 17 00:00:00 2001 From: hsteude Date: Thu, 31 Jul 2025 15:37:16 +0200 Subject: [PATCH 05/16] Removed readme in pipeliens dir --- pipelines/pipe-fiction/pipelines/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 pipelines/pipe-fiction/pipelines/README.md diff --git a/pipelines/pipe-fiction/pipelines/README.md b/pipelines/pipe-fiction/pipelines/README.md deleted file mode 100644 index e69de29..0000000 From 82ea8003d84e28a5b5c6b3d94aa7d1a1a0c9b1a1 Mon Sep 17 00:00:00 2001 From: hsteude Date: Thu, 31 Jul 2025 15:44:44 +0200 Subject: [PATCH 06/16] Removed old component versions --- .../pipe-fiction/pipelines/components.py | 56 ------------------- 1 file changed, 56 deletions(-) diff --git a/pipelines/pipe-fiction/pipelines/components.py b/pipelines/pipe-fiction/pipelines/components.py index 0c8780c..7fb9a26 100644 --- a/pipelines/pipe-fiction/pipelines/components.py +++ b/pipelines/pipe-fiction/pipelines/components.py @@ -35,59 +35,3 @@ def process_data_comp(lines: List[str], remote_debugging: bool = False) -> List[ processed_lines = processor.process_lines(lines) # Step into here! return processed_lines - - -# works: -# connect with telnet localhost 4444 -# @component(base_image="pipe-fiction:latest") -# def greeter_component(names: List = ["Laura", "Malte", "Paul"]): -# import remote_pdb -# -# # Remote debugger auf Port 4444 -# remote_pdb.set_trace(host='0.0.0.0', port=4444) -# -# from pipe_fiction.hello_world import HelloWorld -# greeter = HelloWorld("Python Entwickler") -# greetings = greeter.say_hello_multiple(names) -# -# for i, greeting in enumerate(greetings, 1): -# print(f" {i}. {greeting}") -# - - -@component(base_image="hsteude/kfp-hello-world:latest", packages_to_install=["debugpy"]) -def greeter_component(names: List = ["Laura", "Malte", "Paula"]): - import os - - # Check environment variable for debug mode - if os.getenv("KFP_DEBUG") == "true": - import debugpy - - debug_port = int(os.getenv("KFP_DEBUG_PORT", "5678")) - debugpy.listen(("0.0.0.0", debug_port)) - debugpy.wait_for_client() - debugpy.breakpoint() - - # Your actual component logic - from pipe_fiction.hello_world import HelloWorld - - greeter = HelloWorld("Python Developer") - greetings = greeter.say_hello_multiple(names) - for i, greeting in enumerate(greetings, 1): - print(f" {i}. {greeting}") - print() - - -# @component(base_image="pipe-fiction:latest", packages_to_install=['pudb']) -# def greeter_component(names: List = ["Laura", "Malte", "Paul"]): -# import pudb.remote -# -# # PuDB Remote-Debugger -# pudb.remote.set_trace(term_size=(120, 40), host='0.0.0.0', port=6899) -# -# from pipe_fiction.hello_world import HelloWorld -# greeter = HelloWorld("Python Entwickler") -# greetings = greeter.say_hello_multiple(names) -# -# for i, greeting in enumerate(greetings, 1): -# print(f" {i}. {greeting}") From ed412dc6118872dca06fd4e276b091469c207bc8 Mon Sep 17 00:00:00 2001 From: hsteude Date: Thu, 31 Jul 2025 15:48:58 +0200 Subject: [PATCH 07/16] Update pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../pipe-fiction-codebase/pipe_fiction/data_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py index f1fd527..b05eafc 100644 --- a/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_generator.py @@ -20,7 +20,7 @@ def create_sample_data(self) -> List[str]: "Here are some random useless lines of text.", "Line 1: MLOps is an important topic.", "Line 2: Kubeflow Pipeline are hard to debug, sometimes.", - "Line 3: prokube.ai seams to be a nice company." + "Line 3: prokube.ai seems to be a nice company." ] logger.success(f"✅ Created {len(lines)} sample lines") From a0e5ca9d3b45b20b13240a2edfcfc42339eac06f Mon Sep 17 00:00:00 2001 From: hsteude Date: Thu, 31 Jul 2025 15:49:42 +0200 Subject: [PATCH 08/16] Update pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py b/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py index 796118d..a89cc3e 100644 --- a/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py +++ b/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py @@ -5,7 +5,7 @@ to match the upstream 2.14+ API. Import this module BEFORE using DockerRunner with ports/environment. Usage (exactly like upstream KFP 2.14+): - import kfp_docker_patches # Apply patches + import kfp_docker_monkey_patches # Apply patches from kfp import local # Explicit ports and environment (upstream-compatible API) From 2b5324f079fe9a41dc3e9ccdb5937b295ab02291 Mon Sep 17 00:00:00 2001 From: hsteude Date: Fri, 1 Aug 2025 20:03:48 +0200 Subject: [PATCH 09/16] Integrated the debugging decorator --- pipelines/pipe-fiction/README.md | 48 +++- .../pipe-fiction/pipelines/components.py | 25 +- pipelines/pipe-fiction/pipelines/pipeline.py | 4 +- .../pipe-fiction/pipelines/pyproject.toml | 2 + .../pipelines/utils/debuggable_component.py | 220 ++++++++++++++++++ pipelines/pipe-fiction/pipelines/uv.lock | 35 +++ 6 files changed, 308 insertions(+), 26 deletions(-) create mode 100644 pipelines/pipe-fiction/pipelines/utils/debuggable_component.py diff --git a/pipelines/pipe-fiction/README.md b/pipelines/pipe-fiction/README.md index 642ec68..3bb3f9f 100644 --- a/pipelines/pipe-fiction/README.md +++ b/pipelines/pipe-fiction/README.md @@ -285,14 +285,50 @@ For custom package changes: All debugging across environments (SubprocessRunner, DockerRunner, and cluster execution) now uses remote debugging with [debugpy](https://github.com/microsoft/debugpy) for IDE integration. For CLI-based debugging, `breakpoint()` still works directly with the SubprocessRunner. -### Component Setup +### Debuggable Component Decorator (Recommended) -Components are configured with a `remote_debugging` parameter to enable debug mode: +The easiest way to enable debugging is using our custom `@lightweight_debuggable_component` decorator that automatically injects debugging code: + +```python +from utils.debuggable_component import lightweight_debuggable_component + +@lightweight_debuggable_component(base_image="/:") +def your_component_name(debug: bool = False): + # Your component logic here - debugging code is auto-injected! + from pipe_fiction.data_processor import DataProcessor + processor = DataProcessor() + return processor.process() +``` + +**Features:** +- Automatic debugging code injection (no boilerplate) +- Supports both `debugpy` (VS Code) and `remote-pdb` (CLI) debuggers +- Configurable debug ports +- Works with all KFP component parameters + +**Usage examples:** +```python +# Default debugpy on port 5678 +@lightweight_debuggable_component(base_image="my-image:latest") +def my_component(debug: bool = False): ... + +# Remote pdb on custom port +@lightweight_debuggable_component( + base_image="my-image:latest", + debugger_type="remote-pdb", + debug_port=4444 +) +def my_component(debug: bool = False): ... +``` + +### Manual Component Setup (Alternative) + +For manual setup or when not using the decorator, components can be configured with debugging code directly: ```python @component(base_image="/:", packages_to_install=["debugpy"]) -def your_component_name(remote_debugging: bool = False): - if remote_debugging: +def your_component_name(debug: bool = False): + if debug: import debugpy debugpy.listen(("0.0.0.0", 5678)) debugpy.wait_for_client() @@ -344,10 +380,10 @@ Create `.vscode/launch.json`: 1. **Enable debug mode:** - Pass `remote_debugging=True` to your component when calling it in the pipeline: + Pass `debug=True` to your component when calling it in the pipeline: ```python # In your pipeline definition - task = your_component_name(remote_debugging=True) + task = your_component_name(debug=True) ``` 2. **Start the pipeline:** diff --git a/pipelines/pipe-fiction/pipelines/components.py b/pipelines/pipe-fiction/pipelines/components.py index 7fb9a26..a8b50f8 100644 --- a/pipelines/pipe-fiction/pipelines/components.py +++ b/pipelines/pipe-fiction/pipelines/components.py @@ -1,16 +1,12 @@ -from kfp.dsl import Output, Dataset, Input, component from typing import List, Dict -import debugpy +from utils.debuggable_component import ( + lightweight_debuggable_component, +) -@component(base_image="hsteude/pipe-fiction:latest", packages_to_install=["debugpy"]) -def generate_data_comp(remote_debugging: bool = False) -> List: - if remote_debugging: - import debugpy - - debugpy.listen(("0.0.0.0", 5678)) - debugpy.wait_for_client() +@lightweight_debuggable_component(base_image="hsteude/pipe-fiction:latest") +def generate_data_comp(debug: bool = False) -> List: from pipe_fiction.data_generator import DataGenerator generator = DataGenerator() @@ -18,17 +14,10 @@ def generate_data_comp(remote_debugging: bool = False) -> List: return lines -@component( +@lightweight_debuggable_component( base_image="hsteude/pipe-fiction:latest", - packages_to_install=["debugpy"], ) -def process_data_comp(lines: List[str], remote_debugging: bool = False) -> List[str]: - if remote_debugging: - import debugpy - - debugpy.listen(("0.0.0.0", 5678)) - debugpy.wait_for_client() - +def process_data_comp(lines: List[str], debug: bool = False) -> List[str]: from pipe_fiction.data_processor import DataProcessor processor = DataProcessor() diff --git a/pipelines/pipe-fiction/pipelines/pipeline.py b/pipelines/pipe-fiction/pipelines/pipeline.py index 9a33357..6189970 100644 --- a/pipelines/pipe-fiction/pipelines/pipeline.py +++ b/pipelines/pipe-fiction/pipelines/pipeline.py @@ -4,5 +4,5 @@ @pipeline def example_pipeline(): - data_gen_task = generate_data_comp(remote_debugging=True) - process_data_task = process_data_comp(lines=data_gen_task.output, remote_debugging=False) + data_gen_task = generate_data_comp(debug=False) + process_data_task = process_data_comp(lines=data_gen_task.output, debug=True) diff --git a/pipelines/pipe-fiction/pipelines/pyproject.toml b/pipelines/pipe-fiction/pipelines/pyproject.toml index 4d41fad..67e18be 100644 --- a/pipelines/pipe-fiction/pipelines/pyproject.toml +++ b/pipelines/pipe-fiction/pipelines/pyproject.toml @@ -9,7 +9,9 @@ dependencies = [ "docker>=7.1.0", "ipdb>=0.13.13", "kfp==2.7", + "loguru>=0.7.2", "pdbpp>=0.11.7", "pip>=25.1.1", + "remote-pdb>=2.1.0", "truststore>=0.10.3", ] diff --git a/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py b/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py new file mode 100644 index 0000000..12a61fb --- /dev/null +++ b/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py @@ -0,0 +1,220 @@ +""" +Lightweight debuggable component decorator for KFP. + +This module provides a decorator that automatically injects debugging code +into KFP Lightweight Components, eliminating boilerplate. +""" + +import ast +import inspect +import textwrap +from pathlib import Path +from typing import Callable, Literal + +from kfp.dsl import component +from loguru import logger + + +def lightweight_debuggable_component( + debugger_type: Literal["debugpy", "remote-pdb"] = "debugpy", + debug_port: int = 5678, + **component_kwargs +): + """ + Decorator that creates KFP Lightweight Components with automatic debugging code injection. + + ⚠️ LIGHTWEIGHT COMPONENTS ONLY - Does not work with Container Components! + + This decorator automatically injects debugging code into your component functions, + eliminating the need to manually add debugging boilerplate. + + Args: + debugger_type: Type of debugger to use ("debugpy" or "remote-pdb") + debug_port: Port for remote debugging (default: 5678) + **component_kwargs: All arguments passed to @component decorator (base_image, packages_to_install, etc.) + + Usage: + @lightweight_debuggable_component() + def my_component(arg1: str, debug: bool = False) -> str: + # Just your component logic - debugging code is auto-injected! + return result + + # With remote pdb and base image: + @lightweight_debuggable_component( + base_image="my-custom:latest", + debugger_type="remote-pdb", + debug_port=4444 + ) + def my_component(debug: bool = False) -> str: + return "result" + """ + def decorator(func: Callable) -> Callable: + # Get source file info for logging + try: + source_file = inspect.getfile(func) + source_path = Path(source_file).resolve() # Get absolute path + logger.debug(f"Processing component '{func.__name__}' from {source_path}") + except (OSError, TypeError): + source_file = "" + source_path = Path("") + logger.warning(f"Processing component '{func.__name__}' from unknown source") + + # Determine debugger package to install + debugger_package = debugger_type + packages_to_install = component_kwargs.get("packages_to_install", []) + + # Always add loguru for better logging in components + if "loguru" not in packages_to_install: + packages_to_install.append("loguru") + + if debugger_package not in packages_to_install: + packages_to_install.append(debugger_package) + component_kwargs["packages_to_install"] = packages_to_install + logger.debug(f"Added {debugger_package} to packages_to_install") + + # Get the original function source code + try: + original_source = inspect.getsource(func) + logger.debug(f"Extracted source code for {func.__name__} ({len(original_source)} chars)") + except OSError as e: + logger.error(f"Cannot get source code for {func.__name__}: {e}") + # Fallback to original function without debugging + return component(**component_kwargs)(func) + + def inject_debugging_code(source_code: str) -> str: + """Inject debugging code using AST parsing for robustness.""" + try: + # Parse source into AST for robust function finding + tree = ast.parse(source_code) + + # Find the target function definition + target_func_node = None + for node in ast.walk(tree): + if (isinstance(node, ast.FunctionDef) and + node.name == func.__name__): + target_func_node = node + break + + if not target_func_node: + logger.warning(f"Could not find function '{func.__name__}' in AST, using fallback") + return _inject_debugging_fallback(source_code) + + # Get line number and inject debugging code + func_start_line = target_func_node.lineno - 1 # AST uses 1-based line numbers + lines = source_code.split('\n') + + # Find first line of function body + body_start = func_start_line + 1 + while body_start < len(lines) and not lines[body_start].strip(): + body_start += 1 + + if body_start >= len(lines): + logger.warning("Could not find function body, using fallback") + return _inject_debugging_fallback(source_code) + + # Get indentation + first_body_line = lines[body_start] + indent = len(first_body_line) - len(first_body_line.lstrip()) + indent_str = ' ' * indent + + # Generate debugging code based on debugger type + debug_lines = _generate_debug_code(debugger_type, debug_port, indent_str) + + # Insert debugging code + modified_lines = lines[:body_start] + debug_lines + lines[body_start:] + result = '\n'.join(modified_lines) + + logger.debug(f"Successfully injected {debugger_type} debugging code into {func.__name__}") + return result + + except Exception as e: + logger.error(f"AST parsing failed for {func.__name__}: {e}, using fallback") + return _inject_debugging_fallback(source_code) + + def _inject_debugging_fallback(source_code: str) -> str: + """Fallback to string-based injection if AST fails.""" + lines = source_code.split('\n') + + # Find function definition line (more robust search) + func_def_line = -1 + for i, line in enumerate(lines): + stripped = line.strip() + if (stripped.startswith(f'def {func.__name__}(') or + stripped.startswith(f'def {func.__name__} (')): + func_def_line = i + break + + if func_def_line == -1: + logger.error(f"Could not find function definition for {func.__name__}") + return source_code + + # Find function body start + body_start = func_def_line + 1 + while body_start < len(lines) and not lines[body_start].strip(): + body_start += 1 + + if body_start >= len(lines): + return source_code + + # Get indentation and inject + first_body_line = lines[body_start] + indent = len(first_body_line) - len(first_body_line.lstrip()) + indent_str = ' ' * indent + + debug_lines = _generate_debug_code(debugger_type, debug_port, indent_str) + modified_lines = lines[:body_start] + debug_lines + lines[body_start:] + + logger.debug(f"Fallback injection successful for {func.__name__}") + return '\n'.join(modified_lines) + + def _generate_debug_code(debugger_type: str, port: int, indent: str) -> list: + """Generate debugging code based on debugger type.""" + if debugger_type == "debugpy": + return [ + f"{indent}if debug:", + f"{indent} import debugpy", + f"{indent} debugpy.listen((\"0.0.0.0\", {port}))", + f"{indent} debugpy.wait_for_client()", + f"{indent} debugpy.breakpoint()", + f"{indent}" + ] + elif debugger_type == "remote-pdb": + return [ + f"{indent}if debug:", + f"{indent} import remote_pdb", + f"{indent} remote_pdb.RemotePdb('0.0.0.0', {port}).set_trace()", + f"{indent}" + ] + else: + logger.error(f"Unsupported debugger type: {debugger_type}") + return [f"{indent}# Unsupported debugger type: {debugger_type}"] + + # Monkey-patch inspect.getsource for this component + original_getsource = inspect.getsource + + def patched_getsource(obj): + if obj is func: + modified_source = inject_debugging_code(original_source) + logger.debug(f"Returning modified source for {func.__name__}") + return modified_source + return original_getsource(obj) + + # Apply monkey patch temporarily + inspect.getsource = patched_getsource + + try: + # Apply the KFP component decorator with all passed arguments + component_func = component(**component_kwargs)(func) + logger.debug(f"Successfully created debuggable component '{func.__name__}'") + + finally: + # Always restore original inspect.getsource + inspect.getsource = original_getsource + + return component_func + + return decorator + + +# Backward compatibility alias +debuggable_component = lightweight_debuggable_component \ No newline at end of file diff --git a/pipelines/pipe-fiction/pipelines/uv.lock b/pipelines/pipe-fiction/pipelines/uv.lock index 75ed636..ce3063f 100644 --- a/pipelines/pipe-fiction/pipelines/uv.lock +++ b/pipelines/pipe-fiction/pipelines/uv.lock @@ -396,6 +396,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/74/21/ada0c5eedb678ab663f8e387734418fdd1a26be28fc919a0c32e52964047/kubernetes-26.1.0-py2.py3-none-any.whl", hash = "sha256:e3db6800abf7e36c38d2629b5cb6b74d10988ee0cba6fba45595a7cbe60c0042", size = 1446361, upload-time = "2023-02-16T01:04:34.33Z" }, ] +[[package]] +name = "loguru" +version = "0.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, +] + [[package]] name = "matplotlib-inline" version = "0.1.7" @@ -469,8 +482,10 @@ dependencies = [ { name = "docker" }, { name = "ipdb" }, { name = "kfp" }, + { name = "loguru" }, { name = "pdbpp" }, { name = "pip" }, + { name = "remote-pdb" }, { name = "truststore" }, ] @@ -480,8 +495,10 @@ requires-dist = [ { name = "docker", specifier = ">=7.1.0" }, { name = "ipdb", specifier = ">=0.13.13" }, { name = "kfp", specifier = "==2.7" }, + { name = "loguru", specifier = ">=0.7.2" }, { name = "pdbpp", specifier = ">=0.11.7" }, { name = "pip", specifier = ">=25.1.1" }, + { name = "remote-pdb", specifier = ">=2.1.0" }, { name = "truststore", specifier = ">=0.10.3" }, ] @@ -643,6 +660,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" }, ] +[[package]] +name = "remote-pdb" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/b5/4944cac06fd9fc4a2e168313ec220aa25ed96ce83947b63eea5b4045b22d/remote-pdb-2.1.0.tar.gz", hash = "sha256:2d70c6f41e0eabf0165e8f1be58f82aa7a605aaeab8f2aefeb9ce246431091c1", size = 22295, upload-time = "2020-07-24T13:31:32.985Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/c5/d208c66344bb785d800adb61aef512290d3473052b9e7697890f0547aff2/remote_pdb-2.1.0-py2.py3-none-any.whl", hash = "sha256:94f73a92ac1248cf16189211011f97096bdada8a7baac8c79372663bbb57b5d0", size = 6304, upload-time = "2020-07-24T13:31:31.535Z" }, +] + [[package]] name = "requests" version = "2.32.4" @@ -780,3 +806,12 @@ sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf594 wheels = [ { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826, upload-time = "2024-04-23T22:16:14.422Z" }, ] + +[[package]] +name = "win32-setctime" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, +] From c85e0c93d9c78ded989374b7aa8620ac41c4c759 Mon Sep 17 00:00:00 2001 From: hsteude Date: Fri, 1 Aug 2025 20:05:55 +0200 Subject: [PATCH 10/16] MINOR: VS-Code Session name --- pipelines/pipe-fiction/.vscode/launch.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/pipe-fiction/.vscode/launch.json b/pipelines/pipe-fiction/.vscode/launch.json index 2fea988..137553c 100644 --- a/pipelines/pipe-fiction/.vscode/launch.json +++ b/pipelines/pipe-fiction/.vscode/launch.json @@ -13,7 +13,7 @@ "subProcess": true }, { - "name": "Pipeline: Remote KFP/DockerRunner", + "name": "Pipeline: Remote Debugging", "type": "debugpy", "request": "attach", "connect": { From 0528eed4b3f1b2f7c6634f20f8d85a9d15b0fbb2 Mon Sep 17 00:00:00 2001 From: hsteude Date: Fri, 1 Aug 2025 20:06:15 +0200 Subject: [PATCH 11/16] MINOR: enable remote debugging in second comp --- .../pipe-fiction/pipelines/submit_to_cluster_from_remote.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py index 48d86cc..d7d0d63 100644 --- a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py +++ b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py @@ -23,4 +23,5 @@ run = client.create_run_from_pipeline_func( example_pipeline, + enable_caching=False ) From 5d5a7c48da4cfb264cbde663dbcbe8108af7abf4 Mon Sep 17 00:00:00 2001 From: hsteude Date: Wed, 6 Aug 2025 10:09:43 +0200 Subject: [PATCH 12/16] Fixed Bug in decorator --- .../pipelines/utils/debuggable_component.py | 84 ++++++++++++++----- 1 file changed, 65 insertions(+), 19 deletions(-) diff --git a/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py b/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py index 12a61fb..3042e94 100644 --- a/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py +++ b/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py @@ -18,12 +18,13 @@ def lightweight_debuggable_component( debugger_type: Literal["debugpy", "remote-pdb"] = "debugpy", debug_port: int = 5678, + auto_install_packages: bool = True, **component_kwargs ): """ Decorator that creates KFP Lightweight Components with automatic debugging code injection. - ⚠️ LIGHTWEIGHT COMPONENTS ONLY - Does not work with Container Components! + LIGHTWEIGHT COMPONENTS ONLY - Does not work with Container Components! This decorator automatically injects debugging code into your component functions, eliminating the need to manually add debugging boilerplate. @@ -59,18 +60,21 @@ def decorator(func: Callable) -> Callable: source_path = Path("") logger.warning(f"Processing component '{func.__name__}' from unknown source") - # Determine debugger package to install - debugger_package = debugger_type - packages_to_install = component_kwargs.get("packages_to_install", []) - - # Always add loguru for better logging in components - if "loguru" not in packages_to_install: - packages_to_install.append("loguru") - - if debugger_package not in packages_to_install: - packages_to_install.append(debugger_package) - component_kwargs["packages_to_install"] = packages_to_install - logger.debug(f"Added {debugger_package} to packages_to_install") + # Determine debugger package to install (only if auto_install_packages is True) + if auto_install_packages: + debugger_package = debugger_type + packages_to_install = component_kwargs.get("packages_to_install", []) + + # Always add loguru for better logging in components + if "loguru" not in packages_to_install: + packages_to_install.append("loguru") + + if debugger_package not in packages_to_install: + packages_to_install.append(debugger_package) + component_kwargs["packages_to_install"] = packages_to_install + logger.debug(f"Added {debugger_package} to packages_to_install") + else: + logger.debug("Skipping automatic package installation (auto_install_packages=False)") # Get the original function source code try: @@ -103,11 +107,57 @@ def inject_debugging_code(source_code: str) -> str: func_start_line = target_func_node.lineno - 1 # AST uses 1-based line numbers lines = source_code.split('\n') - # Find first line of function body - body_start = func_start_line + 1 + # Find the line with ':' that ends the function signature + # We need to find where the function signature actually ends + colon_line = func_start_line + paren_count = 0 + found_opening_paren = False + + while colon_line < len(lines): + line = lines[colon_line] + for char in line: + if char == '(': + paren_count += 1 + found_opening_paren = True + elif char == ')': + paren_count -= 1 + elif char == ':' and found_opening_paren and paren_count == 0: + # Found the colon that ends the function signature + break + else: + # If we didn't break out of the inner loop, continue to next line + colon_line += 1 + continue + # If we broke out of the inner loop, we found our colon + break + + # Find first non-empty line after the colon (skip docstring if present) + body_start = colon_line + 1 while body_start < len(lines) and not lines[body_start].strip(): body_start += 1 + # If the first non-empty line is a docstring, skip it + if (body_start < len(lines) and + lines[body_start].strip().startswith(('"""', "'''", 'r"""', "r'''"))): + # Find the end of the docstring + quote_type = lines[body_start].strip()[:3] + if quote_type.startswith('r'): + quote_type = quote_type[1:] + + # Check if docstring ends on the same line + if lines[body_start].strip().endswith(quote_type) and len(lines[body_start].strip()) > 3: + body_start += 1 + else: + # Multi-line docstring - find the end + body_start += 1 + while body_start < len(lines) and not lines[body_start].strip().endswith(quote_type): + body_start += 1 + body_start += 1 # Move past the closing quotes + + # Find next non-empty line after docstring + while body_start < len(lines) and not lines[body_start].strip(): + body_start += 1 + if body_start >= len(lines): logger.warning("Could not find function body, using fallback") return _inject_debugging_fallback(source_code) @@ -214,7 +264,3 @@ def patched_getsource(obj): return component_func return decorator - - -# Backward compatibility alias -debuggable_component = lightweight_debuggable_component \ No newline at end of file From 72a313aa3b90899e2eda0d36ce3c73b689c941bd Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Tue, 7 Oct 2025 10:47:26 +0200 Subject: [PATCH 13/16] Readability improvements for pipeline example --- pipelines/pipe-fiction/README.md | 9 +++++++-- pipelines/pipe-fiction/pipelines/components.py | 13 ++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pipelines/pipe-fiction/README.md b/pipelines/pipe-fiction/README.md index 3bb3f9f..099d43f 100644 --- a/pipelines/pipe-fiction/README.md +++ b/pipelines/pipe-fiction/README.md @@ -89,18 +89,23 @@ Why is debugging a challenge? cd pipelines uv sync source .venv/bin/activate # Activate when working on pipeline code - uv pip install -e ../pipe-fiction-codebase/ # Install custom package + uv pip install -e ../pipe-fiction-codebase/ # Install custom package ``` 3. **(RE-)Build the base Docker image if needed:** ```bash cd pipe-fiction-codebase - docker build -t /: . + export IMAGE_TAG=/: + docker build -t $IMAGE_TAG . ``` More details on this in the `pipe-fiction-codebase` directory. 4. **Run the pipeline** + ```bash + cd pipelines + ``` + Run locally using subprocesses (also works in KF-notebooks): ```bash python run_locally_in_subproc.py diff --git a/pipelines/pipe-fiction/pipelines/components.py b/pipelines/pipe-fiction/pipelines/components.py index a8b50f8..c650c99 100644 --- a/pipelines/pipe-fiction/pipelines/components.py +++ b/pipelines/pipe-fiction/pipelines/components.py @@ -1,11 +1,18 @@ -from typing import List, Dict +import os +from typing import List from utils.debuggable_component import ( lightweight_debuggable_component, ) -@lightweight_debuggable_component(base_image="hsteude/pipe-fiction:latest") +BASE_IMAGE = os.getenv("IMAGE_TAG") +assert ( + BASE_IMAGE +), "Please specify image for your component in `IMAGE_TAG` environment variable" + + +@lightweight_debuggable_component(base_image=BASE_IMAGE) def generate_data_comp(debug: bool = False) -> List: from pipe_fiction.data_generator import DataGenerator @@ -15,7 +22,7 @@ def generate_data_comp(debug: bool = False) -> List: @lightweight_debuggable_component( - base_image="hsteude/pipe-fiction:latest", + base_image=BASE_IMAGE, ) def process_data_comp(lines: List[str], debug: bool = False) -> List[str]: from pipe_fiction.data_processor import DataProcessor From 65c04625b8c47e457c05eb7cce07238b1ac07979 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Thu, 12 Mar 2026 17:01:44 +0100 Subject: [PATCH 14/16] Address PR review feedback: Keycloak auth, README fixes, code cleanup - Replace Dex-based auth_session.py with Keycloak auth using temp OIDC client pattern (create client -> get user token -> cleanup) and KFP existing_token - Update submit_to_cluster_from_remote.py to use token-based auth - Fix all stale script references in README (run_in_k8s_cluster.py, submit_to_cluster.py -> actual filenames) - Restructure Remote Debugging section into 4 clear subsections - Add cluster submission docs with env var examples - Sync README launch.json and directory listing with actual files - Fix .gitignore typo (*.py[codz] -> *.py[cod]) - Fix kfp_docker_monkey_patches.py docstring import path - Fix debug parameter name in Debugging Architecture section - Add DAP compatibility note for non-VS Code IDEs --- pipelines/pipe-fiction/README.md | 175 ++++++----- pipelines/pipe-fiction/pipelines/.gitignore | 2 +- .../submit_to_cluster_from_remote.py | 46 ++- .../pipelines/utils/auth_session.py | 274 ++++++++++++------ .../utils/kfp_docker_monkey_patches.py | 134 +++++---- 5 files changed, 397 insertions(+), 234 deletions(-) diff --git a/pipelines/pipe-fiction/README.md b/pipelines/pipe-fiction/README.md index 099d43f..7254623 100644 --- a/pipelines/pipe-fiction/README.md +++ b/pipelines/pipe-fiction/README.md @@ -116,10 +116,16 @@ Why is debugging a challenge? python run_locally_in_docker.py ``` - Submit to the cluster: - ```bash - python run_in_k8s_cluster.py - ``` + Submit to the cluster from a Kubeflow notebook: + ```bash + python submit_to_cluster_from_kf_notebook.py + ``` + + Submit to the cluster from a remote machine (requires Keycloak admin access): + ```bash + python submit_to_cluster_from_remote.py + ``` + See [Cluster Execution](#3-cluster-execution-in-cluster-debugging) for required environment variables. ## Repository Organization @@ -144,12 +150,14 @@ Contains KFP-specific orchestration code: ``` pipelines/ -├── components.py # KFP component definitions (import from base image) -├── pipeline.py # Pipeline assembly -├── run_locally_*.py # Local execution scripts -├── run_in_k8s_cluster.py # Remote execution -├── .venv/ # Virtual environment with custom package -└── utils/ # KFP utilities and patches +├── components.py # KFP component definitions (import from base image) +├── pipeline.py # Pipeline assembly +├── run_locally_in_subproc.py # Local execution using SubprocessRunner +├── run_locally_in_docker.py # Local execution using DockerRunner +├── submit_to_cluster_from_kf_notebook.py # Submission from a Kubeflow notebook +├── submit_to_cluster_from_remote.py # Remote submission (Keycloak auth) +├── .venv/ # Virtual environment with custom package +└── utils/ # KFP utilities, auth, and patches ``` **Local Package Installation for IDE Support:** @@ -193,7 +201,7 @@ A typical workflow using the subprocess runner could look like this: 4. Build and push Docker image when ready for submission to the cluster (this could also be done in a CI/CD pipeline): `docker build -t /: . && docker push` 5. Update image reference in pipeline components if needed -6. Submit pipeline to cluster: `python submit_to_cluster.py` +6. Submit pipeline to cluster: `python submit_to_cluster_from_kf_notebook.py` (from a KF notebook) or `python submit_to_cluster_from_remote.py` (from a remote machine) Note that this workflow also works inside Kubeflow notebooks. @@ -237,7 +245,7 @@ For changes in the custom Python package: 5. Rebuild the image if needed and push it to your registry: `docker push /:` 6. Update image reference in pipeline components if needed -7. Submit pipeline to cluster: `python submit_to_cluster.py` +7. Submit pipeline to cluster: `python submit_to_cluster_from_kf_notebook.py` or `python submit_to_cluster_from_remote.py` **Advantages:** - Production environment - identical to cluster execution @@ -257,17 +265,38 @@ For changes in the custom Python package: Here we use the KFP backend as it runs inside the Kubernetes cluster, as intended. +**From a Kubeflow notebook** (no extra auth needed): +```bash +cd pipelines +python submit_to_cluster_from_kf_notebook.py +``` + +**From a remote machine** (requires Keycloak admin access): ```bash cd pipelines -python submit_to_cluster.py +# Set required environment variables +export KUBEFLOW_ENDPOINT=https://kubeflow.example.com +export KUBEFLOW_USERNAME=user@example.com +export KUBEFLOW_PASSWORD=your-password +export KEYCLOAK_URL=https://kubeflow.example.com # Base URL where Keycloak /auth/ is reachable +export KEYCLOAK_ADMIN_PASSWORD=admin-password +# Optional: +export KEYCLOAK_REALM=prokube # default: "prokube" +export KUBEFLOW_NAMESPACE=my-ns # default: derived from username + +python submit_to_cluster_from_remote.py ``` +> **Note:** Remote submission creates a temporary Keycloak OIDC client to obtain a user token, then deletes it after authentication. This requires Keycloak admin credentials. The token is passed directly to the KFP Client via the `existing_token` parameter. +> +> `KEYCLOAK_URL` should be the base URL where the Keycloak `/auth/` endpoint is reachable. In many setups, this is the same as `KUBEFLOW_ENDPOINT` (Keycloak is typically exposed at `/auth/` on the same ingress). + **Cluster Execution Workflow** For pipeline-only changes: 1. Modify files in `pipelines/` directory 2. Enable remote debugging for the task you want to debug (see remote debugging section for details) -3. Submit directly to cluster: `python submit_to_cluster.py` +3. Submit directly to cluster: `python submit_to_cluster_from_kf_notebook.py` or `python submit_to_cluster_from_remote.py` For custom package changes: 1. Modify code in `pipe-fiction-codebase/` @@ -288,9 +317,17 @@ For custom package changes: ## Remote Debugging -All debugging across environments (SubprocessRunner, DockerRunner, and cluster execution) now uses remote debugging with [debugpy](https://github.com/microsoft/debugpy) for IDE integration. For CLI-based debugging, `breakpoint()` still works directly with the SubprocessRunner. +All execution environments (SubprocessRunner, DockerRunner, and cluster) support interactive debugging with [debugpy](https://github.com/microsoft/debugpy) for IDE integration. For CLI-based debugging, `breakpoint()` also works directly with the SubprocessRunner. -### Debuggable Component Decorator (Recommended) +This section is organized as follows: +1. **Enabling Debugging in Components** - How to add debugging support to your KFP components (decorator or manual setup) +2. **Local Debugging Workflow** - How to debug pipelines running locally (SubprocessRunner or DockerRunner) +3. **Cluster Debugging with Port Forwarding** - How to debug pipelines running in a Kubernetes cluster +4. **IDE Setup** - VS Code configuration for connecting to the remote debugger + +### 1. Enabling Debugging in Components + +#### Debuggable Component Decorator (Recommended) The easiest way to enable debugging is using our custom `@lightweight_debuggable_component` decorator that automatically injects debugging code: @@ -326,7 +363,7 @@ def my_component(debug: bool = False): ... def my_component(debug: bool = False): ... ``` -### Manual Component Setup (Alternative) +#### Manual Component Setup (Alternative) For manual setup or when not using the decorator, components can be configured with debugging code directly: @@ -341,9 +378,52 @@ def your_component_name(debug: bool = False): # Your component logic here... ``` -### VS Code Setup +### 2. Local Debugging Workflow + +1. **Enable debug mode** by passing `debug=True` to your component in the pipeline definition: + ```python + # In pipeline.py + task = your_component_name(debug=True) + ``` + +2. **Start the pipeline locally:** + + SubprocessRunner: + ```bash + python run_locally_in_subproc.py + ``` + + DockerRunner: + ```bash + python run_locally_in_docker.py + ``` + +3. **Connect your debugger** - The pipeline will pause and wait for a debugger connection on port 5678. Use the appropriate VS Code configuration (see [IDE Setup](#4-ide-setup-vs-code)) to attach: + - **SubprocessRunner**: Use "Pipeline: Remote SubprocessRunner" - no path mapping needed since the code runs directly on your machine. + - **DockerRunner**: Use "Pipeline: Remote Debugging" - includes path mappings between your local `pipe-fiction-codebase/` and `/app` inside the container. + +4. **Debug interactively** - Set breakpoints in your pipeline components or the imported package code, step through execution, and inspect variables. + +### 3. Cluster Debugging with Port Forwarding + +When debugging pipelines running in the cluster, an additional port-forwarding step is needed to connect your local IDE to the pod: + +1. **Enable debug mode** and submit the pipeline to the cluster (see [Cluster Execution](#3-cluster-execution-in-cluster-debugging)). + +2. **Set up port forwarding** to the pipeline pod: + ```bash + # Find your pipeline pod + kubectl get pods | grep your-pipeline + + # Forward debug port + kubectl port-forward pod/your-pod-name 5678:5678 + ``` + +3. **Connect your debugger** using the "Pipeline: Remote Debugging" VS Code configuration. + +### 4. IDE Setup (VS Code) -Create `.vscode/launch.json`: +Create `.vscode/launch.json` (this file is already included in the repo): ```json { @@ -361,7 +441,7 @@ Create `.vscode/launch.json`: "subProcess": true }, { - "name": "Pipeline: Remote KFP/DockerRunner", + "name": "Pipeline: Remote Debugging", "type": "debugpy", "request": "attach", "connect": { @@ -381,58 +461,7 @@ Create `.vscode/launch.json`: } ``` -### Debugging Workflow - -1. **Enable debug mode:** - - Pass `debug=True` to your component when calling it in the pipeline: - ```python - # In your pipeline definition - task = your_component_name(debug=True) - ``` - -2. **Start the pipeline:** - - SubprocessRunner: - ```bash - python run_locally_in_subproc.py - ``` - - DockerRunner: - ```bash - python run_locally_in_docker.py - ``` - - Cluster: - ```bash - python run_in_k8s_cluster.py - ``` - -3. **Connect debugger:** - - Pipeline will pause and wait for debugger connection - - Use the appropriate VS Code configuration to attach: - - "Pipeline: Remote SubprocessRunner" for subprocess execution - - "Pipeline: Remote KFP/DockerRunner" for Docker and cluster execution - -4. **Debug interactively:** - - Set breakpoints in your pipeline components or the code package that gets imported - - Step through code execution - - Inspect variables and data structures - - Debug both pipeline logic and imported modules - -### Cluster Debugging with Port Forwarding - -For cluster execution, you'll need port forwarding: - -```bash -# Find your pipeline pod -kubectl get pods | grep your-pipeline - -# Forward debug port -kubectl port-forward pod/your-pod-name 5678:5678 - -# Connect local debugger using the "Pipeline: Remote KFP/DockerRunner" configuration -``` +> **Note:** While these examples use VS Code with debugpy, any IDE that supports the [Debug Adapter Protocol](https://microsoft.github.io/debug-adapter-protocol/) (DAP) can connect to debugpy — including PyCharm, Neovim (with nvim-dap), and others. ## Technical Implementation Notes @@ -450,7 +479,7 @@ These patches provide forward compatibility and will be obsolete when upgrading ### Debugging Architecture The debugging setup works by: -1. **Injecting debugpy** into pipeline components via the `remote_debugging` parameter +1. **Injecting debugpy** into pipeline components via the `debug` parameter 2. **Port forwarding** from container to host (for Docker/cluster execution) 3. **Path mapping** between local IDE and remote container (for Docker/cluster execution) 4. **Unified debugging experience** across all execution environments diff --git a/pipelines/pipe-fiction/pipelines/.gitignore b/pipelines/pipe-fiction/pipelines/.gitignore index 77e6235..9dc9695 100644 --- a/pipelines/pipe-fiction/pipelines/.gitignore +++ b/pipelines/pipe-fiction/pipelines/.gitignore @@ -1,6 +1,6 @@ # Byte-compiled / optimized / DLL files __pycache__/ -*.py[codz] +*.py[cod] *$py.class # C extensions diff --git a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py index d7d0d63..3044007 100644 --- a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py +++ b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py @@ -1,27 +1,51 @@ +""" +Submit a KFP pipeline to a remote Kubeflow cluster using Keycloak authentication. + +Required environment variables: + KUBEFLOW_ENDPOINT: Kubeflow URL (e.g. https://kubeflow.example.com) + KUBEFLOW_USERNAME: User email in the Keycloak realm + KUBEFLOW_PASSWORD: User password + KEYCLOAK_URL: Base URL where Keycloak /auth/ is reachable + (often same as KUBEFLOW_ENDPOINT) + KEYCLOAK_ADMIN_PASSWORD: Keycloak admin password + +Optional environment variables: + KEYCLOAK_REALM: Keycloak realm name (default: "prokube") + KUBEFLOW_NAMESPACE: KFP namespace (default: derived from username) + IMAGE_TAG: Docker image for the pipeline components +""" + import os + import truststore -from utils.auth_session import get_istio_auth_session + from kfp.client import Client from pipeline import example_pipeline - +from utils.auth_session import get_keycloak_token truststore.inject_into_ssl() - -auth_session = get_istio_auth_session( - url=os.environ["KUBEFLOW_ENDPOINT"], +# Authenticate via Keycloak +token = get_keycloak_token( + keycloak_url=os.environ["KEYCLOAK_URL"], + admin_password=os.environ["KEYCLOAK_ADMIN_PASSWORD"], username=os.environ["KUBEFLOW_USERNAME"], password=os.environ["KUBEFLOW_PASSWORD"], + realm=os.environ.get("KEYCLOAK_REALM", "prokube"), ) -print(os.environ["KUBEFLOW_ENDPOINT"]) -namespace = os.environ.get('KUBEFLOW_NAMESPACE', None) or \ - os.environ['KUBEFLOW_USERNAME'].split("@")[0].replace(".", "-") +namespace = os.environ.get("KUBEFLOW_NAMESPACE") or os.environ[ + "KUBEFLOW_USERNAME" +].split("@")[0].replace(".", "-") -client = Client(host=f"{os.environ['KUBEFLOW_ENDPOINT']}/pipeline", namespace=namespace, - cookies=auth_session["session_cookie"], verify_ssl=False) +client = Client( + host=f"{os.environ['KUBEFLOW_ENDPOINT']}/pipeline", + namespace=namespace, + existing_token=token, + verify_ssl=False, +) run = client.create_run_from_pipeline_func( example_pipeline, - enable_caching=False + enable_caching=False, ) diff --git a/pipelines/pipe-fiction/pipelines/utils/auth_session.py b/pipelines/pipe-fiction/pipelines/utils/auth_session.py index 707c386..ad5859a 100644 --- a/pipelines/pipe-fiction/pipelines/utils/auth_session.py +++ b/pipelines/pipe-fiction/pipelines/utils/auth_session.py @@ -1,101 +1,197 @@ -import re +""" +Keycloak authentication for remote KFP pipeline submission. + +This module provides a way to obtain a Bearer token for Kubeflow +when the cluster uses Keycloak as the identity provider. + +The workflow is: +1. Get an admin token from Keycloak (master realm). +2. Create a temporary OIDC client with direct access grants. +3. Use that client to obtain a user access token. +4. Clean up the temporary client. +5. Pass the token to the KFP Client via ``existing_token``. + +Requirements: +- KEYCLOAK_ADMIN_PASSWORD must be provided (or kubectl access to read the secret). +- The user must exist in the Keycloak realm. + +Environment variables: + KUBEFLOW_ENDPOINT: Kubeflow URL (e.g. https://kubeflow.example.com) + KUBEFLOW_USERNAME: User email in the Keycloak realm + KUBEFLOW_PASSWORD: User password + KEYCLOAK_URL: Base URL where Keycloak /auth/ is reachable + (often same as KUBEFLOW_ENDPOINT) + KEYCLOAK_ADMIN_PASSWORD: Keycloak admin password + KEYCLOAK_REALM: Keycloak realm name (default: "prokube") +""" + +import json +import logging + import requests -from urllib.parse import urlsplit +import urllib3 + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +logger = logging.getLogger(__name__) + +# Name of the temporary OIDC client created for authentication +_TEMP_CLIENT_ID = "kfp-cli-tmp" + + +def _get_admin_token(keycloak_url: str, admin_password: str) -> str: + """Get an admin access token from the Keycloak master realm.""" + url = f"{keycloak_url}/auth/realms/master/protocol/openid-connect/token" + resp = requests.post( + url, + data={ + "grant_type": "password", + "client_id": "admin-cli", + "username": "admin", + "password": admin_password, + }, + verify=False, + timeout=30, + ) + resp.raise_for_status() + return resp.json()["access_token"] + + +def _create_temp_client(keycloak_url: str, realm: str, headers: dict) -> None: + """Create a temporary OIDC client with direct access grants enabled.""" + client_data = { + "clientId": _TEMP_CLIENT_ID, + "enabled": True, + "publicClient": False, + "protocol": "openid-connect", + "directAccessGrantsEnabled": True, + "serviceAccountsEnabled": True, + "standardFlowEnabled": False, + } + url = f"{keycloak_url}/auth/admin/realms/{realm}/clients" + resp = requests.post( + url, headers=headers, data=json.dumps(client_data), verify=False, timeout=30 + ) + if resp.status_code == 409: + logger.debug("Temporary client '%s' already exists", _TEMP_CLIENT_ID) + elif resp.status_code == 201: + logger.debug("Created temporary client '%s'", _TEMP_CLIENT_ID) + else: + raise RuntimeError( + f"Failed to create temp client: {resp.status_code} {resp.text}" + ) -def get_istio_auth_session(url: str, username: str, password: str) -> dict: - """ - Determine if the specified URL is secured by Dex and try to obtain a session cookie. - WARNING: only Dex `staticPasswords` and `LDAP` authentication are currently supported - (we default default to using `staticPasswords` if both are enabled) - - :param url: Kubeflow server URL, including protocol - :param username: Dex `staticPasswords` or `LDAP` username - :param password: Dex `staticPasswords` or `LDAP` password - :return: auth session information +def _get_client_internal_id(keycloak_url: str, realm: str, headers: dict) -> str: + """Get the internal UUID of the temporary client.""" + url = f"{keycloak_url}/auth/admin/realms/{realm}/clients" + resp = requests.get(url, headers=headers, verify=False, timeout=30) + resp.raise_for_status() + clients = resp.json() + client = next((c for c in clients if c["clientId"] == _TEMP_CLIENT_ID), None) + if not client: + raise RuntimeError(f"Could not find client '{_TEMP_CLIENT_ID}'") + return client["id"] + + +def _get_client_secret( + keycloak_url: str, realm: str, headers: dict, client_uuid: str +) -> str: + """Get the secret for the temporary client.""" + url = ( + f"{keycloak_url}/auth/admin/realms/{realm}/clients/{client_uuid}/client-secret" + ) + resp = requests.get(url, headers=headers, verify=False, timeout=30) + resp.raise_for_status() + return resp.json()["value"] + + +def _get_user_token( + keycloak_url: str, + realm: str, + client_secret: str, + username: str, + password: str, +) -> str: + """Get a user access token using the temporary client credentials.""" + url = f"{keycloak_url}/auth/realms/{realm}/protocol/openid-connect/token" + resp = requests.post( + url, + data={ + "grant_type": "password", + "client_id": _TEMP_CLIENT_ID, + "client_secret": client_secret, + "username": username, + "password": password, + }, + verify=False, + timeout=30, + ) + resp.raise_for_status() + return resp.json()["access_token"] + + +def _delete_temp_client(keycloak_url: str, realm: str, headers: dict) -> None: + """Delete the temporary client to clean up.""" + try: + client_uuid = _get_client_internal_id(keycloak_url, realm, headers) + url = f"{keycloak_url}/auth/admin/realms/{realm}/clients/{client_uuid}" + resp = requests.delete(url, headers=headers, verify=False, timeout=30) + if resp.status_code == 204: + logger.debug("Deleted temporary client '%s'", _TEMP_CLIENT_ID) + else: + logger.warning( + "Failed to delete temp client: %s %s", resp.status_code, resp.text + ) + except Exception as e: + logger.warning("Could not clean up temporary client: %s", e) + + +def get_keycloak_token( + keycloak_url: str, + admin_password: str, + username: str, + password: str, + realm: str = "prokube", +) -> str: """ - # define the default return object - auth_session = { - "endpoint_url": url, # KF endpoint URL - "redirect_url": None, # KF redirect URL, if applicable - "dex_login_url": None, # Dex login URL (for POST of credentials) - "is_secured": None, # True if KF endpoint is secured - "session_cookie": None # Resulting session cookies in the form "key1=value1; key2=value2" - } + Obtain a Keycloak user access token for authenticating with Kubeflow. - # use a persistent session (for cookies) - with requests.Session() as s: + This creates a temporary OIDC client in Keycloak, uses it to get a user + token via the Resource Owner Password Credentials grant, then cleans up + the temp client. - ################ - # Determine if Endpoint is Secured - ################ - resp = s.get(url, allow_redirects=True, verify=False) - if resp.status_code != 200: - raise RuntimeError( - f"HTTP status code '{resp.status_code}' for GET against: {url}" - ) + The returned token can be passed to the KFP Client via ``existing_token``. - auth_session["redirect_url"] = resp.url + NOTE: This requires Keycloak admin credentials. The temporary client is + created and deleted within this function call. - # if we were NOT redirected, then the endpoint is UNSECURED - if len(resp.history) == 0: - auth_session["is_secured"] = False - return auth_session - else: - auth_session["is_secured"] = True - - ################ - # Get Dex Login URL - ################ - redirect_url_obj = urlsplit(auth_session["redirect_url"]) - - # if we are at `/auth?=xxxx` path, we need to select an auth type - if re.search(r"/auth$", redirect_url_obj.path): - ####### - # TIP: choose the default auth type by including ONE of the following - ####### - - # OPTION 1: set "staticPasswords" as default auth type - redirect_url_obj = redirect_url_obj._replace( - path=re.sub(r"/auth$", "/auth/local", redirect_url_obj.path) - ) - # OPTION 2: set "ldap" as default auth type - # redirect_url_obj = redirect_url_obj._replace( - # path=re.sub(r"/auth$", "/auth/ldap", redirect_url_obj.path) - # ) + Args: + keycloak_url: Keycloak base URL (e.g. https://keycloak.example.com) + admin_password: Keycloak admin password + username: User email/username in the Keycloak realm + password: User password + realm: Keycloak realm name (default: "prokube") - # if we are at `/auth/xxxx/login` path, then no further action is needed (we can use it for login POST) - if re.search(r"/auth/.*/login$", redirect_url_obj.path): - auth_session["dex_login_url"] = redirect_url_obj.geturl() + Returns: + A Bearer access token string. + """ + admin_token = _get_admin_token(keycloak_url, admin_password) + admin_headers = { + "Authorization": f"Bearer {admin_token}", + "Content-Type": "application/json", + } - # else, we need to be redirected to the actual login page - else: - # this GET should redirect us to the `/auth/xxxx/login` path - resp = s.get(redirect_url_obj.geturl(), allow_redirects=True, verify=False) - if resp.status_code != 200: - raise RuntimeError( - f"HTTP status code '{resp.status_code}' for GET against: {redirect_url_obj.geturl()}" - ) - - # set the login url - auth_session["dex_login_url"] = resp.url - - ################ - # Attempt Dex Login - ################ - resp = s.post( - auth_session["dex_login_url"], - data={"login": username, "password": password}, - verify=False, - allow_redirects=True + try: + _create_temp_client(keycloak_url, realm, admin_headers) + client_uuid = _get_client_internal_id(keycloak_url, realm, admin_headers) + client_secret = _get_client_secret( + keycloak_url, realm, admin_headers, client_uuid ) - if len(resp.history) == 0: - raise RuntimeError( - f"Login credentials were probably invalid - " - f"No redirect after POST to: {auth_session['dex_login_url']}" - ) - - # store the session cookies in a "key1=value1; key2=value2" string - auth_session["session_cookie"] = "; ".join([f"{c.name}={c.value}" for c in s.cookies]) + user_token = _get_user_token( + keycloak_url, realm, client_secret, username, password + ) + finally: + _delete_temp_client(keycloak_url, realm, admin_headers) - return auth_session + return user_token diff --git a/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py b/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py index a89cc3e..6c9268c 100644 --- a/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py +++ b/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py @@ -5,9 +5,9 @@ to match the upstream 2.14+ API. Import this module BEFORE using DockerRunner with ports/environment. Usage (exactly like upstream KFP 2.14+): - import kfp_docker_monkey_patches # Apply patches + from utils import kfp_docker_monkey_patches # Apply patches from kfp import local - + # Explicit ports and environment (upstream-compatible API) local.init(runner=local.DockerRunner( ports={'5678/tcp': 5678}, @@ -23,61 +23,68 @@ def apply_docker_port_patches(): """Apply all necessary patches to enable port support in DockerRunner.""" - + # Patch 1: Enable ports argument in DockerRunner _patch_docker_runner_args() - + # Patch 2: Extend run_docker_container to accept additional arguments _patch_run_docker_container() - + # Patch 3: Modify DockerTaskHandler to pass through container arguments _patch_docker_task_handler() - + # Patch 4: Extend DockerRunner constructor _patch_docker_runner_init() def _patch_docker_runner_args(): """Add ports and environment to allowed DockerRunner arguments.""" - if not hasattr(DockerRunner, 'DOCKER_CONTAINER_RUN_ARGS'): + if not hasattr(DockerRunner, "DOCKER_CONTAINER_RUN_ARGS"): # Create set with essential arguments including ports and environment for older versions DockerRunner.DOCKER_CONTAINER_RUN_ARGS = { - 'ports', 'environment', 'volumes', 'network_mode', 'user', - 'working_dir', 'entrypoint', 'command', 'auto_remove', 'privileged' + "ports", + "environment", + "volumes", + "network_mode", + "user", + "working_dir", + "entrypoint", + "command", + "auto_remove", + "privileged", } else: # Add ports and environment to existing set - DockerRunner.DOCKER_CONTAINER_RUN_ARGS.add('ports') - DockerRunner.DOCKER_CONTAINER_RUN_ARGS.add('environment') + DockerRunner.DOCKER_CONTAINER_RUN_ARGS.add("ports") + DockerRunner.DOCKER_CONTAINER_RUN_ARGS.add("environment") def _patch_run_docker_container(): """Patch run_docker_container to accept additional Docker arguments.""" - + # Backup original function original_run_docker_container = docker_task_handler.run_docker_container - + def patched_run_docker_container(client, image, command, volumes, **kwargs): """Enhanced run_docker_container with support for additional Docker arguments.""" - + # Add latest tag if not present - if ':' not in image: - image = f'{image}:latest' - + if ":" not in image: + image = f"{image}:latest" + # Check if image exists image_exists = any( - image in existing_image.tags - for existing_image in client.images.list() + image in existing_image.tags for existing_image in client.images.list() ) - + if image_exists: - print(f'Found image {image!r}\n') + print(f"Found image {image!r}\n") else: - print(f'Pulling image {image!r}') - repository, tag = image.split(':') + print(f"Pulling image {image!r}") + repository, tag = image.split(":") client.images.pull(repository=repository, tag=tag) - print('Image pull complete\n') - + print("Image pull complete\n") + # Run container with all provided arguments container = client.containers.run( image=image, @@ -86,88 +93,93 @@ def patched_run_docker_container(client, image, command, volumes, **kwargs): stdout=True, stderr=True, volumes=volumes, - **kwargs # Pass through ports and other arguments + **kwargs, # Pass through ports and other arguments ) - + # Stream logs for line in container.logs(stream=True): - print(line.decode(), end='') - - return container.wait()['StatusCode'] - + print(line.decode(), end="") + + return container.wait()["StatusCode"] + # Replace original function docker_task_handler.run_docker_container = patched_run_docker_container def _patch_docker_task_handler(): """Patch DockerTaskHandler to pass container arguments to run_docker_container.""" - + # Backup original method original_docker_task_handler_run = docker_task_handler.DockerTaskHandler.run - + def patched_docker_task_handler_run(self): """Enhanced DockerTaskHandler.run method with container args support.""" import docker + client = docker.from_env() try: volumes = self.get_volumes_to_mount() - + # Get additional container arguments from runner extra_args = {} - if hasattr(self.runner, 'container_run_args'): + if hasattr(self.runner, "container_run_args"): extra_args = self.runner.container_run_args - elif hasattr(self.runner, '__dict__'): + elif hasattr(self.runner, "__dict__"): # Fallback: use all non-private attributes as container args - extra_args = {k: v for k, v in self.runner.__dict__.items() - if not k.startswith('_') and k != 'container_run_args'} - - if 'volumes' in extra_args: - user_volumes = extra_args.pop('volumes') + extra_args = { + k: v + for k, v in self.runner.__dict__.items() + if not k.startswith("_") and k != "container_run_args" + } + + if "volumes" in extra_args: + user_volumes = extra_args.pop("volumes") volumes.update(user_volumes) return_code = docker_task_handler.run_docker_container( client=client, image=self.image, command=self.full_command, volumes=volumes, - **extra_args + **extra_args, ) finally: client.close() - + from kfp.local import status + return status.Status.SUCCESS if return_code == 0 else status.Status.FAILURE - + # Replace original method docker_task_handler.DockerTaskHandler.run = patched_docker_task_handler_run def _patch_docker_runner_init(): """Patch DockerRunner constructor to store container arguments.""" - + # Backup original init (if it exists) - original_docker_runner_init = getattr(DockerRunner, '__init__', None) - + original_docker_runner_init = getattr(DockerRunner, "__init__", None) + def patched_docker_runner_init(self, **kwargs): """Enhanced DockerRunner constructor that stores container run arguments.""" import os - + # Auto-pass debug environment variables to container - environment = kwargs.get('environment', {}) - if 'KFP_DEBUG' not in environment and 'KFP_DEBUG' in os.environ: - environment['KFP_DEBUG'] = os.environ['KFP_DEBUG'] - if 'KFP_DEBUG_PORT' not in environment and 'KFP_DEBUG_PORT' in os.environ: - environment['KFP_DEBUG_PORT'] = os.environ['KFP_DEBUG_PORT'] - + environment = kwargs.get("environment", {}) + if "KFP_DEBUG" not in environment and "KFP_DEBUG" in os.environ: + environment["KFP_DEBUG"] = os.environ["KFP_DEBUG"] + if "KFP_DEBUG_PORT" not in environment and "KFP_DEBUG_PORT" in os.environ: + environment["KFP_DEBUG_PORT"] = os.environ["KFP_DEBUG_PORT"] + if environment: - kwargs['environment'] = environment - + kwargs["environment"] = environment + # Store container run args for later use self.container_run_args = kwargs - + # Call original __post_init__ if it exists (for dataclass compatibility) - if hasattr(DockerRunner, '__post_init__'): + if hasattr(DockerRunner, "__post_init__"): self.__post_init__() - + # Replace constructor DockerRunner.__init__ = patched_docker_runner_init @@ -176,5 +188,7 @@ def patched_docker_runner_init(self, **kwargs): apply_docker_port_patches() print("✅ KFP Docker port & environment patches applied successfully!") -print(" Usage (upstream 2.14+ compatible): DockerRunner(ports={'5678/tcp': 5678}, environment={'DEBUG': 'true'})") +print( + " Usage (upstream 2.14+ compatible): DockerRunner(ports={'5678/tcp': 5678}, environment={'DEBUG': 'true'})" +) print(" This patch will be obsolete once you upgrade to KFP 2.14+") From 391d513ca6ca79e3bb1f15654a4914ff75b3f3f0 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Thu, 12 Mar 2026 17:39:21 +0100 Subject: [PATCH 15/16] Simplify auth: remove admin client setup script, keep only user token flow Remove setup_keycloak_client.py and all admin/temp-client machinery from auth_session.py. The module now contains only get_user_token() which uses a pre-existing OIDC client created by the admin via the Keycloak UI. This removes the need for Keycloak admin credentials at submission time and simplifies the codebase for this PR. --- pipelines/pipe-fiction/README.md | 79 ++++++- .../submit_to_cluster_from_remote.py | 32 +-- .../pipelines/utils/auth_session.py | 205 ++++-------------- 3 files changed, 126 insertions(+), 190 deletions(-) diff --git a/pipelines/pipe-fiction/README.md b/pipelines/pipe-fiction/README.md index 7254623..4fc422d 100644 --- a/pipelines/pipe-fiction/README.md +++ b/pipelines/pipe-fiction/README.md @@ -121,11 +121,14 @@ Why is debugging a challenge? python submit_to_cluster_from_kf_notebook.py ``` - Submit to the cluster from a remote machine (requires Keycloak admin access): + Submit to the cluster from a remote machine (requires Keycloak setup — see [details below](#from-a-remote-machine--keycloak-auth)): ```bash python submit_to_cluster_from_remote.py ``` - See [Cluster Execution](#3-cluster-execution-in-cluster-debugging) for required environment variables. + + **Don't have Keycloak or remote access set up?** You can still submit to the cluster: + - **From a Kubeflow notebook:** clone this repo into a notebook, install deps, and run `python submit_to_cluster_from_kf_notebook.py` — no auth setup needed. + - **Compile & upload manually:** `python -c "from kfp.compiler import Compiler; from pipeline import example_pipeline; Compiler().compile(example_pipeline, 'pipeline.yaml')"`, then upload `pipeline.yaml` through the KFP UI. ## Repository Organization @@ -271,25 +274,79 @@ cd pipelines python submit_to_cluster_from_kf_notebook.py ``` -**From a remote machine** (requires Keycloak admin access): +#### From a remote machine — Keycloak auth + +Remote submission requires an OIDC client in Keycloak that supports the +Resource Owner Password Credentials (ROPC) grant. This is what lets the +script exchange a username + password for a token without a browser redirect. + +#### Prerequisites: create the OIDC client (once) + +A Keycloak admin creates the client once via the **Keycloak Admin Console**: + +1. Log in to the Keycloak Admin Console (e.g. `https:///auth/admin/`) +2. Select the realm where your Kubeflow users are managed (e.g. `prokube`) +3. Go to **Clients** and click **Create client** +4. Configure it with these settings: + - **Client ID:** `kfp-remote-user` (or any name you prefer) + - **Client Protocol:** `openid-connect` + - **Client authentication:** `On` (confidential client) + - **Authorization:** `Off` + - **Authentication flow** — enable **only**: + - `Direct access grants` (this is the ROPC grant) + - Disable everything else (`Standard flow`, `Implicit flow`, `Service accounts roles`, etc.) +5. Click **Save**, then go to the **Credentials** tab +6. Copy the **Client secret** — share this with users securely (e.g. via a secrets manager) + +#### User: submit the pipeline + +Once the admin has shared the client ID and secret, the user submits with: + ```bash cd pipelines -# Set required environment variables export KUBEFLOW_ENDPOINT=https://kubeflow.example.com export KUBEFLOW_USERNAME=user@example.com export KUBEFLOW_PASSWORD=your-password -export KEYCLOAK_URL=https://kubeflow.example.com # Base URL where Keycloak /auth/ is reachable -export KEYCLOAK_ADMIN_PASSWORD=admin-password +export KEYCLOAK_URL=https://kubeflow.example.com +export KFP_CLIENT_SECRET= # Optional: -export KEYCLOAK_REALM=prokube # default: "prokube" -export KUBEFLOW_NAMESPACE=my-ns # default: derived from username +export KEYCLOAK_REALM=prokube # default: "prokube" +export KFP_CLIENT_ID=kfp-remote-user # default: "kfp-remote-user" +export KUBEFLOW_NAMESPACE=my-ns # default: derived from username python submit_to_cluster_from_remote.py ``` -> **Note:** Remote submission creates a temporary Keycloak OIDC client to obtain a user token, then deletes it after authentication. This requires Keycloak admin credentials. The token is passed directly to the KFP Client via the `existing_token` parameter. -> -> `KEYCLOAK_URL` should be the base URL where the Keycloak `/auth/` endpoint is reachable. In many setups, this is the same as `KUBEFLOW_ENDPOINT` (Keycloak is typically exposed at `/auth/` on the same ingress). +> **Note:** `KEYCLOAK_URL` should be the base URL where the Keycloak `/auth/` +> endpoint is reachable. In many setups this is the same as `KUBEFLOW_ENDPOINT`. + +#### Without Keycloak or remote auth setup + +If you don't have Keycloak set up or don't want to deal with remote +authentication, you can still submit pipelines to the cluster: + +**Option A — Clone into a Kubeflow notebook and submit from there:** + +From a Kubeflow notebook terminal (no extra auth needed — the notebook +session is already authenticated): + +```bash +git clone +cd pipelines/pipe-fiction/pipelines +pip install -r requirements.txt # or: uv sync && uv pip install -e ../pipe-fiction-codebase/ +python submit_to_cluster_from_kf_notebook.py +``` + +**Option B — Compile the pipeline locally and upload via the KFP UI:** + +```bash +cd pipelines +python -c "from kfp.compiler import Compiler; from pipeline import example_pipeline; Compiler().compile(example_pipeline, 'pipeline.yaml')" +``` + +Then open the Kubeflow Pipelines UI, go to **Pipelines > Upload pipeline**, +and upload the generated `pipeline.yaml` file. + **Cluster Execution Workflow** diff --git a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py index 3044007..718ae16 100644 --- a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py +++ b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py @@ -1,18 +1,23 @@ """ Submit a KFP pipeline to a remote Kubeflow cluster using Keycloak authentication. +Prerequisites: + A Keycloak admin must have created an OIDC client with Direct Access Grants + enabled and shared the client secret with you (see README for details). + Required environment variables: - KUBEFLOW_ENDPOINT: Kubeflow URL (e.g. https://kubeflow.example.com) - KUBEFLOW_USERNAME: User email in the Keycloak realm - KUBEFLOW_PASSWORD: User password - KEYCLOAK_URL: Base URL where Keycloak /auth/ is reachable - (often same as KUBEFLOW_ENDPOINT) - KEYCLOAK_ADMIN_PASSWORD: Keycloak admin password + KUBEFLOW_ENDPOINT: Kubeflow URL (e.g. https://kubeflow.example.com) + KUBEFLOW_USERNAME: User email in the Keycloak realm + KUBEFLOW_PASSWORD: User password + KEYCLOAK_URL: Base URL where Keycloak /auth/ is reachable + (often same as KUBEFLOW_ENDPOINT) + KFP_CLIENT_SECRET: Client secret provided by the admin Optional environment variables: - KEYCLOAK_REALM: Keycloak realm name (default: "prokube") - KUBEFLOW_NAMESPACE: KFP namespace (default: derived from username) - IMAGE_TAG: Docker image for the pipeline components + KEYCLOAK_REALM: Keycloak realm name (default: "prokube") + KFP_CLIENT_ID: Client ID created by admin (default: "kfp-remote-user") + KUBEFLOW_NAMESPACE: KFP namespace (default: derived from username) + IMAGE_TAG: Docker image for the pipeline components """ import os @@ -21,17 +26,18 @@ from kfp.client import Client from pipeline import example_pipeline -from utils.auth_session import get_keycloak_token +from utils.auth_session import get_user_token truststore.inject_into_ssl() -# Authenticate via Keycloak -token = get_keycloak_token( +# Authenticate via Keycloak (requires a pre-created OIDC client — see README) +token = get_user_token( keycloak_url=os.environ["KEYCLOAK_URL"], - admin_password=os.environ["KEYCLOAK_ADMIN_PASSWORD"], + client_secret=os.environ["KFP_CLIENT_SECRET"], username=os.environ["KUBEFLOW_USERNAME"], password=os.environ["KUBEFLOW_PASSWORD"], realm=os.environ.get("KEYCLOAK_REALM", "prokube"), + client_id=os.environ.get("KFP_CLIENT_ID", "kfp-remote-user"), ) namespace = os.environ.get("KUBEFLOW_NAMESPACE") or os.environ[ diff --git a/pipelines/pipe-fiction/pipelines/utils/auth_session.py b/pipelines/pipe-fiction/pipelines/utils/auth_session.py index ad5859a..7955d7e 100644 --- a/pipelines/pipe-fiction/pipelines/utils/auth_session.py +++ b/pipelines/pipe-fiction/pipelines/utils/auth_session.py @@ -1,125 +1,64 @@ """ Keycloak authentication for remote KFP pipeline submission. -This module provides a way to obtain a Bearer token for Kubeflow -when the cluster uses Keycloak as the identity provider. - -The workflow is: -1. Get an admin token from Keycloak (master realm). -2. Create a temporary OIDC client with direct access grants. -3. Use that client to obtain a user access token. -4. Clean up the temporary client. -5. Pass the token to the KFP Client via ``existing_token``. - -Requirements: -- KEYCLOAK_ADMIN_PASSWORD must be provided (or kubectl access to read the secret). -- The user must exist in the Keycloak realm. - -Environment variables: - KUBEFLOW_ENDPOINT: Kubeflow URL (e.g. https://kubeflow.example.com) - KUBEFLOW_USERNAME: User email in the Keycloak realm - KUBEFLOW_PASSWORD: User password - KEYCLOAK_URL: Base URL where Keycloak /auth/ is reachable - (often same as KUBEFLOW_ENDPOINT) - KEYCLOAK_ADMIN_PASSWORD: Keycloak admin password - KEYCLOAK_REALM: Keycloak realm name (default: "prokube") +Obtains a user-scoped Bearer token using a pre-existing OIDC client that +has the Resource Owner Password Credentials (ROPC) grant enabled. The +admin creates this client once (via the Keycloak Admin UI); users then +authenticate with just their credentials and the client secret. + +The returned token can be passed to the KFP Client via ``existing_token``. + +Environment variables (for reference — callers pass values explicitly): + KUBEFLOW_ENDPOINT: Kubeflow URL (e.g. https://kubeflow.example.com) + KUBEFLOW_USERNAME: User email in the Keycloak realm + KUBEFLOW_PASSWORD: User password + KEYCLOAK_URL: Base URL where Keycloak /auth/ is reachable + (often same as KUBEFLOW_ENDPOINT) + KFP_CLIENT_SECRET: Client secret shared by the admin + KEYCLOAK_REALM: Keycloak realm name (default: "prokube") + KFP_CLIENT_ID: Client ID created by admin (default: "kfp-remote-user") """ -import json -import logging - import requests import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -logger = logging.getLogger(__name__) - -# Name of the temporary OIDC client created for authentication -_TEMP_CLIENT_ID = "kfp-cli-tmp" - - -def _get_admin_token(keycloak_url: str, admin_password: str) -> str: - """Get an admin access token from the Keycloak master realm.""" - url = f"{keycloak_url}/auth/realms/master/protocol/openid-connect/token" - resp = requests.post( - url, - data={ - "grant_type": "password", - "client_id": "admin-cli", - "username": "admin", - "password": admin_password, - }, - verify=False, - timeout=30, - ) - resp.raise_for_status() - return resp.json()["access_token"] - - -def _create_temp_client(keycloak_url: str, realm: str, headers: dict) -> None: - """Create a temporary OIDC client with direct access grants enabled.""" - client_data = { - "clientId": _TEMP_CLIENT_ID, - "enabled": True, - "publicClient": False, - "protocol": "openid-connect", - "directAccessGrantsEnabled": True, - "serviceAccountsEnabled": True, - "standardFlowEnabled": False, - } - url = f"{keycloak_url}/auth/admin/realms/{realm}/clients" - resp = requests.post( - url, headers=headers, data=json.dumps(client_data), verify=False, timeout=30 - ) - if resp.status_code == 409: - logger.debug("Temporary client '%s' already exists", _TEMP_CLIENT_ID) - elif resp.status_code == 201: - logger.debug("Created temporary client '%s'", _TEMP_CLIENT_ID) - else: - raise RuntimeError( - f"Failed to create temp client: {resp.status_code} {resp.text}" - ) - - -def _get_client_internal_id(keycloak_url: str, realm: str, headers: dict) -> str: - """Get the internal UUID of the temporary client.""" - url = f"{keycloak_url}/auth/admin/realms/{realm}/clients" - resp = requests.get(url, headers=headers, verify=False, timeout=30) - resp.raise_for_status() - clients = resp.json() - client = next((c for c in clients if c["clientId"] == _TEMP_CLIENT_ID), None) - if not client: - raise RuntimeError(f"Could not find client '{_TEMP_CLIENT_ID}'") - return client["id"] - - -def _get_client_secret( - keycloak_url: str, realm: str, headers: dict, client_uuid: str -) -> str: - """Get the secret for the temporary client.""" - url = ( - f"{keycloak_url}/auth/admin/realms/{realm}/clients/{client_uuid}/client-secret" - ) - resp = requests.get(url, headers=headers, verify=False, timeout=30) - resp.raise_for_status() - return resp.json()["value"] +_DEFAULT_CLIENT_ID = "kfp-remote-user" -def _get_user_token( +def get_user_token( keycloak_url: str, - realm: str, client_secret: str, username: str, password: str, + realm: str = "prokube", + client_id: str = _DEFAULT_CLIENT_ID, ) -> str: - """Get a user access token using the temporary client credentials.""" + """ + Obtain a user-scoped Bearer token for authenticating with Kubeflow. + + Uses the Resource Owner Password Credentials grant against a Keycloak + OIDC client that was pre-created by the admin. No admin credentials + are needed. + + Args: + keycloak_url: Base URL where Keycloak ``/auth/`` is reachable + client_secret: Client secret shared by the admin + username: User email/username in the Keycloak realm + password: User password + realm: Keycloak realm name (default: "prokube") + client_id: Client ID created by admin (default: "kfp-remote-user") + + Returns: + A Bearer access token string for ``Client(existing_token=...)``. + """ url = f"{keycloak_url}/auth/realms/{realm}/protocol/openid-connect/token" resp = requests.post( url, data={ "grant_type": "password", - "client_id": _TEMP_CLIENT_ID, + "client_id": client_id, "client_secret": client_secret, "username": username, "password": password, @@ -129,69 +68,3 @@ def _get_user_token( ) resp.raise_for_status() return resp.json()["access_token"] - - -def _delete_temp_client(keycloak_url: str, realm: str, headers: dict) -> None: - """Delete the temporary client to clean up.""" - try: - client_uuid = _get_client_internal_id(keycloak_url, realm, headers) - url = f"{keycloak_url}/auth/admin/realms/{realm}/clients/{client_uuid}" - resp = requests.delete(url, headers=headers, verify=False, timeout=30) - if resp.status_code == 204: - logger.debug("Deleted temporary client '%s'", _TEMP_CLIENT_ID) - else: - logger.warning( - "Failed to delete temp client: %s %s", resp.status_code, resp.text - ) - except Exception as e: - logger.warning("Could not clean up temporary client: %s", e) - - -def get_keycloak_token( - keycloak_url: str, - admin_password: str, - username: str, - password: str, - realm: str = "prokube", -) -> str: - """ - Obtain a Keycloak user access token for authenticating with Kubeflow. - - This creates a temporary OIDC client in Keycloak, uses it to get a user - token via the Resource Owner Password Credentials grant, then cleans up - the temp client. - - The returned token can be passed to the KFP Client via ``existing_token``. - - NOTE: This requires Keycloak admin credentials. The temporary client is - created and deleted within this function call. - - Args: - keycloak_url: Keycloak base URL (e.g. https://keycloak.example.com) - admin_password: Keycloak admin password - username: User email/username in the Keycloak realm - password: User password - realm: Keycloak realm name (default: "prokube") - - Returns: - A Bearer access token string. - """ - admin_token = _get_admin_token(keycloak_url, admin_password) - admin_headers = { - "Authorization": f"Bearer {admin_token}", - "Content-Type": "application/json", - } - - try: - _create_temp_client(keycloak_url, realm, admin_headers) - client_uuid = _get_client_internal_id(keycloak_url, realm, admin_headers) - client_secret = _get_client_secret( - keycloak_url, realm, admin_headers, client_uuid - ) - user_token = _get_user_token( - keycloak_url, realm, client_secret, username, password - ) - finally: - _delete_temp_client(keycloak_url, realm, admin_headers) - - return user_token From d2f64ef34467e9eab75ca1ba20f1f54efae3cd09 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Thu, 12 Mar 2026 19:56:45 +0100 Subject: [PATCH 16/16] Address Copilot review: type safety, configurable TLS, robustness fixes - Fix type annotations: List -> List[str], Dict[str,str] -> Dict[str,object], any -> Any - Make TLS verification configurable via verify_ssl param and VERIFY_SSL env var - Fix image.split(':') -> rsplit(':', 1) for registry:port image refs - Add allow-list filtering for extra_args in DockerRunner monkey patch - Call original DockerRunner.__init__ before applying patches - Validate debug parameter exists in decorated function signature - Fix raw-string docstring delimiter extraction (r-prefix handling) - Normalize packages_to_install to new list to prevent mutation side effects - Replace print() with logger calls for import-time messages - Fix docstring import example to match actual module path --- .../pipe_fiction/data_processor.py | 43 ++-- .../pipe-fiction/pipelines/components.py | 8 +- .../submit_to_cluster_from_remote.py | 7 +- .../pipelines/utils/auth_session.py | 5 +- .../pipelines/utils/debuggable_component.py | 192 +++++++++++------- .../utils/kfp_docker_monkey_patches.py | 29 ++- 6 files changed, 169 insertions(+), 115 deletions(-) diff --git a/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_processor.py b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_processor.py index 34908af..5affb21 100644 --- a/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_processor.py +++ b/pipelines/pipe-fiction/pipe-fiction-codebase/pipe_fiction/data_processor.py @@ -3,29 +3,29 @@ """ from loguru import logger -from typing import List, Dict +from typing import Any, List, Dict class DataProcessor: """Simple text processor with logging.""" - + def __init__(self): logger.info("🔧 Initializing DataProcessor") - - def process_lines(self, lines: List[str]) -> List[Dict[str, str]]: + + def process_lines(self, lines: List[str]) -> List[Dict[str, object]]: """Process lines and extract useful information.""" logger.info(f"⚙️ Processing {len(lines)} lines") - + processed_data = [] - + for i, line in enumerate(lines, 1): # Clean the line clean_line = line.strip() - + if not clean_line: logger.debug(f"⏭️ Skipping empty line {i}") continue - + # Extract some info processed_item = { "line_number": i, @@ -33,29 +33,34 @@ def process_lines(self, lines: List[str]) -> List[Dict[str, str]]: "word_count": len(clean_line.split()), "contains_mlops": "mlops" in clean_line.lower(), "contains_kubeflow": "kubeflow" in clean_line.lower(), - "length": len(clean_line) + "length": len(clean_line), } - + processed_data.append(processed_item) logger.debug(f"✨ Processed line {i}: {processed_item['word_count']} words") - + logger.success(f"🎉 Successfully processed {len(processed_data)} lines") return processed_data - - def get_summary(self, processed_data: List[Dict[str, str]]) -> Dict[str, any]: + + def get_summary(self, processed_data: List[Dict[str, object]]) -> Dict[str, Any]: """Get summary statistics.""" logger.info("📊 Generating summary statistics") - + if not processed_data: return {"total_lines": 0} - + summary = { "total_lines": len(processed_data), "total_words": sum(item["word_count"] for item in processed_data), - "mlops_mentions": sum(1 for item in processed_data if item["contains_mlops"]), - "kubeflow_mentions": sum(1 for item in processed_data if item["contains_kubeflow"]), - "avg_line_length": sum(item["length"] for item in processed_data) / len(processed_data) + "mlops_mentions": sum( + 1 for item in processed_data if item["contains_mlops"] + ), + "kubeflow_mentions": sum( + 1 for item in processed_data if item["contains_kubeflow"] + ), + "avg_line_length": sum(item["length"] for item in processed_data) + / len(processed_data), } - + logger.info(f"📈 Summary: {summary}") return summary diff --git a/pipelines/pipe-fiction/pipelines/components.py b/pipelines/pipe-fiction/pipelines/components.py index c650c99..2d38e98 100644 --- a/pipelines/pipe-fiction/pipelines/components.py +++ b/pipelines/pipe-fiction/pipelines/components.py @@ -7,13 +7,13 @@ BASE_IMAGE = os.getenv("IMAGE_TAG") -assert ( - BASE_IMAGE -), "Please specify image for your component in `IMAGE_TAG` environment variable" +assert BASE_IMAGE, ( + "Please specify image for your component in `IMAGE_TAG` environment variable" +) @lightweight_debuggable_component(base_image=BASE_IMAGE) -def generate_data_comp(debug: bool = False) -> List: +def generate_data_comp(debug: bool = False) -> List[str]: from pipe_fiction.data_generator import DataGenerator generator = DataGenerator() diff --git a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py index 718ae16..cc5e763 100644 --- a/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py +++ b/pipelines/pipe-fiction/pipelines/submit_to_cluster_from_remote.py @@ -17,6 +17,8 @@ KEYCLOAK_REALM: Keycloak realm name (default: "prokube") KFP_CLIENT_ID: Client ID created by admin (default: "kfp-remote-user") KUBEFLOW_NAMESPACE: KFP namespace (default: derived from username) + VERIFY_SSL: Verify TLS certificates (default: "false" for + internal clusters with self-signed certs) IMAGE_TAG: Docker image for the pipeline components """ @@ -30,6 +32,8 @@ truststore.inject_into_ssl() +verify_ssl = os.environ.get("VERIFY_SSL", "false").lower() in ("true", "1", "yes") + # Authenticate via Keycloak (requires a pre-created OIDC client — see README) token = get_user_token( keycloak_url=os.environ["KEYCLOAK_URL"], @@ -38,6 +42,7 @@ password=os.environ["KUBEFLOW_PASSWORD"], realm=os.environ.get("KEYCLOAK_REALM", "prokube"), client_id=os.environ.get("KFP_CLIENT_ID", "kfp-remote-user"), + verify_ssl=verify_ssl, ) namespace = os.environ.get("KUBEFLOW_NAMESPACE") or os.environ[ @@ -48,7 +53,7 @@ host=f"{os.environ['KUBEFLOW_ENDPOINT']}/pipeline", namespace=namespace, existing_token=token, - verify_ssl=False, + verify_ssl=verify_ssl, ) run = client.create_run_from_pipeline_func( diff --git a/pipelines/pipe-fiction/pipelines/utils/auth_session.py b/pipelines/pipe-fiction/pipelines/utils/auth_session.py index 7955d7e..9b04e32 100644 --- a/pipelines/pipe-fiction/pipelines/utils/auth_session.py +++ b/pipelines/pipe-fiction/pipelines/utils/auth_session.py @@ -34,6 +34,7 @@ def get_user_token( password: str, realm: str = "prokube", client_id: str = _DEFAULT_CLIENT_ID, + verify_ssl: bool = False, ) -> str: """ Obtain a user-scoped Bearer token for authenticating with Kubeflow. @@ -49,6 +50,8 @@ def get_user_token( password: User password realm: Keycloak realm name (default: "prokube") client_id: Client ID created by admin (default: "kfp-remote-user") + verify_ssl: Verify TLS certificates (default: False for + internal clusters with self-signed certs) Returns: A Bearer access token string for ``Client(existing_token=...)``. @@ -63,7 +66,7 @@ def get_user_token( "username": username, "password": password, }, - verify=False, + verify=verify_ssl, timeout=30, ) resp.raise_for_status() diff --git a/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py b/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py index 3042e94..c3aac36 100644 --- a/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py +++ b/pipelines/pipe-fiction/pipelines/utils/debuggable_component.py @@ -19,37 +19,46 @@ def lightweight_debuggable_component( debugger_type: Literal["debugpy", "remote-pdb"] = "debugpy", debug_port: int = 5678, auto_install_packages: bool = True, - **component_kwargs + **component_kwargs, ): """ Decorator that creates KFP Lightweight Components with automatic debugging code injection. - + LIGHTWEIGHT COMPONENTS ONLY - Does not work with Container Components! - + This decorator automatically injects debugging code into your component functions, eliminating the need to manually add debugging boilerplate. - + Args: debugger_type: Type of debugger to use ("debugpy" or "remote-pdb") debug_port: Port for remote debugging (default: 5678) **component_kwargs: All arguments passed to @component decorator (base_image, packages_to_install, etc.) - + Usage: @lightweight_debuggable_component() def my_component(arg1: str, debug: bool = False) -> str: # Just your component logic - debugging code is auto-injected! return result - + # With remote pdb and base image: @lightweight_debuggable_component( base_image="my-custom:latest", - debugger_type="remote-pdb", + debugger_type="remote-pdb", debug_port=4444 ) def my_component(debug: bool = False) -> str: return "result" """ + def decorator(func: Callable) -> Callable: + # Validate that the wrapped function has a `debug` parameter + sig = inspect.signature(func) + if "debug" not in sig.parameters: + raise TypeError( + f"@lightweight_debuggable_component requires '{func.__name__}' to have " + f"a 'debug' parameter (e.g. debug: bool = False)" + ) + # Get source file info for logging try: source_file = inspect.getfile(func) @@ -58,70 +67,80 @@ def decorator(func: Callable) -> Callable: except (OSError, TypeError): source_file = "" source_path = Path("") - logger.warning(f"Processing component '{func.__name__}' from unknown source") - + logger.warning( + f"Processing component '{func.__name__}' from unknown source" + ) + # Determine debugger package to install (only if auto_install_packages is True) if auto_install_packages: debugger_package = debugger_type - packages_to_install = component_kwargs.get("packages_to_install", []) - + packages_to_install = list(component_kwargs.get("packages_to_install", [])) + # Always add loguru for better logging in components if "loguru" not in packages_to_install: packages_to_install.append("loguru") - + if debugger_package not in packages_to_install: packages_to_install.append(debugger_package) - component_kwargs["packages_to_install"] = packages_to_install - logger.debug(f"Added {debugger_package} to packages_to_install") + + component_kwargs["packages_to_install"] = packages_to_install + logger.debug(f"packages_to_install: {packages_to_install}") else: - logger.debug("Skipping automatic package installation (auto_install_packages=False)") - + logger.debug( + "Skipping automatic package installation (auto_install_packages=False)" + ) + # Get the original function source code try: original_source = inspect.getsource(func) - logger.debug(f"Extracted source code for {func.__name__} ({len(original_source)} chars)") + logger.debug( + f"Extracted source code for {func.__name__} ({len(original_source)} chars)" + ) except OSError as e: logger.error(f"Cannot get source code for {func.__name__}: {e}") # Fallback to original function without debugging return component(**component_kwargs)(func) - + def inject_debugging_code(source_code: str) -> str: """Inject debugging code using AST parsing for robustness.""" try: # Parse source into AST for robust function finding tree = ast.parse(source_code) - + # Find the target function definition target_func_node = None for node in ast.walk(tree): - if (isinstance(node, ast.FunctionDef) and - node.name == func.__name__): + if isinstance(node, ast.FunctionDef) and node.name == func.__name__: target_func_node = node break - + if not target_func_node: - logger.warning(f"Could not find function '{func.__name__}' in AST, using fallback") + logger.warning( + f"Could not find function '{func.__name__}' in AST, using fallback" + ) return _inject_debugging_fallback(source_code) - + # Get line number and inject debugging code - func_start_line = target_func_node.lineno - 1 # AST uses 1-based line numbers - lines = source_code.split('\n') - + func_start_line = ( + target_func_node.lineno - 1 + ) # AST uses 1-based line numbers + lines = source_code.split("\n") + # Find the line with ':' that ends the function signature # We need to find where the function signature actually ends colon_line = func_start_line paren_count = 0 found_opening_paren = False - + while colon_line < len(lines): line = lines[colon_line] for char in line: - if char == '(': + if char == "(": paren_count += 1 found_opening_paren = True - elif char == ')': + elif char == ")": paren_count -= 1 - elif char == ':' and found_opening_paren and paren_count == 0: + elif char == ":" and found_opening_paren and paren_count == 0: # Found the colon that ends the function signature break else: @@ -130,137 +149,152 @@ def inject_debugging_code(source_code: str) -> str: continue # If we broke out of the inner loop, we found our colon break - + # Find first non-empty line after the colon (skip docstring if present) body_start = colon_line + 1 while body_start < len(lines) and not lines[body_start].strip(): body_start += 1 - + # If the first non-empty line is a docstring, skip it - if (body_start < len(lines) and - lines[body_start].strip().startswith(('"""', "'''", 'r"""', "r'''"))): - # Find the end of the docstring - quote_type = lines[body_start].strip()[:3] - if quote_type.startswith('r'): - quote_type = quote_type[1:] - + if body_start < len(lines) and lines[body_start].strip().startswith( + ('"""', "'''", 'r"""', "r'''") + ): + # Find the end of the docstring - extract the actual triple-quote delimiter + stripped = lines[body_start].strip() + if stripped.startswith(('r"""', "r'''")): + quote_type = stripped[1:4] # skip 'r', take '"""' or "'''" + else: + quote_type = stripped[:3] + # Check if docstring ends on the same line - if lines[body_start].strip().endswith(quote_type) and len(lines[body_start].strip()) > 3: + if ( + lines[body_start].strip().endswith(quote_type) + and len(lines[body_start].strip()) > 3 + ): body_start += 1 else: # Multi-line docstring - find the end body_start += 1 - while body_start < len(lines) and not lines[body_start].strip().endswith(quote_type): + while body_start < len(lines) and not lines[ + body_start + ].strip().endswith(quote_type): body_start += 1 body_start += 1 # Move past the closing quotes - + # Find next non-empty line after docstring while body_start < len(lines) and not lines[body_start].strip(): body_start += 1 - + if body_start >= len(lines): - logger.warning("Could not find function body, using fallback") + logger.warning("Could not find function body, using fallback") return _inject_debugging_fallback(source_code) - + # Get indentation first_body_line = lines[body_start] indent = len(first_body_line) - len(first_body_line.lstrip()) - indent_str = ' ' * indent - + indent_str = " " * indent + # Generate debugging code based on debugger type - debug_lines = _generate_debug_code(debugger_type, debug_port, indent_str) - + debug_lines = _generate_debug_code( + debugger_type, debug_port, indent_str + ) + # Insert debugging code modified_lines = lines[:body_start] + debug_lines + lines[body_start:] - result = '\n'.join(modified_lines) - - logger.debug(f"Successfully injected {debugger_type} debugging code into {func.__name__}") + result = "\n".join(modified_lines) + + logger.debug( + f"Successfully injected {debugger_type} debugging code into {func.__name__}" + ) return result - + except Exception as e: - logger.error(f"AST parsing failed for {func.__name__}: {e}, using fallback") + logger.error( + f"AST parsing failed for {func.__name__}: {e}, using fallback" + ) return _inject_debugging_fallback(source_code) - + def _inject_debugging_fallback(source_code: str) -> str: """Fallback to string-based injection if AST fails.""" - lines = source_code.split('\n') - + lines = source_code.split("\n") + # Find function definition line (more robust search) func_def_line = -1 for i, line in enumerate(lines): stripped = line.strip() - if (stripped.startswith(f'def {func.__name__}(') or - stripped.startswith(f'def {func.__name__} (')): + if stripped.startswith(f"def {func.__name__}(") or stripped.startswith( + f"def {func.__name__} (" + ): func_def_line = i break - + if func_def_line == -1: logger.error(f"Could not find function definition for {func.__name__}") return source_code - + # Find function body start body_start = func_def_line + 1 while body_start < len(lines) and not lines[body_start].strip(): body_start += 1 - + if body_start >= len(lines): return source_code - + # Get indentation and inject first_body_line = lines[body_start] indent = len(first_body_line) - len(first_body_line.lstrip()) - indent_str = ' ' * indent - + indent_str = " " * indent + debug_lines = _generate_debug_code(debugger_type, debug_port, indent_str) modified_lines = lines[:body_start] + debug_lines + lines[body_start:] - + logger.debug(f"Fallback injection successful for {func.__name__}") - return '\n'.join(modified_lines) - + return "\n".join(modified_lines) + def _generate_debug_code(debugger_type: str, port: int, indent: str) -> list: """Generate debugging code based on debugger type.""" if debugger_type == "debugpy": return [ f"{indent}if debug:", f"{indent} import debugpy", - f"{indent} debugpy.listen((\"0.0.0.0\", {port}))", + f'{indent} debugpy.listen(("0.0.0.0", {port}))', f"{indent} debugpy.wait_for_client()", f"{indent} debugpy.breakpoint()", - f"{indent}" + f"{indent}", ] elif debugger_type == "remote-pdb": return [ f"{indent}if debug:", f"{indent} import remote_pdb", f"{indent} remote_pdb.RemotePdb('0.0.0.0', {port}).set_trace()", - f"{indent}" + f"{indent}", ] else: logger.error(f"Unsupported debugger type: {debugger_type}") return [f"{indent}# Unsupported debugger type: {debugger_type}"] - + # Monkey-patch inspect.getsource for this component original_getsource = inspect.getsource - + def patched_getsource(obj): if obj is func: modified_source = inject_debugging_code(original_source) logger.debug(f"Returning modified source for {func.__name__}") return modified_source return original_getsource(obj) - + # Apply monkey patch temporarily inspect.getsource = patched_getsource - + try: # Apply the KFP component decorator with all passed arguments component_func = component(**component_kwargs)(func) logger.debug(f"Successfully created debuggable component '{func.__name__}'") - + finally: # Always restore original inspect.getsource inspect.getsource = original_getsource - + return component_func - + return decorator diff --git a/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py b/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py index 6c9268c..33648c5 100644 --- a/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py +++ b/pipelines/pipe-fiction/pipelines/utils/kfp_docker_monkey_patches.py @@ -5,7 +5,7 @@ to match the upstream 2.14+ API. Import this module BEFORE using DockerRunner with ports/environment. Usage (exactly like upstream KFP 2.14+): - from utils import kfp_docker_monkey_patches # Apply patches + from utils import kfp_docker_monkey_patches # noqa: F401 — Apply patches on import from kfp import local # Explicit ports and environment (upstream-compatible API) @@ -19,6 +19,7 @@ from kfp.local import docker_task_handler from kfp.local.config import DockerRunner import docker +from loguru import logger def apply_docker_port_patches(): @@ -81,7 +82,7 @@ def patched_run_docker_container(client, image, command, volumes, **kwargs): print(f"Found image {image!r}\n") else: print(f"Pulling image {image!r}") - repository, tag = image.split(":") + repository, tag = image.rsplit(":", 1) client.images.pull(repository=repository, tag=tag) print("Image pull complete\n") @@ -123,13 +124,12 @@ def patched_docker_task_handler_run(self): # Get additional container arguments from runner extra_args = {} if hasattr(self.runner, "container_run_args"): - extra_args = self.runner.container_run_args + extra_args = dict(self.runner.container_run_args) elif hasattr(self.runner, "__dict__"): - # Fallback: use all non-private attributes as container args + # Fallback: only forward keys that are valid Docker container.run() args + allowed = getattr(DockerRunner, "DOCKER_CONTAINER_RUN_ARGS", set()) extra_args = { - k: v - for k, v in self.runner.__dict__.items() - if not k.startswith("_") and k != "container_run_args" + k: v for k, v in self.runner.__dict__.items() if k in allowed } if "volumes" in extra_args: @@ -163,6 +163,13 @@ def patched_docker_runner_init(self, **kwargs): """Enhanced DockerRunner constructor that stores container run arguments.""" import os + # Call original __init__ if it exists + if original_docker_runner_init is not None: + try: + original_docker_runner_init(self) + except TypeError: + pass # Original init may not accept extra args + # Auto-pass debug environment variables to container environment = kwargs.get("environment", {}) if "KFP_DEBUG" not in environment and "KFP_DEBUG" in os.environ: @@ -187,8 +194,8 @@ def patched_docker_runner_init(self, **kwargs): # Apply patches immediately when module is imported apply_docker_port_patches() -print("✅ KFP Docker port & environment patches applied successfully!") -print( - " Usage (upstream 2.14+ compatible): DockerRunner(ports={'5678/tcp': 5678}, environment={'DEBUG': 'true'})" +logger.info("KFP Docker port & environment patches applied successfully") +logger.debug( + "Usage (upstream 2.14+ compatible): DockerRunner(ports={'5678/tcp': 5678}, environment={'DEBUG': 'true'})" ) -print(" This patch will be obsolete once you upgrade to KFP 2.14+") +logger.debug("This patch will be obsolete once you upgrade to KFP 2.14+")