diff --git a/CLAUDE.md b/CLAUDE.md index 833c296..6ea4cfc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -138,7 +138,7 @@ app = ( - `.with_config(schema)` - Config CRUD endpoints at `/api/v1/configs` - `.with_artifacts(hierarchy)` - Artifact CRUD at `/api/v1/artifacts` - `.with_jobs()` - Job scheduler at `/api/v1/jobs` -- `.with_tasks()` - Task execution at `/api/v1/tasks` +- `.with_tasks(validate_on_startup=True)` - Task execution at `/api/v1/tasks` with automatic Python task validation - `.with_ml(runner)` - ML train/predict at `/api/v1/ml` - `.with_logging()` - Structured logging with request tracing - `.with_auth()` - API key authentication @@ -255,12 +255,69 @@ apps/ See `examples/app_hosting_api.py` and `examples/apps/sample-dashboard/` for complete working example. +## Task Execution System + +Chapkit provides a task execution system supporting both shell commands and Python functions with type-based dependency injection. + +**Task Types:** +- **Shell tasks**: Execute commands via asyncio subprocess, capture stdout/stderr/exit_code +- **Python tasks**: Execute registered functions via TaskRegistry, capture result/error with traceback + +**Python Task Registration:** +```python +from chapkit import TaskRegistry + +@TaskRegistry.register("my_task") +async def my_task(name: str, session: AsyncSession) -> dict: + """Task with user parameters and dependency injection.""" + # name comes from task.parameters (user-provided) + # session is injected by framework (type-based) + return {"status": "success", "name": name} +``` + +**Type-Based Dependency Injection:** + +Framework types are automatically injected based on function parameter type hints: +- `AsyncSession` - SQLAlchemy async database session +- `Database` - Chapkit Database instance +- `ArtifactManager` - Artifact management service +- `JobScheduler` - Job scheduling service + +**Key Features:** +- Enable/disable controls for tasks +- Automatic orphaned task validation (enabled by default, auto-disables tasks with missing functions on startup) +- Support both sync and async Python functions +- Mix user parameters with framework injections +- Optional type support (`AsyncSession | None`) +- Artifact-based execution results for both shell and Python tasks + +**Example:** +```python +app = ( + ServiceBuilder(info=ServiceInfo(display_name="Task Service")) + .with_health() + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=3) + .with_tasks() # Adds task CRUD + execution, validates on startup by default + .build() +) + +# Disable validation if needed +app = ( + ServiceBuilder(info=info) + .with_tasks(validate_on_startup=False) + .build() +) +``` + +See `docs/guides/task-execution.md` for complete documentation and `examples/python_task_execution_api.py` for working examples. + ## Common Endpoints **Config Service:** Health check, CRUD operations, pagination (`?page=1&size=20`), schema endpoint (`/$schema`) **Artifact Service:** CRUD + tree operations (`/$tree`), optional config linking **Job Scheduler:** List/get/delete jobs, status filtering -**Task Service:** CRUD + execute operation (`/$execute`) +**Task Service:** CRUD, execute (`/$execute`), enable/disable controls, Python function registry, type-based injection **ML Service:** Train (`/$train`) and predict (`/$predict`) operations **Operation prefix:** `$` indicates operations (computed/derived data) vs resource access diff --git a/ROADMAP.md b/ROADMAP.md index d03894e..f50d7e6 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,19 +1,152 @@ +# Chapkit Roadmap -## Core Improvements +> **Vision:** Build the most productive async Python framework for ML/data services with FastAPI integration -- [ ] Support artifact export (PandasDataFrame => csv/parquet, pandas => csv/json/parquet) -- [ ] Store more meta information for train/predict runs (full config, type etc) -- [ ] Support multiple types for /api/configs +## Priority Legend +- **High Priority** - Next 1-2 releases (actively working or immediate next) +- **Medium Priority** - Next 3-6 releases (planned, design in progress) +- **Future** - Under consideration (evaluate demand/feasibility) -## Type Safety -- [ ] Stricter generic type constraints -- [ ] Runtime type validation options -- [ ] Better inference for generic managers -- [ ] Type-safe configuration builder +--- -## Code Quality -- [ ] Performance benchmarking suite -- [ ] Memory leak detection -- [ ] Code coverage improvements (target 95%+) -- [ ] Dependency injection improvements -- [ ] Create chapkit.client.Client for testing/working with instances +## High Priority (Next 1-2 Releases) + +### Task Execution +- [ ] **Task scheduling (Phase 2)** - Cron, interval, and one-off scheduling with in-memory storage + - Already designed in `designs/python-tasks-and-scheduling.md` + - Background scheduler worker + - Schedule enable/disable controls + - Migration path to persistent scheduling + +- [ ] **Decorator-based ML runner registration** - Extend TaskRegistry with metadata + - Reuse TaskRegistry instead of creating new registry + - `@TaskRegistry.register("model_name", type="ml_train")` + - `FunctionalModelRunner.from_registry()` factory method + - Cleaner API, consistent with task patterns + +### Developer Experience +- [ ] **chapkit.client.Client** - Python client for testing and working with chapkit services + - Type-safe client with IDE support + - Automatic serialization/deserialization + - Request/response validation + - Essential for testing and SDK users + +### Artifact System +- [ ] **Artifact export** - Export DataFrames and data structures from artifacts + - CSV, Parquet, JSON formats + - Streaming for large datasets + - Compression support (gzip, bzip2) + +--- + +## Medium Priority (Next 3-6 Releases) + +### Task Execution Enhancements +- [ ] **Retry policies** - Automatic retry with exponential backoff for failed tasks +- [ ] **Custom injectable types** - User-defined dependency injection types +- [ ] **Result caching** - Cache task results based on parameters with TTL + +### ML System +- [ ] **Enhanced train/predict metadata** - Store full config, model type, framework version, hyperparameters +- [ ] **Model versioning** - Track model lineage and version history +- [ ] **Experiment tracking** - MLflow or W&B integration for experiment management + +### Configuration +- [ ] **Multiple config types** - Support multiple config schemas per service +- [ ] **Config versioning** - Track and rollback config changes + +### Observability +- [ ] **Distributed tracing** - OpenTelemetry integration for request tracing +- [ ] **Enhanced metrics** - Custom metrics registration and SLO tracking +- [ ] **Structured audit logging** - Comprehensive audit trails for compliance + +### Type Safety +- [ ] **Stricter generic constraints** - Better compile-time type checking +- [ ] **Runtime type validation** - Optional runtime validation layer + +--- + +## Future Considerations + +### Advanced Task Features +- [ ] **Registry namespacing** - Module-scoped registries to avoid collisions +- [ ] **Function versioning** - Track function versions in artifacts +- [ ] **Parameter serialization** - Custom serializers for complex types + +### ML Advanced Features +- [ ] **Model registry** - Central registry for discovering trained models +- [ ] **A/B testing** - Deploy multiple model versions with traffic splitting +- [ ] **Pipeline composition** - Chain models and transformations +- [ ] **Feature store integration** - Connect to feature stores + +### Developer Tools +- [ ] **CLI tool** - Command-line tool for migrations, seeding, testing +- [ ] **Code generation** - Generate boilerplate for modules, routers, models +- [ ] **Development server** - Enhanced dev server with auto-reload + +### Testing & Quality +- [ ] **Performance benchmarking** - Comprehensive benchmarks for core operations +- [ ] **Memory leak detection** - Automated leak detection in tests +- [ ] **Code coverage 95%+** - Target high coverage across all modules +- [ ] **Load testing tools** - Built-in load testing utilities + +### API & Middleware +- [ ] **WebSocket support** - Real-time updates via WebSockets +- [ ] **Rate limiting** - Built-in rate limiting middleware +- [ ] **Response caching** - Intelligent caching layer +- [ ] **GraphQL support** - Optional GraphQL layer (evaluate demand first) +- [ ] **gRPC support** - High-performance gRPC endpoints (evaluate demand first) + +### Security +- [ ] **RBAC** - Role-based access control +- [ ] **OAuth2/JWT** - Modern authentication flows +- [ ] **Encryption at rest** - Encrypt sensitive artifacts and configs +- [ ] **Secret management** - Vault, AWS Secrets Manager integration + +### Cloud & Storage +- [ ] **Artifact cloud storage** - S3, GCS, Azure Blob backends +- [ ] **PostgreSQL adapter** - Production-grade relational DB support +- [ ] **Message queue integration** - RabbitMQ, Kafka for async processing + +### Documentation +- [ ] **Tutorial series** - Step-by-step guides for common patterns +- [ ] **Architecture guide** - Deep dive into chapkit internals +- [ ] **Best practices** - Production deployment patterns +- [ ] **Video tutorials** - Screencast series for key features + +--- + +## Recently Completed + +### v0.x (Current) +- **Python task execution** - TaskRegistry with decorator-based registration +- **Type-based dependency injection** - Automatic injection of framework services +- **Enable/disable controls** - Task execution controls +- **Orphaned task validation** - Auto-disable tasks with missing functions +- **App hosting system** - Host static web apps alongside API +- **Health check SSE streaming** - Server-sent events for health monitoring +- **Comprehensive testing** - 683 tests passing with extensive coverage +- **ML service builder** - Specialized builder for ML workflows + +--- + +## Evaluation Criteria for New Features + +Before adding items to this roadmap, consider: + +1. **Core Value Alignment** - Does it enhance ML/data service development? +2. **Developer Experience** - Does it reduce boilerplate or improve productivity? +3. **Production Readiness** - Does it solve real production challenges? +4. **Maintenance Burden** - Can we maintain it long-term? +5. **Community Demand** - Are users asking for it? +6. **Breaking Changes** - Can we add it without breaking existing code? + +## Contributing + +Have ideas for the roadmap? Open an issue with: +- **Use case** - What problem does it solve? +- **Alternatives** - What workarounds exist today? +- **Impact** - How many users would benefit? +- **Effort** - Rough complexity estimate + +See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed contribution guidelines. diff --git a/alembic/versions/20251010_0927_4d869b5fb06e_initial_schema.py b/alembic/versions/20251010_0927_4d869b5fb06e_initial_schema.py index a9fe1a0..3d78c20 100644 --- a/alembic/versions/20251010_0927_4d869b5fb06e_initial_schema.py +++ b/alembic/versions/20251010_0927_4d869b5fb06e_initial_schema.py @@ -1,10 +1,4 @@ -"""initial_schema - -Revision ID: 4d869b5fb06e -Revises: -Create Date: 2025-10-10 09:27:01.866482+00:00 - -""" +"""Initial database schema migration.""" import sqlalchemy as sa @@ -57,6 +51,9 @@ def upgrade() -> None: op.create_table( "tasks", sa.Column("command", sa.Text(), nullable=False), + sa.Column("task_type", sa.Text(), nullable=False, server_default="shell"), + sa.Column("parameters", sa.JSON(), nullable=True), + sa.Column("enabled", sa.Boolean(), nullable=False, server_default="1"), sa.Column("id", chapkit.core.types.ULIDType(length=26), nullable=False), sa.Column("created_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False), sa.Column("updated_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False), diff --git a/designs/python-tasks-and-scheduling.md b/designs/python-tasks-and-scheduling.md new file mode 100644 index 0000000..a574fe2 --- /dev/null +++ b/designs/python-tasks-and-scheduling.md @@ -0,0 +1,1660 @@ +# Design: Python Task Execution and Job Scheduling + +**Status:** Phase 1 Complete, Phase 2 Draft +**Date:** 2025-10-17 +**Author:** AI Assistant + +## Overview + +This design extends Chapkit's task execution system with: +1. **Phase 1 (IMPLEMENTED):** Python function execution with type-based dependency injection +2. **Phase 2 (DRAFT):** Job scheduling for one-off, interval, and cron-based execution + +This document captures the complete knowledge of both phases, with emphasis on the implemented Python task execution system. + +--- + +# Phase 1: Python Task Execution (IMPLEMENTED) + +## Goals (Completed) + +- Execute registered Python functions as tasks alongside shell commands +- Support both sync and async Python functions +- Provide type-based dependency injection for framework services +- Enable/disable control for tasks +- Validate and auto-disable orphaned Python tasks +- Artifact-based result storage with error handling + +## Architecture + +### Core Components + +``` +┌─────────────────────────────────────────────────────────┐ +│ TaskRegistry │ +│ - Global function registry (decorator & imperative) │ +│ - register(name): Decorator for functions │ +│ - get(name): Retrieve registered function │ +│ - list_all(): List all registered names │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ TaskManager │ +│ - execute_task(task_id): Route to shell or python │ +│ - _execute_command(task_id): Shell execution │ +│ - _execute_python(task_id): Python execution │ +│ - _inject_parameters(): Type-based DI │ +│ - find_all(enabled=...): Query with filtering │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ TaskRepository │ +│ - find_all(enabled=...): Filter by enabled status │ +│ - find_by_enabled(bool): Query enabled/disabled │ +└─────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────┐ +│ Task (ORM) │ +│ - command: str (function name or shell command) │ +│ - task_type: str ("shell" or "python") │ +│ - parameters: dict | None (JSON for python tasks) │ +│ - enabled: bool (execution control) │ +└─────────────────────────────────────────────────────────┘ +``` + +## Database Schema + +**Added fields to `tasks` table** (via migration `20251010_0927_4d869b5fb06e_initial_schema.py`): + +```python +class Task(Entity): + __tablename__ = "tasks" + + command: Mapped[str] # Function name (python) or shell command + task_type: Mapped[str] = mapped_column(default="shell") # "shell" | "python" + parameters: Mapped[dict | None] = mapped_column(JSON, nullable=True) + enabled: Mapped[bool] = mapped_column(default=True) # Enable/disable control +``` + +## Task Types + +### Shell Tasks (Existing) + +**Execution:** +- Via `asyncio.create_subprocess_shell()` +- Captures stdout, stderr, exit_code + +**Artifact Structure:** +```json +{ + "task": { + "id": "01TASK...", + "command": "echo 'Hello World'", + "created_at": "2025-10-17T...", + "updated_at": "2025-10-17T..." + }, + "stdout": "Hello World\n", + "stderr": "", + "exit_code": 0 +} +``` + +### Python Tasks (NEW) + +**Execution:** +- Via `TaskRegistry.get(function_name)` +- Supports sync and async functions +- Type-based dependency injection +- Parameter validation via function signature + +**Artifact Structure (Success):** +```json +{ + "task": { + "id": "01TASK...", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "created_at": "2025-10-17T...", + "updated_at": "2025-10-17T..." + }, + "result": { + "result": 42, + "operation": "sum" + }, + "error": null +} +``` + +**Artifact Structure (Failure):** +```json +{ + "task": { + "id": "01TASK...", + "command": "failing_task", + "task_type": "python", + "parameters": {"should_fail": true} + }, + "result": null, + "error": { + "type": "ValueError", + "message": "This task was designed to fail", + "traceback": "Traceback (most recent call last):\n ..." + } +} +``` + +## TaskRegistry + +**Purpose:** Global registry for Python functions to prevent arbitrary code execution + +**File:** `src/chapkit/modules/task/registry.py` + +### Registration Methods + +**1. Decorator (Recommended):** +```python +from chapkit import TaskRegistry + +@TaskRegistry.register("my_function") +async def my_function(x: int, y: int) -> dict: + """Example async function.""" + return {"sum": x + y} +``` + +**2. Imperative:** +```python +def my_function(x: int) -> dict: + return {"result": x * 2} + +TaskRegistry.register_function("my_function", my_function) +``` + +### Methods + +```python +class TaskRegistry: + @classmethod + def register(cls, name: str) -> Callable: + """Decorator to register a function.""" + + @classmethod + def register_function(cls, name: str, func: Callable) -> None: + """Imperative registration.""" + + @classmethod + def get(cls, name: str) -> Callable: + """Retrieve registered function (raises KeyError if not found).""" + + @classmethod + def list_all(cls) -> list[str]: + """List all registered function names.""" + + @classmethod + def clear(cls) -> None: + """Clear registry (useful for testing).""" +``` + +## Type-Based Dependency Injection + +**Feature:** Framework services are automatically injected based on function parameter type hints. + +### Injectable Types + +```python +INJECTABLE_TYPES = { + AsyncSession, # SQLAlchemy async database session + Database, # Chapkit Database instance + ArtifactManager, # Artifact management service + JobScheduler, # Job scheduling service +} +``` + +### Parameter Sources + +1. **User Parameters:** From `task.parameters` (primitives, dicts, lists, pandas DataFrames, etc.) +2. **Framework Injections:** Automatically injected based on type hints + +### Examples + +**Pure User Parameters:** +```python +@TaskRegistry.register("calculate_sum") +async def calculate_sum(a: int, b: int) -> dict: + """All params from task.parameters.""" + return {"result": a + b} + +# Task: {"command": "calculate_sum", "parameters": {"a": 10, "b": 32}} +``` + +**Mixed User + Framework:** +```python +@TaskRegistry.register("query_tasks") +async def query_tasks( + limit: int, # From task.parameters + session: AsyncSession, # Injected by framework +) -> dict: + """Mix user and injected parameters.""" + from sqlalchemy import select, func + from chapkit.modules.task.models import Task + + stmt = select(func.count()).select_from(Task) + result = await session.execute(stmt) + count = result.scalar() or 0 + + return { + "total": count, + "limit": limit, + "using_injection": True + } + +# Task: {"command": "query_tasks", "parameters": {"limit": 100}} +# session is injected automatically based on type hint +``` + +**Framework-Only (No User Params):** +```python +@TaskRegistry.register("query_task_count") +async def query_task_count(session: AsyncSession) -> dict: + """No user parameters needed.""" + from sqlalchemy import select, func + from chapkit.modules.task.models import Task + + stmt = select(func.count()).select_from(Task) + result = await session.execute(stmt) + count = result.scalar() or 0 + + return {"total_tasks": count} + +# Task: {"command": "query_task_count", "parameters": {}} +# Empty parameters - session injected automatically +``` + +**Optional Injection:** +```python +@TaskRegistry.register("maybe_db") +def maybe_db( + value: int, + session: AsyncSession | None = None, # Optional injection +) -> dict: + """Optional framework parameter.""" + result = {"value": value} + if session: + result["has_session"] = True + return result + +# Works with or without session available +``` + +### Implementation Details + +**Injection Algorithm** (`src/chapkit/modules/task/manager.py:69-127`): + +1. Parse function signature with `inspect.signature(func)` +2. Get type hints with `get_type_hints(func)` +3. Build injection map: `{AsyncSession: session_instance, Database: db_instance, ...}` +4. For each parameter: + - Check if type hint matches injectable type + - Handle `Optional[Type]` (extract non-None type) + - If injectable: inject from map + - If not injectable: must be in `task.parameters` or have default +5. Raise `ValueError` if required non-injectable parameter missing + +**Type Checking:** +- Handles both `Type | None` (Python 3.10+ union) and `Union[Type, None]` (typing module) +- Uses `get_origin()` to detect union types +- Extracts non-None types from unions + +**Session Management:** +- Creates dedicated session for injection: `database.session()` +- Session enters context before execution +- Session always closes in `finally` block (prevents leaks) +- Artifact saved with separate session (prevents interference) + +## Enable/Disable Control + +**Feature:** Tasks can be enabled/disabled for execution control without deletion. + +### Use Cases + +1. **Soft delete:** Disable instead of deleting to preserve history +2. **Maintenance:** Temporarily disable tasks during system maintenance +3. **Orphaned tasks:** Auto-disable tasks with missing Python functions +4. **Gradual rollout:** Create disabled tasks, enable when ready + +### Schema + +```python +class Task(Entity): + enabled: Mapped[bool] = mapped_column(default=True) +``` + +### API Endpoints + +**Create with disabled state:** +```bash +POST /api/v1/tasks +{ + "command": "process_data", + "task_type": "python", + "parameters": {"input": "test"}, + "enabled": false +} +``` + +**Filter by enabled status:** +```bash +GET /api/v1/tasks?enabled=true # Only enabled +GET /api/v1/tasks?enabled=false # Only disabled +GET /api/v1/tasks # All tasks +``` + +**Update enabled status:** +```bash +PUT /api/v1/tasks/{id} +{ + "command": "process_data", + "enabled": false +} +``` + +### Execution Validation + +Tasks are validated before execution (`src/chapkit/modules/task/manager.py:144-145`): + +```python +async def execute_task(self, task_id: ULID) -> ULID: + task = await self.repo.find_by_id(task_id) + if not task.enabled: + raise ValueError(f"Cannot execute disabled task {task_id}") + # ... continue execution +``` + +**Error Response:** +```json +{ + "detail": "Cannot execute disabled task 01TASK..." +} +``` + +## Orphaned Task Validation + +**Feature:** Automatically detect and disable Python tasks referencing unregistered functions. + +**File:** `src/chapkit/modules/task/validation.py` + +### Purpose + +Prevent execution failures when: +- Function is removed from code but task still exists in DB +- Service restarts and function registration changes +- Code deployment removes or renames functions + +### Implementation + +```python +async def validate_and_disable_orphaned_tasks(app: FastAPI) -> int: + """Validate Python tasks and disable orphaned ones. + + Returns: + Number of tasks disabled + """ + database = getattr(app.state, "database", None) + if database is None: + return 0 + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, ...) + + # Get all tasks + all_tasks = await task_manager.find_all() + + # Get registered function names + registered_functions = set(TaskRegistry.list_all()) + + # Find orphaned Python tasks + orphaned_tasks = [ + task for task in all_tasks + if task.task_type == "python" and task.command not in registered_functions + ] + + # Disable each orphaned task + for task in orphaned_tasks: + logger.warning( + f"Disabling orphaned task {task.id}: function '{task.command}' not found" + ) + await task_manager.save( + TaskIn(id=task.id, ..., enabled=False) + ) + + return len(orphaned_tasks) +``` + +### Usage in ServiceBuilder + +```python +async def validate_tasks_on_startup(app: FastAPI) -> None: + """Startup hook for validation.""" + await validate_and_disable_orphaned_tasks(app) + +app = ( + ServiceBuilder(info=info) + .with_health() + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=3) + .with_tasks() + .on_startup(seed_python_tasks) + .on_startup(validate_tasks_on_startup) # Auto-disable orphaned tasks + .build() +) +``` + +### Logging + +**Structured logging with context:** +```python +logger.warning( + "Found orphaned Python tasks - disabling them", + extra={ + "count": len(orphaned_tasks), + "task_ids": [str(task.id) for task in orphaned_tasks], + "commands": [task.command for task in orphaned_tasks], + }, +) +``` + +## Read-Only Task API Pattern + +**Use Case:** Pre-seed tasks at startup, expose via read-only API for execution + +**File:** `examples/readonly_task_api.py` + +### Benefits + +1. **Version control:** Task definitions in code, not database +2. **Security:** Prevent task creation/modification via API +3. **Consistency:** Same tasks across environments +4. **Production best practice:** Immutable infrastructure + +### Implementation + +```python +from chapkit.core.api.crud import CrudPermissions + +app = ( + ServiceBuilder(info=info) + .with_health() + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=3) + .with_tasks( + permissions=CrudPermissions( + create=False, # Disable POST /tasks + read=True, # Allow GET /tasks, GET /tasks/{id} + update=False, # Disable PUT /tasks/{id} + delete=False, # Disable DELETE /tasks/{id} + ) + ) + .on_startup(seed_tasks) # Pre-seed tasks from code + .build() +) +``` + +**Available operations:** +- `GET /api/v1/tasks` - List tasks +- `GET /api/v1/tasks/{id}` - Get task +- `POST /api/v1/tasks/{id}/$execute` - Execute task +- `POST /api/v1/tasks` - Create task (disabled) +- `PUT /api/v1/tasks/{id}` - Update task (disabled) +- `DELETE /api/v1/tasks/{id}` - Delete task (disabled) + +## API Reference + +### Create Python Task + +```bash +POST /api/v1/tasks +Content-Type: application/json + +{ + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "enabled": true +} +``` + +**Response (201):** +```json +{ + "id": "01TASK...", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "enabled": true, + "created_at": "2025-10-17T...", + "updated_at": "2025-10-17T..." +} +``` + +### Execute Task + +```bash +POST /api/v1/tasks/{id}/$execute +``` + +**Response (202):** +```json +{ + "job_id": "01JOB...", + "message": "Task submitted for execution. Job ID: 01JOB..." +} +``` + +### Get Job Status + +```bash +GET /api/v1/jobs/{job_id} +``` + +**Response (200):** +```json +{ + "id": "01JOB...", + "status": "completed", + "artifact_id": "01ARTIFACT...", + "submitted_at": "2025-10-17T...", + "started_at": "2025-10-17T...", + "finished_at": "2025-10-17T..." +} +``` + +### Get Execution Results + +```bash +GET /api/v1/artifacts/{artifact_id} +``` + +**Response (200):** +```json +{ + "id": "01ARTIFACT...", + "data": { + "task": { + "id": "01TASK...", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32} + }, + "result": { + "result": 42, + "operation": "sum" + }, + "error": null + }, + "created_at": "2025-10-17T...", + "updated_at": "2025-10-17T..." +} +``` + +### Filter Tasks by Status + +```bash +GET /api/v1/tasks?enabled=true +GET /api/v1/tasks?enabled=false +``` + +## Testing + +**Test Coverage:** 683 tests passing, 6 skipped + +### Test Files + +1. **`tests/test_task_registry.py`** (151 lines) + - Decorator registration + - Imperative registration + - Duplicate name detection + - Function retrieval + - Registry listing + - Clear functionality + +2. **`tests/test_task_injection.py`** (382 lines) + - AsyncSession injection + - Database injection + - ArtifactManager injection + - Mixed user + injected parameters + - Optional type handling (`Type | None`) + - Missing parameter error handling + - Sync function injection + +3. **`tests/test_manager_task.py`** (246 lines added) + - Python task execution (sync/async) + - Shell task execution + - Parameter passing + - Error handling and artifact structure + - Enable/disable enforcement + - Find with enabled filtering + +4. **`tests/test_task_repository.py`** (139 lines) + - `find_all(enabled=True/False/None)` + - `find_by_enabled(bool)` + - Query correctness + +5. **`tests/test_task_router.py`** (168 lines) + - Enable/disable via API + - Query parameter filtering + - Execution validation + +6. **`tests/test_task_validation.py`** (242 lines) + - Orphaned task detection + - Auto-disable orphaned tasks + - Logging verification + - Registry validation + +7. **`tests/test_example_python_task_execution_api.py`** (286 lines) + - Full integration tests + - Multiple task types + - Sync/async function execution + - Error handling + - Dependency injection examples + - Mixed shell and Python tasks + +### Test Patterns + +**Registry Testing:** +```python +from chapkit import TaskRegistry + +def test_register_function(): + TaskRegistry.clear() # Clean state + + @TaskRegistry.register("test_func") + def test_func(x: int) -> int: + return x * 2 + + assert "test_func" in TaskRegistry.list_all() + func = TaskRegistry.get("test_func") + assert func(5) == 10 +``` + +**Injection Testing:** +```python +async def test_inject_async_session(): + @TaskRegistry.register("needs_session") + async def needs_session(session: AsyncSession) -> dict: + assert session is not None + return {"has_session": True} + + task_manager = TaskManager(repo, scheduler, database, artifact_manager) + task = await task_manager.save( + TaskIn( + command="needs_session", + task_type="python", + parameters={}, # Empty - session injected + ) + ) + + job_id = await task_manager.execute_task(task.id) + # Verify session was injected and task executed +``` + +**Enable/Disable Testing:** +```python +async def test_cannot_execute_disabled_task(): + task = await task_manager.save( + TaskIn(command="echo 'test'", enabled=False) + ) + + with pytest.raises(ValueError, match="Cannot execute disabled task"): + await task_manager.execute_task(task.id) +``` + +## Documentation + +### Guides + +1. **`docs/guides/task-execution.md`** (1610 lines) + - Complete task execution guide + - Shell and Python task examples + - Type-based injection documentation + - Enable/disable patterns + - Orphaned task validation + - API reference with examples + +2. **`examples/docs/task_python_execution.md`** (543 lines) + - cURL-based API examples + - Step-by-step Python task workflow + - Dependency injection examples + - Error handling demonstrations + +3. **`examples/docs/task_python_execution.postman_collection.json`** (958 lines) + - Complete Postman collection + - Pre-configured requests + - Environment variables + - Test scripts + +4. **`CLAUDE.md`** updates (52 lines added) + - Task Execution System section + - Quick reference for TaskRegistry + - Type-based injection overview + - Integration with ServiceBuilder + +### Example Applications + +1. **`examples/task_execution_api.py`** (Original shell example) + - Simple shell task execution + - Artifact-based results + - Basic seeding + +2. **`examples/python_task_execution_api.py`** (229 lines) + - Python function registration + - Sync/async examples + - Dependency injection examples + - Error handling demonstrations + - Mixed shell and Python tasks + - Orphaned task validation + +3. **`examples/readonly_task_api.py`** (167 lines) + - Read-only API pattern + - Pre-seeded tasks + - CrudPermissions usage + - Production deployment pattern + +## Security Considerations + +1. **No Arbitrary Code Execution** + - Only registered functions can be executed + - Function names validated against registry + - No `eval()` or dynamic imports + +2. **Parameter Validation** + - Pydantic validation on `task.parameters` + - Type hints enforce parameter types + - Missing required parameters caught before execution + +3. **Exception Isolation** + - Python exceptions captured and stored in artifacts + - Exceptions don't crash job scheduler + - Full tracebacks preserved for debugging + +4. **Session Management** + - Dedicated session per execution + - Always closed in `finally` block + - No session leaks + +5. **Orphaned Task Prevention** + - Auto-disable tasks with missing functions + - Prevents execution failures + - Logged for monitoring + +## Performance Considerations + +1. **Sync Function Handling** + - Executed via `asyncio.to_thread()` + - Doesn't block event loop + - Suitable for CPU-bound tasks + +2. **Async Function Handling** + - Direct `await` execution + - Efficient for I/O-bound tasks + - No thread overhead + +3. **Parameter Injection Overhead** + - Function signature parsed once per execution + - Type hints retrieved once + - Minimal overhead (~microseconds) + +4. **Registry Lookup** + - Dictionary-based (O(1) lookup) + - No parsing or compilation + - Cached function references + +## Migration Guide + +### From Shell-Only to Shell + Python + +**Before:** +```python +app = ( + ServiceBuilder(info=info) + .with_health() + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=3) + .with_tasks() # Only shell tasks + .build() +) +``` + +**After:** +```python +# 1. Register Python functions +@TaskRegistry.register("my_function") +async def my_function(x: int) -> dict: + return {"result": x * 2} + +# 2. Add validation hook +async def validate_tasks_on_startup(app: FastAPI) -> None: + await validate_and_disable_orphaned_tasks(app) + +# 3. Same ServiceBuilder, add validation +app = ( + ServiceBuilder(info=info) + .with_health() + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=3) + .with_tasks() # Now supports both shell and python + .on_startup(validate_tasks_on_startup) # Optional but recommended + .build() +) +``` + +**No breaking changes:** +- Existing shell tasks continue to work +- API endpoints unchanged +- Database schema extended (backwards compatible) + +## Known Limitations + +1. **In-Memory Registry** + - Registry cleared on restart + - Must re-register functions on startup + - No registry persistence + +2. **Global Registry** + - Single global registry per process + - No namespacing or scoping + - Function name collisions possible + +3. **Parameter Serialization** + - Parameters must be JSON-serializable + - Complex objects (pandas DataFrames) stored as dicts + - No automatic serialization for custom types + +4. **No Retry Logic** + - Failed executions don't retry automatically + - Must re-execute manually + - (Can be added in future) + +5. **Injection Limitations** + - Only framework types injectable + - No custom user-defined injectable types + - No constructor injection (function parameters only) + +## Future Enhancements (Phase 1 Follow-ups) + +Potential improvements to Python task execution: + +1. **Custom Injectable Types** + - Allow users to register custom injectable types + - Service locator pattern + - `TaskManager.register_injectable(Type, instance)` + +2. **Parameter Serialization** + - Support custom type serializers + - Automatic pandas DataFrame serialization + - Protocol for user-defined serializers + +3. **Registry Namespacing** + - Module-scoped registries + - Avoid name collisions + - `TaskRegistry("myapp.tasks").register("func")` + +4. **Function Versioning** + - Track function versions + - Artifact stores which version executed + - `@TaskRegistry.register("func", version="1.0")` + +5. **Retry Policies** + - Automatic retry on failure + - Configurable backoff strategies + - Max retry limits + +6. **Result Caching** + - Cache results based on parameters + - Avoid re-execution + - TTL-based invalidation + +--- + +# Phase 2: Job Scheduling (DRAFT) + +## Goals + +- Support multiple scheduling strategies (once, interval, cron) +- Work with both shell and Python tasks +- Keep implementation simple (in-memory scheduling, no persistence) +- Provide clear migration path to persistent scheduling later + +## Non-Goals + +- Persistent schedule storage (defer to future iteration) +- Distributed scheduling across multiple nodes + +## Background + +### Current Job Scheduler + +`AIOJobScheduler` provides immediate execution only: +- Submit jobs with `add_job(target, *args, **kwargs)` +- In-memory job tracking (not persisted) +- Concurrency control via semaphore +- Job lifecycle: pending → running → completed/failed/canceled + +**Gap:** No ability to schedule tasks for future or recurring execution. + +## Design Decisions + +### Decision 1: In-Memory Scheduling + +**Options Considered:** +1. **In-Memory** (chosen) - Dict-based storage, lost on restart +2. Database-backed - Persist schedules in SQLite +3. APScheduler Integration - Use battle-tested library + +**Rationale:** +- Simplest implementation for MVP +- No schema changes required initially +- Easy to migrate to persistence later +- User explicitly requested in-memory for now + +**Trade-offs:** +- Schedules lost on service restart +- No clustering/distributed scheduling +- Need to rebuild schedules on startup (if persisted later) + +### Decision 2: Scheduling as Task Operation + +**Options Considered:** +1. **Operation Endpoint** (chosen) - `POST /tasks/{id}/$schedule` +2. Separate Resource - `POST /schedules` with task_id reference + +**Rationale:** +- Consistent with existing `/$execute` pattern +- Simpler API surface (fewer endpoints) +- Scheduling is conceptually an operation on a task + +**Trade-offs:** +- Schedule CRUD requires task ID in path +- Listing all schedules requires iterating all tasks + +## Architecture + +### Component Overview + +``` +┌─────────────────────────────────────────────────────────┐ +│ TaskRouter │ +│ POST /tasks/{id}/$schedule │ +│ GET /tasks/{id}/$schedules │ +│ DELETE /tasks/{id}/$schedules/{schedule_id} │ +│ PATCH /tasks/{id}/$schedules/{schedule_id} │ +└─────────────────┬───────────────────────────────────────┘ + │ + v +┌─────────────────────────────────────────────────────────┐ +│ TaskManager │ +│ ┌──────────────────────────────────────────┐ │ +│ │ execute_task(task_id) │ │ +│ │ (handles both shell and python tasks) │ │ +│ └──────────────────────────────────────────┘ │ +│ ┌──────────────────────────────────────────┐ │ +│ │ schedule_task(task_id, schedule_config) │ │ +│ │ _scheduler_worker() [background loop] │ │ +│ │ _calculate_next_run(schedule) │ │ +│ └──────────────────────────────────────────┘ │ +└─────────────────┬───────────────────────────────────────┘ + │ + v + ┌──────────────────┐ + │ AIOJobScheduler │ + │ add_job() │ + │ get_status() │ + └──────────────────┘ +``` + +### Data Flow: Scheduled Task Execution + +``` +1. User schedules task: + POST /api/v1/tasks/{id}/$schedule + { + "schedule_type": "cron", + "cron_expression": "0 2 * * *" + } + +2. TaskManager.schedule_task(): + - Validate schedule params + - Calculate next_run_at + - Store in _schedules dict + - Ensure scheduler worker is running + +3. Background worker loop (every 60s): + - Check all enabled schedules + - If next_run_at <= now: + - Call execute_task(task_id) + - Update last_run_at + - Calculate new next_run_at + - Disable if schedule_type == "once" + +4. Execution flows through normal task execution path +``` + +## Detailed Design + +### 1. Schedule Models + +**File:** `src/chapkit/modules/task/schedule.py` + +```python +"""Task scheduling models and schemas.""" + +from datetime import datetime, timezone +from typing import Literal + +from pydantic import BaseModel, Field, model_validator +from ulid import ULID + + +class TaskSchedule(BaseModel): + """In-memory task schedule representation.""" + + id: ULID = Field(description="Unique schedule identifier") + task_id: ULID = Field(description="ID of task to execute") + schedule_type: Literal["once", "interval", "cron"] = Field( + description="Type of schedule" + ) + run_at: datetime | None = Field( + default=None, + description="Specific datetime for 'once' schedules (UTC)", + ) + interval_seconds: int | None = Field( + default=None, + description="Interval in seconds for 'interval' schedules", + ) + cron_expression: str | None = Field( + default=None, + description="Cron expression for 'cron' schedules", + ) + enabled: bool = Field( + default=True, + description="Whether schedule is active", + ) + next_run_at: datetime = Field( + description="Next scheduled execution time (UTC)" + ) + last_run_at: datetime | None = Field( + default=None, + description="Last execution time (UTC)", + ) + created_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + description="When schedule was created", + ) + updated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + description="When schedule was last updated", + ) + + +class ScheduleIn(BaseModel): + """Input schema for creating task schedules.""" + + schedule_type: Literal["once", "interval", "cron"] = Field( + description="Type of schedule to create" + ) + run_at: datetime | None = Field( + default=None, + description="Specific datetime for 'once' schedules (UTC)", + ) + interval_seconds: int | None = Field( + default=None, + ge=1, + description="Interval in seconds for 'interval' schedules (minimum 1)", + ) + cron_expression: str | None = Field( + default=None, + description="Cron expression for 'cron' schedules (e.g., '0 2 * * *')", + ) + enabled: bool = Field( + default=True, + description="Whether schedule should be active initially", + ) + + @model_validator(mode="after") + def validate_schedule_params(self) -> "ScheduleIn": + """Ensure correct parameters for schedule type.""" + if self.schedule_type == "once": + if self.run_at is None: + raise ValueError("run_at required for 'once' schedules") + if self.run_at <= datetime.now(timezone.utc): + raise ValueError("run_at must be in the future") + elif self.schedule_type == "interval": + if self.interval_seconds is None: + raise ValueError("interval_seconds required for 'interval' schedules") + elif self.schedule_type == "cron": + if self.cron_expression is None: + raise ValueError("cron_expression required for 'cron' schedules") + # Validate cron expression + try: + from croniter import croniter + croniter(self.cron_expression, datetime.now(timezone.utc)) + except Exception as e: + raise ValueError(f"Invalid cron expression: {e}") + return self + + +class ScheduleOut(BaseModel): + """Output schema for task schedules.""" + + id: ULID + task_id: ULID + schedule_type: Literal["once", "interval", "cron"] + run_at: datetime | None = None + interval_seconds: int | None = None + cron_expression: str | None = None + enabled: bool + next_run_at: datetime + last_run_at: datetime | None = None + created_at: datetime + updated_at: datetime + + +class ScheduleUpdateIn(BaseModel): + """Input schema for updating schedule (enable/disable).""" + + enabled: bool = Field(description="Enable or disable the schedule") +``` + +### 2. TaskManager Changes + +**File:** `src/chapkit/modules/task/manager.py` + +Key additions for scheduling: + +```python +class TaskManager(BaseManager[Task, TaskIn, TaskOut, ULID]): + """Manager for Task template entities with artifact-based execution.""" + + def __init__(self, ...) -> None: + # Existing initialization + ... + # New: Schedule management + self._schedules: dict[ULID, TaskSchedule] = {} + self._scheduler_task: asyncio.Task | None = None + self._scheduler_lock = asyncio.Lock() + + # Note: execute_task() already exists and handles both shell and python tasks + # Scheduling methods (NEW) + + async def schedule_task( + self, task_id: ULID, schedule_in: ScheduleIn + ) -> ScheduleOut: + """Create a new schedule for a task.""" + # Verify task exists + task = await self.repo.find_by_id(task_id) + if task is None: + raise ValueError(f"Task {task_id} not found") + + # Create schedule + schedule_id = ULID() + now = datetime.now(timezone.utc) + + schedule = TaskSchedule( + id=schedule_id, + task_id=task_id, + schedule_type=schedule_in.schedule_type, + run_at=schedule_in.run_at, + interval_seconds=schedule_in.interval_seconds, + cron_expression=schedule_in.cron_expression, + enabled=schedule_in.enabled, + next_run_at=await self._calculate_next_run_from_input(schedule_in, now), + last_run_at=None, + created_at=now, + updated_at=now, + ) + + async with self._scheduler_lock: + self._schedules[schedule_id] = schedule + # Ensure scheduler worker is running + if self._scheduler_task is None or self._scheduler_task.done(): + self._scheduler_task = asyncio.create_task(self._scheduler_worker()) + + return ScheduleOut.model_validate(schedule) + + async def get_schedules_for_task(self, task_id: ULID) -> list[ScheduleOut]: + """Get all schedules for a specific task.""" + async with self._scheduler_lock: + schedules = [ + s for s in self._schedules.values() if s.task_id == task_id + ] + return [ScheduleOut.model_validate(s) for s in schedules] + + async def update_schedule( + self, schedule_id: ULID, update: ScheduleUpdateIn + ) -> ScheduleOut: + """Update schedule (currently only enable/disable).""" + async with self._scheduler_lock: + schedule = self._schedules.get(schedule_id) + if schedule is None: + raise KeyError(f"Schedule {schedule_id} not found") + + schedule.enabled = update.enabled + schedule.updated_at = datetime.now(timezone.utc) + + return ScheduleOut.model_validate(schedule) + + async def delete_schedule(self, schedule_id: ULID) -> None: + """Delete a schedule.""" + async with self._scheduler_lock: + if schedule_id not in self._schedules: + raise KeyError(f"Schedule {schedule_id} not found") + del self._schedules[schedule_id] + + async def _scheduler_worker(self) -> None: + """Background worker that checks and triggers scheduled tasks.""" + while True: + try: + await asyncio.sleep(60) # Check every minute + + now = datetime.now(timezone.utc) + schedules_to_run: list[TaskSchedule] = [] + + async with self._scheduler_lock: + for schedule in self._schedules.values(): + if schedule.enabled and schedule.next_run_at <= now: + schedules_to_run.append(schedule) + + # Execute tasks (outside lock to avoid blocking) + for schedule in schedules_to_run: + try: + await self.execute_task(schedule.task_id) + + # Update schedule + async with self._scheduler_lock: + schedule.last_run_at = now + + if schedule.schedule_type == "once": + schedule.enabled = False + else: + schedule.next_run_at = await self._calculate_next_run(schedule) + + schedule.updated_at = now + except Exception as e: + # Log error but continue with other schedules + print(f"Error executing scheduled task {schedule.task_id}: {e}") + + except Exception as e: + # Log error but keep worker running + print(f"Error in scheduler worker: {e}") + + async def _calculate_next_run(self, schedule: TaskSchedule) -> datetime: + """Calculate next run time based on schedule configuration.""" + now = datetime.now(timezone.utc) + + if schedule.schedule_type == "once": + # Should not be called for "once" schedules + return schedule.run_at or now + + elif schedule.schedule_type == "interval": + # Add interval to last_run or current time + base_time = schedule.last_run_at or now + return base_time + timedelta(seconds=schedule.interval_seconds) + + elif schedule.schedule_type == "cron": + from croniter import croniter + cron = croniter(schedule.cron_expression, now) + return cron.get_next(datetime) + + raise ValueError(f"Unknown schedule_type: {schedule.schedule_type}") + + async def _calculate_next_run_from_input( + self, schedule_in: ScheduleIn, base_time: datetime + ) -> datetime: + """Calculate initial next_run_at from schedule input.""" + if schedule_in.schedule_type == "once": + return schedule_in.run_at + + elif schedule_in.schedule_type == "interval": + return base_time + timedelta(seconds=schedule_in.interval_seconds) + + elif schedule_in.schedule_type == "cron": + from croniter import croniter + cron = croniter(schedule_in.cron_expression, base_time) + return cron.get_next(datetime) + + raise ValueError(f"Unknown schedule_type: {schedule_in.schedule_type}") +``` + +### 3. TaskRouter Changes + +**File:** `src/chapkit/modules/task/router.py` + +Add schedule endpoints: + +```python +def _register_routes(self) -> None: + """Register task CRUD routes and execution/scheduling operations.""" + super()._register_routes() + + manager_factory = self.manager_factory + + # Existing: /$execute endpoint + ... + + # New: /$schedule endpoint + async def schedule_task( + entity_id: str, + schedule_in: ScheduleIn, + manager: TaskManager = Depends(manager_factory), + ) -> ScheduleOut: + """Schedule a task for execution.""" + task_id = self._parse_ulid(entity_id) + try: + return await manager.schedule_task(task_id, schedule_in) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + self.register_entity_operation( + "schedule", + schedule_task, + http_method="POST", + response_model=ScheduleOut, + status_code=201, + summary="Schedule task", + description="Create a schedule for task execution", + ) + + # New: Get schedules for task + async def get_task_schedules( + entity_id: str, + manager: TaskManager = Depends(manager_factory), + ) -> list[ScheduleOut]: + """Get all schedules for a task.""" + task_id = self._parse_ulid(entity_id) + return await manager.get_schedules_for_task(task_id) + + self.register_entity_operation( + "schedules", + get_task_schedules, + http_method="GET", + response_model=list[ScheduleOut], + summary="Get task schedules", + description="List all schedules for this task", + ) + + # New: Delete schedule + async def delete_task_schedule( + entity_id: str, + schedule_id: str, + manager: TaskManager = Depends(manager_factory), + ) -> None: + """Delete a task schedule.""" + try: + schedule_ulid = ULID.from_str(schedule_id) + await manager.delete_schedule(schedule_ulid) + except (ValueError, KeyError) as e: + raise HTTPException(status_code=404, detail=str(e)) + + # Custom route pattern for schedule operations + @self.router.delete( + "/{entity_id}/$schedules/{schedule_id}", + status_code=204, + summary="Delete schedule", + tags=self.tags, + ) + async def delete_schedule_route( + entity_id: str, + schedule_id: str, + manager: TaskManager = Depends(manager_factory), + ): + await delete_task_schedule(entity_id, schedule_id, manager) + + # New: Update schedule (enable/disable) + @self.router.patch( + "/{entity_id}/$schedules/{schedule_id}", + response_model=ScheduleOut, + summary="Update schedule", + tags=self.tags, + ) + async def update_schedule_route( + entity_id: str, + schedule_id: str, + update: ScheduleUpdateIn, + manager: TaskManager = Depends(manager_factory), + ): + try: + schedule_ulid = ULID.from_str(schedule_id) + return await manager.update_schedule(schedule_ulid, update) + except (ValueError, KeyError) as e: + raise HTTPException(status_code=404, detail=str(e)) +``` + +## API Reference (Phase 2) + +### Scheduling Endpoints + +#### POST /api/v1/tasks/{task_id}/$schedule +Create a schedule for a task. + +**Request (one-off):** +```json +{ + "schedule_type": "once", + "run_at": "2025-10-20T14:00:00Z", + "enabled": true +} +``` + +**Request (interval):** +```json +{ + "schedule_type": "interval", + "interval_seconds": 3600, + "enabled": true +} +``` + +**Request (cron):** +```json +{ + "schedule_type": "cron", + "cron_expression": "0 2 * * *", + "enabled": true +} +``` + +**Response (201):** +```json +{ + "id": "01SCHEDULE...", + "task_id": "01TASK...", + "schedule_type": "cron", + "cron_expression": "0 2 * * *", + "enabled": true, + "next_run_at": "2025-10-18T02:00:00Z", + "last_run_at": null, + "created_at": "2025-10-17T10:00:00Z", + "updated_at": "2025-10-17T10:00:00Z" +} +``` + +#### GET /api/v1/tasks/{task_id}/$schedules +List all schedules for a task. + +**Response (200):** +```json +[ + { + "id": "01SCHEDULE...", + "task_id": "01TASK...", + "schedule_type": "interval", + "interval_seconds": 3600, + "enabled": true, + "next_run_at": "2025-10-17T11:00:00Z", + "last_run_at": "2025-10-17T10:00:00Z", + "created_at": "2025-10-17T09:00:00Z", + "updated_at": "2025-10-17T10:00:00Z" + } +] +``` + +#### PATCH /api/v1/tasks/{task_id}/$schedules/{schedule_id} +Update a schedule (enable/disable). + +**Request:** +```json +{ + "enabled": false +} +``` + +**Response (200):** +```json +{ + "id": "01SCHEDULE...", + "enabled": false, + ... +} +``` + +#### DELETE /api/v1/tasks/{task_id}/$schedules/{schedule_id} +Delete a schedule. + +**Response (204):** No content + +## Usage Examples (Phase 2) + +### Example 1: Schedule Task with Cron + +```bash +# Create task (shell or python) +TASK_ID=$(curl -s -X POST http://localhost:8000/api/v1/tasks \ + -d '{"command": "backup_database", "task_type": "python"}' | jq -r '.id') + +# Schedule to run daily at 2 AM +SCHEDULE_ID=$(curl -s -X POST http://localhost:8000/api/v1/tasks/$TASK_ID/\$schedule \ + -d '{ + "schedule_type": "cron", + "cron_expression": "0 2 * * *" + }' | jq -r '.id') + +# List all schedules for task +curl http://localhost:8000/api/v1/tasks/$TASK_ID/\$schedules + +# Disable schedule temporarily +curl -X PATCH http://localhost:8000/api/v1/tasks/$TASK_ID/\$schedules/$SCHEDULE_ID \ + -d '{"enabled": false}' + +# Re-enable +curl -X PATCH http://localhost:8000/api/v1/tasks/$TASK_ID/\$schedules/$SCHEDULE_ID \ + -d '{"enabled": true}' + +# Delete schedule +curl -X DELETE http://localhost:8000/api/v1/tasks/$TASK_ID/\$schedules/$SCHEDULE_ID +``` + +### Example 2: Interval-Based Monitoring + +```python +# Register monitoring task +@TaskRegistry.register("health_check") +async def health_check() -> dict: + """Check system health.""" + import psutil + return { + "cpu_percent": psutil.cpu_percent(), + "memory_percent": psutil.virtual_memory().percent, + "disk_percent": psutil.disk_usage('/').percent, + "timestamp": datetime.now(timezone.utc).isoformat(), + } +``` + +```bash +# Create monitoring task +TASK_ID=$(curl -s -X POST http://localhost:8000/api/v1/tasks \ + -d '{"command": "health_check", "task_type": "python"}' | jq -r '.id') + +# Schedule to run every 5 minutes +curl -X POST http://localhost:8000/api/v1/tasks/$TASK_ID/\$schedule \ + -d '{ + "schedule_type": "interval", + "interval_seconds": 300 + }' + +# Monitor execution history via jobs/artifacts +curl http://localhost:8000/api/v1/jobs?page=1&size=20 +``` + +## Testing Strategy (Phase 2) + +### Unit Tests + +**test_schedule_validation.py:** +- Validate "once" schedule requires run_at +- Validate "interval" schedule requires interval_seconds +- Validate "cron" schedule requires valid cron_expression +- Reject invalid cron expressions +- Reject past timestamps for "once" schedules + +**test_next_run_calculation.py:** +- Calculate next run for "once" schedules +- Calculate next run for "interval" schedules +- Calculate next run for "cron" schedules +- Handle edge cases (month boundaries, DST, leap years for cron) + +### Integration Tests + +**test_task_scheduling.py:** +- Create schedule via API +- List schedules for task +- Update schedule (enable/disable) +- Delete schedule +- Verify scheduled execution occurs +- Verify "once" schedule disables after execution +- Verify "interval" schedule calculates next run correctly + +**test_scheduler_worker.py:** +- Worker executes due schedules +- Worker skips disabled schedules +- Worker continues after task failure +- Worker updates last_run_at and next_run_at +- Multiple schedules for same task execute correctly + +## Migration Path to Persistence + +When persistence is needed later: + +1. Create `ScheduledTask` ORM model (similar to current `TaskSchedule` Pydantic model) +2. Create `ScheduleRepository` with standard CRUD operations +3. Update `TaskManager._schedules` to load from database on startup +4. Update schedule CRUD methods to persist to database +5. Add database cleanup for completed "once" schedules +6. **No API changes required** - same endpoints, same request/response format + +## Open Questions (Phase 2) + +1. Should schedules be deleted when parent task is deleted? +2. Should we limit max number of schedules per task? +3. Should we expose scheduler worker health/status? +4. Should we support schedule "tags" for bulk enable/disable? + +## References + +- Current task execution guide: `docs/guides/task-execution.md` +- Python task execution examples: `examples/python_task_execution_api.py` +- Job scheduler: `src/chapkit/core/scheduler.py` +- Task module: `src/chapkit/modules/task/` +- Croniter docs: https://github.com/kiorky/croniter + +--- + +## Summary + +**Phase 1 (IMPLEMENTED):** Python task execution with type-based dependency injection is complete with comprehensive testing (683 tests passing) and documentation. + +**Phase 2 (DRAFT):** Job scheduling design is ready for implementation when needed. + +Both phases integrate seamlessly with existing chapkit architecture and maintain backwards compatibility with shell task execution. diff --git a/docs/guides/task-execution.md b/docs/guides/task-execution.md index e4e88c4..58fa572 100644 --- a/docs/guides/task-execution.md +++ b/docs/guides/task-execution.md @@ -1,6 +1,6 @@ # Task Execution -Chapkit provides a task execution system for running shell commands asynchronously with artifact-based result storage. Tasks are reusable command templates that can be executed multiple times, with each execution creating a Job and storing results in an Artifact. +Chapkit provides a task execution system for running shell commands and Python functions asynchronously with artifact-based result storage. Tasks are reusable templates that can be executed multiple times, with each execution creating a Job and storing results in an Artifact. ## Quick Start @@ -85,6 +85,350 @@ The Job record links to the result artifact via `Job.artifact_id`. --- +## Python Task Execution + +In addition to shell commands, Chapkit supports executing registered Python functions as tasks. This provides type-safe, IDE-friendly task execution with parameter validation. + +### TaskRegistry + +Python functions must be registered before they can be executed as tasks. This prevents arbitrary code execution and ensures all callable functions are explicitly defined. + +**Registration Methods:** + +**1. Decorator Registration:** +```python +from chapkit import TaskRegistry + +@TaskRegistry.register("calculate_sum") +async def calculate_sum(a: int, b: int) -> dict: + """Calculate sum of two numbers asynchronously.""" + await asyncio.sleep(0.1) # Simulate async work + return {"result": a + b, "operation": "sum"} + +@TaskRegistry.register("process_data") +def process_data(input_text: str, uppercase: bool = False) -> dict: + """Process text data synchronously.""" + result = input_text.upper() if uppercase else input_text.lower() + return {"processed": result, "original": input_text} +``` + +**2. Imperative Registration:** +```python +def my_function(param: str) -> dict: + return {"result": f"Processed {param}"} + +TaskRegistry.register_function("my_task", my_function) +``` + +### Creating Python Tasks + +Python tasks use `task_type="python"` and accept a `parameters` dict: + +```bash +curl -X POST http://localhost:8000/api/v1/tasks \ + -H "Content-Type: application/json" \ + -d '{ + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32} + }' +``` + +**Field Mapping:** +- `command` - Name of registered function (not the function body) +- `task_type` - Must be "python" +- `parameters` - Dict passed as kwargs to the function + +### Python Task Artifacts + +Python task results have a different structure than shell tasks: + +**Successful Execution:** +```json +{ + "task": { + "id": "01TASK...", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "created_at": "2025-10-17T...", + "updated_at": "2025-10-17T..." + }, + "result": { + "result": 42, + "operation": "sum" + }, + "error": null +} +``` + +**Failed Execution:** +```json +{ + "task": {...}, + "result": null, + "error": { + "type": "ValueError", + "message": "Invalid parameter value", + "traceback": "Traceback (most recent call last):\n..." + } +} +``` + +**Comparison with Shell Tasks:** + +| Feature | Shell Tasks | Python Tasks | +|---------|-------------|--------------| +| Output fields | `stdout`, `stderr`, `exit_code` | `result`, `error` | +| Success indicator | `exit_code == 0` | `error == null` | +| Error info | `stderr` text | Full exception with traceback | +| Return value | Command output text | Any JSON-serializable Python object | + +### Sync vs Async Functions + +TaskRegistry supports both synchronous and asynchronous functions: + +```python +# Async function - awaited directly +@TaskRegistry.register("async_task") +async def async_task(param: str) -> dict: + await asyncio.sleep(1) + return {"result": param} + +# Sync function - executed in thread pool +@TaskRegistry.register("sync_task") +def sync_task(param: str) -> dict: + import time + time.sleep(1) # Blocking operation + return {"result": param} +``` + +Synchronous functions are executed in a thread pool via `asyncio.to_thread()` to prevent blocking the event loop. + +### Dependency Injection + +Python task functions support **type-based dependency injection** for framework services. The framework automatically injects dependencies based on parameter type hints, while user parameters come from `task.parameters`. + +#### Injectable Types Reference + +| Type | Description | Use Case | +|------|-------------|----------| +| `AsyncSession` | SQLAlchemy async database session | Database queries, ORM operations | +| `Database` | chapkit Database instance | Creating sessions, database operations | +| `ArtifactManager` | Artifact management service | Saving/loading artifacts during execution | +| `JobScheduler` | Job scheduling service | Submitting child jobs, job management | + +**Location**: Defined in `src/chapkit/modules/task/manager.py` as `INJECTABLE_TYPES` + +#### Basic Injection + +Functions request framework services via type hints: + +```python +from sqlalchemy.ext.asyncio import AsyncSession +from chapkit import TaskRegistry + +@TaskRegistry.register("query_task_count") +async def query_task_count(session: AsyncSession) -> dict: + """Task that queries database using injected session.""" + from sqlalchemy import select, func + from chapkit.modules.task.models import Task + + # Use injected session + stmt = select(func.count()).select_from(Task) + result = await session.execute(stmt) + count = result.scalar() or 0 + + return { + "total_tasks": count, + "timestamp": datetime.now(timezone.utc).isoformat(), + } +``` + +**Execution** - No parameters needed: +```json +{ + "command": "query_task_count", + "task_type": "python", + "parameters": {} +} +``` + +#### Mixed Parameters + +Combine user parameters with injected dependencies: + +```python +@TaskRegistry.register("process_with_db") +async def process_with_db( + input_text: str, # From task.parameters + count: int, # From task.parameters + session: AsyncSession, # Injected by framework +) -> dict: + """Mix user params and framework injection.""" + # Perform database operations using session + # Process user-provided input_text and count + return {"processed": input_text, "count": count} +``` + +**Execution**: +```json +{ + "command": "process_with_db", + "task_type": "python", + "parameters": { + "input_text": "Hello", + "count": 42 + } +} +``` + +**Parameter Sources**: +- User parameters: Primitives (`str`, `int`, `dict`) and generic types (`pd.DataFrame`) +- Framework parameters: Injectable types from the table above + +#### Optional Injection + +Use Optional types for optional dependencies: + +```python +@TaskRegistry.register("optional_db_task") +async def optional_db_task( + data: dict, # From task.parameters (required) + session: AsyncSession | None = None, # Injected if available (optional) +) -> dict: + """Task with optional session injection.""" + if session: + # Use database if session available + pass + return {"processed": data} +``` + +#### Flexible Naming + +Parameter names don't matter - only types: + +```python +# All of these work - framework matches by type +async def task_a(session: AsyncSession) -> dict: ... +async def task_b(db_session: AsyncSession) -> dict: ... +async def task_c(conn: AsyncSession) -> dict: ... +``` + +This allows natural, readable parameter names in your functions. + +#### Multiple Injections + +Inject multiple framework services: + +```python +from chapkit import Database, ArtifactManager + +@TaskRegistry.register("complex_task") +async def complex_task( + input_data: dict, # From task.parameters + database: Database, # Injected + artifact_manager: ArtifactManager, # Injected + session: AsyncSession, # Injected +) -> dict: + """Task using multiple framework services.""" + # Use all injected services + return {"result": "processed"} +``` + +#### Error Handling + +Missing required user parameters raise clear errors: + +```python +@TaskRegistry.register("needs_param") +async def needs_param(name: str, session: AsyncSession) -> dict: + return {"name": name} + +# Executing without 'name' parameter: +{ + "command": "needs_param", + "task_type": "python", + "parameters": {} # Missing 'name' +} + +# Error captured in artifact: +{ + "error": { + "type": "ValueError", + "message": "Missing required parameter 'name' for task function. + Parameter is not injectable and not provided in task.parameters." + } +} +``` + +#### Best Practices + +**DO:** +- Use type hints for all parameters +- Request only needed framework services +- Use descriptive parameter names +- Combine user parameters with injections naturally + +**DON'T:** +- Mix user and framework parameter types (primitives vs injectable types are clear) +- Forget type hints (injection requires them) +- Assume services are always available (use Optional for optional deps) + +#### Example: Database Query Task + +Complete example combining injection with user parameters: + +```python +@TaskRegistry.register("search_tasks") +async def search_tasks( + command_pattern: str, # User parameter + enabled_only: bool = True, # User parameter with default + session: AsyncSession, # Injected +) -> dict: + """Search for tasks matching a pattern.""" + from sqlalchemy import select + from chapkit.modules.task.models import Task + + # Build query using injected session + stmt = select(Task).where(Task.command.like(f"%{command_pattern}%")) + + if enabled_only: + stmt = stmt.where(Task.enabled == True) + + result = await session.execute(stmt) + tasks = result.scalars().all() + + return { + "matches": len(tasks), + "tasks": [{"id": str(t.id), "command": t.command} for t in tasks], + } +``` + +**Usage**: +```bash +curl -X POST http://localhost:8000/api/v1/tasks \ + -H "Content-Type: application/json" \ + -d '{ + "command": "search_tasks", + "task_type": "python", + "parameters": { + "command_pattern": "echo", + "enabled_only": true + } + }' +``` + +### Complete Example + +See `examples/python_task_execution_api.py` for a complete working example with: +- Multiple registered functions (async and sync) +- Error handling demonstrations +- Mixed shell and Python tasks +- Seeded example tasks + +--- + ## Task Lifecycle ``` @@ -155,20 +499,38 @@ app = ( ### POST /api/v1/tasks -Create a new task template. +Create a new task template (shell or Python). -**Request:** +**Request (Shell Task):** ```json { "command": "echo 'Hello World'" } ``` +**Request (Python Task):** +```json +{ + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32} +} +``` + +**Fields:** +- `command` (required) - Shell command or registered Python function name +- `task_type` (optional) - "shell" (default) or "python" +- `parameters` (optional) - Dict of parameters for Python tasks (ignored for shell tasks) +- `enabled` (optional) - Boolean to enable/disable task execution (default: true) + **Response (201):** ```json { "id": "01JCSEED0000000000000TASK1", - "command": "echo 'Hello World'", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "enabled": true, "created_at": "2025-10-14T10:30:00Z", "updated_at": "2025-10-14T10:30:00Z" } @@ -176,14 +538,21 @@ Create a new task template. ### GET /api/v1/tasks -List all task templates with optional pagination. +List all task templates with optional pagination and filtering. ```bash # List all tasks curl http://localhost:8000/api/v1/tasks +# Filter by enabled status +curl http://localhost:8000/api/v1/tasks?enabled=true # Only enabled tasks +curl http://localhost:8000/api/v1/tasks?enabled=false # Only disabled tasks + # With pagination curl http://localhost:8000/api/v1/tasks?page=1&size=20 + +# Combine filters +curl http://localhost:8000/api/v1/tasks?enabled=true&page=1&size=10 ``` **Response:** @@ -192,6 +561,18 @@ curl http://localhost:8000/api/v1/tasks?page=1&size=20 { "id": "01JCSEED0000000000000TASK1", "command": "ls -la /tmp", + "task_type": "shell", + "parameters": null, + "enabled": true, + "created_at": "2025-10-14T10:30:00Z", + "updated_at": "2025-10-14T10:30:00Z" + }, + { + "id": "01JCSEED0000000000000TASK2", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "enabled": false, "created_at": "2025-10-14T10:30:00Z", "updated_at": "2025-10-14T10:30:00Z" } @@ -208,16 +589,17 @@ curl http://localhost:8000/api/v1/tasks/01JCSEED0000000000000TASK1 ### PUT /api/v1/tasks/{task_id} -Update a task template command. +Update a task template. **Request:** ```json { - "command": "echo 'Updated command'" + "command": "echo 'Updated command'", + "task_type": "shell" } ``` -**Note:** Updating a task does not affect previous execution artifacts. +**Note:** Updating a task does not affect previous execution artifacts. You can change task_type and parameters when updating. ### DELETE /api/v1/tasks/{task_id} @@ -248,9 +630,51 @@ curl -X POST http://localhost:8000/api/v1/tasks/01JCSEED0000000000000TASK1/\$exe ``` **Errors:** -- `400 Bad Request` - Task not found or invalid ID +- `400 Bad Request` - Task not found, invalid ID, or task is disabled - `409 Conflict` - Scheduler or artifact manager not configured +### Task Enable/Disable + +Tasks can be enabled or disabled to control execution. Disabled tasks cannot be executed but remain in the database for reference. + +**Creating a Disabled Task:** +```bash +curl -X POST http://localhost:8000/api/v1/tasks \ + -H "Content-Type: application/json" \ + -d '{ + "command": "echo test", + "enabled": false + }' +``` + +**Disabling an Existing Task:** +```bash +curl -X PUT http://localhost:8000/api/v1/tasks/{task_id} \ + -H "Content-Type: application/json" \ + -d '{ + "command": "echo test", + "enabled": false + }' +``` + +**Attempting to Execute a Disabled Task:** +```bash +curl -X POST http://localhost:8000/api/v1/tasks/{disabled_task_id}/\$execute +``` + +**Response (400):** +```json +{ + "detail": "Cannot execute disabled task {task_id}" +} +``` + +**Use Cases:** +- Temporarily pause task execution without deletion +- Preserve task history while preventing new executions +- Automatically disable orphaned Python tasks (see Orphaned Tasks section) +- Soft-delete pattern for auditing and compliance + --- ## Artifact Integration @@ -329,7 +753,9 @@ echo "$artifacts" | jq --arg task_id "$TASK_ID" \ ## Examples -### Simple Commands +### Shell Task Examples + +**Simple Commands:** ```bash # Directory listing @@ -345,10 +771,10 @@ curl -X POST http://localhost:8000/api/v1/tasks \ -d '{"command": "echo \"Task execution works!\""}' | jq -r '.id' ``` -### Python Scripts +**Python One-liners (Shell Tasks):** ```bash -# Python one-liner +# Python one-liner as shell command curl -X POST http://localhost:8000/api/v1/tasks -d '{ "command": "python3 -c \"import sys; print(sys.version); print(2+2)\"" }' @@ -359,6 +785,100 @@ curl -X POST http://localhost:8000/api/v1/tasks -d '{ }' ``` +### Python Task Examples + +**Async Function Execution:** + +```bash +# Assuming you have registered this function: +# @TaskRegistry.register("calculate_sum") +# async def calculate_sum(a: int, b: int) -> dict: +# await asyncio.sleep(0.1) +# return {"result": a + b, "operation": "sum"} + +# Create Python task +TASK_ID=$(curl -s -X POST http://localhost:8000/api/v1/tasks \ + -H "Content-Type: application/json" \ + -d '{ + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 15, "b": 27} + }' | jq -r '.id') + +# Execute task +JOB_ID=$(curl -s -X POST http://localhost:8000/api/v1/tasks/$TASK_ID/\$execute | jq -r '.job_id') + +# Wait and get result +sleep 1 +ARTIFACT_ID=$(curl -s http://localhost:8000/api/v1/jobs/$JOB_ID | jq -r '.artifact_id') + +# View result +curl -s http://localhost:8000/api/v1/artifacts/$ARTIFACT_ID | jq '.data.result' +# Output: {"result": 42, "operation": "sum"} +``` + +**Sync Function with Parameters:** + +```bash +# Assuming you have registered: +# @TaskRegistry.register("process_data") +# def process_data(input_text: str, uppercase: bool = False) -> dict: +# result = input_text.upper() if uppercase else input_text.lower() +# return {"processed": result, "original": input_text} + +curl -X POST http://localhost:8000/api/v1/tasks \ + -H "Content-Type: application/json" \ + -d '{ + "command": "process_data", + "task_type": "python", + "parameters": { + "input_text": "Hello World", + "uppercase": true + } + }' +``` + +**Error Handling:** + +```bash +# Assuming you have registered: +# @TaskRegistry.register("failing_task") +# async def failing_task(should_fail: bool = True) -> dict: +# if should_fail: +# raise ValueError("This task was designed to fail") +# return {"success": True} + +TASK_ID=$(curl -s -X POST http://localhost:8000/api/v1/tasks \ + -d '{ + "command": "failing_task", + "task_type": "python", + "parameters": {"should_fail": true} + }' | jq -r '.id') + +# Execute and check artifact +JOB_ID=$(curl -s -X POST http://localhost:8000/api/v1/tasks/$TASK_ID/\$execute | jq -r '.job_id') +sleep 1 + +# View error details +curl -s http://localhost:8000/api/v1/jobs/$JOB_ID | jq '.artifact_id' | \ + xargs -I {} curl -s http://localhost:8000/api/v1/artifacts/{} | jq '.data.error' + +# Output: +# { +# "type": "ValueError", +# "message": "This task was designed to fail", +# "traceback": "Traceback (most recent call last):\n..." +# } +``` + +**Complete Working Example:** + +See `examples/python_task_execution_api.py` for a full service with: +- Multiple registered functions (async and sync) +- Error handling demonstrations +- Mixed shell and Python tasks +- Integration with ServiceBuilder + ### Multi-line Commands ```bash @@ -748,29 +1268,83 @@ app = ( **Command Injection Prevention:** -Tasks execute arbitrary shell commands. Implement access controls: +Tasks execute arbitrary shell commands. Implement access controls using CRUD permissions: ```python from chapkit.core.api.crud import CrudPermissions +from chapkit.api import ServiceBuilder, ServiceInfo -# Restrict task creation/modification +# Read-only task API (tasks created only via code) task_permissions = CrudPermissions( - create=False, # Disable runtime task creation - read=True, - update=False, # Disable runtime updates - delete=False, # Disable deletion + allow_create=False, # Disable runtime task creation + allow_read=True, # Allow reading tasks + allow_update=False, # Disable runtime updates + allow_delete=False, # Disable deletion ) -# Apply at router level (requires custom router setup) +app = ( + ServiceBuilder(info=ServiceInfo(display_name="Task Service")) + .with_database("tasks.db") + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=5) + .with_tasks(permissions=task_permissions) # Apply permissions + .build() +) ``` +**Read-Only API Pattern:** + +With read-only permissions, all tasks are pre-seeded at startup: + +```python +from chapkit import TaskIn, TaskManager + +async def seed_tasks(app): + """Pre-seed task templates on startup.""" + task_manager = app.state.task_manager + + # Define tasks programmatically + tasks = [ + TaskIn(command="echo 'System health check'", enabled=True), + TaskIn(command="python3 /app/backup.py", enabled=True), + TaskIn(command="process_data", task_type="python", + parameters={"batch_size": 100}, enabled=True), + ] + + for task in tasks: + await task_manager.save(task) + +app = ( + ServiceBuilder(info=info) + .with_database("tasks.db") + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=5) + .with_tasks(permissions=CrudPermissions( + allow_create=False, + allow_read=True, + allow_update=False, + allow_delete=False, + )) + .on_startup(seed_tasks) + .build() +) +``` + +**Benefits:** +- Tasks defined in code (version controlled) +- No runtime command injection risk +- API users can only execute pre-defined tasks +- Tasks can be audited before deployment +- Enables GitOps workflow for task management + **Recommendations:** -- Pre-seed tasks at startup (read-only templates) -- Use authentication (`.with_auth()`) -- Validate commands before creating tasks +- Use read-only API for production (pre-seed tasks at startup) +- Apply authentication (`.with_auth()`) for execution endpoint +- Validate commands in seeding logic - Run service with limited OS user permissions - Use container security (no privileged mode) -- Monitor execution logs for suspicious commands +- Monitor execution logs for suspicious activity +- Use `validate_and_disable_orphaned_tasks` to prevent broken Python tasks ### Docker Deployment @@ -919,6 +1493,106 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* ``` +### Orphaned Python Tasks + +**Problem:** Python task references a function that was removed or renamed from the registry. + +**Cause:** Function was removed or renamed but task template still references the old name. + +**Automatic Disabling (Recommended):** + +Chapkit provides a startup validation utility that automatically disables orphaned Python tasks: + +```python +from chapkit import validate_and_disable_orphaned_tasks +from chapkit.api import ServiceBuilder, ServiceInfo + +app = ( + ServiceBuilder(info=ServiceInfo(display_name="Task Service")) + .with_database("tasks.db") + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=5) + .with_tasks() + .on_startup(validate_and_disable_orphaned_tasks) + .build() +) +``` + +**Behavior:** +- Checks all Python tasks against `TaskRegistry` on startup +- Automatically disables tasks referencing unregistered functions +- Logs warnings for each orphaned task with task IDs and function names +- Preserves task history (soft-delete via `enabled=False`) +- Returns count of disabled tasks + +**Example Log Output:** +``` +WARNING Found orphaned Python tasks - disabling them + count: 2 + task_ids: ['01TASK1...', '01TASK2...'] + commands: ['old_function', 'removed_function'] +INFO Disabling orphaned task 01TASK1...: function 'old_function' not found in registry +INFO Disabling orphaned task 01TASK2...: function 'removed_function' not found in registry +WARNING Disabled 2 orphaned Python task(s) +``` + +**Filtering Disabled Tasks:** +```bash +# List all disabled tasks +curl http://localhost:8000/api/v1/tasks?enabled=false + +# List only enabled tasks +curl http://localhost:8000/api/v1/tasks?enabled=true +``` + +**Re-enabling Tasks:** +If you re-register the function, you can re-enable the task: + +```python +# Re-register the function +@TaskRegistry.register("old_function") +def old_function(**params) -> dict: + return {"result": "restored"} +``` + +```bash +# Re-enable the task +curl -X PUT http://localhost:8000/api/v1/tasks/{task_id} \ + -H "Content-Type: application/json" \ + -d '{ + "command": "old_function", + "task_type": "python", + "enabled": true + }' +``` + +**Alternative Solutions:** + +**Option 1: Keep deprecated functions with errors** +```python +@TaskRegistry.register("old_function") +def old_function(**params) -> dict: + """Deprecated - use new_function instead.""" + raise NotImplementedError("This function has been removed. Use new_function instead.") +``` + +**Option 2: Manual deletion** +```bash +# Find orphaned tasks +curl http://localhost:8000/api/v1/tasks?enabled=false | \ + jq '.[] | select(.task_type == "python")' + +# Delete specific task +curl -X DELETE http://localhost:8000/api/v1/tasks/{task_id} +``` + +**Best Practices:** +- Always use `validate_and_disable_orphaned_tasks` on startup (production ready) +- Monitor logs for orphaned task warnings +- Consider versioning function names (e.g., `process_data_v1`, `process_data_v2`) +- Document which tasks depend on which functions +- Periodically review disabled tasks for cleanup + --- ## Next Steps @@ -929,6 +1603,8 @@ RUN apt-get update && apt-get install -y \ - **Monitoring:** Track execution metrics with `.with_monitoring()` For more examples: -- `examples/task_execution_api.py` - Complete task execution service -- `tests/test_example_task_execution_api.py` - Comprehensive test suite +- `examples/task_execution_api.py` - Shell task execution service +- `examples/python_task_execution_api.py` - Python task execution with TaskRegistry +- `tests/test_example_task_execution_api.py` - Shell task test suite +- `tests/test_example_python_task_execution_api.py` - Python task test suite - `docs/guides/job-scheduler.md` - Job scheduler and SSE streaming diff --git a/examples/docs/task_python_execution.md b/examples/docs/task_python_execution.md new file mode 100644 index 0000000..ffce65d --- /dev/null +++ b/examples/docs/task_python_execution.md @@ -0,0 +1,543 @@ +# python_task_execution_api.py - Python Task Execution cURL Guide + +Task execution service demonstrating Python function registration, task parameters, enable/disable controls, and orphaned task validation. + +## Quick Start + +```bash +# Start the service +fastapi dev examples/python_task_execution_api.py + +# Service available at: http://127.0.0.1:8000 +``` + +## Features + +- **Python Functions**: Register sync and async functions as executable tasks +- **Parameters**: Pass JSON parameters as kwargs to Python functions +- **Task Types**: Both "shell" and "python" tasks supported +- **Enable/Disable**: Control task execution with enabled flag +- **Orphaned Tasks**: Automatic validation and disabling on startup +- **Task Registry**: Type-safe function registration with @TaskRegistry.register() + +## Complete Workflow + +### 1. Check Service Health + +```bash +curl http://127.0.0.1:8000/health +``` + +**Response:** +```json +{ + "status": "healthy", + "checks": { + "database": "healthy" + } +} +``` + +### 2. List All Tasks + +```bash +curl http://127.0.0.1:8000/api/v1/tasks +``` + +**Response:** +```json +[ + { + "id": "01JCSEED0000000000000PYTH1", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "enabled": true, + "created_at": "2025-10-17T10:00:00Z", + "updated_at": "2025-10-17T10:00:00Z" + }, + { + "id": "01JCSEED0000000000000PYTH2", + "command": "process_data", + "task_type": "python", + "parameters": {"input_text": "Hello World", "uppercase": true}, + "enabled": true, + "created_at": "2025-10-17T10:00:00Z", + "updated_at": "2025-10-17T10:00:00Z" + } +] +``` + +### 3. Filter Tasks by Status + +```bash +# Only enabled tasks +curl "http://127.0.0.1:8000/api/v1/tasks?enabled=true" + +# Only disabled tasks +curl "http://127.0.0.1:8000/api/v1/tasks?enabled=false" +``` + +### 4. Get Specific Task + +```bash +curl http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH1 +``` + +**Response:** +```json +{ + "id": "01JCSEED0000000000000PYTH1", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "enabled": true, + "created_at": "2025-10-17T10:00:00Z", + "updated_at": "2025-10-17T10:00:00Z" +} +``` + +### 5. Execute Python Task (Async Function) + +```bash +curl -X POST http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH1/\$execute +``` + +**Response:** +```json +{ + "job_id": "01K79YAHJ7BR4E87VVTG8FNBMA", + "message": "Task submitted for execution. Job ID: 01K79YAHJ7BR4E87VVTG8FNBMA" +} +``` + +### 6. Poll Job Status + +```bash +# Poll every 1-2 seconds until status is "completed" +curl http://127.0.0.1:8000/api/v1/jobs/01K79YAHJ7BR4E87VVTG8FNBMA +``` + +**Response (pending):** +```json +{ + "id": "01K79YAHJ7BR4E87VVTG8FNBMA", + "status": "pending", + "artifact_id": null, + "created_at": "2025-10-17T10:01:00Z", + "updated_at": "2025-10-17T10:01:00Z" +} +``` + +**Response (completed):** +```json +{ + "id": "01K79YAHJ7BR4E87VVTG8FNBMA", + "status": "completed", + "artifact_id": "01K79YAHJ7BR4E87VVTG8FNBMB", + "created_at": "2025-10-17T10:01:00Z", + "updated_at": "2025-10-17T10:01:02Z" +} +``` + +### 7. Get Python Task Results + +```bash +curl http://127.0.0.1:8000/api/v1/artifacts/01K79YAHJ7BR4E87VVTG8FNBMB +``` + +**Response:** +```json +{ + "id": "01K79YAHJ7BR4E87VVTG8FNBMB", + "parent_id": null, + "level": 0, + "data": { + "task": { + "id": "01JCSEED0000000000000PYTH1", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "created_at": "2025-10-17T10:00:00Z", + "updated_at": "2025-10-17T10:00:00Z" + }, + "result": { + "result": 42, + "operation": "sum" + }, + "error": null + }, + "created_at": "2025-10-17T10:01:02Z", + "updated_at": "2025-10-17T10:01:02Z" +} +``` + +### 8. Execute Sync Python Task + +```bash +curl -X POST http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH2/\$execute +``` + +Wait for job completion, then get results: + +```bash +curl http://127.0.0.1:8000/api/v1/artifacts/ARTIFACT_ID +``` + +**Response:** +```json +{ + "data": { + "task": {...}, + "result": { + "original": "Hello World", + "processed": "HELLO WORLD", + "length": 11, + "timestamp": "2025-10-17T10:05:00Z" + }, + "error": null + } +} +``` + +### 9. Execute Shell Task (For Comparison) + +```bash +curl -X POST http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH5/\$execute +``` + +**Shell Task Artifact (Different Structure):** +```json +{ + "data": { + "task": { + "id": "01JCSEED0000000000000PYTH5", + "command": "echo \"This is a shell task\"", + "created_at": "2025-10-17T10:00:00Z", + "updated_at": "2025-10-17T10:00:00Z" + }, + "stdout": "This is a shell task\n", + "stderr": "", + "exit_code": 0 + } +} +``` + +### 10. Error Handling - Execute Failing Task + +```bash +curl -X POST http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH4/\$execute +``` + +**Artifact with Error:** +```json +{ + "data": { + "task": {...}, + "result": null, + "error": { + "type": "ValueError", + "message": "This task was designed to fail", + "traceback": "Traceback (most recent call last):\n File \"...\", line 48, in failing_task\n raise ValueError(\"This task was designed to fail\")\nValueError: This task was designed to fail" + } + } +} +``` + +### 11. Try to Execute Disabled Task + +```bash +curl -X POST http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH6/\$execute +``` + +**Response (400 Bad Request):** +```json +{ + "detail": "Cannot execute disabled task 01JCSEED0000000000000PYTH6" +} +``` + +## Creating Tasks + +### Create Python Task with Parameters + +```bash +curl -X POST http://127.0.0.1:8000/api/v1/tasks \ + -H "Content-Type: application/json" \ + -d '{ + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 25, "b": 17}, + "enabled": true + }' +``` + +**Response:** +```json +{ + "id": "01K79YAHJ7BR4E87VVTG8FNBMC", + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 25, "b": 17}, + "enabled": true, + "created_at": "2025-10-17T10:10:00Z", + "updated_at": "2025-10-17T10:10:00Z" +} +``` + +### Create Shell Task + +```bash +curl -X POST http://127.0.0.1:8000/api/v1/tasks \ + -H "Content-Type: application/json" \ + -d '{ + "command": "date", + "task_type": "shell", + "enabled": true + }' +``` + +### Create Disabled Task + +```bash +curl -X POST http://127.0.0.1:8000/api/v1/tasks \ + -H "Content-Type: application/json" \ + -d '{ + "command": "process_data", + "task_type": "python", + "parameters": {"input_text": "test", "uppercase": false}, + "enabled": false + }' +``` + +## Updating Tasks + +### Enable a Disabled Task + +```bash +curl -X PUT http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH6 \ + -H "Content-Type: application/json" \ + -d '{ + "command": "process_data", + "task_type": "python", + "parameters": {"input_text": "Disabled", "uppercase": false}, + "enabled": true + }' +``` + +### Disable an Enabled Task + +```bash +curl -X PUT http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH1 \ + -H "Content-Type: application/json" \ + -d '{ + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 10, "b": 32}, + "enabled": false + }' +``` + +### Update Task Parameters + +```bash +curl -X PUT http://127.0.0.1:8000/api/v1/tasks/01JCSEED0000000000000PYTH1 \ + -H "Content-Type: application/json" \ + -d '{ + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 100, "b": 200}, + "enabled": true + }' +``` + +## Deleting Tasks + +```bash +curl -X DELETE http://127.0.0.1:8000/api/v1/tasks/01K79YAHJ7BR4E87VVTG8FNBMC +``` + +**Response:** `204 No Content` + +**Note:** Deleting a task preserves all execution artifacts in the database. + +## Advanced Workflows + +### Stream Job Progress (SSE) + +```bash +curl -N http://127.0.0.1:8000/api/v1/jobs/01K79YAHJ7BR4E87VVTG8FNBMA/\$stream +``` + +**Response (Server-Sent Events):** +``` +data: {"id": "01K79YAHJ7BR4E87VVTG8FNBMA", "status": "pending", ...} + +data: {"id": "01K79YAHJ7BR4E87VVTG8FNBMA", "status": "running", ...} + +data: {"id": "01K79YAHJ7BR4E87VVTG8FNBMA", "status": "completed", ...} +``` + +### List All Jobs + +```bash +# All jobs +curl http://127.0.0.1:8000/api/v1/jobs + +# Filter by status +curl "http://127.0.0.1:8000/api/v1/jobs?status_filter=completed" +curl "http://127.0.0.1:8000/api/v1/jobs?status_filter=failed" +curl "http://127.0.0.1:8000/api/v1/jobs?status_filter=pending" +``` + +### Pagination + +```bash +# Paginate tasks +curl "http://127.0.0.1:8000/api/v1/tasks?page=1&size=10" + +# Paginate artifacts +curl "http://127.0.0.1:8000/api/v1/artifacts?page=1&size=20" +``` + +### Find Task Executions + +```bash +# Get all artifacts (includes task execution results) +curl http://127.0.0.1:8000/api/v1/artifacts + +# Filter by task ID in application code or jq +curl -s http://127.0.0.1:8000/api/v1/artifacts | \ + jq '[.[] | select(.data.task.id == "01JCSEED0000000000000PYTH1")]' +``` + +## Task Registry Examples + +These Python functions are pre-registered in the example: + +### 1. calculate_sum (Async) +- **Parameters**: `a: int, b: int` +- **Returns**: `{"result": , "operation": "sum"}` +- **Type**: Async function + +### 2. process_data (Sync) +- **Parameters**: `input_text: str, uppercase: bool = False` +- **Returns**: `{"original": str, "processed": str, "length": int, "timestamp": str}` +- **Type**: Sync function (runs in thread pool) + +### 3. slow_computation (Sync) +- **Parameters**: `seconds: int = 2` +- **Returns**: `{"completed": true, "duration_seconds": int}` +- **Type**: Sync function with blocking sleep + +### 4. failing_task (Async) +- **Parameters**: `should_fail: bool = True` +- **Returns**: `{"success": true}` or raises ValueError +- **Type**: Async function for error handling demo + +## Orphaned Task Behavior + +The service automatically validates Python tasks on startup: + +1. **Checks**: All Python tasks against TaskRegistry +2. **Detects**: Tasks referencing unregistered functions +3. **Disables**: Orphaned tasks automatically (enabled = false) +4. **Logs**: Warnings with task IDs and function names + +**Example Log Output:** +``` +WARNING Found orphaned Python tasks - disabling them + count: 1 + task_ids: ['01JCSEED0000000000000PYTH7'] + commands: ['nonexistent_function'] +INFO Disabling orphaned task 01JCSEED0000000000000PYTH7: function 'nonexistent_function' not found in registry +WARNING Disabled 1 orphaned Python task(s) +``` + +**Check Disabled Tasks:** +```bash +curl "http://127.0.0.1:8000/api/v1/tasks?enabled=false" +``` + +## Python vs Shell Tasks Comparison + +| Feature | Shell Tasks | Python Tasks | +|---------|-------------|--------------| +| **task_type** | "shell" | "python" | +| **command** | Shell command string | Registered function name | +| **parameters** | Not used | JSON dict passed as kwargs | +| **Success output** | stdout, stderr, exit_code | result (any JSON-serializable) | +| **Error output** | stderr text | Full exception with traceback | +| **Success check** | exit_code == 0 | error == null | +| **Execution** | asyncio.subprocess | Direct function call | +| **Registration** | Not required | Required via TaskRegistry | + +## Tips + +1. **Parameters**: Always passed as kwargs - function signature must match parameter keys +2. **Sync Functions**: Automatically run in thread pool via asyncio.to_thread() +3. **Error Handling**: Python exceptions captured with full traceback +4. **Task Snapshot**: Artifacts preserve task state at execution time (immutable) +5. **Orphaned Tasks**: Re-register function and re-enable task to fix +6. **Disabled Tasks**: Cannot execute but remain visible for auditing + +## Troubleshooting + +### "Python function 'xxx' not found in registry" + +**Problem:** Function not registered or service restarted without registration + +**Solution:** +```python +# Re-register the function +@TaskRegistry.register("xxx") +def xxx(**params) -> dict: + return {"result": "ok"} +``` + +### "Cannot execute disabled task" + +**Problem:** Task has `enabled: false` + +**Solution:** +```bash +# Re-enable the task +curl -X PUT http://127.0.0.1:8000/api/v1/tasks/TASK_ID \ + -H "Content-Type: application/json" \ + -d '{...task data..., "enabled": true}' +``` + +### TypeError on function execution + +**Problem:** Parameters don't match function signature + +**Solution:** Ensure parameter keys match function argument names exactly: +```python +# Function expects 'a' and 'b' +def calculate_sum(a: int, b: int) -> dict: ... + +# Parameters must use same names +{"a": 10, "b": 32} # Correct +{"x": 10, "y": 32} # Wrong - TypeError +``` + +### Job stays "pending" + +**Problem:** +1. Reached max_concurrency limit (default: 3) +2. Long-running jobs blocking queue + +**Solution:** +```bash +# Check running jobs +curl "http://127.0.0.1:8000/api/v1/jobs?status_filter=running" + +# Wait for jobs to complete or increase max_concurrency in code +``` + +## Next Steps + +- Try **[readonly_task_api.py](../readonly_task_api.py)** for read-only security pattern +- Read **[task-execution.md](../../docs/guides/task-execution.md)** for complete API reference +- Check **[../python_task_execution_api.py](../python_task_execution_api.py)** source code +- See **[../../CLAUDE.md](../../CLAUDE.md)** for architecture overview diff --git a/examples/docs/task_python_execution.postman_collection.json b/examples/docs/task_python_execution.postman_collection.json new file mode 100644 index 0000000..51da13a --- /dev/null +++ b/examples/docs/task_python_execution.postman_collection.json @@ -0,0 +1,958 @@ +{ + "info": { + "_postman_id": "01TASK000000000000000POST1", + "name": "Chapkit Task Execution - Python & Shell", + "description": "Task execution service demonstrating Python function registration, task parameters, dependency injection, enable/disable controls, and orphaned task validation.\n\n**Features:**\n- Python Functions: Register sync and async functions as executable tasks\n- Parameters: Pass JSON parameters as kwargs to Python functions\n- Dependency Injection: Automatic type-based injection (AsyncSession, Database, ArtifactManager, JobScheduler)\n- Task Types: Both \"shell\" and \"python\" tasks supported\n- Enable/Disable: Control task execution with enabled flag\n- Orphaned Tasks: Automatic validation and disabling on startup\n- Task Registry: Type-safe function registration\n\n**Quick Start:**\n```bash\nfastapi dev examples/python_task_execution_api.py\n```\n\nService available at: http://127.0.0.1:8000", + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", + "_exporter_id": "chapkit-task-execution" + }, + "item": [ + { + "name": "1. Service Health", + "item": [ + { + "name": "Check Service Health", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/health", + "host": ["{{baseUrl}}"], + "path": ["health"] + }, + "description": "Check the health status of the service and database connection." + }, + "response": [ + { + "name": "Healthy", + "originalRequest": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/health", + "host": ["{{baseUrl}}"], + "path": ["health"] + } + }, + "status": "OK", + "code": 200, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"status\": \"healthy\",\n \"checks\": {\n \"database\": \"healthy\"\n }\n}" + } + ] + } + ] + }, + { + "name": "2. List & Filter Tasks", + "item": [ + { + "name": "List All Tasks", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks"] + }, + "description": "List all task templates (both enabled and disabled, Python and shell)." + }, + "response": [ + { + "name": "Task List", + "originalRequest": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks"] + } + }, + "status": "OK", + "code": 200, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "[\n {\n \"id\": \"01JCSEED0000000000000PYTH1\",\n \"command\": \"calculate_sum\",\n \"task_type\": \"python\",\n \"parameters\": {\"a\": 10, \"b\": 32},\n \"enabled\": true,\n \"created_at\": \"2025-10-17T10:00:00Z\",\n \"updated_at\": \"2025-10-17T10:00:00Z\"\n },\n {\n \"id\": \"01JCSEED0000000000000PYTH2\",\n \"command\": \"process_data\",\n \"task_type\": \"python\",\n \"parameters\": {\"input_text\": \"Hello World\", \"uppercase\": true},\n \"enabled\": true,\n \"created_at\": \"2025-10-17T10:00:00Z\",\n \"updated_at\": \"2025-10-17T10:00:00Z\"\n }\n]" + } + ] + }, + { + "name": "List Only Enabled Tasks", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks?enabled=true", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks"], + "query": [ + { + "key": "enabled", + "value": "true", + "description": "Filter by enabled status" + } + ] + }, + "description": "List only tasks that are enabled for execution." + }, + "response": [] + }, + { + "name": "List Only Disabled Tasks", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks?enabled=false", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks"], + "query": [ + { + "key": "enabled", + "value": "false", + "description": "Filter by disabled status" + } + ] + }, + "description": "List only tasks that are disabled (cannot be executed)." + }, + "response": [] + }, + { + "name": "Get Specific Task", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/{{taskId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "{{taskId}}"] + }, + "description": "Get a specific task by ID." + }, + "response": [] + } + ] + }, + { + "name": "3. Execute Python Tasks", + "item": [ + { + "name": "Execute Async Python Task (calculate_sum)", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "// Save job_id for polling", + "const response = pm.response.json();", + "pm.environment.set('jobId', response.job_id);" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH1/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH1", "$execute"] + }, + "description": "Execute async Python function with parameters {a: 10, b: 32}. Returns job_id for polling." + }, + "response": [ + { + "name": "Job Submitted", + "originalRequest": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH1/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH1", "$execute"] + } + }, + "status": "Accepted", + "code": 202, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"job_id\": \"01K79YAHJ7BR4E87VVTG8FNBMA\",\n \"message\": \"Task submitted for execution. Job ID: 01K79YAHJ7BR4E87VVTG8FNBMA\"\n}" + } + ] + }, + { + "name": "Execute Sync Python Task (process_data)", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "const response = pm.response.json();", + "pm.environment.set('jobId', response.job_id);" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH2/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH2", "$execute"] + }, + "description": "Execute sync Python function with parameters {input_text: 'Hello World', uppercase: true}." + }, + "response": [] + }, + { + "name": "Execute Slow Computation", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "const response = pm.response.json();", + "pm.environment.set('jobId', response.job_id);" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH3/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH3", "$execute"] + }, + "description": "Execute sync function with 1 second sleep to demonstrate blocking operations." + }, + "response": [] + }, + { + "name": "Execute Failing Task (Error Demo)", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "const response = pm.response.json();", + "pm.environment.set('jobId', response.job_id);" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH4/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH4", "$execute"] + }, + "description": "Execute task that raises ValueError to demonstrate error handling." + }, + "response": [] + }, + { + "name": "Execute Dependency Injection Task (query_task_count)", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "const response = pm.response.json();", + "pm.environment.set('jobId', response.job_id);" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH7/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH7", "$execute"] + }, + "description": "Execute task with dependency injection. AsyncSession is automatically injected by the framework (no user parameters needed). Task queries the database using the injected session." + }, + "response": [ + { + "name": "Job Submitted", + "originalRequest": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH7/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH7", "$execute"] + } + }, + "status": "Accepted", + "code": 202, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"job_id\": \"01K79YAHJ7BR4E87VVTG8FNBME\",\n \"message\": \"Task submitted for execution. Job ID: 01K79YAHJ7BR4E87VVTG8FNBME\"\n}" + } + ] + } + ] + }, + { + "name": "4. Execute Shell Task", + "item": [ + { + "name": "Execute Shell Task (echo)", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "const response = pm.response.json();", + "pm.environment.set('jobId', response.job_id);" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH5/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH5", "$execute"] + }, + "description": "Execute shell command to demonstrate different artifact structure (stdout/stderr vs result/error)." + }, + "response": [] + } + ] + }, + { + "name": "5. Job Monitoring", + "item": [ + { + "name": "Poll Job Status", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/jobs/{{jobId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "jobs", "{{jobId}}"] + }, + "description": "Poll job status until completed. Check artifact_id when status is 'completed'." + }, + "response": [ + { + "name": "Pending", + "originalRequest": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/jobs/{{jobId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "jobs", "{{jobId}}"] + } + }, + "status": "OK", + "code": 200, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"id\": \"01K79YAHJ7BR4E87VVTG8FNBMA\",\n \"status\": \"pending\",\n \"artifact_id\": null,\n \"created_at\": \"2025-10-17T10:01:00Z\",\n \"updated_at\": \"2025-10-17T10:01:00Z\"\n}" + }, + { + "name": "Completed", + "originalRequest": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/jobs/{{jobId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "jobs", "{{jobId}}"] + } + }, + "status": "OK", + "code": 200, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"id\": \"01K79YAHJ7BR4E87VVTG8FNBMA\",\n \"status\": \"completed\",\n \"artifact_id\": \"01K79YAHJ7BR4E87VVTG8FNBMB\",\n \"created_at\": \"2025-10-17T10:01:00Z\",\n \"updated_at\": \"2025-10-17T10:01:02Z\"\n}" + } + ] + }, + { + "name": "Stream Job Progress (SSE)", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/jobs/{{jobId}}/$stream", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "jobs", "{{jobId}}", "$stream"] + }, + "description": "Stream job status updates via Server-Sent Events (SSE). Use curl for better SSE support." + }, + "response": [] + }, + { + "name": "List All Jobs", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/jobs", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "jobs"] + }, + "description": "List all jobs (task executions)." + }, + "response": [] + }, + { + "name": "Filter Jobs by Status", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/jobs?status_filter=completed", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "jobs"], + "query": [ + { + "key": "status_filter", + "value": "completed", + "description": "pending, running, completed, failed, or canceled" + } + ] + }, + "description": "Filter jobs by status (pending, running, completed, failed, canceled)." + }, + "response": [] + } + ] + }, + { + "name": "6. Get Task Results", + "item": [ + { + "name": "Get Python Task Results", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/artifacts/{{artifactId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "artifacts", "{{artifactId}}"] + }, + "description": "Get Python task execution results. Structure: task snapshot + result/error." + }, + "response": [ + { + "name": "Success Result", + "originalRequest": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/artifacts/{{artifactId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "artifacts", "{{artifactId}}"] + } + }, + "status": "OK", + "code": 200, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"id\": \"01K79YAHJ7BR4E87VVTG8FNBMB\",\n \"parent_id\": null,\n \"level\": 0,\n \"data\": {\n \"task\": {\n \"id\": \"01JCSEED0000000000000PYTH1\",\n \"command\": \"calculate_sum\",\n \"task_type\": \"python\",\n \"parameters\": {\"a\": 10, \"b\": 32},\n \"created_at\": \"2025-10-17T10:00:00Z\",\n \"updated_at\": \"2025-10-17T10:00:00Z\"\n },\n \"result\": {\n \"result\": 42,\n \"operation\": \"sum\"\n },\n \"error\": null\n },\n \"created_at\": \"2025-10-17T10:01:02Z\",\n \"updated_at\": \"2025-10-17T10:01:02Z\"\n}" + }, + { + "name": "Error Result", + "originalRequest": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/artifacts/{{artifactId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "artifacts", "{{artifactId}}"] + } + }, + "status": "OK", + "code": 200, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"id\": \"01K79YAHJ7BR4E87VVTG8FNBMC\",\n \"parent_id\": null,\n \"level\": 0,\n \"data\": {\n \"task\": {...},\n \"result\": null,\n \"error\": {\n \"type\": \"ValueError\",\n \"message\": \"This task was designed to fail\",\n \"traceback\": \"Traceback (most recent call last):\\n File \\\"...\\\", line 48, in failing_task\\n raise ValueError(\\\"This task was designed to fail\\\")\\nValueError: This task was designed to fail\"\n }\n },\n \"created_at\": \"2025-10-17T10:05:00Z\",\n \"updated_at\": \"2025-10-17T10:05:00Z\"\n}" + }, + { + "name": "Dependency Injection Result", + "originalRequest": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/artifacts/{{artifactId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "artifacts", "{{artifactId}}"] + } + }, + "status": "OK", + "code": 200, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"id\": \"01K79YAHJ7BR4E87VVTG8FNBME\",\n \"parent_id\": null,\n \"level\": 0,\n \"data\": {\n \"task\": {\n \"id\": \"01JCSEED0000000000000PYTH7\",\n \"command\": \"query_task_count\",\n \"task_type\": \"python\",\n \"parameters\": {},\n \"created_at\": \"2025-10-17T10:00:00Z\",\n \"updated_at\": \"2025-10-17T10:00:00Z\"\n },\n \"result\": {\n \"total_tasks\": 8,\n \"timestamp\": \"2025-10-17T10:07:15.123456Z\",\n \"injected_session\": true\n },\n \"error\": null\n },\n \"created_at\": \"2025-10-17T10:07:15Z\",\n \"updated_at\": \"2025-10-17T10:07:15Z\"\n}" + } + ] + }, + { + "name": "Get Shell Task Results", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/artifacts/{{artifactId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "artifacts", "{{artifactId}}"] + }, + "description": "Get shell task execution results. Structure: task snapshot + stdout/stderr/exit_code." + }, + "response": [ + { + "name": "Shell Result", + "originalRequest": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/artifacts/{{artifactId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "artifacts", "{{artifactId}}"] + } + }, + "status": "OK", + "code": 200, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"id\": \"01K79YAHJ7BR4E87VVTG8FNBMD\",\n \"parent_id\": null,\n \"level\": 0,\n \"data\": {\n \"task\": {\n \"id\": \"01JCSEED0000000000000PYTH5\",\n \"command\": \"echo \\\"This is a shell task\\\"\",\n \"created_at\": \"2025-10-17T10:00:00Z\",\n \"updated_at\": \"2025-10-17T10:00:00Z\"\n },\n \"stdout\": \"This is a shell task\\n\",\n \"stderr\": \"\",\n \"exit_code\": 0\n },\n \"created_at\": \"2025-10-17T10:06:00Z\",\n \"updated_at\": \"2025-10-17T10:06:01Z\"\n}" + } + ] + }, + { + "name": "List All Artifacts", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/artifacts", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "artifacts"] + }, + "description": "List all artifacts (task execution results)." + }, + "response": [] + } + ] + }, + { + "name": "7. Create Tasks", + "item": [ + { + "name": "Create Python Task with Parameters", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "const response = pm.response.json();", + "pm.environment.set('taskId', response.id);" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"command\": \"calculate_sum\",\n \"task_type\": \"python\",\n \"parameters\": {\"a\": 25, \"b\": 17},\n \"enabled\": true\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/v1/tasks", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks"] + }, + "description": "Create a Python task with custom parameters." + }, + "response": [ + { + "name": "Task Created", + "originalRequest": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"command\": \"calculate_sum\",\n \"task_type\": \"python\",\n \"parameters\": {\"a\": 25, \"b\": 17},\n \"enabled\": true\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/v1/tasks", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks"] + } + }, + "status": "Created", + "code": 201, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"id\": \"01K79YAHJ7BR4E87VVTG8FNBMC\",\n \"command\": \"calculate_sum\",\n \"task_type\": \"python\",\n \"parameters\": {\"a\": 25, \"b\": 17},\n \"enabled\": true,\n \"created_at\": \"2025-10-17T10:10:00Z\",\n \"updated_at\": \"2025-10-17T10:10:00Z\"\n}" + } + ] + }, + { + "name": "Create Shell Task", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"command\": \"date\",\n \"task_type\": \"shell\",\n \"enabled\": true\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/v1/tasks", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks"] + }, + "description": "Create a shell task (parameters not used for shell tasks)." + }, + "response": [] + }, + { + "name": "Create Disabled Task", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"command\": \"process_data\",\n \"task_type\": \"python\",\n \"parameters\": {\"input_text\": \"test\", \"uppercase\": false},\n \"enabled\": false\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/v1/tasks", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks"] + }, + "description": "Create a task that is disabled and cannot be executed." + }, + "response": [] + } + ] + }, + { + "name": "8. Update Tasks", + "item": [ + { + "name": "Enable a Disabled Task", + "request": { + "method": "PUT", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"command\": \"process_data\",\n \"task_type\": \"python\",\n \"parameters\": {\"input_text\": \"Disabled\", \"uppercase\": false},\n \"enabled\": true\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH6", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH6"] + }, + "description": "Change enabled flag from false to true." + }, + "response": [] + }, + { + "name": "Disable an Enabled Task", + "request": { + "method": "PUT", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"command\": \"calculate_sum\",\n \"task_type\": \"python\",\n \"parameters\": {\"a\": 10, \"b\": 32},\n \"enabled\": false\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/{{taskId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "{{taskId}}"] + }, + "description": "Disable a task to prevent execution while preserving history." + }, + "response": [] + }, + { + "name": "Update Task Parameters", + "request": { + "method": "PUT", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"command\": \"calculate_sum\",\n \"task_type\": \"python\",\n \"parameters\": {\"a\": 100, \"b\": 200},\n \"enabled\": true\n}" + }, + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/{{taskId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "{{taskId}}"] + }, + "description": "Update parameters for Python task. Previous execution results remain unchanged." + }, + "response": [] + } + ] + }, + { + "name": "9. Error Cases", + "item": [ + { + "name": "Try to Execute Disabled Task", + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH6/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH6", "$execute"] + }, + "description": "Attempt to execute a disabled task (will fail with 400)." + }, + "response": [ + { + "name": "Cannot Execute Disabled", + "originalRequest": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH6/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH6", "$execute"] + } + }, + "status": "Bad Request", + "code": 400, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"detail\": \"Cannot execute disabled task 01JCSEED0000000000000PYTH6\"\n}" + } + ] + }, + { + "name": "Try to Execute Orphaned Task", + "request": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH8/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH8", "$execute"] + }, + "description": "Attempt to execute orphaned task (auto-disabled on startup because function 'nonexistent_function' is not registered)." + }, + "response": [ + { + "name": "Cannot Execute Disabled", + "originalRequest": { + "method": "POST", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/01JCSEED0000000000000PYTH8/$execute", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "01JCSEED0000000000000PYTH8", "$execute"] + } + }, + "status": "Bad Request", + "code": 400, + "_postman_previewlanguage": "json", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "cookie": [], + "body": "{\n \"detail\": \"Cannot execute disabled task 01JCSEED0000000000000PYTH8\"\n}" + } + ] + } + ] + }, + { + "name": "10. Delete Task", + "item": [ + { + "name": "Delete Task", + "request": { + "method": "DELETE", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/{{taskId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "{{taskId}}"] + }, + "description": "Delete a task template. Execution artifacts are preserved." + }, + "response": [ + { + "name": "Deleted", + "originalRequest": { + "method": "DELETE", + "header": [], + "url": { + "raw": "{{baseUrl}}/api/v1/tasks/{{taskId}}", + "host": ["{{baseUrl}}"], + "path": ["api", "v1", "tasks", "{{taskId}}"] + } + }, + "status": "No Content", + "code": 204, + "_postman_previewlanguage": null, + "header": [], + "cookie": [], + "body": null + } + ] + } + ] + } + ], + "variable": [ + { + "key": "baseUrl", + "value": "http://127.0.0.1:8000", + "type": "string" + }, + { + "key": "taskId", + "value": "01JCSEED0000000000000PYTH1", + "type": "string" + }, + { + "key": "jobId", + "value": "", + "type": "string" + }, + { + "key": "artifactId", + "value": "", + "type": "string" + } + ] +} diff --git a/examples/python_task_execution_api.py b/examples/python_task_execution_api.py new file mode 100644 index 0000000..ec613f7 --- /dev/null +++ b/examples/python_task_execution_api.py @@ -0,0 +1,216 @@ +"""FastAPI service demonstrating Python task execution with TaskRegistry.""" + +from __future__ import annotations + +import asyncio +import time +from datetime import datetime, timezone + +from fastapi import FastAPI +from sqlalchemy.ext.asyncio import AsyncSession +from ulid import ULID + +from chapkit import ( + ArtifactHierarchy, + TaskIn, + TaskManager, + TaskRegistry, + TaskRepository, +) +from chapkit.api import ServiceBuilder, ServiceInfo +from chapkit.core import Database + + +# Register Python task functions +@TaskRegistry.register("calculate_sum") +async def calculate_sum(a: int, b: int) -> dict: + """Calculate sum of two numbers asynchronously.""" + await asyncio.sleep(0.1) # Simulate async work + return {"result": a + b, "operation": "sum"} + + +@TaskRegistry.register("process_data") +def process_data(input_text: str, uppercase: bool = False) -> dict: + """Process text data synchronously.""" + result = input_text.upper() if uppercase else input_text.lower() + return { + "original": input_text, + "processed": result, + "length": len(result), + "timestamp": datetime.now(timezone.utc).isoformat(), + } + + +@TaskRegistry.register("slow_computation") +def slow_computation(seconds: int = 2) -> dict: + """Simulate slow computation (sync function).""" + time.sleep(seconds) + return {"completed": True, "duration_seconds": seconds} + + +@TaskRegistry.register("failing_task") +async def failing_task(should_fail: bool = True) -> dict: + """Task that demonstrates error handling.""" + if should_fail: + raise ValueError("This task was designed to fail") + return {"success": True} + + +@TaskRegistry.register("query_task_count") +async def query_task_count(session: AsyncSession) -> dict: + """Query total task count using injected database session.""" + from sqlalchemy import func, select + + from chapkit.modules.task.models import Task + + # Use injected session to query database + stmt = select(func.count()).select_from(Task) + result = await session.execute(stmt) + count = result.scalar() or 0 + + return { + "total_tasks": count, + "timestamp": datetime.now(timezone.utc).isoformat(), + "injected_session": True, + } + + +async def seed_python_tasks(app: FastAPI) -> None: + """Seed example Python task templates with stable ULIDs.""" + database: Database | None = getattr(app.state, "database", None) + if database is None: + return + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Check if tasks already exist + existing_tasks = await task_manager.find_all() + if len(existing_tasks) > 0: + return # Skip seeding if tasks already exist + + # Example 1: Async Python function with parameters + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000PYTH1"), + command="calculate_sum", + task_type="python", + parameters={"a": 10, "b": 32}, + ) + ) + + # Example 2: Sync Python function with parameters + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000PYTH2"), + command="process_data", + task_type="python", + parameters={"input_text": "Hello World", "uppercase": True}, + ) + ) + + # Example 3: Slow computation + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000PYTH3"), + command="slow_computation", + task_type="python", + parameters={"seconds": 1}, + ) + ) + + # Example 4: Error handling demonstration + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000PYTH4"), + command="failing_task", + task_type="python", + parameters={"should_fail": True}, + enabled=True, + ) + ) + + # Example 5: Traditional shell task (for comparison) + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000PYTH5"), + command='echo "This is a shell task"', + task_type="shell", + enabled=True, + ) + ) + + # Example 6: Disabled task (won't execute) + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000PYTH6"), + command="process_data", + task_type="python", + parameters={"input_text": "Disabled", "uppercase": False}, + enabled=False, + ) + ) + + # Example 7: Task with dependency injection (no parameters needed) + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000PYTH7"), + command="query_task_count", + task_type="python", + parameters={}, # No parameters - session injected automatically + enabled=True, + ) + ) + + # Example 8: Orphaned task (function not registered - will be auto-disabled) + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000PYTH8"), + command="nonexistent_function", + task_type="python", + parameters={}, + enabled=True, + ) + ) + + +info = ServiceInfo( + display_name="Python Task Execution Service", + summary="Example service demonstrating Python function execution via TaskRegistry", + version="1.0.0", + description=""" + This service demonstrates chapkit's Python task execution capabilities. + + Features: + - Register Python functions with @TaskRegistry.register() + - Support both sync and async functions + - Pass parameters as dict to functions + - Type-based dependency injection (AsyncSession, Database, etc.) + - Capture results or exceptions in artifacts + - Mix Python and shell tasks in the same service + - Enable/disable tasks for execution control + - Automatic validation and disabling of orphaned tasks + """, +) + +# Simple hierarchy for task execution artifacts +TASK_HIERARCHY = ArtifactHierarchy( + name="task_executions", + level_labels={0: "execution"}, +) + +app = ( + ServiceBuilder(info=info) + .with_health() + .with_artifacts(hierarchy=TASK_HIERARCHY) # Required for task execution results + .with_jobs(max_concurrency=3) # Limit concurrent task execution + .with_tasks() # validate_on_startup=True by default + .on_startup(seed_python_tasks) + .build() +) + +if __name__ == "__main__": + from chapkit.api import run_app + + run_app("python_task_execution_api:app") diff --git a/examples/readonly_task_api.py b/examples/readonly_task_api.py new file mode 100644 index 0000000..29a2e9f --- /dev/null +++ b/examples/readonly_task_api.py @@ -0,0 +1,159 @@ +"""FastAPI service demonstrating read-only task API with pre-seeded tasks.""" + +from __future__ import annotations + +import asyncio + +from fastapi import FastAPI +from ulid import ULID + +from chapkit import ( + ArtifactHierarchy, + TaskIn, + TaskManager, + TaskRegistry, + TaskRepository, +) +from chapkit.api import ServiceBuilder, ServiceInfo +from chapkit.core import Database +from chapkit.core.api.crud import CrudPermissions + + +# Register Python task functions +@TaskRegistry.register("health_check") +async def health_check() -> dict: + """Perform system health check.""" + await asyncio.sleep(0.1) + return {"status": "healthy", "checks": ["database", "scheduler", "artifacts"]} + + +@TaskRegistry.register("cleanup_temp_files") +async def cleanup_temp_files(older_than_days: int = 7) -> dict: + """Simulate cleanup of temporary files.""" + await asyncio.sleep(0.2) + return {"cleaned": 42, "criteria": f"older than {older_than_days} days"} + + +@TaskRegistry.register("backup_database") +def backup_database(destination: str = "/backups") -> dict: + """Simulate database backup operation.""" + return {"success": True, "destination": destination, "size_mb": 150} + + +async def seed_readonly_tasks(app: FastAPI) -> None: + """Seed predefined task templates - the only way to create tasks in this service.""" + database: Database | None = getattr(app.state, "database", None) + if database is None: + return + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Check if tasks already exist + existing_tasks = await task_manager.find_all() + if len(existing_tasks) > 0: + return # Skip seeding if tasks already exist + + # Task 1: Health check (enabled) + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000READ1"), + command="health_check", + task_type="python", + parameters={}, + enabled=True, + ) + ) + + # Task 2: Cleanup temp files (enabled) + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000READ2"), + command="cleanup_temp_files", + task_type="python", + parameters={"older_than_days": 7}, + enabled=True, + ) + ) + + # Task 3: Database backup (enabled) + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000READ3"), + command="backup_database", + task_type="python", + parameters={"destination": "/backups"}, + enabled=True, + ) + ) + + # Task 4: Shell task (enabled) + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000READ4"), + command="echo 'System check complete'", + task_type="shell", + enabled=True, + ) + ) + + # Task 5: Disabled maintenance task + await task_manager.save( + TaskIn( + id=ULID.from_str("01JCSEED0000000000000READ5"), + command="backup_database", + task_type="python", + parameters={"destination": "/archive"}, + enabled=False, + ) + ) + + +info = ServiceInfo( + display_name="Read-Only Task Service", + summary="Secure task execution service with pre-defined tasks only", + version="1.0.0", + description=""" + This service demonstrates a read-only task API pattern for production use. + + Security Features: + - No runtime task creation (prevent command injection) + - No runtime task updates (prevent tampering) + - No runtime task deletion (preserve audit trail) + - All tasks defined in code (version controlled) + - API users can only view and execute pre-defined tasks + + This pattern is ideal for production deployments where tasks should be + managed through code/configuration rather than runtime APIs. + """, +) + +# Simple hierarchy for task execution artifacts +TASK_HIERARCHY = ArtifactHierarchy( + name="task_executions", + level_labels={0: "execution"}, +) + +# Read-only CRUD permissions (no create, update, or delete) +READONLY_PERMISSIONS = CrudPermissions( + create=False, # Tasks can only be created via seeding + read=True, # Users can list and view tasks + update=False, # No runtime modifications + delete=False, # No runtime deletions +) + +app = ( + ServiceBuilder(info=info) + .with_health() + .with_artifacts(hierarchy=TASK_HIERARCHY) + .with_jobs(max_concurrency=5) + .with_tasks(permissions=READONLY_PERMISSIONS) # Apply read-only permissions, validate_on_startup=True by default + .on_startup(seed_readonly_tasks) # Pre-seed tasks + .build() +) + +if __name__ == "__main__": + from chapkit.api import run_app + + run_app("readonly_task_api:app") diff --git a/src/chapkit/__init__.py b/src/chapkit/__init__.py index 05b963e..0e3e010 100644 --- a/src/chapkit/__init__.py +++ b/src/chapkit/__init__.py @@ -39,7 +39,15 @@ ) # Task feature -from chapkit.modules.task import Task, TaskIn, TaskManager, TaskOut, TaskRepository +from chapkit.modules.task import ( + Task, + TaskIn, + TaskManager, + TaskOut, + TaskRegistry, + TaskRepository, + validate_and_disable_orphaned_tasks, +) __all__ = [ # Core framework @@ -75,6 +83,8 @@ "Task", "TaskIn", "TaskOut", + "TaskRegistry", "TaskRepository", "TaskManager", + "validate_and_disable_orphaned_tasks", ] diff --git a/src/chapkit/api/service_builder.py b/src/chapkit/api/service_builder.py index 2eb0f83..55e3a36 100644 --- a/src/chapkit/api/service_builder.py +++ b/src/chapkit/api/service_builder.py @@ -23,7 +23,14 @@ ) from chapkit.modules.config import BaseConfig, ConfigIn, ConfigManager, ConfigOut, ConfigRepository, ConfigRouter from chapkit.modules.ml import MLManager, MLRouter, ModelRunnerProtocol -from chapkit.modules.task import TaskIn, TaskManager, TaskOut, TaskRepository, TaskRouter +from chapkit.modules.task import ( + TaskIn, + TaskManager, + TaskOut, + TaskRepository, + TaskRouter, + validate_and_disable_orphaned_tasks, +) from .dependencies import get_artifact_manager as default_get_artifact_manager from .dependencies import get_config_manager as default_get_config_manager @@ -84,6 +91,7 @@ class _TaskOptions: prefix: str = "/api/v1/tasks" tags: List[str] = field(default_factory=lambda: ["Tasks"]) permissions: CrudPermissions = field(default_factory=CrudPermissions) + validate_on_startup: bool = True @dataclass(slots=True) @@ -170,6 +178,7 @@ def with_tasks( prefix: str = "/api/v1/tasks", tags: List[str] | None = None, permissions: CrudPermissions | None = None, + validate_on_startup: bool = True, allow_create: bool | None = None, allow_read: bool | None = None, allow_update: bool | None = None, @@ -187,6 +196,7 @@ def with_tasks( prefix=prefix, tags=list(tags) if tags else ["Tasks"], permissions=perms, + validate_on_startup=validate_on_startup, ) return self @@ -291,6 +301,15 @@ def _register_module_routers(self, app: FastAPI) -> None: app.include_router(task_router) app.dependency_overrides[default_get_task_manager] = task_dep + # Register validation startup hook if enabled + if task_options.validate_on_startup: + + async def _validate_tasks_on_startup(app_instance: FastAPI) -> None: + """Validate and disable orphaned Python tasks on startup.""" + await validate_and_disable_orphaned_tasks(app_instance) + + self._startup_hooks.append(_validate_tasks_on_startup) + if self._ml_options: ml_options = self._ml_options ml_dep = self._build_ml_dependency() diff --git a/src/chapkit/core/__init__.py b/src/chapkit/core/__init__.py index 0c3f986..283e550 100644 --- a/src/chapkit/core/__init__.py +++ b/src/chapkit/core/__init__.py @@ -30,7 +30,7 @@ PaginatedResponse, ProblemDetail, ) -from .types import SerializableDict, ULIDType +from .types import JsonSafe, ULIDType __all__ = [ # Base infrastructure @@ -46,7 +46,7 @@ "Base", "Entity", "ULIDType", - "SerializableDict", + "JsonSafe", # Schemas "EntityIn", "EntityOut", diff --git a/src/chapkit/core/types.py b/src/chapkit/core/types.py index b85135a..ebecb5c 100644 --- a/src/chapkit/core/types.py +++ b/src/chapkit/core/types.py @@ -87,8 +87,8 @@ def _serialize_with_metadata(value: Any) -> Any: return _create_serialization_metadata(value, is_full_object=True) -SerializableDict = Annotated[ +JsonSafe = Annotated[ Any, PlainSerializer(_serialize_with_metadata, return_type=Any), ] -"""Pydantic type that serializes dicts, replacing non-JSON-serializable values with metadata.""" +"""Pydantic type for JSON-safe serialization with graceful handling of non-serializable values.""" diff --git a/src/chapkit/modules/artifact/schemas.py b/src/chapkit/modules/artifact/schemas.py index c71a362..800f1e3 100644 --- a/src/chapkit/modules/artifact/schemas.py +++ b/src/chapkit/modules/artifact/schemas.py @@ -9,7 +9,7 @@ from ulid import ULID from chapkit.core.schemas import EntityIn, EntityOut -from chapkit.core.types import SerializableDict +from chapkit.core.types import JsonSafe from chapkit.modules.config.schemas import BaseConfig, ConfigOut @@ -24,7 +24,7 @@ class ArtifactIn(EntityIn): class ArtifactOut(EntityOut): """Output schema for artifact entities.""" - data: SerializableDict + data: JsonSafe parent_id: ULID | None = None level: int diff --git a/src/chapkit/modules/task/__init__.py b/src/chapkit/modules/task/__init__.py index 5117ed7..b90b774 100644 --- a/src/chapkit/modules/task/__init__.py +++ b/src/chapkit/modules/task/__init__.py @@ -2,15 +2,19 @@ from .manager import TaskManager from .models import Task +from .registry import TaskRegistry from .repository import TaskRepository from .router import TaskRouter from .schemas import TaskIn, TaskOut +from .validation import validate_and_disable_orphaned_tasks __all__ = [ "Task", "TaskIn", "TaskOut", + "TaskRegistry", "TaskRepository", "TaskManager", "TaskRouter", + "validate_and_disable_orphaned_tasks", ] diff --git a/src/chapkit/modules/task/manager.py b/src/chapkit/modules/task/manager.py index 59e15a5..51970fd 100644 --- a/src/chapkit/modules/task/manager.py +++ b/src/chapkit/modules/task/manager.py @@ -3,8 +3,12 @@ from __future__ import annotations import asyncio -from typing import Any +import inspect +import traceback +import types +from typing import Any, Union, get_origin, get_type_hints +from sqlalchemy.ext.asyncio import AsyncSession from ulid import ULID from chapkit.core import Database @@ -13,9 +17,18 @@ from chapkit.modules.artifact import ArtifactIn, ArtifactManager, ArtifactRepository from .models import Task +from .registry import TaskRegistry from .repository import TaskRepository from .schemas import TaskIn, TaskOut +# Framework-provided types that can be injected into Python task functions +INJECTABLE_TYPES = { + AsyncSession, + Database, + ArtifactManager, + JobScheduler, +} + class TaskManager(BaseManager[Task, TaskIn, TaskOut, ULID]): """Manager for Task template entities with artifact-based execution.""" @@ -34,6 +47,98 @@ def __init__( self.database = database self.artifact_manager = artifact_manager + async def find_all(self, *, enabled: bool | None = None) -> list[TaskOut]: + """Find all tasks, optionally filtered by enabled status.""" + tasks = await self.repo.find_all(enabled=enabled) + return [self._to_output_schema(task) for task in tasks] + + def _is_injectable_type(self, param_type: type | None) -> bool: + """Check if a parameter type should be injected by the framework.""" + if param_type is None: + return False + + # Handle Optional[Type] -> extract the non-None type + origin = get_origin(param_type) + if origin is types.UnionType or origin is Union: # Union type (both syntaxes) + # For Optional types, we still want to inject if the non-None type is injectable + # This allows Optional[AsyncSession] to work + args = getattr(param_type, "__args__", ()) + non_none_types = [arg for arg in args if arg is not type(None)] + if len(non_none_types) == 1: + param_type = non_none_types[0] + + # Check if type is in injectable set + return param_type in INJECTABLE_TYPES + + def _build_injection_map(self, task_id: ULID, session: AsyncSession | None) -> dict[type, Any]: + """Build map of injectable types to their instances.""" + return { + AsyncSession: session, + Database: self.database, + ArtifactManager: self.artifact_manager, + JobScheduler: self.scheduler, + } + + def _inject_parameters( + self, func: Any, user_params: dict[str, Any], task_id: ULID, session: AsyncSession | None + ) -> dict[str, Any]: + """Merge user parameters with framework injections based on function signature.""" + sig = inspect.signature(func) + type_hints = get_type_hints(func) + + # Build injection map + injection_map = self._build_injection_map(task_id, session) + + # Start with user parameters + final_params = dict(user_params) + + # Inspect each parameter in function signature + for param_name, param in sig.parameters.items(): + # Skip self, *args, **kwargs + if param.kind in (param.VAR_POSITIONAL, param.VAR_KEYWORD): + continue + + # Get type hint for this parameter + param_type = type_hints.get(param_name) + + # Check if this type should be injected + if self._is_injectable_type(param_type): + # Get the actual type (handle Optional) + actual_type = param_type + origin = get_origin(param_type) + if origin is types.UnionType or origin is Union: + args = getattr(param_type, "__args__", ()) + non_none_types = [arg for arg in args if arg is not type(None)] + if non_none_types: + actual_type = non_none_types[0] + + # Inject if we have an instance of this type + if actual_type in injection_map: + injectable_value = injection_map[actual_type] + # For required parameters, inject even if None + # For optional parameters, only inject if not None + if param.default is param.empty: + # Required parameter - inject whatever we have (even None) + final_params[param_name] = injectable_value + elif injectable_value is not None: + # Optional parameter - only inject if we have a value + final_params[param_name] = injectable_value + continue + + # Not injectable - must come from user parameters + if param_name not in final_params: + # Check if parameter has a default value + if param.default is not param.empty: + continue # Will use default + + # Required parameter missing + raise ValueError( + f"Missing required parameter '{param_name}' for task function. " + f"Parameter is not injectable and not provided in task.parameters." + ) + + return final_params + async def execute_task(self, task_id: ULID) -> ULID: """Execute a task by submitting it to the scheduler and return the job ID.""" if self.scheduler is None: @@ -48,8 +153,15 @@ async def execute_task(self, task_id: ULID) -> ULID: if task is None: raise ValueError(f"Task {task_id} not found") - # Submit job to scheduler - job_id = await self.scheduler.add_job(self._execute_command, task_id) + # Check if task is enabled + if not task.enabled: + raise ValueError(f"Cannot execute disabled task {task_id}") + + # Route based on task type + if task.task_type == "python": + job_id = await self.scheduler.add_job(self._execute_python, task_id) + else: # shell + job_id = await self.scheduler.add_job(self._execute_command, task_id) return job_id @@ -110,3 +222,79 @@ async def _execute_command(self, task_id: ULID) -> ULID: ) return artifact_out.id + + async def _execute_python(self, task_id: ULID) -> ULID: + """Execute Python function and return artifact_id containing results.""" + if self.database is None: + raise RuntimeError("Database instance required for task execution") + + if self.artifact_manager is None: + raise RuntimeError("ArtifactManager instance required for task execution") + + # Create a database session for potential injection + session_context = self.database.session() + session = await session_context.__aenter__() + + try: + # Fetch task and serialize snapshot + task_repo = TaskRepository(session) + task = await task_repo.find_by_id(task_id) + if task is None: + raise ValueError(f"Task {task_id} not found") + + # Capture task snapshot + task_snapshot = { + "id": str(task.id), + "command": task.command, + "task_type": task.task_type, + "parameters": task.parameters, + "created_at": task.created_at.isoformat(), + "updated_at": task.updated_at.isoformat(), + } + + # Get function from registry + try: + func = TaskRegistry.get(task.command) + except KeyError: + raise ValueError(f"Python function '{task.command}' not found in registry") + + # Execute function with type-based injection + result_data: dict[str, Any] + try: + user_params = task.parameters or {} + + # Inject framework dependencies based on function signature + final_params = self._inject_parameters(func, user_params, task_id, session) + + # Handle sync/async functions + if inspect.iscoroutinefunction(func): + result = await func(**final_params) + else: + result = await asyncio.to_thread(func, **final_params) + + result_data = { + "task": task_snapshot, + "result": result, + "error": None, + } + except Exception as e: + result_data = { + "task": task_snapshot, + "result": None, + "error": { + "type": type(e).__name__, + "message": str(e), + "traceback": traceback.format_exc(), + }, + } + finally: + # Always close the session + await session_context.__aexit__(None, None, None) + + # Create artifact (with a new session) + async with self.database.session() as artifact_session: + artifact_repo = ArtifactRepository(artifact_session) + artifact_mgr = ArtifactManager(artifact_repo) + artifact_out = await artifact_mgr.save(ArtifactIn(data=result_data, parent_id=None)) + + return artifact_out.id diff --git a/src/chapkit/modules/task/models.py b/src/chapkit/modules/task/models.py index 11a69de..d14428c 100644 --- a/src/chapkit/modules/task/models.py +++ b/src/chapkit/modules/task/models.py @@ -2,6 +2,8 @@ from __future__ import annotations +from sqlalchemy import Boolean +from sqlalchemy.dialects.sqlite import JSON from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.types import Text @@ -14,3 +16,6 @@ class Task(Entity): __tablename__ = "tasks" command: Mapped[str] = mapped_column(Text, nullable=False) + task_type: Mapped[str] = mapped_column(Text, nullable=False, default="shell", server_default="shell") + parameters: Mapped[dict | None] = mapped_column(JSON, nullable=True) + enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True, server_default="1") diff --git a/src/chapkit/modules/task/registry.py b/src/chapkit/modules/task/registry.py new file mode 100644 index 0000000..d661892 --- /dev/null +++ b/src/chapkit/modules/task/registry.py @@ -0,0 +1,46 @@ +"""Global registry for Python task functions.""" + +from collections.abc import Callable +from typing import Any + + +class TaskRegistry: + """Global registry for Python task functions.""" + + _registry: dict[str, Callable[..., Any]] = {} + + @classmethod + def register(cls, name: str) -> Callable[[Callable[..., Any]], Callable[..., Any]]: + """Decorator to register a task function with support for type-based dependency injection.""" + + def decorator(func: Callable[..., Any]) -> Callable[..., Any]: + if name in cls._registry: + raise ValueError(f"Task '{name}' already registered") + cls._registry[name] = func + return func + + return decorator + + @classmethod + def register_function(cls, name: str, func: Callable[..., Any]) -> None: + """Imperatively register a task function.""" + if name in cls._registry: + raise ValueError(f"Task '{name}' already registered") + cls._registry[name] = func + + @classmethod + def get(cls, name: str) -> Callable[..., Any]: + """Retrieve a registered task function.""" + if name not in cls._registry: + raise KeyError(f"Task '{name}' not found in registry") + return cls._registry[name] + + @classmethod + def list_all(cls) -> list[str]: + """List all registered task names.""" + return sorted(cls._registry.keys()) + + @classmethod + def clear(cls) -> None: + """Clear all registered tasks (useful for testing).""" + cls._registry.clear() diff --git a/src/chapkit/modules/task/repository.py b/src/chapkit/modules/task/repository.py index 32ede3c..6b4bd83 100644 --- a/src/chapkit/modules/task/repository.py +++ b/src/chapkit/modules/task/repository.py @@ -2,6 +2,7 @@ from __future__ import annotations +from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from ulid import ULID @@ -16,3 +17,16 @@ class TaskRepository(BaseRepository[Task, ULID]): def __init__(self, session: AsyncSession) -> None: """Initialize task repository with database session.""" super().__init__(session, Task) + + async def find_by_enabled(self, enabled: bool) -> list[Task]: + """Find all tasks by enabled status.""" + stmt = select(Task).where(Task.enabled == enabled).order_by(Task.created_at.desc()) + result = await self.s.execute(stmt) + return list(result.scalars().all()) + + async def find_all(self, *, enabled: bool | None = None) -> list[Task]: + """Find all tasks, optionally filtered by enabled status.""" + if enabled is None: + result = await super().find_all() + return list(result) + return await self.find_by_enabled(enabled) diff --git a/src/chapkit/modules/task/router.py b/src/chapkit/modules/task/router.py index 418708e..af48652 100644 --- a/src/chapkit/modules/task/router.py +++ b/src/chapkit/modules/task/router.py @@ -5,10 +5,13 @@ from collections.abc import Sequence from typing import Any -from fastapi import Depends, HTTPException, status +from fastapi import Depends, HTTPException, Query, status from pydantic import BaseModel, Field +from ulid import ULID from chapkit.core.api.crud import CrudPermissions, CrudRouter +from chapkit.core.manager import Manager +from chapkit.core.schemas import PaginatedResponse from .manager import TaskManager from .schemas import TaskIn, TaskOut @@ -45,6 +48,33 @@ def __init__( **kwargs, ) + def _register_find_all_route(self, manager_dependency: Any, manager_annotation: Any) -> None: + """Register find all route with enabled filtering support.""" + entity_out_annotation: Any = self.entity_out_type + collection_response_model: Any = list[entity_out_annotation] | PaginatedResponse[entity_out_annotation] + + @self.router.get("", response_model=collection_response_model) + async def find_all( + page: int | None = None, + size: int | None = None, + enabled: bool | None = Query(None, description="Filter by enabled status"), + manager: Manager[TaskIn, TaskOut, ULID] = manager_dependency, + ) -> list[TaskOut] | PaginatedResponse[TaskOut]: + from chapkit.core.api.pagination import create_paginated_response + + # Pagination is opt-in: both page and size must be provided + if page is not None and size is not None: + items, total = await manager.find_paginated(page, size) + return create_paginated_response(items, total, page, size) + + # Use TaskRepository's find_all with enabled filtering + # Cast manager to access repository with enabled parameter + task_manager = manager # TaskManager with TaskRepository + return await task_manager.find_all(enabled=enabled) # type: ignore[call-arg] + + self._annotate_manager(find_all, manager_annotation) + find_all.__annotations__["return"] = list[entity_out_annotation] | PaginatedResponse[entity_out_annotation] + def _register_routes(self) -> None: """Register task CRUD routes and execution operation.""" super()._register_routes() diff --git a/src/chapkit/modules/task/schemas.py b/src/chapkit/modules/task/schemas.py index 28a80b3..a5b2236 100644 --- a/src/chapkit/modules/task/schemas.py +++ b/src/chapkit/modules/task/schemas.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import Any, Literal + from pydantic import Field from chapkit.core.schemas import EntityIn, EntityOut @@ -10,10 +12,18 @@ class TaskIn(EntityIn): """Input schema for creating or updating task templates.""" - command: str = Field(description="Shell command to execute") + command: str = Field(description="Shell command or Python function name to execute") + task_type: Literal["shell", "python"] = Field(default="shell", description="Type of task: 'shell' or 'python'") + parameters: dict[str, Any] | None = Field( + default=None, description="Parameters to pass to Python function (ignored for shell tasks)" + ) + enabled: bool = Field(default=True, description="Whether task is enabled for execution") class TaskOut(EntityOut): """Output schema for task template entities.""" - command: str = Field(description="Shell command to execute") + command: str = Field(description="Shell command or Python function name to execute") + task_type: str = Field(description="Type of task: 'shell' or 'python'") + parameters: dict[str, Any] | None = Field(default=None, description="Parameters to pass to Python function") + enabled: bool = Field(description="Whether task is enabled for execution") diff --git a/src/chapkit/modules/task/validation.py b/src/chapkit/modules/task/validation.py new file mode 100644 index 0000000..31bffd7 --- /dev/null +++ b/src/chapkit/modules/task/validation.py @@ -0,0 +1,77 @@ +"""Task validation utilities for detecting orphaned Python tasks.""" + +from __future__ import annotations + +import logging + +from fastapi import FastAPI + +from chapkit.core import Database + +from .manager import TaskManager +from .registry import TaskRegistry +from .repository import TaskRepository +from .schemas import TaskIn + +logger = logging.getLogger(__name__) + + +async def validate_and_disable_orphaned_tasks(app: FastAPI) -> int: + """Validate Python tasks and disable orphaned ones that reference missing functions.""" + database: Database | None = getattr(app.state, "database", None) + if database is None: + logger.debug("No database configured, skipping task validation") + return 0 + + disabled_count = 0 + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Get all tasks + all_tasks = await task_manager.find_all() + + # Get registered function names + registered_functions = set(TaskRegistry.list_all()) + + # Find orphaned Python tasks + orphaned_tasks = [ + task for task in all_tasks if task.task_type == "python" and task.command not in registered_functions + ] + + if orphaned_tasks: + logger.warning( + "Found orphaned Python tasks - disabling them", + extra={ + "count": len(orphaned_tasks), + "task_ids": [str(task.id) for task in orphaned_tasks], + "commands": [task.command for task in orphaned_tasks], + }, + ) + + # Disable each orphaned task + for task in orphaned_tasks: + logger.info( + f"Disabling orphaned task {task.id}: function '{task.command}' not found in registry", + extra={"task_id": str(task.id), "command": task.command, "task_type": task.task_type}, + ) + + # Create TaskIn with enabled=False + task_type_value = task.task_type if task.task_type in ("shell", "python") else "shell" + task_in = TaskIn( + id=task.id, + command=task.command, + task_type=task_type_value, # type: ignore[arg-type] + parameters=task.parameters, + enabled=False, + ) + await task_manager.save(task_in) + disabled_count += 1 + + if disabled_count > 0: + logger.warning(f"Disabled {disabled_count} orphaned Python task(s)") + else: + logger.debug("No orphaned Python tasks found") + + return disabled_count diff --git a/tests/test_example_python_task_execution_api.py b/tests/test_example_python_task_execution_api.py new file mode 100644 index 0000000..4ac2dc0 --- /dev/null +++ b/tests/test_example_python_task_execution_api.py @@ -0,0 +1,278 @@ +"""Tests for python_task_execution_api example with Python function execution.""" + +from __future__ import annotations + +import time +from collections.abc import Generator +from typing import Any, cast + +import pytest +from fastapi.testclient import TestClient + +from examples.python_task_execution_api import app + + +@pytest.fixture(scope="module") +def client() -> Generator[TestClient, None, None]: + """Create FastAPI TestClient for testing with lifespan context.""" + with TestClient(app) as test_client: + yield test_client + + +def wait_for_job_completion(client: TestClient, job_id: str, timeout: float = 5.0) -> dict[Any, Any]: + """Poll job status until completion or timeout.""" + start_time = time.time() + while time.time() - start_time < timeout: + job_response = client.get(f"/api/v1/jobs/{job_id}") + assert job_response.status_code == 200 + job = cast(dict[Any, Any], job_response.json()) + + if job["status"] in ["completed", "failed", "canceled"]: + return job + + time.sleep(0.1) + + raise TimeoutError(f"Job {job_id} did not complete within {timeout}s") + + +def test_health_endpoint(client: TestClient) -> None: + """Test health check returns healthy status.""" + response = client.get("/health") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + + +def test_list_python_tasks(client: TestClient) -> None: + """Test listing tasks shows seeded Python tasks.""" + response = client.get("/api/v1/tasks") + assert response.status_code == 200 + data = response.json() + + # Should have at least 5 seeded tasks + assert isinstance(data, list) + assert len(data) >= 5 + + # Check for Python tasks + python_tasks = [task for task in data if task["task_type"] == "python"] + assert len(python_tasks) >= 4 + + # Check for specific Python task names + commands = [task["command"] for task in python_tasks] + assert "calculate_sum" in commands + assert "process_data" in commands + assert "failing_task" in commands + + +def test_create_python_task(client: TestClient) -> None: + """Test creating a Python task with parameters.""" + new_task = { + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 5, "b": 10}, + } + + response = client.post("/api/v1/tasks", json=new_task) + assert response.status_code == 201 + created = response.json() + + assert "id" in created + assert created["command"] == "calculate_sum" + assert created["task_type"] == "python" + assert created["parameters"] == {"a": 5, "b": 10} + + +def test_execute_async_python_task(client: TestClient) -> None: + """Test executing an async Python function and retrieving results.""" + # Create a Python task + new_task = { + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 15, "b": 27}, + } + create_response = client.post("/api/v1/tasks", json=new_task) + task = create_response.json() + task_id = task["id"] + + # Execute the task + execute_response = client.post(f"/api/v1/tasks/{task_id}/$execute") + assert execute_response.status_code == 202 + execute_data = execute_response.json() + job_id = execute_data["job_id"] + + # Wait for completion + job = wait_for_job_completion(client, job_id) + assert job["status"] == "completed" + assert job["artifact_id"] is not None + + # Get artifact with results + artifact_response = client.get(f"/api/v1/artifacts/{job['artifact_id']}") + assert artifact_response.status_code == 200 + artifact = artifact_response.json() + + # Check artifact structure for Python tasks + data = artifact["data"] + assert "task" in data + assert "result" in data + assert "error" in data + + # Verify task snapshot + assert data["task"]["command"] == "calculate_sum" + assert data["task"]["task_type"] == "python" + assert data["task"]["parameters"] == {"a": 15, "b": 27} + + # Verify result + assert data["error"] is None + assert data["result"] is not None + assert data["result"]["result"] == 42 # 15 + 27 + assert data["result"]["operation"] == "sum" + + +def test_execute_sync_python_task(client: TestClient) -> None: + """Test executing a sync Python function.""" + # Create a task + new_task = { + "command": "process_data", + "task_type": "python", + "parameters": {"input_text": "Test String", "uppercase": True}, + } + create_response = client.post("/api/v1/tasks", json=new_task) + task = create_response.json() + task_id = task["id"] + + # Execute + execute_response = client.post(f"/api/v1/tasks/{task_id}/$execute") + job_id = execute_response.json()["job_id"] + + # Wait for completion + job = wait_for_job_completion(client, job_id) + assert job["status"] == "completed" + + # Get artifact + artifact_response = client.get(f"/api/v1/artifacts/{job['artifact_id']}") + artifact = artifact_response.json() + data = artifact["data"] + + # Verify result + assert data["error"] is None + assert data["result"]["original"] == "Test String" + assert data["result"]["processed"] == "TEST STRING" + assert data["result"]["length"] == 11 + + +def test_execute_python_task_with_error(client: TestClient) -> None: + """Test that Python task exceptions are captured in artifacts.""" + # Create a failing task + new_task = { + "command": "failing_task", + "task_type": "python", + "parameters": {"should_fail": True}, + } + create_response = client.post("/api/v1/tasks", json=new_task) + task = create_response.json() + task_id = task["id"] + + # Execute + execute_response = client.post(f"/api/v1/tasks/{task_id}/$execute") + job_id = execute_response.json()["job_id"] + + # Wait for completion + job = wait_for_job_completion(client, job_id) + # Job completes even if Python function raised exception + assert job["status"] == "completed" + + # Get artifact + artifact_response = client.get(f"/api/v1/artifacts/{job['artifact_id']}") + artifact = artifact_response.json() + data = artifact["data"] + + # Verify error was captured + assert data["result"] is None + assert data["error"] is not None + assert data["error"]["type"] == "ValueError" + assert "designed to fail" in data["error"]["message"] + assert "traceback" in data["error"] + + +def test_execute_seeded_python_tasks(client: TestClient) -> None: + """Test executing pre-seeded Python tasks.""" + # Get list of tasks + response = client.get("/api/v1/tasks") + tasks = response.json() + + # Find a seeded Python task + python_tasks = [t for t in tasks if t["task_type"] == "python"] + assert len(python_tasks) > 0 + + # Execute one + task = python_tasks[0] + execute_response = client.post(f"/api/v1/tasks/{task['id']}/$execute") + assert execute_response.status_code == 202 + job_id = execute_response.json()["job_id"] + + # Wait for completion + job = wait_for_job_completion(client, job_id) + assert job["status"] in ["completed", "failed"] + + +def test_python_task_without_parameters(client: TestClient) -> None: + """Test Python task can be executed without parameters.""" + # Create a task without parameters + new_task = { + "command": "slow_computation", + "task_type": "python", + # No parameters field + } + create_response = client.post("/api/v1/tasks", json=new_task) + task = create_response.json() + task_id = task["id"] + + # Execute + execute_response = client.post(f"/api/v1/tasks/{task_id}/$execute") + job_id = execute_response.json()["job_id"] + + # Wait for completion + job = wait_for_job_completion(client, job_id, timeout=5.0) + assert job["status"] == "completed" + + # Get artifact + artifact_response = client.get(f"/api/v1/artifacts/{job['artifact_id']}") + artifact = artifact_response.json() + data = artifact["data"] + + # Should use default parameters + assert data["error"] is None + assert data["result"]["completed"] is True + + +def test_python_task_artifact_structure(client: TestClient) -> None: + """Test Python task execution and artifact structure.""" + # Create a Python task + python_task = { + "command": "calculate_sum", + "task_type": "python", + "parameters": {"a": 1, "b": 1}, + } + python_response = client.post("/api/v1/tasks", json=python_task) + python_task_data = python_response.json() + + # Execute the task + python_exec = client.post(f"/api/v1/tasks/{python_task_data['id']}/$execute") + python_job = wait_for_job_completion(client, python_exec.json()["job_id"]) + + # Should complete successfully + assert python_job["status"] == "completed" + assert python_job["artifact_id"] is not None + + # Get artifact and verify Python task structure + python_artifact = client.get(f"/api/v1/artifacts/{python_job['artifact_id']}").json() + + # Python artifact has result/error structure (not stdout/stderr) + assert "result" in python_artifact["data"] + assert "error" in python_artifact["data"] + assert "task" in python_artifact["data"] + + # Verify the result + assert python_artifact["data"]["error"] is None + assert python_artifact["data"]["result"]["result"] == 2 + assert python_artifact["data"]["result"]["operation"] == "sum" diff --git a/tests/test_manager_task.py b/tests/test_manager_task.py index 8dd5b2e..5b85be3 100644 --- a/tests/test_manager_task.py +++ b/tests/test_manager_task.py @@ -1,11 +1,12 @@ """Tests for TaskManager error handling and edge cases.""" +from datetime import datetime, timezone from unittest.mock import AsyncMock, Mock, patch import pytest from ulid import ULID -from chapkit import ArtifactManager, Task, TaskManager, TaskRepository +from chapkit import ArtifactManager, Task, TaskManager, TaskRegistry, TaskRepository from chapkit.core import Database, JobScheduler @@ -153,3 +154,246 @@ async def test_execute_command_task_not_found() -> None: with pytest.raises(ValueError, match=f"Task {task_id} not found"): await manager._execute_command(task_id) + + +@pytest.mark.asyncio +async def test_execute_task_routes_to_python() -> None: + """Test execute_task routes Python tasks to _execute_python.""" + task_id = ULID() + job_id = ULID() + + mock_task = Mock(spec=Task) + mock_task.id = task_id + mock_task.command = "test_func" + mock_task.task_type = "python" + + mock_repo = Mock(spec=TaskRepository) + mock_repo.find_by_id = AsyncMock(return_value=mock_task) + + mock_scheduler = Mock(spec=JobScheduler) + mock_scheduler.add_job = AsyncMock(return_value=job_id) + + mock_artifact_manager = Mock(spec=ArtifactManager) + + manager = TaskManager( + repo=mock_repo, + scheduler=mock_scheduler, + database=None, + artifact_manager=mock_artifact_manager, + ) + + result = await manager.execute_task(task_id) + + assert result == job_id + # Verify _execute_python was passed to scheduler + call_args = mock_scheduler.add_job.call_args + assert call_args[0][0].__name__ == "_execute_python" + + +@pytest.mark.asyncio +async def test_execute_task_routes_to_shell() -> None: + """Test execute_task routes shell tasks to _execute_command.""" + task_id = ULID() + job_id = ULID() + + mock_task = Mock(spec=Task) + mock_task.id = task_id + mock_task.command = "echo test" + mock_task.task_type = "shell" + + mock_repo = Mock(spec=TaskRepository) + mock_repo.find_by_id = AsyncMock(return_value=mock_task) + + mock_scheduler = Mock(spec=JobScheduler) + mock_scheduler.add_job = AsyncMock(return_value=job_id) + + mock_artifact_manager = Mock(spec=ArtifactManager) + + manager = TaskManager( + repo=mock_repo, + scheduler=mock_scheduler, + database=None, + artifact_manager=mock_artifact_manager, + ) + + result = await manager.execute_task(task_id) + + assert result == job_id + # Verify _execute_command was passed to scheduler + call_args = mock_scheduler.add_job.call_args + assert call_args[0][0].__name__ == "_execute_command" + + +@pytest.mark.asyncio +async def test_execute_python_without_database() -> None: + """Test _execute_python raises error when database not configured.""" + mock_repo = Mock(spec=TaskRepository) + manager = TaskManager(repo=mock_repo, scheduler=None, database=None, artifact_manager=None) + + with pytest.raises(RuntimeError, match="Database instance required"): + await manager._execute_python(ULID()) + + +@pytest.mark.asyncio +async def test_execute_python_without_artifact_manager() -> None: + """Test _execute_python raises error when artifact manager not configured.""" + mock_repo = Mock(spec=TaskRepository) + mock_database = Mock(spec=Database) + + manager = TaskManager( + repo=mock_repo, + scheduler=None, + database=mock_database, + artifact_manager=None, + ) + + with pytest.raises(RuntimeError, match="ArtifactManager instance required"): + await manager._execute_python(ULID()) + + +@pytest.mark.asyncio +async def test_execute_python_task_not_found() -> None: + """Test _execute_python raises error for non-existent task.""" + task_id = ULID() + + # Mock session context manager + mock_session = Mock() + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock(return_value=None) + + # Mock task repo that returns None + mock_task_repo = Mock(spec=TaskRepository) + mock_task_repo.find_by_id = AsyncMock(return_value=None) + + # Mock database + mock_database = Mock(spec=Database) + mock_database.session = Mock(return_value=mock_session) + + # Patch TaskRepository to return our mock + with patch( + "chapkit.modules.task.manager.TaskRepository", + return_value=mock_task_repo, + ): + mock_repo = Mock(spec=TaskRepository) + mock_artifact_manager = Mock(spec=ArtifactManager) + + manager = TaskManager( + repo=mock_repo, + scheduler=None, + database=mock_database, + artifact_manager=mock_artifact_manager, + ) + + with pytest.raises(ValueError, match=f"Task {task_id} not found"): + await manager._execute_python(task_id) + + +@pytest.mark.asyncio +async def test_execute_python_function_not_in_registry() -> None: + """Test _execute_python raises error when function not in registry.""" + task_id = ULID() + + # Clear registry + TaskRegistry.clear() + + # Mock task + mock_task = Mock(spec=Task) + mock_task.id = task_id + mock_task.command = "missing_function" + mock_task.task_type = "python" + mock_task.parameters = {} + mock_task.created_at = datetime.now(timezone.utc) + mock_task.updated_at = datetime.now(timezone.utc) + + # Mock session context manager + mock_session = Mock() + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock(return_value=None) + + # Mock task repo + mock_task_repo = Mock(spec=TaskRepository) + mock_task_repo.find_by_id = AsyncMock(return_value=mock_task) + + # Mock database + mock_database = Mock(spec=Database) + mock_database.session = Mock(return_value=mock_session) + + # Patch TaskRepository + with patch( + "chapkit.modules.task.manager.TaskRepository", + return_value=mock_task_repo, + ): + mock_repo = Mock(spec=TaskRepository) + mock_artifact_manager = Mock(spec=ArtifactManager) + + manager = TaskManager( + repo=mock_repo, + scheduler=None, + database=mock_database, + artifact_manager=mock_artifact_manager, + ) + + with pytest.raises(ValueError, match="Python function 'missing_function' not found in registry"): + await manager._execute_python(task_id) + + +@pytest.mark.asyncio +async def test_execute_task_disabled() -> None: + """Test execute_task raises error when task is disabled.""" + task_id = ULID() + + mock_task = Mock(spec=Task) + mock_task.id = task_id + mock_task.command = "echo test" + mock_task.enabled = False # Disabled task + + mock_repo = Mock(spec=TaskRepository) + mock_repo.find_by_id = AsyncMock(return_value=mock_task) + + mock_scheduler = Mock(spec=JobScheduler) + mock_artifact_manager = Mock(spec=ArtifactManager) + + manager = TaskManager( + repo=mock_repo, + scheduler=mock_scheduler, + database=None, + artifact_manager=mock_artifact_manager, + ) + + with pytest.raises(ValueError, match=f"Cannot execute disabled task {task_id}"): + await manager.execute_task(task_id) + + mock_repo.find_by_id.assert_called_once_with(task_id) + + +@pytest.mark.asyncio +async def test_execute_task_enabled() -> None: + """Test execute_task successfully executes enabled task.""" + task_id = ULID() + job_id = ULID() + + mock_task = Mock(spec=Task) + mock_task.id = task_id + mock_task.command = "echo test" + mock_task.enabled = True # Enabled task + + mock_repo = Mock(spec=TaskRepository) + mock_repo.find_by_id = AsyncMock(return_value=mock_task) + + mock_scheduler = Mock(spec=JobScheduler) + mock_scheduler.add_job = AsyncMock(return_value=job_id) + + mock_artifact_manager = Mock(spec=ArtifactManager) + + manager = TaskManager( + repo=mock_repo, + scheduler=mock_scheduler, + database=None, + artifact_manager=mock_artifact_manager, + ) + + result = await manager.execute_task(task_id) + + assert result == job_id + mock_repo.find_by_id.assert_called_once_with(task_id) + mock_scheduler.add_job.assert_called_once() diff --git a/tests/test_task_injection.py b/tests/test_task_injection.py new file mode 100644 index 0000000..f043293 --- /dev/null +++ b/tests/test_task_injection.py @@ -0,0 +1,382 @@ +"""Tests for type-based dependency injection in Python tasks.""" + +from __future__ import annotations + +from typing import Any + +import pytest +from sqlalchemy.ext.asyncio import AsyncSession + +from chapkit import ArtifactManager, TaskIn, TaskManager, TaskRepository +from chapkit.core import AIOJobScheduler, Database, SqliteDatabaseBuilder +from chapkit.modules.artifact import ArtifactRepository +from chapkit.modules.task.registry import TaskRegistry + + +@pytest.fixture +async def database() -> Database: + """Create in-memory database for testing.""" + db = SqliteDatabaseBuilder().in_memory().build() + await db.init() + return db + + +@pytest.fixture +async def task_manager(database: Database) -> TaskManager: + """Create task manager with all dependencies.""" + async with database.session() as session: + task_repo = TaskRepository(session) + scheduler = AIOJobScheduler() + artifact_repo = ArtifactRepository(session) + artifact_manager = ArtifactManager(artifact_repo) + + return TaskManager( + repo=task_repo, + scheduler=scheduler, + database=database, + artifact_manager=artifact_manager, + ) + + +@pytest.mark.asyncio +async def test_inject_async_session(database: Database, task_manager: TaskManager) -> None: + """Test AsyncSession injection into Python task.""" + + # Register task that uses AsyncSession + @TaskRegistry.register("test_session_injection") + async def task_with_session(session: AsyncSession) -> dict[str, Any]: + """Task that uses injected session.""" + assert session is not None + assert isinstance(session, AsyncSession) + return {"session_injected": True} + + try: + # Create task + async with database.session() as session: + task_repo = TaskRepository(session) + task_mgr = TaskManager(task_repo, None, database, None) + task = await task_mgr.save( + TaskIn( + command="test_session_injection", + task_type="python", + parameters={}, # No user parameters + ) + ) + + # Execute with full manager (has scheduler) + job_id = await task_manager.execute_task(task.id) + + # Wait for completion + scheduler = task_manager.scheduler + assert scheduler is not None + await scheduler.wait(job_id) + + # Verify result + job_record = await scheduler.get_record(job_id) + assert job_record is not None + assert job_record.status == "completed" + assert job_record.artifact_id is not None + + # Check artifact + async with database.session() as session: + artifact_repo = ArtifactRepository(session) + artifact_mgr = ArtifactManager(artifact_repo) + artifact = await artifact_mgr.find_by_id(job_record.artifact_id) + assert artifact is not None + assert artifact.data["error"] is None + assert artifact.data["result"]["session_injected"] is True + finally: + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_inject_database(database: Database, task_manager: TaskManager) -> None: + """Test Database injection into Python task.""" + + @TaskRegistry.register("test_database_injection") + async def task_with_database(db: Database) -> dict[str, Any]: + """Task that uses injected database.""" + assert db is not None + assert isinstance(db, Database) + return {"database_injected": True} + + try: + # Create task + async with database.session() as session: + task_repo = TaskRepository(session) + task_mgr = TaskManager(task_repo, None, database, None) + task = await task_mgr.save( + TaskIn( + command="test_database_injection", + task_type="python", + parameters={}, + ) + ) + + # Execute + job_id = await task_manager.execute_task(task.id) + + # Wait and verify + scheduler = task_manager.scheduler + assert scheduler is not None + await scheduler.wait(job_id) + job_record = await scheduler.get_record(job_id) + assert job_record is not None + assert job_record.status == "completed" + + # Check result + assert job_record.artifact_id is not None + async with database.session() as session: + artifact_repo = ArtifactRepository(session) + artifact_mgr = ArtifactManager(artifact_repo) + artifact = await artifact_mgr.find_by_id(job_record.artifact_id) + assert artifact is not None + assert artifact.data["error"] is None + assert artifact.data["result"]["database_injected"] is True + finally: + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_inject_artifact_manager(database: Database, task_manager: TaskManager) -> None: + """Test ArtifactManager injection into Python task.""" + + @TaskRegistry.register("test_artifact_injection") + async def task_with_artifacts(artifact_manager: ArtifactManager) -> dict[str, Any]: + """Task that uses injected artifact manager.""" + assert artifact_manager is not None + assert isinstance(artifact_manager, ArtifactManager) + return {"artifact_manager_injected": True} + + try: + # Create task + async with database.session() as session: + task_repo = TaskRepository(session) + task_mgr = TaskManager(task_repo, None, database, None) + task = await task_mgr.save( + TaskIn( + command="test_artifact_injection", + task_type="python", + parameters={}, + ) + ) + + # Execute + job_id = await task_manager.execute_task(task.id) + + # Wait and verify + scheduler = task_manager.scheduler + assert scheduler is not None + await scheduler.wait(job_id) + job_record = await scheduler.get_record(job_id) + assert job_record is not None + assert job_record.status == "completed" + + # Check result + assert job_record.artifact_id is not None + async with database.session() as session: + artifact_repo = ArtifactRepository(session) + artifact_mgr = ArtifactManager(artifact_repo) + artifact = await artifact_mgr.find_by_id(job_record.artifact_id) + assert artifact is not None + assert artifact.data["error"] is None + assert artifact.data["result"]["artifact_manager_injected"] is True + finally: + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_inject_with_user_parameters(database: Database, task_manager: TaskManager) -> None: + """Test mixing injected types with user parameters.""" + + @TaskRegistry.register("test_mixed_params") + async def task_with_mixed( + name: str, # From user parameters + count: int, # From user parameters + session: AsyncSession, # Injected + ) -> dict[str, Any]: + """Task that mixes user and injected parameters.""" + assert name == "test" + assert count == 42 + assert session is not None + return {"name": name, "count": count, "has_session": True} + + try: + # Create task with user parameters + async with database.session() as session: + task_repo = TaskRepository(session) + task_mgr = TaskManager(task_repo, None, database, None) + task = await task_mgr.save( + TaskIn( + command="test_mixed_params", + task_type="python", + parameters={"name": "test", "count": 42}, + ) + ) + + # Execute + job_id = await task_manager.execute_task(task.id) + + # Wait and verify + scheduler = task_manager.scheduler + assert scheduler is not None + await scheduler.wait(job_id) + job_record = await scheduler.get_record(job_id) + assert job_record is not None + assert job_record.status == "completed" + + # Check result + assert job_record.artifact_id is not None + async with database.session() as session: + artifact_repo = ArtifactRepository(session) + artifact_mgr = ArtifactManager(artifact_repo) + artifact = await artifact_mgr.find_by_id(job_record.artifact_id) + assert artifact is not None + assert artifact.data["error"] is None + result = artifact.data["result"] + assert result["name"] == "test" + assert result["count"] == 42 + assert result["has_session"] is True + finally: + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_optional_injection(database: Database, task_manager: TaskManager) -> None: + """Test Optional type injection.""" + + @TaskRegistry.register("test_optional_injection") + async def task_with_optional(session: AsyncSession | None = None) -> dict[str, Any]: + """Task with optional injected parameter.""" + # Verify session was injected (not None) + return {"session_provided": session is not None} + + try: + # Create task + async with database.session() as session: + task_repo = TaskRepository(session) + task_mgr = TaskManager(task_repo, None, database, None) + task = await task_mgr.save( + TaskIn( + command="test_optional_injection", + task_type="python", + parameters={}, + ) + ) + + # Execute + job_id = await task_manager.execute_task(task.id) + + # Wait and verify + scheduler = task_manager.scheduler + assert scheduler is not None + await scheduler.wait(job_id) + job_record = await scheduler.get_record(job_id) + assert job_record is not None + assert job_record.status == "completed" + + # Check result + assert job_record.artifact_id is not None + async with database.session() as session: + artifact_repo = ArtifactRepository(session) + artifact_mgr = ArtifactManager(artifact_repo) + artifact = await artifact_mgr.find_by_id(job_record.artifact_id) + assert artifact is not None + assert artifact.data["error"] is None + assert artifact.data["result"]["session_provided"] is True + finally: + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_missing_required_user_parameter(database: Database, task_manager: TaskManager) -> None: + """Test error when required user parameter is missing.""" + + @TaskRegistry.register("test_missing_param") + async def task_with_required(name: str, session: AsyncSession) -> dict[str, Any]: + """Task with required user parameter.""" + return {"name": name} + + try: + # Create task WITHOUT required parameter + async with database.session() as session: + task_repo = TaskRepository(session) + task_mgr = TaskManager(task_repo, None, database, None) + task = await task_mgr.save( + TaskIn( + command="test_missing_param", + task_type="python", + parameters={}, # Missing 'name' + ) + ) + + # Execute - should capture error + job_id = await task_manager.execute_task(task.id) + + # Wait for completion + scheduler = task_manager.scheduler + assert scheduler is not None + await scheduler.wait(job_id) + job_record = await scheduler.get_record(job_id) + assert job_record is not None + assert job_record.status == "completed" # Job completes but captures error + + # Check error in artifact + assert job_record.artifact_id is not None + async with database.session() as session: + artifact_repo = ArtifactRepository(session) + artifact_mgr = ArtifactManager(artifact_repo) + artifact = await artifact_mgr.find_by_id(job_record.artifact_id) + assert artifact is not None + assert artifact.data["error"] is not None + assert "Missing required parameter 'name'" in artifact.data["error"]["message"] + finally: + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_sync_function_injection(database: Database, task_manager: TaskManager) -> None: + """Test injection works with sync functions too.""" + + @TaskRegistry.register("test_sync_injection") + def sync_task_with_injection(value: int, database: Database) -> dict[str, Any]: + """Sync task with injection.""" + assert database is not None + return {"value": value * 2, "has_database": True} + + try: + # Create task + async with database.session() as session: + task_repo = TaskRepository(session) + task_mgr = TaskManager(task_repo, None, database, None) + task = await task_mgr.save( + TaskIn( + command="test_sync_injection", + task_type="python", + parameters={"value": 21}, + ) + ) + + # Execute + job_id = await task_manager.execute_task(task.id) + + # Wait and verify + scheduler = task_manager.scheduler + assert scheduler is not None + await scheduler.wait(job_id) + job_record = await scheduler.get_record(job_id) + assert job_record is not None + assert job_record.status == "completed" + + # Check result + assert job_record.artifact_id is not None + async with database.session() as session: + artifact_repo = ArtifactRepository(session) + artifact_mgr = ArtifactManager(artifact_repo) + artifact = await artifact_mgr.find_by_id(job_record.artifact_id) + assert artifact is not None + assert artifact.data["error"] is None + assert artifact.data["result"]["value"] == 42 + assert artifact.data["result"]["has_database"] is True + finally: + TaskRegistry.clear() diff --git a/tests/test_task_registry.py b/tests/test_task_registry.py new file mode 100644 index 0000000..9f69e07 --- /dev/null +++ b/tests/test_task_registry.py @@ -0,0 +1,151 @@ +"""Tests for TaskRegistry functionality.""" + +import pytest + +from chapkit import TaskRegistry + + +@pytest.fixture(autouse=True) +def clear_registry(): + """Clear registry before and after each test.""" + TaskRegistry.clear() + yield + TaskRegistry.clear() + + +def test_register_decorator(): + """Test registering a function using the decorator.""" + + @TaskRegistry.register("test_func") + def test_func(): + return "test" + + assert "test_func" in TaskRegistry.list_all() + func = TaskRegistry.get("test_func") + assert func() == "test" + + +def test_register_function_imperative(): + """Test registering a function imperatively.""" + + def my_func(): + return "my result" + + TaskRegistry.register_function("my_func", my_func) + + assert "my_func" in TaskRegistry.list_all() + func = TaskRegistry.get("my_func") + assert func() == "my result" + + +def test_register_async_function(): + """Test registering an async function.""" + + @TaskRegistry.register("async_func") + async def async_func(): + return "async result" + + assert "async_func" in TaskRegistry.list_all() + func = TaskRegistry.get("async_func") + assert callable(func) + + +def test_duplicate_registration_decorator(): + """Test that duplicate registration raises ValueError.""" + + @TaskRegistry.register("dup_func") + def func1(): + return "first" + + with pytest.raises(ValueError, match="Task 'dup_func' already registered"): + + @TaskRegistry.register("dup_func") + def func2(): + return "second" + + +def test_duplicate_registration_imperative(): + """Test that duplicate imperative registration raises ValueError.""" + + def func1(): + return "first" + + def func2(): + return "second" + + TaskRegistry.register_function("dup_func", func1) + + with pytest.raises(ValueError, match="Task 'dup_func' already registered"): + TaskRegistry.register_function("dup_func", func2) + + +def test_get_missing_function(): + """Test that getting a missing function raises KeyError.""" + with pytest.raises(KeyError, match="Task 'missing' not found in registry"): + TaskRegistry.get("missing") + + +def test_list_all_empty(): + """Test listing all tasks when registry is empty.""" + assert TaskRegistry.list_all() == [] + + +def test_list_all_multiple(): + """Test listing all registered tasks.""" + + @TaskRegistry.register("func_a") + def func_a(): + pass + + @TaskRegistry.register("func_c") + def func_c(): + pass + + @TaskRegistry.register("func_b") + def func_b(): + pass + + tasks = TaskRegistry.list_all() + assert tasks == ["func_a", "func_b", "func_c"] # Should be sorted + + +def test_clear(): + """Test clearing the registry.""" + + @TaskRegistry.register("func1") + def func1(): + pass + + @TaskRegistry.register("func2") + def func2(): + pass + + assert len(TaskRegistry.list_all()) == 2 + + TaskRegistry.clear() + + assert TaskRegistry.list_all() == [] + + +def test_register_with_parameters(): + """Test registering function that accepts parameters.""" + + @TaskRegistry.register("add_numbers") + def add_numbers(a: int, b: int) -> int: + return a + b + + func = TaskRegistry.get("add_numbers") + assert func(5, 3) == 8 + assert func(a=10, b=20) == 30 + + +def test_register_with_default_parameters(): + """Test registering function with default parameters.""" + + @TaskRegistry.register("greet") + def greet(name: str, greeting: str = "Hello") -> str: + return f"{greeting}, {name}!" + + func = TaskRegistry.get("greet") + assert func("World") == "Hello, World!" + assert func("World", greeting="Hi") == "Hi, World!" diff --git a/tests/test_task_repository.py b/tests/test_task_repository.py new file mode 100644 index 0000000..c9d03ed --- /dev/null +++ b/tests/test_task_repository.py @@ -0,0 +1,139 @@ +"""Tests for TaskRepository enabled filtering.""" + +import pytest +from ulid import ULID + +from chapkit import TaskIn, TaskManager, TaskRepository +from chapkit.core import SqliteDatabaseBuilder + + +@pytest.mark.asyncio +async def test_find_by_enabled_true() -> None: + """Test finding only enabled tasks.""" + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create enabled and disabled tasks + await task_manager.save(TaskIn(id=ULID(), command="enabled1", task_type="shell", enabled=True)) + await task_manager.save(TaskIn(id=ULID(), command="enabled2", task_type="shell", enabled=True)) + await task_manager.save(TaskIn(id=ULID(), command="disabled1", task_type="shell", enabled=False)) + + # Find only enabled tasks + enabled_tasks = await task_repo.find_by_enabled(True) + + assert len(enabled_tasks) == 2 + assert all(task.enabled for task in enabled_tasks) + assert {task.command for task in enabled_tasks} == {"enabled1", "enabled2"} + + +@pytest.mark.asyncio +async def test_find_by_enabled_false() -> None: + """Test finding only disabled tasks.""" + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create enabled and disabled tasks + await task_manager.save(TaskIn(id=ULID(), command="enabled1", task_type="shell", enabled=True)) + await task_manager.save(TaskIn(id=ULID(), command="disabled1", task_type="shell", enabled=False)) + await task_manager.save(TaskIn(id=ULID(), command="disabled2", task_type="shell", enabled=False)) + + # Find only disabled tasks + disabled_tasks = await task_repo.find_by_enabled(False) + + assert len(disabled_tasks) == 2 + assert all(not task.enabled for task in disabled_tasks) + assert {task.command for task in disabled_tasks} == {"disabled1", "disabled2"} + + +@pytest.mark.asyncio +async def test_find_all_with_enabled_filter_true() -> None: + """Test find_all with enabled=True filter.""" + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create mixed tasks + await task_manager.save(TaskIn(id=ULID(), command="enabled1", task_type="shell", enabled=True)) + await task_manager.save(TaskIn(id=ULID(), command="disabled1", task_type="shell", enabled=False)) + + # Filter for enabled only + enabled_tasks = await task_repo.find_all(enabled=True) + + assert len(enabled_tasks) == 1 + assert enabled_tasks[0].command == "enabled1" + assert enabled_tasks[0].enabled is True + + +@pytest.mark.asyncio +async def test_find_all_with_enabled_filter_false() -> None: + """Test find_all with enabled=False filter.""" + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create mixed tasks + await task_manager.save(TaskIn(id=ULID(), command="enabled1", task_type="shell", enabled=True)) + await task_manager.save(TaskIn(id=ULID(), command="disabled1", task_type="shell", enabled=False)) + + # Filter for disabled only + disabled_tasks = await task_repo.find_all(enabled=False) + + assert len(disabled_tasks) == 1 + assert disabled_tasks[0].command == "disabled1" + assert disabled_tasks[0].enabled is False + + +@pytest.mark.asyncio +async def test_find_all_without_filter() -> None: + """Test find_all returns all tasks when enabled=None.""" + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create mixed tasks + await task_manager.save(TaskIn(id=ULID(), command="enabled1", task_type="shell", enabled=True)) + await task_manager.save(TaskIn(id=ULID(), command="disabled1", task_type="shell", enabled=False)) + await task_manager.save(TaskIn(id=ULID(), command="enabled2", task_type="shell", enabled=True)) + + # Get all tasks (no filter) + all_tasks = await task_repo.find_all(enabled=None) + + assert len(all_tasks) == 3 + commands = {task.command for task in all_tasks} + assert commands == {"enabled1", "disabled1", "enabled2"} + + +@pytest.mark.asyncio +async def test_find_by_enabled_empty() -> None: + """Test find_by_enabled returns empty list when no matches.""" + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create only enabled tasks + await task_manager.save(TaskIn(id=ULID(), command="enabled1", task_type="shell", enabled=True)) + + # Find disabled tasks (should be empty) + disabled_tasks = await task_repo.find_by_enabled(False) + + assert len(disabled_tasks) == 0 diff --git a/tests/test_task_router.py b/tests/test_task_router.py index 6965dcc..259c431 100644 --- a/tests/test_task_router.py +++ b/tests/test_task_router.py @@ -1,5 +1,6 @@ """Tests for TaskRouter error handling.""" +from datetime import datetime, timezone from unittest.mock import AsyncMock, Mock from fastapi import FastAPI @@ -96,3 +97,170 @@ def manager_factory() -> TaskManager: data = response.json() assert data["job_id"] == str(job_id) assert "submitted for execution" in data["message"] + + +def test_list_tasks_with_enabled_filter_true() -> None: + """Test GET /tasks?enabled=true returns only enabled tasks.""" + # Create mock manager + mock_manager = Mock(spec=TaskManager) + + now = datetime.now(timezone.utc) + + enabled_task1 = TaskOut( + id=ULID(), + command="echo enabled1", + task_type="shell", + parameters=None, + enabled=True, + created_at=now, + updated_at=now, + ) + enabled_task2 = TaskOut( + id=ULID(), + command="echo enabled2", + task_type="shell", + parameters=None, + enabled=True, + created_at=now, + updated_at=now, + ) + + mock_manager.find_all = AsyncMock(return_value=[enabled_task1, enabled_task2]) + + def manager_factory() -> TaskManager: + return mock_manager + + # Create app with router + app = FastAPI() + router = TaskRouter.create( + prefix="/api/v1/tasks", + tags=["Tasks"], + entity_in_type=TaskIn, + entity_out_type=TaskOut, + manager_factory=manager_factory, + ) + app.include_router(router) + + client = TestClient(app) + + response = client.get("/api/v1/tasks?enabled=true") + + assert response.status_code == 200 + data = response.json() + assert len(data) == 2 + assert all(task["enabled"] for task in data) + # Verify find_all was called with enabled=True + mock_manager.find_all.assert_called_once() + call_kwargs = mock_manager.find_all.call_args.kwargs + assert call_kwargs.get("enabled") is True + + +def test_list_tasks_with_enabled_filter_false() -> None: + """Test GET /tasks?enabled=false returns only disabled tasks.""" + # Create mock manager + mock_manager = Mock(spec=TaskManager) + + now = datetime.now(timezone.utc) + + disabled_task1 = TaskOut( + id=ULID(), + command="echo disabled1", + task_type="shell", + parameters=None, + enabled=False, + created_at=now, + updated_at=now, + ) + disabled_task2 = TaskOut( + id=ULID(), + command="echo disabled2", + task_type="shell", + parameters=None, + enabled=False, + created_at=now, + updated_at=now, + ) + + mock_manager.find_all = AsyncMock(return_value=[disabled_task1, disabled_task2]) + + def manager_factory() -> TaskManager: + return mock_manager + + # Create app with router + app = FastAPI() + router = TaskRouter.create( + prefix="/api/v1/tasks", + tags=["Tasks"], + entity_in_type=TaskIn, + entity_out_type=TaskOut, + manager_factory=manager_factory, + ) + app.include_router(router) + + client = TestClient(app) + + response = client.get("/api/v1/tasks?enabled=false") + + assert response.status_code == 200 + data = response.json() + assert len(data) == 2 + assert all(not task["enabled"] for task in data) + # Verify find_all was called with enabled=False + mock_manager.find_all.assert_called_once() + call_kwargs = mock_manager.find_all.call_args.kwargs + assert call_kwargs.get("enabled") is False + + +def test_list_tasks_without_enabled_filter() -> None: + """Test GET /tasks returns all tasks when enabled parameter not provided.""" + # Create mock manager + mock_manager = Mock(spec=TaskManager) + + now = datetime.now(timezone.utc) + + task1 = TaskOut( + id=ULID(), + command="echo enabled", + task_type="shell", + parameters=None, + enabled=True, + created_at=now, + updated_at=now, + ) + task2 = TaskOut( + id=ULID(), + command="echo disabled", + task_type="shell", + parameters=None, + enabled=False, + created_at=now, + updated_at=now, + ) + + mock_manager.find_all = AsyncMock(return_value=[task1, task2]) + + def manager_factory() -> TaskManager: + return mock_manager + + # Create app with router + app = FastAPI() + router = TaskRouter.create( + prefix="/api/v1/tasks", + tags=["Tasks"], + entity_in_type=TaskIn, + entity_out_type=TaskOut, + manager_factory=manager_factory, + ) + app.include_router(router) + + client = TestClient(app) + + response = client.get("/api/v1/tasks") + + assert response.status_code == 200 + data = response.json() + assert len(data) == 2 + # Verify find_all was called with enabled=None + mock_manager.find_all.assert_called_once() + call_kwargs = mock_manager.find_all.call_args.kwargs + assert call_kwargs.get("enabled") is None diff --git a/tests/test_task_validation.py b/tests/test_task_validation.py new file mode 100644 index 0000000..0c6271c --- /dev/null +++ b/tests/test_task_validation.py @@ -0,0 +1,242 @@ +"""Tests for validate_and_disable_orphaned_tasks utility.""" + +import pytest +from fastapi import FastAPI +from ulid import ULID + +from chapkit import TaskIn, TaskManager, TaskRegistry, TaskRepository, validate_and_disable_orphaned_tasks +from chapkit.core import SqliteDatabaseBuilder + + +@pytest.mark.asyncio +async def test_validate_no_database() -> None: + """Test validation returns 0 when no database configured.""" + app = FastAPI() + # No database in app.state + + disabled_count = await validate_and_disable_orphaned_tasks(app) + + assert disabled_count == 0 + + +@pytest.mark.asyncio +async def test_validate_no_orphaned_tasks() -> None: + """Test validation returns 0 when all Python tasks are valid.""" + # Register a function + TaskRegistry.clear() + TaskRegistry.register_function("valid_func", lambda: {"result": "ok"}) + + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create valid Python task + await task_manager.save(TaskIn(id=ULID(), command="valid_func", task_type="python", enabled=True)) + + # Create shell task (should be ignored) + await task_manager.save(TaskIn(id=ULID(), command="echo test", task_type="shell", enabled=True)) + + # Setup app with database + app = FastAPI() + app.state.database = database + + disabled_count = await validate_and_disable_orphaned_tasks(app) + + assert disabled_count == 0 + + # Verify tasks are still enabled + async with database.session() as session: + task_repo = TaskRepository(session) + all_tasks = await task_repo.find_all() + assert len(all_tasks) == 2 + assert all(task.enabled for task in all_tasks) + + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_validate_disables_orphaned_tasks() -> None: + """Test validation disables orphaned Python tasks.""" + TaskRegistry.clear() + # Don't register "missing_func" + + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create orphaned Python task + orphaned_task_id = ULID() + await task_manager.save(TaskIn(id=orphaned_task_id, command="missing_func", task_type="python", enabled=True)) + + # Create shell task (should not be affected) + shell_task_id = ULID() + await task_manager.save(TaskIn(id=shell_task_id, command="echo test", task_type="shell", enabled=True)) + + # Setup app with database + app = FastAPI() + app.state.database = database + + disabled_count = await validate_and_disable_orphaned_tasks(app) + + assert disabled_count == 1 + + # Verify orphaned task is disabled + async with database.session() as session: + task_repo = TaskRepository(session) + + orphaned_task = await task_repo.find_by_id(orphaned_task_id) + assert orphaned_task is not None + assert orphaned_task.enabled is False + + shell_task = await task_repo.find_by_id(shell_task_id) + assert shell_task is not None + assert shell_task.enabled is True # Still enabled + + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_validate_multiple_orphaned_tasks() -> None: + """Test validation disables multiple orphaned Python tasks.""" + TaskRegistry.clear() + # Register only one function + TaskRegistry.register_function("valid_func", lambda: {"result": "ok"}) + + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create valid task + valid_task_id = ULID() + await task_manager.save(TaskIn(id=valid_task_id, command="valid_func", task_type="python", enabled=True)) + + # Create orphaned tasks + orphaned1_id = ULID() + await task_manager.save(TaskIn(id=orphaned1_id, command="missing_func1", task_type="python", enabled=True)) + + orphaned2_id = ULID() + await task_manager.save(TaskIn(id=orphaned2_id, command="missing_func2", task_type="python", enabled=True)) + + # Setup app with database + app = FastAPI() + app.state.database = database + + disabled_count = await validate_and_disable_orphaned_tasks(app) + + assert disabled_count == 2 + + # Verify correct tasks are disabled + async with database.session() as session: + task_repo = TaskRepository(session) + + valid_task = await task_repo.find_by_id(valid_task_id) + assert valid_task is not None + assert valid_task.enabled is True + + orphaned1 = await task_repo.find_by_id(orphaned1_id) + assert orphaned1 is not None + assert orphaned1.enabled is False + + orphaned2 = await task_repo.find_by_id(orphaned2_id) + assert orphaned2 is not None + assert orphaned2.enabled is False + + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_validate_already_disabled_orphaned_task() -> None: + """Test validation handles already disabled orphaned tasks.""" + TaskRegistry.clear() + # Don't register "missing_func" + + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create orphaned task that's already disabled + orphaned_task_id = ULID() + await task_manager.save(TaskIn(id=orphaned_task_id, command="missing_func", task_type="python", enabled=False)) + + # Setup app with database + app = FastAPI() + app.state.database = database + + # Run validation - should still disable it (idempotent) + disabled_count = await validate_and_disable_orphaned_tasks(app) + + assert disabled_count == 1 + + # Verify task is still disabled + async with database.session() as session: + task_repo = TaskRepository(session) + orphaned_task = await task_repo.find_by_id(orphaned_task_id) + assert orphaned_task is not None + assert orphaned_task.enabled is False + + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_validate_no_tasks() -> None: + """Test validation returns 0 when there are no tasks.""" + TaskRegistry.clear() + + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + # Setup app with empty database + app = FastAPI() + app.state.database = database + + disabled_count = await validate_and_disable_orphaned_tasks(app) + + assert disabled_count == 0 + + TaskRegistry.clear() + + +@pytest.mark.asyncio +async def test_validate_only_shell_tasks() -> None: + """Test validation ignores shell tasks.""" + TaskRegistry.clear() + + database = SqliteDatabaseBuilder().in_memory().build() + await database.init() + + async with database.session() as session: + task_repo = TaskRepository(session) + task_manager = TaskManager(task_repo, scheduler=None, database=None, artifact_manager=None) + + # Create only shell tasks + await task_manager.save(TaskIn(id=ULID(), command="echo test1", task_type="shell", enabled=True)) + await task_manager.save(TaskIn(id=ULID(), command="echo test2", task_type="shell", enabled=True)) + + # Setup app with database + app = FastAPI() + app.state.database = database + + disabled_count = await validate_and_disable_orphaned_tasks(app) + + assert disabled_count == 0 + + # Verify all tasks still enabled + async with database.session() as session: + task_repo = TaskRepository(session) + all_tasks = await task_repo.find_all() + assert len(all_tasks) == 2 + assert all(task.enabled for task in all_tasks) + + TaskRegistry.clear()