diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 0000000..4395724 --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,25 @@ +changelog: + exclude: + labels: + - skip-changelog + categories: + - title: Breaking Changes + labels: + - breaking + - breaking-change + - title: Features + labels: + - enhancement + - feature + - title: Fixes + labels: + - bug + - fix + - title: CI and Tooling + labels: + - ci + - dependencies + - tooling + - title: Other Changes + labels: + - "*" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..afe63f8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,70 @@ +name: CI + +on: + push: + branches: + - dev + pull_request: + +permissions: + contents: read + +jobs: + tests: + name: Tests (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + run: python -m pip install uv + + - name: Install dependencies + run: uv sync --group dev --locked + + - name: Run tests + run: uv run pytest + + build: + name: Build distribution + runs-on: ubuntu-latest + needs: + - tests + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install uv + run: python -m pip install uv + + - name: Install dependencies + run: uv sync --group dev --locked + + - name: Build distributions + run: uv run python -m build + + - name: Validate distribution metadata + run: uv run python -m twine check dist/* + + - name: Upload distributions + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ diff --git a/.github/workflows/publish-testpypi.yml b/.github/workflows/publish-testpypi.yml new file mode 100644 index 0000000..e5c5a02 --- /dev/null +++ b/.github/workflows/publish-testpypi.yml @@ -0,0 +1,65 @@ +name: Publish to TestPyPI + +on: + workflow_dispatch: + +permissions: + contents: read + +jobs: + build: + name: Build distribution + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install uv + run: python -m pip install uv + + - name: Install dependencies + run: uv sync --group dev --locked + + - name: Run tests + run: uv run pytest + + - name: Build distributions + run: uv run python -m build + + - name: Validate distribution metadata + run: uv run python -m twine check dist/* + + - name: Upload distributions + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish: + name: Publish to TestPyPI + runs-on: ubuntu-latest + needs: + - build + environment: + name: testpypi + url: https://test.pypi.org/p/adagio-cli + permissions: + id-token: write + + steps: + - name: Download distributions + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + - name: Publish distribution to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..996c225 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,75 @@ +name: Publish to PyPI + +on: + push: + tags: + - "v*" + +permissions: + contents: read + +jobs: + build: + name: Build distribution + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install uv + run: python -m pip install uv + + - name: Install dependencies + run: uv sync --group dev --locked + + - name: Run tests + run: uv run pytest + + - name: Verify tag matches package version + run: | + PACKAGE_VERSION=$(grep '^version = ' pyproject.toml | head -n 1 | cut -d '"' -f 2) + TAG_VERSION="${GITHUB_REF_NAME#v}" + + if [ "$PACKAGE_VERSION" != "$TAG_VERSION" ]; then + echo "Tag version $TAG_VERSION does not match pyproject version $PACKAGE_VERSION" + exit 1 + fi + + - name: Build distributions + run: uv run python -m build + + - name: Validate distribution metadata + run: uv run python -m twine check dist/* + + - name: Upload distributions + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + needs: + - build + environment: + name: pypi + url: https://pypi.org/p/adagio-cli + permissions: + id-token: write + + steps: + - name: Download distributions + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + - name: Publish distribution to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index 0fe94b7..c6b7524 100644 --- a/.gitignore +++ b/.gitignore @@ -9,14 +9,13 @@ venv/ # Distribution / packaging *.egg-info/ +build/ +dist/ # Local test / coverage files .coverage .pytest_cache/ -# Project lock file -uv.lock - # OS junk .DS_Store Thumbs.db @@ -30,4 +29,5 @@ Thumbs.db .ipynb_checkpoints/ .mypy_cache/ -runinfo/ \ No newline at end of file +.uv-cache/ +runinfo/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..6df60c8 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,15 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html) +using [PEP 440](https://packaging.python.org/en/latest/specifications/version-specifiers/) version strings for Python releases. + +## [Unreleased] + +### Added + +- GitHub Actions CI for linting, tests, and build verification. +- Trusted Publishing workflows for manual TestPyPI validation and tagged PyPI releases. +- A release playbook covering changelog, tags, and publish steps. diff --git a/Dockerfile b/Dockerfile index 19b34af..7e4a90a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,36 +1,22 @@ -ARG QIIME_BASE_IMAGE=quay.io/qiime2/amplicon:2024.5 -FROM ${QIIME_BASE_IMAGE} as base +ARG QIIME_BASE_IMAGE=quay.io/qiime2/amplicon:2026.1 +FROM ${QIIME_BASE_IMAGE} AS base ENV PYTHONUNBUFFERED=1 WORKDIR /app +FROM base AS dev -COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /uvx /bin/ - -ENV UV_COMPILE_BYTECODE=1 -ENV UV_LINK_MODE=copy - -# Development stage -FROM base as dev - -RUN --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=uv.lock,target=uv.lock \ - --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ - uv sync --frozen --no-install-project - -ENV PYTHONPATH=/app/src - -COPY ./pyproject.toml ./uv.lock /app/ +COPY ./pyproject.toml /app/ COPY ./README.md /app/ COPY ./src /app/src -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --frozen +# Skip UV and use QIIME conda env +RUN pip install . + +RUN pip uninstall pyOpenSSL -y || true -# is this needed still? -RUN pip3 uninstall pyOpenSSL -y || true -# We can extend this if needed -FROM dev as production +FROM dev AS production +WORKDIR /app + +CMD ["adagio", "--help"] -# Set default command -CMD ["uv", "run", "adagio", "--help"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ed4b99d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Cymis + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 1ac2213..b28475d 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,278 @@ -# Adagio +# Adagio CLI + +Command-line runner for Adagio pipeline files + +## Requirements + +- Python 3.10+ +- `uv` (recommended for development) +- Docker for the default task runtime +- Apptainer or Singularity when using `kind = "apptainer"` with local `.sif` images ## Installation +Install from PyPI: + +```bash +pip install adagio-cli +``` + +Install a prerelease from TestPyPI while validating a release candidate: + +```bash +pip install \ + --index-url https://test.pypi.org/simple/ \ + --extra-index-url https://pypi.org/simple \ + adagio-cli +``` + +Install from the current checkout: + ```bash pip install . ``` -## Usage +Or with `uv`: + +```bash +uv pip install . +``` + +Verify install: ```bash adagio --version +adagio --help +``` + +## Usage + +### Run a pipeline + +Show command help: + +```bash +adagio run --help +``` + +Run with a pipeline file: + +```bash +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache +``` + +`adagio run` executes each plugin task in its own task environment. +The default task environment is a Docker image in GHCR derived from the plugin +name in the pipeline spec, for example `dada2` -> `ghcr.io/cymis/qiime2-plugin-dada2:2026.1`. +Runtime config can override that per default/plugin/task and switch selected work to +Apptainer/Singularity with a local `.sif` image path. The cache directory is required +and is reused across reruns by default so unchanged successful tasks can be replayed. + +Equivalent positional form: + +```bash +adagio run path/to/pipeline.json --cache-dir /path/to/cache +``` + +Use an arguments file: + +```bash +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --arguments path/to/arguments.json +``` + +Use a runtime config file with defaults, plugin-level overrides, and optional task overrides: + +```bash +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --config path/to/runtime.toml +``` + +Control which dynamic flags are shown in help: + +```bash +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --show-params required +# choices: all | missing | required +``` + +Disable reuse for a run while still writing outputs into the selected cache directory: + +```bash +adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache --no-reuse +``` + +The same boolean pair is available as `--reuse` / `--no-reuse`. `--reuse` is the default. + +Clear an existing cache directory: + +```bash +adagio cache clear --cache-dir /path/to/cache +``` + +### Inspect a pipeline + +Print a dependency-ordered summary of the plugin actions in a pipeline: + +```bash +adagio pipeline show path/to/pipeline.json +``` + +### Arguments file format + +`--arguments` can be downloaded from Adagio directly in the "Run" workflow : + +```json +{ + "version": 1, + "inputs": { + "input_name": "/path/to/input.qza" + }, + "parameters": { + "param_name": "value" + }, + "outputs": "/path/to/output-dir" +} +``` + +`outputs` may also be a map keyed by output name (WIP: not currently generated by Adagio): + +```json +{ + "outputs": { + "output_a": "/path/to/output-a", + "output_b": "/path/to/output-b" + } +} +``` + +If outputs are omitted, defaults are generated under `./adagio-outputs`. + +### Runtime config format + +`--config` accepts TOML. Defaults apply first, then plugin keys, then task keys: + +```toml +version = 1 + +[defaults] +platform = "linux/amd64" + +[plugins] +dada2 = { image = "ghcr.io/cymis/qiime2-plugin-dada2:2026.1" } +demux = { image = "ghcr.io/cymis/qiime2-plugin-demux:2026.1" } + +[tasks] +"dada2.denoise_single" = { image = "registry.internal/custom-dada2:1.0", platform = "linux/amd64" } +``` + +`kind`, `image`, and `platform` are all optional on defaults, plugin entries, and task entries. +`kind` may be `docker` or `apptainer`. `image` remains the environment reference: +for Docker it is the container image, and for Apptainer it must be a local `.sif` path. + +Precedence is `task override > plugin override > defaults > default resolver`. + +Task lookup supports graph node `id`, optional task `name` when present in the +pipeline, and `plugin.action` as a fallback. Plugin lookup uses the pipeline's +plugin name. If `platform` is omitted all the way through, Adagio uses normal +Docker platform resolution with no implicit fallback. Anything not listed in the +config uses the default plugin image resolver. + +Concrete Apptainer example: + +```toml +version = 1 + +[defaults] +kind = "docker" + +[plugins] +bowtie2 = { kind = "apptainer", image = "/shared/qiime-images/q2-bowtie2-test.sif" } +``` + +For `kind = "apptainer"`, Adagio prefers the `apptainer` executable and falls back to +`singularity`. The current implementation supports only local `.sif` paths and runs +tasks serially; no scheduler submission or remote image pull behavior is included. + +### QAPI generation/submission + +Generate and submit plugin metadata from the active QIIME environment: + +```bash +adagio qapi build --action-url http://localhost:81/api/v1 +``` + +Submit to a protected deployment such as `adagio.run` with a scoped submission token: + +```bash +export ACTION_URL=https://adagio.run/api/v1 +export QAPI_SUBMISSION_TOKEN= +uv run adagio qapi build +``` + +You can also pass `--submission-token`, but the environment variable is safer because it does +not end up in shell history. + +Write payload to disk without submitting: + +```bash +adagio qapi build --output qapi.json --dry-run +``` + +Submit selected plugins only: + +```bash +adagio qapi build --plugin dada2 --plugin feature-table ``` ## Development +### Setup + +Install runtime and dev dependencies: + +```bash +uv sync --group dev +``` + +Run commands inside the project environment: + +```bash +uv run adagio --help +``` + +### Linting + +```bash +uv run ruff check . +uv run ruff format --check . +uv run ruff format . +``` + +### Tests + +```bash +uv run pytest +``` + +### Build distributions + +```bash +uv run python -m build +uv run python -m twine check dist/* +``` + +### Running locally during development + +```bash +uv run adagio run --pipeline path/to/pipeline.json --cache-dir /path/to/cache +``` + +### Runtime entrypoint (container/integration use) + +The `runtime` subcommand is intended for runtime-adapter jobs: + ```bash -uv sync -source .venv/bin/activate +uv run adagio runtime --spec spec.json --config runtime.toml --arguments arguments.json --cache-dir /path/to/cache ``` + +### Releasing + +See [RELEASING.md](RELEASING.md) for the one-time PyPI/GitHub setup, version/tag workflow, and TestPyPI/PyPI publishing steps. diff --git a/RELEASING.md b/RELEASING.md new file mode 100644 index 0000000..b24391f --- /dev/null +++ b/RELEASING.md @@ -0,0 +1,86 @@ +# Releasing `adagio-cli` + +This repository publishes the Python distribution as `adagio-cli` while keeping the installed CLI command as `adagio`. + +## One-time setup + +1. Make the GitHub repository public before the first Trusted Publishing run, or confirm your GitHub plan supports protected environments for private repositories. +2. Create a PyPI account at . +3. Create a separate TestPyPI account at . +4. Enable two-factor authentication on both accounts. +5. In GitHub, create two repository environments named `testpypi` and `pypi`. +6. Add a required reviewer to the `pypi` environment so production publishes need manual approval. +7. In PyPI Trusted Publishers, register this workflow: + - Project name: `adagio-cli` + - Owner: `cymis` + - Repository: `adagio-cli` + - Workflow file: `publish.yml` + - Environment: `pypi` +8. In TestPyPI Trusted Publishers, register this workflow: + - Project name: `adagio-cli` + - Owner: `cymis` + - Repository: `adagio-cli` + - Workflow file: `publish-testpypi.yml` + - Environment: `testpypi` + +## Before a release + +1. Update `CHANGELOG.md`. +2. Update the version in `pyproject.toml`. +3. Run the local checks: + +```bash +uv sync --group dev +uv run pytest +uv run python -m build +uv run python -m twine check dist/* +``` + +4. Commit the changelog and version bump to `dev`. +5. Push the branch and confirm the `CI` workflow passes. + +## TestPyPI validation + +Run the `Publish to TestPyPI` workflow manually from GitHub Actions after bumping to a fresh prerelease version. + +After it succeeds, validate installation in a clean environment: + +```bash +python -m venv /tmp/adagio-cli-testpypi +source /tmp/adagio-cli-testpypi/bin/activate +python -m pip install \ + --index-url https://test.pypi.org/simple/ \ + --extra-index-url https://pypi.org/simple \ + adagio-cli +adagio --version +``` + +## Production release + +1. Create an annotated tag that matches the package version: + +```bash +git tag -a v0.1.0a1 -m "adagio-cli 0.1.0a1" +``` + +2. Push the tag: + +```bash +git push origin v0.1.0a1 +``` + +3. Approve the pending `pypi` environment deployment in GitHub Actions. +4. Confirm the package appears on PyPI and installs cleanly: + +```bash +python -m venv /tmp/adagio-cli-pypi +source /tmp/adagio-cli-pypi/bin/activate +python -m pip install adagio-cli +adagio --version +``` + +## Versioning and tags + +- Use PEP 440 versions in `pyproject.toml`, for example `0.1.0a1`, `0.1.0`, `0.1.1`. +- Use Git tags prefixed with `v`, for example `v0.1.0a1`, `v0.1.0`, `v0.1.1`. +- The tagged version and `pyproject.toml` version must match exactly. diff --git a/examples/simple.json b/examples/simple.json deleted file mode 100644 index 7600d0e..0000000 --- a/examples/simple.json +++ /dev/null @@ -1,377 +0,0 @@ -{ - "type": "pipeline", - "meta": { - "version": "1.0.0rc" - }, - "signature": { - "inputs": [ - { - "id": "9a586cf8-272f-43de-b137-3fc36729f2c7", - "name": "sample_metadata", - "type": "Metadata", - "ast": { - "name": "Metadata", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - }, - "required": true - }, - { - "id": "aead4a07-1cd3-4034-9e20-80536b8c2264", - "name": "table", - "type": "FeatureTable[Frequency | PresenceAbsence]", - "ast": { - "name": "FeatureTable", - "type": "expression", - "fields": [ - { - "type": "union", - "members": [ - { - "name": "Frequency", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - }, - { - "name": "PresenceAbsence", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - ] - } - ], - "builtin": false, - "predicate": null - }, - "required": true - } - ], - "parameters": [ - { - "id": "c2051f90-5128-4197-a430-e1be6e0ace56", - "name": "metric", - "required": false, - "default": "canberra", - "type": "Str % Choices('aitchison', 'braycurtis', 'canberra', 'canberra_adkins', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'jensenshannon', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule')", - "ast": { - "name": "Str", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": { - "name": "Choices", - "type": "predicate", - "choices": [ - "aitchison", - "braycurtis", - "canberra", - "canberra_adkins", - "chebyshev", - "cityblock", - "correlation", - "cosine", - "dice", - "euclidean", - "hamming", - "jaccard", - "jensenshannon", - "matching", - "minkowski", - "rogerstanimoto", - "russellrao", - "seuclidean", - "sokalmichener", - "sokalsneath", - "sqeuclidean", - "yule" - ] - } - } - }, - { - "id": "66b84a2e-9ad9-4774-ad8a-438ae54bf3da", - "name": "compare", - "required": true, - "default": null, - "type": "MetadataColumn[Categorical]", - "ast": { - "name": "MetadataColumn", - "type": "expression", - "fields": [ - { - "name": "Categorical", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - ], - "builtin": true, - "predicate": null - } - } - ], - "outputs": [ - { - "id": "a264bb78-3496-4324-ad75-a5aa933dd8f2", - "name": "summary", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - }, - { - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572", - "name": "distance_matrix1", - "type": "DistanceMatrix", - "ast": { - "name": "DistanceMatrix", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "4cb5be9f-964e-42e2-af86-ca75e72c0fcf", - "name": "beta-group", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - }, - { - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7", - "name": "distance_matrix2", - "type": "DistanceMatrix", - "ast": { - "name": "DistanceMatrix", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "332931ea-4a96-42ad-bf32-81713781e29b", - "name": "pcoa", - "type": "PCoAResults", - "ast": { - "name": "PCoAResults", - "type": "expression", - "fields": [], - "builtin": false, - "predicate": null - } - }, - { - "id": "ceb812ba-793b-4387-bdd0-4b91569e25b9", - "name": "emperor", - "type": "Visualization", - "ast": { - "name": "Visualization", - "type": "expression", - "fields": [], - "builtin": true, - "predicate": null - } - } - ] - }, - "graph": [ - { - "id": "febcf6e0-90ed-44a6-8b1b-f7beb53223a2", - "kind": "built-in", - "name": "root-input", - "inputs": { - "sample_metadata": { - "kind": "archive", - "id": "9a586cf8-272f-43de-b137-3fc36729f2c7" - } - }, - "parameters": {}, - "outputs": { - "sample_metadata": { - "kind": "archive", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - } - }, - { - "id": "a0ec2ae8-6736-4fab-9b07-9e718a7194f1", - "kind": "built-in", - "name": "root-input", - "inputs": { - "table": { - "kind": "archive", - "id": "aead4a07-1cd3-4034-9e20-80536b8c2264" - } - }, - "parameters": {}, - "outputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - } - }, - { - "id": "31efdf49-e06b-4ad5-8c97-47a2a323120f", - "kind": "plugin-action", - "plugin": "feature_table", - "action": "summarize", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - }, - "sample_metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": {}, - "outputs": { - "visualization": { - "kind": "archive", - "id": "a264bb78-3496-4324-ad75-a5aa933dd8f2" - } - } - }, - { - "id": "cfd580cd-7278-45b4-84f6-f879b1f94b24", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - }, - "parameters": { - "metric": { - "kind": "promoted", - "id": "c2051f90-5128-4197-a430-e1be6e0ace56" - } - }, - "outputs": { - "distance_matrix": { - "kind": "archive", - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572" - } - } - }, - { - "id": "22a5a5d7-d532-4251-914a-ecf34df945fd", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta_group_significance", - "inputs": { - "distance_matrix": { - "kind": "archive", - "id": "ee42b860-8d75-4120-b56c-b9d5c12bb572" - }, - "metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": { - "metadata": { - "kind": "metadata", - "column": { - "kind": "promoted", - "id": "66b84a2e-9ad9-4774-ad8a-438ae54bf3da" - } - } - }, - "outputs": { - "visualization": { - "kind": "archive", - "id": "4cb5be9f-964e-42e2-af86-ca75e72c0fcf" - } - } - }, - { - "id": "68a5d6a8-7d24-40d7-8197-3294951f5cd6", - "kind": "plugin-action", - "plugin": "diversity", - "action": "beta", - "inputs": { - "table": { - "kind": "archive", - "id": "63981d12-7beb-4443-aed4-a5e7b29ab796" - } - }, - "parameters": { - "metric": { - "kind": "promoted", - "id": "c2051f90-5128-4197-a430-e1be6e0ace56" - } - }, - "outputs": { - "distance_matrix": { - "kind": "archive", - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7" - } - } - }, - { - "id": "ef9ed04d-bb5e-40ab-9292-b5c0573be32d", - "kind": "plugin-action", - "plugin": "diversity", - "action": "pcoa", - "inputs": { - "distance_matrix": { - "kind": "archive", - "id": "7667756f-4c8a-4090-8ffd-200e37b6e4a7" - } - }, - "parameters": {}, - "outputs": { - "pcoa": { - "kind": "archive", - "id": "332931ea-4a96-42ad-bf32-81713781e29b" - } - } - }, - { - "id": "464fc7c0-a91b-4d66-a304-8668c0377867", - "kind": "plugin-action", - "plugin": "emperor", - "action": "plot", - "inputs": { - "pcoa": { - "kind": "archive", - "id": "332931ea-4a96-42ad-bf32-81713781e29b" - }, - "metadata": { - "kind": "metadata", - "id": "990f2bbb-dda6-4f83-87fa-5a56025d2095" - } - }, - "parameters": {}, - "outputs": { - "visualization": { - "kind": "archive", - "id": "ceb812ba-793b-4387-bdd0-4b91569e25b9" - } - } - } - ] -} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a58ad72..a62f17a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,46 +1,83 @@ [project] -name = "adagio" -version = "0.0.0" -description = "Adagio command line tool" +name = "adagio-cli" +version = "0.1.0a1" +description = "Command-line runner for Adagio pipeline files." readme = "README.md" -requires-python = ">=3.10,<3.11" +requires-python = ">=3.10" +authors = [{ name = "Cymis" }] +license = "MIT" +license-files = ["LICENSE"] +keywords = ["adagio", "bioinformatics", "cli", "pipelines", "qiime2", "workflow"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Software Development :: Libraries :: Python Modules", +] dependencies = [ - "rich>=14.1.0", - "typer>=0.17.4", + "cyclopts>=4.5.3", + "pydantic>=2.12.5", + "rich>=14.1.0", + "parsl>=2024.12.16", + "tomli>=2.2.1; python_version < '3.11'", ] +[project.urls] +Homepage = "https://github.com/cymis/adagio-cli" +Repository = "https://github.com/cymis/adagio-cli" +Issues = "https://github.com/cymis/adagio-cli/issues" +Changelog = "https://github.com/cymis/adagio-cli/blob/dev/CHANGELOG.md" [dependency-groups] dev = [ - "ruff>=0.13.0", + "build>=1.2.2", + "pytest>=8.4.0", + "ruff>=0.13.0", + "twine>=6.2.0", ] [project.scripts] -adagio = "adagio.cli:app" +adagio = "adagio.cli.main:main" [build-system] requires = ["uv_build>=0.8.17,<0.9.0"] build-backend = "uv_build" +[tool.uv.build-backend] +module-name = "adagio" + [tool.ruff.format] quote-style = "double" indent-style = "space" docstring-code-format = true [tool.ruff.lint.isort] + + known-first-party = ["adagio"] section-order = [ "future", "standard-library", "third-party", "first-party", - "local-folder" + "local-folder", ] - - [tool.mypy] plugins = ['pydantic.mypy'] [[tool.mypy.overrides]] + module = ["qiime2.*"] -ignore_missing_imports = true \ No newline at end of file +ignore_missing_imports = true + +[tool.pytest.ini_options] +addopts = "-ra" +testpaths = ["tests"] diff --git a/src/adagio/__init__.py b/src/adagio/__init__.py index dde9fb7..743cd9e 100644 --- a/src/adagio/__init__.py +++ b/src/adagio/__init__.py @@ -1,3 +1,15 @@ -from .hello import hello +from importlib.metadata import PackageNotFoundError, version -__all__ = ["hello"] + +def _resolve_version() -> str: + for dist_name in ("adagio-cli", "adagio"): + try: + return version(dist_name) + except PackageNotFoundError: + continue + return "0.0.0" + + +__version__ = _resolve_version() + +__all__ = ["__version__"] diff --git a/src/adagio/app/parsers/pipeline.py b/src/adagio/app/parsers/pipeline.py new file mode 100644 index 0000000..621fab2 --- /dev/null +++ b/src/adagio/app/parsers/pipeline.py @@ -0,0 +1,89 @@ +"""Helpers for pulling promoted parameter specs from pipeline JSON.""" + +from typing import Any, List, Optional +from uuid import UUID + +from pydantic import BaseModel + + +class Parameter(BaseModel): + id: UUID + name: str + required: bool + default: Optional[Any] = None + type: str + description: Optional[str] = None + + +class Input(BaseModel): + id: UUID + name: str + required: bool + type: str + description: Optional[str] = None + + +class Output(BaseModel): + id: UUID + name: str + type: str + description: Optional[str] = None + + +def _extract_signature(data: Any) -> dict[str, Any]: + signature = ( + data.get("spec", {}).get("signature") + if isinstance(data, dict) + else None + ) or (data.get("signature") if isinstance(data, dict) else None) + + if not isinstance(signature, dict): + raise ValueError( + "Invalid pipeline: missing 'signature' section in pipeline JSON." + ) + + return signature + + +def parse_parameters(data: Any) -> List[Parameter]: + """Parse promoted parameters from supported pipeline JSON layouts. + + We currently accept either: + - {"spec": {"signature": {"parameters": [...]}}} + - {"signature": {"parameters": [...]} } + """ + signature = _extract_signature(data) + + raw_parameters = signature.get("parameters") + if not isinstance(raw_parameters, list): + raise ValueError( + "Invalid pipeline: missing 'signature.parameters' list in pipeline JSON." + ) + + return [Parameter(**param) for param in raw_parameters] + + +def parse_inputs(data: Any) -> List[Input]: + """Parse promoted inputs from supported pipeline JSON layouts.""" + signature = _extract_signature(data) + + raw_inputs = signature.get("inputs") + if not isinstance(raw_inputs, list): + raise ValueError( + "Invalid pipeline: missing 'signature.inputs' list in pipeline JSON." + ) + + return [Input(**input_item) for input_item in raw_inputs] + + +def parse_outputs(data: Any) -> List[Output]: + """Parse pipeline outputs from supported pipeline JSON layouts.""" + signature = _extract_signature(data) + + raw_outputs = signature.get("outputs") + if not isinstance(raw_outputs, list): + raise ValueError( + "Invalid pipeline: missing 'signature.outputs' list in pipeline JSON." + ) + + return [Output(**output_item) for output_item in raw_outputs] diff --git a/src/adagio/cli.py b/src/adagio/cli.py deleted file mode 100644 index 58ebead..0000000 --- a/src/adagio/cli.py +++ /dev/null @@ -1,125 +0,0 @@ -from typing import Annotated -import typer -from pathlib import Path - -from rich.console import Console - -import time -import itertools -from rich.live import Live -from .execute import parse_spec, parse_config, process_job - - -app = typer.Typer( - help="Adagio command line tool for processing pipelines created with the Adagio GUI." -) -console = Console() - - -@app.command("execute") -def execute_cmd( - pipeline: Annotated[ - Path, - typer.Option( - "--input", - "-i", - help="Adagio created pipeline", - exists=False, - file_okay=True, - dir_okay=False, - readable=True, - ), - ], - config: Annotated[ - Path, - typer.Option( - "--config", - "-c", - help="Configuration file for the pipeline", - exists=False, - file_okay=True, - dir_okay=False, - readable=True, - ), - ], -): - """Execute an Adagio created pipeline""" - spec = parse_spec(pipeline) - config = parse_config(config) - - process_job(spec, config) - - -@app.command("chicken") -def animate_big_chicken( - laps: int = typer.Option(1, help="How many times to go left→right→left."), - speed: float = typer.Option(0.08, help="Seconds between steps (lower = faster)."), -): - """Animate a multi-line chicken walking across the screen.""" - - # Two frames to fake wing flaps - frames = [ - [ - " __", - " <(o )___", - " ( ._>", - " `---'", - ], - [ - " __", - " <( -)___", - " (o ._>", - " `---'", - ], - ] - flap = itertools.cycle(frames) - - width = console.size.width - rightmost = max(10, width - 12) - - def render(pos: int, art: list[str]) -> str: - # Shift each line horizontally by pos - shifted = [" " * pos + line for line in art] - # Pad lines so Live keeps height/width stable - padded = [line.ljust(width) for line in shifted] - return "\n".join(padded) - - with Live( - render(0, next(flap)), console=console, refresh_per_second=30, transient=True - ) as live: - for _ in range(laps): - # Left → Right - for x in range(0, rightmost): - live.update(render(x, next(flap))) - time.sleep(speed) - # Right → Left - for x in range(rightmost, 0, -1): - live.update(render(x, next(flap))) - time.sleep(speed) - - console.print("[bold yellow]🐔 Big chicken says cluck![/bold yellow]") - - -@app.callback(invoke_without_command=True) -def main_callback( - version: Annotated[bool, typer.Option("--version", help="Show version")] = False, -): - """Adagio command line tool version.""" - if version: - try: - from importlib.metadata import PackageNotFoundError - from importlib.metadata import version as get_version - except ImportError: - from importlib_metadata import PackageNotFoundError # type: ignore - from importlib_metadata import version as get_version # type: ignore - try: - package_version = get_version("adagio") - console.print(f"Adagio {package_version}") - except PackageNotFoundError: - console.print("Adagio version unknown (not installed as a package)") - - raise typer.Exit() - - -if __name__ == "__main__": - app() diff --git a/src/adagio/cli/__init__.py b/src/adagio/cli/__init__.py new file mode 100644 index 0000000..c9c2ef6 --- /dev/null +++ b/src/adagio/cli/__init__.py @@ -0,0 +1 @@ +__all__: list[str] = [] diff --git a/src/adagio/cli/args.py b/src/adagio/cli/args.py new file mode 100644 index 0000000..3a7246e --- /dev/null +++ b/src/adagio/cli/args.py @@ -0,0 +1,62 @@ +import re +from enum import Enum + +try: + from enum import StrEnum +except ImportError: # pragma: no cover - Python < 3.11 + class StrEnum(str, Enum): + pass + + +class ParamType(StrEnum): + INPUT = "input" + PARAM = "param" + OUTPUT = "output" + + +class ShowParamsMode(StrEnum): + ALL = "all" + MISSING = "missing" + REQUIRED = "required" + + +def promote_positional_pipeline(argv: list[str]) -> tuple[list[str], str | None]: + """Allow `adagio run ` by rewriting it to `--pipeline `.""" + if len(argv) < 2 or argv[0] != "run": + return argv, None + + candidate = argv[1] + if candidate.startswith("-"): + return argv, None + + rewritten = ["run", "--pipeline", candidate, *argv[2:]] + return rewritten, candidate + + +def extract_flag_value(argv: list[str], *flags: str) -> str | None: + """Supports: --flag value, -f value, --flag=value.""" + flag_set = set(flags) + for i, tok in enumerate(argv): + if tok in flag_set: + return argv[i + 1] if i + 1 < len(argv) else None + for flag in flags: + if tok.startswith(flag + "="): + return tok.split("=", 1)[1] + return None + + +def to_identifier(name: str, prefix: str | None = None) -> str: + """Turn arbitrary names into valid Python identifiers for kwargs keys.""" + clean = (name or "").strip() + clean = re.sub(r"[^0-9a-zA-Z_]+", "_", clean) + if not clean: + raise ValueError("Empty parameter name in pipeline file.") + if clean[0].isdigit(): + clean = "_" + clean + if prefix: + return f"{prefix}_{clean}" + return clean + + +def dynamic_opt(name: str, param_type: ParamType) -> str: + return f"--{param_type}-{name.replace('_', '-')}" diff --git a/src/adagio/cli/cache.py b/src/adagio/cli/cache.py new file mode 100644 index 0000000..83f99f0 --- /dev/null +++ b/src/adagio/cli/cache.py @@ -0,0 +1,82 @@ +import re +import shutil +from pathlib import Path +from typing import Annotated + +from cyclopts import App, Group, Parameter +from rich.console import Console + +from ..executors.cache_support import CACHE_DIR_HELP, resolve_cache_dir_path + +QIIME_CACHE_CONTENTS = {"VERSION", "data", "keys", "pools", "processes"} +QIIME_CACHE_LINE_RE = re.compile(r"cache: v?\d+\Z") + + +def run_cache(argv: list[str], *, console: Console) -> None: + app = App( + name="adagio cache", + help="Manage Adagio's shared QIIME cache directory.", + ) + command_group = Group("Command Options", sort_key=0) + + @app.command + def clear( + *, + cache_dir: Annotated[ + Path, + Parameter( + name=("--cache-dir",), + group=command_group, + help=CACHE_DIR_HELP, + ), + ], + ) -> None: + """Delete an existing QIIME cache directory.""" + resolved_cache_dir = resolve_cache_dir_path( + cwd=Path.cwd().resolve(), + raw_value=str(cache_dir), + ) + _clear_cache(cache_dir=resolved_cache_dir, console=console) + + app(argv) + + +def _clear_cache(*, cache_dir: Path, console: Console) -> None: + _require_qiime_cache(cache_dir) + shutil.rmtree(cache_dir) + console.print(f"Cleared cache directory: {cache_dir}") + + +def _require_qiime_cache(cache_dir: Path) -> None: + if not cache_dir.exists(): + raise SystemExit(f"Cache directory does not exist: {cache_dir}") + if not cache_dir.is_dir(): + raise SystemExit(f"Cache path is not a directory: {cache_dir}") + + contents = set(item.name for item in cache_dir.iterdir()) + if not contents.issuperset(QIIME_CACHE_CONTENTS): + raise SystemExit(f"Path is not a QIIME cache: {cache_dir}") + + version_file = cache_dir / "VERSION" + try: + version_text = version_file.read_text(encoding="utf-8").strip() + except OSError as exc: + raise SystemExit(f"Could not read cache version file: {version_file}") from exc + + if not _looks_like_qiime_cache_version(version_text): + raise SystemExit(f"Path is not a QIIME cache: {cache_dir}") + + +def _looks_like_qiime_cache_version(version_text: str) -> bool: + lines = version_text.splitlines() + if len(lines) != 3: + return False + + if lines[0] != "QIIME 2": + return False + + if not QIIME_CACHE_LINE_RE.fullmatch(lines[1]): + return False + + framework_prefix = "framework: " + return lines[2].startswith(framework_prefix) and bool(lines[2][len(framework_prefix) :].strip()) diff --git a/src/adagio/cli/config.py b/src/adagio/cli/config.py new file mode 100644 index 0000000..5c09c71 --- /dev/null +++ b/src/adagio/cli/config.py @@ -0,0 +1,39 @@ +from pathlib import Path +from typing import Literal + +from pydantic import BaseModel, Field + +try: + import tomllib +except ModuleNotFoundError: # pragma: no cover + import tomli as tomllib + + +class ImageOverride(BaseModel): + kind: Literal["docker", "apptainer"] | None = None + image: str | None = None + platform: str | None = None + + +class DefaultOverride(BaseModel): + kind: Literal["docker", "apptainer"] | None = None + image: str | None = None + platform: str | None = None + + +class AdagioRunConfig(BaseModel): + version: int = 1 + defaults: DefaultOverride = Field(default_factory=DefaultOverride) + plugins: dict[str, ImageOverride] = Field(default_factory=dict) + tasks: dict[str, ImageOverride] = Field(default_factory=dict) + + +def load_run_config(path: Path | None) -> AdagioRunConfig | None: + if path is None: + return None + + data = tomllib.loads(path.read_text(encoding="utf-8")) + if not isinstance(data, dict): + raise SystemExit("Invalid config file: expected a TOML table.") + + return AdagioRunConfig.model_validate(data) diff --git a/src/adagio/cli/dynamic.py b/src/adagio/cli/dynamic.py new file mode 100644 index 0000000..3cc81b2 --- /dev/null +++ b/src/adagio/cli/dynamic.py @@ -0,0 +1,618 @@ +import inspect +import math +import re +import types +from pathlib import Path +from typing import Any, Annotated, Callable, Union, get_args, get_origin + +from cyclopts import Group +from cyclopts import Parameter as CliParameter + +from ..app.parsers.pipeline import Input as InputSpec +from ..app.parsers.pipeline import Output as OutputSpec +from ..app.parsers.pipeline import Parameter as ParamSpec +from ..executors.cache_support import ( + CACHE_DIR_HELP, + REUSE_HELP, +) +from .args import ParamType, ShowParamsMode, dynamic_opt, to_identifier + + +class _PipelineGroupFormatter: + """Render pipeline options in one aligned table.""" + + def __init__(self, entry_metadata: dict[str, dict[str, Any]]): + self.entry_metadata = entry_metadata + + def __call__(self, console: Any, options: Any, panel: Any) -> None: + from rich.console import Group as RichGroup + from rich.console import NewLine + + from cyclopts.help.specs import PanelSpec, TableSpec + + renderables: list[Any] = [] + + if panel.description: + renderables.append(panel.description) + if not panel.entries: + return + + if renderables: + renderables.append(NewLine()) + columns = _get_pipeline_parameter_columns( + console, panel.entries, self.entry_metadata + ) + renderables.append(TableSpec().build(columns, panel.entries)) + console.print(PanelSpec().build(RichGroup(*renderables), title=panel.title)) + + +def _entry_key(entry: Any) -> str: + options = entry.all_options if hasattr(entry, "all_options") else () + return next((name for name in options if name.startswith("--")), "") + + +def _unwrap_optional_type(type_hint: Any) -> Any: + origin = get_origin(type_hint) + if origin not in (types.UnionType, Union): + return type_hint + + args = [arg for arg in get_args(type_hint) if arg is not type(None)] + return args[0] if len(args) == 1 else type_hint + + +def _pipeline_type_label(type_hint: Any) -> str: + type_hint = _unwrap_optional_type(type_hint) + if type_hint is bool: + return "BOOLEAN" + if type_hint is int: + return "INTEGER" + if type_hint is float: + return "NUMBER" + if type_hint is Path: + return "PATH" + return "TEXT" + + +def _display_type_label(*, spec_type: str | None, type_hint: Any, is_input: bool) -> str: + if is_input: + return "PATH" + + if spec_type: + compact = _compact_type_text(spec_type) + if compact.startswith("["): + return compact + + return _pipeline_type_label(type_hint) + + +def _output_path_help(description: str | None) -> str: + cleaned = (description or "").strip() + if cleaned: + return f"{cleaned} Overrides --output-dir for this output." + return "Overrides --output-dir for this output." + + +def _render_pipeline_type( + entry: Any, entry_metadata: dict[str, dict[str, Any]], width: int +) -> Any: + from rich.text import Text + + label = entry_metadata.get(_entry_key(entry), {}).get("type_label", "TEXT") + return Text(_wrap_type_label(label, width), style="bold yellow") + + +def _compact_type_text(type_text: str) -> str: + cleaned = type_text.strip() + if "Choices(" not in cleaned: + return f"({cleaned})" + + match = re.search(r"Choices\((.*)\)", cleaned) + if match is None: + return f"({cleaned})" + + choices = [ + choice.strip().strip("'\"") + for choice in match.group(1).split(",") + if choice.strip() + ] + if not choices: + return f"({cleaned})" + return "[" + "|".join(choices) + "]" + + +def _wrap_type_label(label: str, width: int) -> str: + if len(label) <= width or not (label.startswith("[") and label.endswith("]")): + return label + + choices = [choice for choice in label[1:-1].split("|") if choice] + if not choices: + return label + + lines: list[str] = [] + current = "[" + + for index, choice in enumerate(choices): + is_last = index == len(choices) - 1 + separator = "" if current in ("[", " |") else "|" + suffix = "]" if is_last else "" + candidate = current + separator + choice + suffix + + if len(candidate) <= width or current in ("[", " |"): + current = candidate + else: + lines.append(current) + current = " |" + choice + suffix + + if not current.endswith("]"): + current += "]" + lines.append(current) + return "\n".join(lines) + + +def _render_pipeline_description( + entry: Any, entry_metadata: dict[str, dict[str, Any]] +) -> Any: + from rich.text import Text + + from cyclopts.help.inline_text import InlineText + + metadata = entry_metadata.get(_entry_key(entry), {}) + description = entry.description + if description is None: + description = InlineText(Text()) + elif not isinstance(description, InlineText): + if hasattr(description, "__rich_console__"): + description = InlineText(description) + else: + description = InlineText(Text(str(description))) + + default = metadata.get("default") + if default is not None: + description.append(Text(f"[default: {default}]", "dim")) + + if metadata.get("required"): + description.append(Text("[required]", "dim red")) + + return description + + +def _get_pipeline_parameter_columns( + console: Any, + entries: list[Any], + entry_metadata: dict[str, dict[str, Any]], +) -> tuple[Any, ...]: + from cyclopts.help.specs import ( + ColumnSpec, + NameRenderer, + ) + + max_width = math.ceil(console.width * 0.35) + type_width = max( + 8, + min( + max( + len(entry_metadata.get(_entry_key(entry), {}).get("type_label", "TEXT")) + for entry in entries + ), + max(22, min(34, math.ceil(console.width * 0.3))), + ), + ) + name_column = ColumnSpec( + renderer=NameRenderer(max_width=max_width), + header="Option", + justify="left", + style="cyan", + max_width=max_width, + ) + type_column = ColumnSpec( + renderer=lambda entry: _render_pipeline_type(entry, entry_metadata, type_width), + header="Type", + justify="left", + no_wrap=True, + width=type_width, + min_width=type_width, + max_width=type_width, + ) + description_column = ColumnSpec( + renderer=lambda entry: _render_pipeline_description(entry, entry_metadata), + header="Description", + justify="left", + overflow="fold", + ) + + return (name_column, type_column, description_column) + + +def _spec_py_type(type_name: str) -> type: + """Map pipeline type text to a Python type.""" + normalized = re.sub(r"[^a-z0-9]+", " ", (type_name or "").lower()).strip() + tokens = set(normalized.split()) + + if {"bool", "boolean"} & tokens or "bool" in normalized: + return bool + if {"int", "integer"} & tokens or "int" in normalized: + return int + if {"float", "double", "number", "numeric", "real"} & tokens: + return float + if {"str", "string", "text"} & tokens: + return str + return str + + +def _default_py_type(default: Any) -> type | None: + """Infer a Python type from a default value.""" + if isinstance(default, bool): + return bool + if isinstance(default, int): + return int + if isinstance(default, float): + return float + if isinstance(default, str): + return str + return None + + +def _resolve_param_type(type_name: str, default: Any) -> type: + """Resolve the CLI parameter type from type text and default.""" + declared = _spec_py_type(type_name) + inferred = _default_py_type(default) + if inferred is None: + return declared + if declared is str and inferred is not str: + return inferred + return declared + + +def _format_help_text( + *, + description: str | None = None, +) -> str: + """Return plain description text for pipeline help rows.""" + return (description or "").strip() + + +def _is_required_param(spec: ParamSpec) -> bool: + return bool(spec.required and spec.default is None) + + +def build_dynamic_run( + *, + input_specs: list[InputSpec], + param_specs: list[ParamSpec], + output_specs: list[OutputSpec], + argument_inputs: dict[str, Any] | None = None, + argument_params: dict[str, Any] | None = None, + run_handler: Callable[ + [ + Path, + Path | None, + Path | None, + dict[str, Any], + list[tuple[str, str]], + list[tuple[str, str]], + list[tuple[str, str]], + str, + list[str], + list[str], + ], + None, + ], +): + """Build a dynamic run command from pipeline input, parameter, and output specs.""" + input_bindings: list[tuple[str, str]] = [] + param_bindings: list[tuple[str, str]] = [] + output_bindings: list[tuple[str, str]] = [] + required_inputs: list[str] = [] + required_params: list[str] = [] + seen_idents: set[str] = set() + entry_metadata: dict[str, dict[str, Any]] = {} + seen_opts: set[str] = { + "--pipeline", + "-p", + "--arguments", + "--config", + "--show-params", + "--cache-dir", + "--reuse", + "--no-reuse", + "--output-dir", + } + argument_inputs = argument_inputs or {} + argument_params = argument_params or {} + command_group = Group("Command Options", sort_key=0) + pipeline_group = Group( + "Pipeline", + sort_key=1, + help_formatter=_PipelineGroupFormatter(entry_metadata), + ) + + annotations: dict[str, Any] = { + "pipeline": Annotated[ + Path, + CliParameter( + name=("--pipeline", "-p"), + group=command_group, + help="Path to the pipeline JSON file.", + ), + ] + } + + annotations["arguments_file"] = Annotated[ + Path | None, + CliParameter( + name=("--arguments",), + group=command_group, + help="Path to a JSON arguments file. Values are applied before CLI overrides.", + ), + ] + annotations["show_params"] = Annotated[ + ShowParamsMode, + CliParameter( + name=("--show-params",), + group=command_group, + help="Parameter display mode: all, missing, or required.", + ), + ] + annotations["config_file"] = Annotated[ + Path | None, + CliParameter( + name=("--config",), + group=command_group, + help="Path to a TOML runtime config file.", + ), + ] + annotations["cache_dir"] = Annotated[ + Path, + CliParameter( + name=("--cache-dir",), + group=command_group, + help=CACHE_DIR_HELP, + ), + ] + annotations["reuse"] = Annotated[ + bool, + CliParameter( + name=("--reuse",), + negative=("--no-reuse",), + group=command_group, + help=REUSE_HELP, + ), + ] + annotations["output_dir"] = Annotated[ + Path | None, + CliParameter( + name=("--output-dir",), + group=command_group, + help="Directory for all pipeline outputs.", + ), + ] + + parameters: list[inspect.Parameter] = [ + inspect.Parameter( + name="pipeline", + kind=inspect.Parameter.KEYWORD_ONLY, + annotation=annotations["pipeline"], + ), + inspect.Parameter( + name="cache_dir", + kind=inspect.Parameter.KEYWORD_ONLY, + annotation=annotations["cache_dir"], + ), + inspect.Parameter( + name="arguments_file", + kind=inspect.Parameter.KEYWORD_ONLY, + default=None, + annotation=annotations["arguments_file"], + ), + inspect.Parameter( + name="show_params", + kind=inspect.Parameter.KEYWORD_ONLY, + default=ShowParamsMode.REQUIRED, + annotation=annotations["show_params"], + ), + inspect.Parameter( + name="config_file", + kind=inspect.Parameter.KEYWORD_ONLY, + default=None, + annotation=annotations["config_file"], + ), + inspect.Parameter( + name="reuse", + kind=inspect.Parameter.KEYWORD_ONLY, + default=True, + annotation=annotations["reuse"], + ), + inspect.Parameter( + name="output_dir", + kind=inspect.Parameter.KEYWORD_ONLY, + default=None, + annotation=annotations["output_dir"], + ), + ] + + def add_dynamic_option( + *, + ident: str, + opt: str, + required: bool, + py_type: Any, + help_text: str, + default: Any, + group: Group | tuple[Group, ...], + ) -> None: + if opt in seen_opts: + raise ValueError(f"Conflicting CLI option generated: {opt!r}.") + seen_opts.add(opt) + + annotation_type = py_type | None if default is None else py_type + annotations[ident] = Annotated[ + annotation_type, + CliParameter( + name=(opt,), + group=group, + help=help_text, + required=required, + ), + ] + parameters.append( + inspect.Parameter( + name=ident, + kind=inspect.Parameter.KEYWORD_ONLY, + default=default, + annotation=annotations[ident], + ) + ) + + required_input_specs = [spec for spec in input_specs if spec.required] + optional_input_specs = [spec for spec in input_specs if not spec.required] + required_param_specs = [spec for spec in param_specs if _is_required_param(spec)] + optional_param_specs = [spec for spec in param_specs if not _is_required_param(spec)] + + def add_input_spec(spec: InputSpec) -> None: + original = spec.name + ident = to_identifier(original, "input") + if ident in seen_idents: + raise ValueError( + f"Duplicate pipeline input name after normalization: {original!r}." + ) + seen_idents.add(ident) + input_bindings.append((ident, original)) + argument_value = argument_inputs.get(original) + display_required = bool(spec.required and _is_missing(argument_value)) + if spec.required: + required_inputs.append(original) + + type_text = spec.type + opt = dynamic_opt(original, ParamType.INPUT) + entry_metadata[opt] = { + "type_label": _display_type_label( + spec_type=type_text, type_hint=str, is_input=True + ), + "default": None, + "required": display_required, + } + add_dynamic_option( + ident=ident, + opt=opt, + required=False, + py_type=str, + help_text=_format_help_text( + description=spec.description, + ), + default=None, + group=pipeline_group, + ) + + def add_param_spec(spec: ParamSpec) -> None: + original = spec.name + ident = to_identifier(original, "param") + if ident in seen_idents: + raise ValueError( + f"Duplicate pipeline parameter name after normalization: {original!r}." + ) + seen_idents.add(ident) + param_bindings.append((ident, original)) + + default = spec.default + is_required = _is_required_param(spec) + argument_value = argument_params.get(original) + has_argument_default = not _is_missing(argument_value) + display_default = ( + default if default is not None else (argument_value if has_argument_default else None) + ) + display_required = is_required and display_default is None + param_default = None + param_type: Any = _resolve_param_type(spec.type, default) + opt = dynamic_opt(original, ParamType.PARAM) + if is_required: + required_params.append(original) + entry_metadata[opt] = { + "type_label": _display_type_label( + spec_type=spec.type, type_hint=param_type, is_input=False + ), + "default": display_default, + "required": display_required, + } + add_dynamic_option( + ident=ident, + opt=opt, + required=False, + py_type=param_type, + help_text=_format_help_text( + description=spec.description, + ), + default=param_default, + group=pipeline_group, + ) + + for spec in required_input_specs: + add_input_spec(spec) + for spec in required_param_specs: + add_param_spec(spec) + for spec in optional_input_specs: + add_input_spec(spec) + for spec in optional_param_specs: + add_param_spec(spec) + + for spec in output_specs: + original = spec.name + ident = to_identifier(original, "output") + if ident in seen_idents: + raise ValueError( + f"Duplicate pipeline output name after normalization: {original!r}." + ) + seen_idents.add(ident) + output_bindings.append((ident, original)) + opt = dynamic_opt(original, ParamType.OUTPUT) + entry_metadata[opt] = { + "type_label": "PATH", + "default": None, + "required": False, + } + add_dynamic_option( + ident=ident, + opt=opt, + required=False, + py_type=str, + help_text=_format_help_text( + description=_output_path_help(spec.description), + ), + default=None, + group=pipeline_group, + ) + + def run( + pipeline: Path, + arguments_file: Path | None = None, + show_params: ShowParamsMode = ShowParamsMode.REQUIRED, + config_file: Path | None = None, + output_dir: Path | None = None, + **kwargs: Any, + ) -> None: + _ = show_params + kwargs["output_dir"] = output_dir + run_handler( + pipeline, + arguments_file, + config_file, + kwargs, + input_bindings, + param_bindings, + output_bindings, + "output_dir", + required_inputs, + required_params, + ) + + run.__annotations__ = annotations + run.__signature__ = inspect.Signature(parameters) + run.__doc__ = ( + "Run an Adagio pipeline.\n\n" + "Dynamic inputs, parameters, and outputs are loaded from the pipeline file and exposed as CLI options.\n" + "Use: adagio run --pipeline PATH --help" + ) + return run + + +def _is_missing(value: Any) -> bool: + return value is None or value == "" diff --git a/src/adagio/cli/main.py b/src/adagio/cli/main.py new file mode 100644 index 0000000..8df40ef --- /dev/null +++ b/src/adagio/cli/main.py @@ -0,0 +1,263 @@ +import json +import sys +from functools import partial +from pathlib import Path +from typing import Annotated, Any + +from cyclopts import App, Group, Parameter +from cyclopts.panel import CycloptsPanel +from rich.console import Console + +from .. import __version__ +from ..app.parsers.pipeline import Input as InputSpec +from ..app.parsers.pipeline import Output as OutputSpec +from ..app.parsers.pipeline import Parameter as ParamSpec +from ..app.parsers.pipeline import parse_inputs, parse_outputs, parse_parameters +from ..executors.cache_support import CACHE_DIR_HELP, REUSE_HELP +from .args import ShowParamsMode, extract_flag_value, promote_positional_pipeline +from .config import load_run_config +from .dynamic import build_dynamic_run +from .pipeline import run_pipeline_cli +from .qapi import run_qapi +from .runner import run_pipeline_from_kwargs + + +console = Console() + + +def main(argv: list[str] | None = None) -> None: + argv = sys.argv[1:] if argv is None else argv + + if argv and argv[0] == "exec-task": + from .task_exec import run_task_exec + + run_task_exec(argv[1:]) + return + + if argv and argv[0] == "cache": + from .cache import run_cache + + run_cache(argv[1:], console=console) + return + + if argv and argv[0] == "runtime": + from .runtime import run_runtime + + run_runtime(argv[1:], console=console) + return + + if argv and argv[0] == "qapi": + run_qapi(argv[1:]) + return + + if argv and argv[0] == "pipeline": + run_pipeline_cli(argv[1:]) + return + + argv, positional_pipeline = promote_positional_pipeline(argv) + pipeline_str = extract_flag_value(argv, "--pipeline", "-p") + show_mode_str = extract_flag_value(argv, "--show-params") + try: + show_mode = ( + ShowParamsMode(show_mode_str) if show_mode_str else ShowParamsMode.REQUIRED + ) + except ValueError: + console.print(CycloptsPanel("Invalid --show-params value. Use one of: all, missing, required.")) + sys.exit(1) + if pipeline_str is None: + pipeline_str = positional_pipeline + + app = App( + name="adagio", + help="Adagio command line tool for processing pipelines created with the Adagio GUI.", + help_format="rich", + version=__version__, + ) + @app.command + def cache() -> None: + """Manage the shared QIIME cache directory.""" + console.print(CycloptsPanel("Try: adagio cache --help")) + sys.exit(1) + + @app.command + def runtime() -> None: + """Execute a pipeline from spec/config/arguments files.""" + console.print(CycloptsPanel("Try: adagio runtime --help")) + sys.exit(1) + + @app.command + def qapi() -> None: + """Generate and submit QAPI payloads.""" + console.print(CycloptsPanel("Try: adagio qapi --help")) + sys.exit(1) + + @app.command + def pipeline() -> None: + """Inspect pipeline definitions.""" + console.print(CycloptsPanel("Try: adagio pipeline --help")) + sys.exit(1) + + if not pipeline_str: + command_group = Group("Command Options", sort_key=0) + + @app.command + def run( + *, + pipeline: Annotated[ + Path, + Parameter( + name=("--pipeline", "-p"), + group=command_group, + help="Path to the pipeline JSON file.", + ), + ], + arguments: Annotated[ + Path | None, + Parameter( + name=("--arguments",), + group=command_group, + help="Path to a JSON arguments file.", + ), + ] = None, + config: Annotated[ + Path | None, + Parameter( + name=("--config",), + group=command_group, + help="Path to a TOML runtime config file.", + ), + ] = None, + show_params: Annotated[ + ShowParamsMode, + Parameter( + name=("--show-params",), + group=command_group, + help="Parameter display mode: all, missing, or required.", + ), + ] = ShowParamsMode.REQUIRED, + cache_dir: Annotated[ + Path, + Parameter( + name=("--cache-dir",), + group=command_group, + help=CACHE_DIR_HELP, + ), + ], + reuse: Annotated[ + bool, + Parameter( + name=("--reuse",), + negative=("--no-reuse",), + group=command_group, + help=REUSE_HELP, + ), + ] = True, + ): + """Run a pipeline (requires --pipeline; dynamic options come from that file).""" + _ = (config, show_params, cache_dir, reuse) + console.print(CycloptsPanel("Missing --pipeline. Try:\n adagio run --pipeline pipeline.json --help")) + sys.exit(1) + + app(argv) + return + + pipeline_path = Path(pipeline_str) + data = json.loads(pipeline_path.read_text(encoding="utf-8")) + input_specs = parse_inputs(data) + param_specs = parse_parameters(data) + output_specs = parse_outputs(data) + arguments_path_str = extract_flag_value(argv, "--arguments") + config_path_str = extract_flag_value(argv, "--config") + arguments_data = ( + _load_arguments_data(Path(arguments_path_str), console) if arguments_path_str else None + ) + if config_path_str: + load_run_config(Path(config_path_str)) + visible_inputs, visible_params, visible_outputs = _filter_visible_specs( + input_specs=input_specs, + param_specs=param_specs, + output_specs=output_specs, + show_mode=show_mode, + arguments_data=arguments_data, + ) + + dynamic_run = build_dynamic_run( + input_specs=visible_inputs, + param_specs=visible_params, + output_specs=visible_outputs, + argument_inputs=arguments_data.get("inputs", {}) if arguments_data else None, + argument_params=arguments_data.get("parameters", {}) if arguments_data else None, + run_handler=partial(run_pipeline_from_kwargs, console=console), + ) + app.command(dynamic_run, name="run") + app(argv) + + +def _filter_visible_specs( + *, + input_specs: list[InputSpec], + param_specs: list[ParamSpec], + output_specs: list[OutputSpec], + show_mode: ShowParamsMode, + arguments_data: dict[str, Any] | None, +) -> tuple[list[InputSpec], list[ParamSpec], list[OutputSpec]]: + if show_mode is ShowParamsMode.ALL: + return input_specs, param_specs, output_specs + + state_inputs = {spec.name: None for spec in input_specs} + state_params = {spec.name: spec.default for spec in param_specs} + + if arguments_data is not None: + state_inputs.update(arguments_data.get("inputs", {})) + state_params.update(arguments_data.get("parameters", {})) + + if show_mode is ShowParamsMode.REQUIRED: + filtered_inputs = [ + spec + for spec in input_specs + if spec.required and _is_missing(state_inputs.get(spec.name)) + ] + filtered_params = [ + spec + for spec in param_specs + if bool( + spec.required + and spec.default is None + and _is_missing(state_params.get(spec.name)) + ) + ] + return filtered_inputs, filtered_params, [] + + filtered_inputs = [ + spec for spec in input_specs if _is_missing(state_inputs.get(spec.name)) + ] + filtered_params = [ + spec for spec in param_specs if _is_missing(state_params.get(spec.name)) + ] + return filtered_inputs, filtered_params, [] + + +def _load_arguments_data(path: Path, _console: Console | None = None) -> dict[str, Any]: + _con = _console or Console(stderr=True) + data = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(data, dict): + _con.print(CycloptsPanel("Invalid arguments file: expected a JSON object.")) + sys.exit(1) + if "inputs" not in data: + data["inputs"] = {} + if "parameters" not in data: + data["parameters"] = {} + if not isinstance(data.get("inputs"), dict) or not isinstance( + data.get("parameters"), dict + ): + _con.print(CycloptsPanel("Invalid arguments file: 'inputs' and 'parameters' must be objects.")) + sys.exit(1) + return data + + +def _is_missing(value: Any) -> bool: + return value is None or value == "" + + +if __name__ == "__main__": + main() diff --git a/src/adagio/cli/pipeline.py b/src/adagio/cli/pipeline.py new file mode 100644 index 0000000..0d7168c --- /dev/null +++ b/src/adagio/cli/pipeline.py @@ -0,0 +1,27 @@ +import json +from pathlib import Path + +from cyclopts import App +from rich.console import Console + +from ..describe import render_pipeline_text +from ..model.pipeline import AdagioPipeline + +console = Console() + + +def run_pipeline_cli(argv: list[str]) -> None: + app = App( + name="adagio pipeline", + help="Inspect pipeline definitions.", + ) + app.command(show_pipeline, name="show") + app(argv) + + +def show_pipeline(pipeline: Path) -> None: + """Print a pipeline summary to the terminal.""" + data = json.loads(pipeline.read_text(encoding="utf-8")) + pipeline_data = data.get("spec", data) if isinstance(data, dict) else data + parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) + console.print(render_pipeline_text(parsed_pipeline), soft_wrap=True) diff --git a/src/adagio/cli/qapi.py b/src/adagio/cli/qapi.py new file mode 100644 index 0000000..a42657c --- /dev/null +++ b/src/adagio/cli/qapi.py @@ -0,0 +1,172 @@ +import json +import os +from pathlib import Path +from typing import Annotated + +from cyclopts import App, Parameter +from rich.console import Console + +from ..qapi import DEFAULT_SCHEMA_VERSION, generate_qapi_payload, submit_qapi_payload + +console = Console() + + +def run_qapi(argv: list[str]) -> None: + app = App( + name="adagio qapi", + help="Generate and submit QAPI payloads from the active QIIME environment.", + ) + app.command(build_qapi, name="build") + app(argv) + + +def _print_submission_summary(response_body: object) -> None: + if isinstance(response_body, dict): + message = response_body.get("message") + if isinstance(message, str) and message.strip(): + console.print(message) + + operations = response_body.get("operations") + if isinstance(operations, list): + created = [ + operation["plugin_name"] + for operation in operations + if isinstance(operation, dict) and operation.get("action") == "create" + ] + overwritten = [ + operation["plugin_name"] + for operation in operations + if isinstance(operation, dict) and operation.get("action") == "overwrite" + ] + if created: + console.print(f"[green]Create:[/green] {', '.join(created)}") + if overwritten: + console.print(f"[yellow]Overwrite:[/yellow] {', '.join(overwritten)}") + return + + if isinstance(response_body, str): + if response_body.strip(): + console.print(response_body) + return + + if response_body is not None: + console.print(json.dumps(response_body, indent=2)) + + +def build_qapi( + *, + action_url: Annotated[ + str | None, + Parameter( + name=("--action-url",), + help=( + "Action Potential API base URL (e.g. http://localhost:81/api/v1). " + "Defaults to ACTION_URL env var." + ), + ), + ] = None, + schema_version: Annotated[ + str, + Parameter( + name=("--schema-version",), + help="Schema version string stored alongside generated plugin data.", + ), + ] = DEFAULT_SCHEMA_VERSION, + plugin: Annotated[ + tuple[str, ...], + Parameter( + name=("--plugin",), + help=( + "Plugin name to include. Repeat the option for multiple plugins. " + "Comma-separated values are also accepted." + ), + ), + ] = (), + all_plugins: Annotated[ + bool, + Parameter( + name=("--all",), + help=( + "Submit all installed plugins. This is also the default when " + "no --plugin values are provided." + ), + ), + ] = False, + output: Annotated[ + Path | None, + Parameter( + name=("--output",), + help="Optional path to write the generated request JSON.", + ), + ] = None, + submission_token: Annotated[ + str | None, + Parameter( + name=("--submission-token",), + help=( + "Bearer token for protected QAPI submission routes. Defaults to " + "QAPI_SUBMISSION_TOKEN env var; prefer the env var to avoid shell history leaks." + ), + ), + ] = None, + timeout: Annotated[ + int, + Parameter( + name=("--timeout",), + help="HTTP timeout (seconds) for submitting to Action Potential.", + ), + ] = 60, + dry_run: Annotated[ + bool, + Parameter( + name=("--dry-run",), + help=( + "Preview the backend changes without writing them. If no Action URL is " + "configured, this falls back to generating the payload locally only." + ), + ), + ] = False, + force_overwrite: Annotated[ + bool, + Parameter( + name=("--force-overwrite",), + help="Overwrite existing plugins for the same QIIME version.", + ), + ] = False, +) -> None: + """Generate QAPI from the active QIIME environment and submit it to Action Potential.""" + if all_plugins and plugin: + raise SystemExit("Use either --all or --plugin, not both.") + + requested_plugins = None if all_plugins or not plugin else plugin + try: + request_body = generate_qapi_payload( + schema_version=schema_version, + plugins=requested_plugins, + ) + except ValueError as exc: + raise SystemExit(str(exc)) from exc + + if output is not None: + output.write_text(json.dumps(request_body, indent=2), encoding="utf-8") + console.print(f"[green]Wrote QAPI payload:[/green] {output}") + + resolved_action_url = action_url or os.getenv("ACTION_URL") + if dry_run and not resolved_action_url: + console.print( + "[yellow]Dry run enabled without an Action URL; generated the payload locally only.[/yellow]" + ) + return + + url, status, response_body = submit_qapi_payload( + request_body, + action_url=action_url, + submission_token=submission_token, + timeout=timeout, + dry_run=dry_run, + force_overwrite=force_overwrite, + ) + + verb = "Previewed QAPI submit against" if dry_run else "Submitted QAPI to" + console.print(f"[green]{verb}[/green] {url} [green](HTTP {status})[/green]") + _print_submission_summary(response_body) diff --git a/src/adagio/cli/runner.py b/src/adagio/cli/runner.py new file mode 100644 index 0000000..6783355 --- /dev/null +++ b/src/adagio/cli/runner.py @@ -0,0 +1,285 @@ +import json +import os +import sys +from pathlib import Path +from typing import Any + +from rich import box +from rich.console import Console +from rich.panel import Panel +from rich.text import Text + +from .config import load_run_config +from ..executors.base import TaskEnvironmentOverride +from ..executors.cache_support import ( + describe_cache_config, + resolve_cache_config, +) + + +def _error_exit(console: Console, message: str) -> None: + panel = Panel( + Text.from_markup(message), + title="Error", + border_style="red", + box=box.ROUNDED, + expand=True, + title_align="left", + ) + console.print(panel) + sys.exit(1) + + +DEFAULT_OUTPUT_DIRNAME = "adagio-outputs" + + +def run_pipeline_from_kwargs( + pipeline: Path, + arguments_file: Path | None, + config_file: Path | None, + kwargs: dict[str, Any], + input_bindings: list[tuple[str, str]], + param_bindings: list[tuple[str, str]], + output_bindings: list[tuple[str, str]], + output_dir_ident: str, + required_inputs: list[str], + required_params: list[str], + *, + console: Console, +) -> None: + """Run a pipeline from resolved CLI keyword arguments.""" + from ..model.arguments import AdagioArgumentsFile + from ..model.pipeline import AdagioPipeline + + cache_dir = kwargs.pop("cache_dir", None) + reuse = bool(kwargs.pop("reuse", True)) + + data = json.loads(pipeline.read_text(encoding="utf-8")) + pipeline_data = data.get("spec", data) if isinstance(data, dict) else data + parsed_pipeline = AdagioPipeline.model_validate(pipeline_data) + arguments = parsed_pipeline.signature.to_default_arguments() + run_config = load_run_config(config_file) + output_names = [output.name for output in parsed_pipeline.signature.outputs] + + input_names = {name for _, name in input_bindings} + param_names = {name for _, name in param_bindings} + output_name_set = set(output_names) + + if arguments_file is not None: + file_data = json.loads(arguments_file.read_text(encoding="utf-8")) + arguments_data = AdagioArgumentsFile.model_validate(file_data) + + unknown_inputs = sorted(set(arguments_data.inputs) - input_names) + if unknown_inputs: + _error_exit( + console, + "Unknown inputs in arguments file: " + ", ".join(unknown_inputs), + ) + + unknown_params = sorted(set(arguments_data.parameters) - param_names) + if unknown_params: + _error_exit( + console, + "Unknown parameters in arguments file: " + ", ".join(unknown_params), + ) + + unknown_outputs: list[str] = [] + if isinstance(arguments_data.outputs, dict): + unknown_outputs = sorted(set(arguments_data.outputs) - output_name_set) + if unknown_outputs: + _error_exit( + console, + "Unknown outputs in arguments file: " + ", ".join(unknown_outputs), + ) + + arguments.inputs.update(arguments_data.inputs) + arguments.parameters.update(arguments_data.parameters) + if arguments_data.outputs is not None: + arguments.outputs = arguments_data.outputs + + for ident, original in input_bindings: + value = kwargs.get(ident) + if value is not None: + arguments.inputs[original] = str(value) + + for ident, original in param_bindings: + value = kwargs.get(ident) + if value is not None: + arguments.parameters[original] = value + + cli_output_dir = kwargs.get(output_dir_ident) + cli_output_overrides = { + original: str(value) + for ident, original in output_bindings + if (value := kwargs.get(ident)) is not None + } + arguments.outputs = _apply_output_overrides( + outputs=arguments.outputs, + output_names=output_names, + output_dir=str(cli_output_dir) if cli_output_dir is not None else None, + output_overrides=cli_output_overrides, + ) + + missing_inputs = [ + name for name in required_inputs if _is_missing(arguments.inputs.get(name)) + ] + missing_params = [ + name for name in required_params if _is_missing(arguments.parameters.get(name)) + ] + if missing_inputs or missing_params: + missing_opts = [f"--input-{n.replace('_', '-')}" for n in missing_inputs] + [ + f"--param-{n.replace('_', '-')}" for n in missing_params + ] + formatted = ", ".join(f"[cyan]{opt}[/cyan]" for opt in missing_opts) + _error_exit(console, f"Missing required arguments: {formatted}") + + arguments.outputs = _resolve_output_destinations( + outputs=arguments.outputs, + output_names=output_names, + cwd=Path.cwd().resolve(), + ) + + suppress_header = _is_truthy(os.getenv("ADAGIO_SUPPRESS_RUN_HEADER")) + if not suppress_header: + console.print(f"[bold]Pipeline:[/bold] {pipeline}") + + cache_config = resolve_cache_config( + cwd=Path.cwd().resolve(), + cache_dir=cache_dir, + reuse=reuse, + ) + + if not suppress_header: + console.print(f"[bold]Cache:[/bold] {describe_cache_config(cache_config)}") + + from ..executors import select_default_executor + + executor = select_default_executor( + default_override=_config_default_override(run_config), + plugin_overrides=_config_named_overrides( + run_config.plugins if run_config is not None else {} + ), + task_overrides=_config_named_overrides( + run_config.tasks if run_config is not None else {} + ), + ) + + if not suppress_header: + console.print(f"[bold]Executing pipeline[/bold] ({executor.mode_label})") + + executor.execute( + pipeline=parsed_pipeline, + arguments=arguments, + console=console, + cache_config=cache_config, + ) + + +def _is_missing(value: Any) -> bool: + """Treat placeholders and null values as missing.""" + return value is None or value == "" + + +def _is_missing_output(value: Any) -> bool: + if not isinstance(value, str): + return True + return value == "" or value == "" + + +def _resolve_output_destinations( + *, + outputs: str | dict[str, str], + output_names: list[str], + cwd: Path, +) -> str | dict[str, str]: + default_output_dir = (cwd / DEFAULT_OUTPUT_DIRNAME).resolve() + if isinstance(outputs, str): + if _is_missing_output(outputs): + return str(default_output_dir) + return outputs + + if not isinstance(outputs, dict): + raise TypeError("Unsupported outputs configuration.") + + resolved = dict(outputs) + for output_name in output_names: + value = resolved.get(output_name) + if _is_missing_output(value): + resolved[output_name] = str((default_output_dir / output_name).resolve()) + return resolved + + +def _apply_output_overrides( + *, + outputs: str | dict[str, str], + output_names: list[str], + output_dir: str | None, + output_overrides: dict[str, str], +) -> str | dict[str, str]: + if output_dir is not None: + if not output_overrides: + return output_dir + + resolved = { + output_name: os.path.join(output_dir, output_name) + for output_name in output_names + } + resolved.update(output_overrides) + return resolved + + if not output_overrides: + return outputs + + if isinstance(outputs, dict): + resolved = dict(outputs) + elif isinstance(outputs, str): + if _is_missing_output(outputs): + resolved = {} + else: + resolved = { + output_name: os.path.join(outputs, output_name) + for output_name in output_names + } + else: + raise TypeError("Unsupported outputs configuration.") + + resolved.update(output_overrides) + return resolved + + +def _is_truthy(value: str | None) -> bool: + if value is None: + return False + return value.strip().lower() in {"1", "true", "yes", "on"} + + +def _config_default_override(run_config: Any) -> TaskEnvironmentOverride | None: + if run_config is None: + return None + + defaults = run_config.defaults + if defaults.kind is None and defaults.image is None and defaults.platform is None: + return None + + return TaskEnvironmentOverride( + kind=defaults.kind, + reference=defaults.image, + platform=defaults.platform, + ) + + +def _config_named_overrides( + raw_overrides: dict[str, Any], +) -> dict[str, TaskEnvironmentOverride] | None: + resolved = { + name: TaskEnvironmentOverride( + kind=override.kind, + reference=override.image, + platform=override.platform, + ) + for name, override in raw_overrides.items() + if override.kind is not None + or override.image is not None + or override.platform is not None + } + return resolved or None diff --git a/src/adagio/cli/runtime.py b/src/adagio/cli/runtime.py new file mode 100644 index 0000000..8328035 --- /dev/null +++ b/src/adagio/cli/runtime.py @@ -0,0 +1,388 @@ +import argparse +import json +import os +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +from rich.console import Console + +from ..executors.base import TaskEnvironmentOverride +from ..executors.cache_support import ( + CACHE_DIR_HELP, + REUSE_HELP, + resolve_cache_config, +) +from ..model.arguments import AdagioArguments +from ..model.pipeline import AdagioPipeline +from ..monitor.composite import CompositeMonitor +from ..monitor.connected import ConnectedMonitor +from ..monitor.log import LogMonitor +from .config import load_run_config + + +def run_runtime(argv: list[str], *, console: Console) -> None: + """Runtime entrypoint used by the runtime-adapter job container.""" + parser = argparse.ArgumentParser( + prog="adagio runtime", + description=( + "Execute a pipeline from spec/config/arguments files. " + "The config file may define default, per-plugin, and per-task environment overrides." + ), + ) + parser.add_argument("--spec", required=True, help="Path to pipeline spec JSON.") + parser.add_argument( + "--config", + required=True, + help="Path to runtime config TOML.", + ) + parser.add_argument( + "--arguments", required=False, help="Path to run arguments JSON." + ) + parser.add_argument("--job-id", required=False, help="Runtime job ID.") + parser.add_argument( + "--output-dir", required=False, help="Directory for output artifacts." + ) + parser.add_argument( + "--runtime-url", required=False, help="Runtime adapter API base URL." + ) + parser.add_argument( + "--cache-dir", + required=True, + help=CACHE_DIR_HELP, + ) + parser.add_argument( + "--reuse", + action=argparse.BooleanOptionalAction, + default=True, + help=REUSE_HELP, + ) + parser.add_argument( + "--connected", + action="store_true", + help="Emit execution status updates to the runtime-adapter.", + ) + + opts = parser.parse_args(argv) + + spec_data = _load_json(Path(opts.spec)) + run_config = load_run_config(Path(opts.config)) + runtime_arguments: Any = {} + if opts.arguments: + runtime_arguments = _load_json(Path(opts.arguments)) + if runtime_arguments is None: + runtime_arguments = {} + + pipeline = _parse_pipeline(spec_data) + output_dir = _resolve_output_dir(opts.output_dir, opts.job_id) + arguments = _build_arguments( + pipeline=pipeline, + runtime_arguments=runtime_arguments, + output_dir=output_dir, + ) + _validate_required_arguments(pipeline, arguments) + cache_config = resolve_cache_config( + cwd=Path.cwd().resolve(), + cache_dir=opts.cache_dir, + reuse=opts.reuse, + ) + + connected = bool( + opts.connected + and opts.job_id + and (opts.runtime_url or os.getenv("RUNTIME_URL")) + ) + runtime_url = opts.runtime_url or os.getenv("RUNTIME_URL") + + log_monitor = LogMonitor(console=console) + monitor = log_monitor + if connected and runtime_url: + monitor = CompositeMonitor( + log_monitor, + ConnectedMonitor(runtime_url=runtime_url, job_id=opts.job_id or ""), + ) + + if connected and runtime_url and opts.job_id: + _post_job_event( + runtime_url=runtime_url, + job_id=opts.job_id, + payload={"event": "job_status", "status": "running"}, + ) + + from ..executors import select_default_executor + + executor = select_default_executor( + default_override=_default_override(run_config), + plugin_overrides=_named_overrides( + run_config.plugins if run_config is not None else {} + ), + task_overrides=_named_overrides( + run_config.tasks if run_config is not None else {} + ), + ) + + try: + executor.execute( + pipeline=pipeline, + arguments=arguments, + console=console, + monitor=monitor, + cache_config=cache_config, + ) + except Exception as exc: # noqa: BLE001 + if connected and runtime_url and opts.job_id: + _post_job_event( + runtime_url=runtime_url, + job_id=opts.job_id, + payload={"event": "job_status", "status": "failed", "error": str(exc)}, + ) + raise + else: + if connected and runtime_url and opts.job_id: + _post_job_event( + runtime_url=runtime_url, + job_id=opts.job_id, + payload={"event": "job_status", "status": "succeeded"}, + ) + + +def _load_json(path: Path) -> Any: + return json.loads(path.read_text(encoding="utf-8")) + + +def _parse_pipeline(data: Any) -> AdagioPipeline: + pipeline_data = data.get("spec", data) if isinstance(data, dict) else data + return AdagioPipeline.model_validate(pipeline_data) + + +def _resolve_output_dir(raw_output_dir: str | None, job_id: str | None) -> str: + if raw_output_dir: + output_dir = raw_output_dir + elif job_id: + output_dir = f"/storage/runtime_jobs/{job_id}/outputs" + else: + output_dir = "/storage/runtime_outputs" + os.makedirs(output_dir, exist_ok=True) + return output_dir + + +def _default_override(run_config: Any) -> TaskEnvironmentOverride | None: + if run_config is None: + return None + defaults = run_config.defaults + if defaults.kind is None and defaults.image is None and defaults.platform is None: + return None + return TaskEnvironmentOverride( + kind=defaults.kind, + reference=defaults.image, + platform=defaults.platform, + ) + + +def _named_overrides( + raw_overrides: dict[str, Any], +) -> dict[str, TaskEnvironmentOverride] | None: + resolved = { + name: TaskEnvironmentOverride( + kind=override.kind, + reference=override.image, + platform=override.platform, + ) + for name, override in raw_overrides.items() + if override.kind is not None + or override.image is not None + or override.platform is not None + } + return resolved or None + + +def _build_arguments( + *, + pipeline: AdagioPipeline, + runtime_arguments: Any, + output_dir: str, +) -> AdagioArguments: + arguments = pipeline.signature.to_default_arguments() + storage_root = "/storage" + + if isinstance(runtime_arguments, dict): + if isinstance(runtime_arguments.get("inputs"), dict): + _apply_named_arguments( + arguments=arguments, + runtime_arguments=runtime_arguments, + storage_root=storage_root, + ) + else: + _apply_legacy_arguments( + pipeline=pipeline, + arguments=arguments, + runtime_arguments=runtime_arguments, + storage_root=storage_root, + ) + + resolved_outputs = _resolve_outputs( + runtime_arguments.get("outputs"), storage_root=storage_root + ) + if resolved_outputs is not None: + arguments.outputs = resolved_outputs + + if _outputs_need_default(arguments.outputs): + arguments.outputs = output_dir + + return arguments + + +def _apply_named_arguments( + *, arguments: AdagioArguments, runtime_arguments: dict[str, Any], storage_root: str +) -> None: + raw_inputs = runtime_arguments.get("inputs", {}) + if isinstance(raw_inputs, dict): + for name, value in raw_inputs.items(): + arguments.inputs[name] = _resolve_input_path( + value, storage_root=storage_root + ) + + raw_parameters = runtime_arguments.get("parameters", {}) + if isinstance(raw_parameters, dict): + arguments.parameters.update(raw_parameters) + + +def _apply_legacy_arguments( + *, + pipeline: AdagioPipeline, + arguments: AdagioArguments, + runtime_arguments: dict[str, Any], + storage_root: str, +) -> None: + preprocessing = runtime_arguments.get("preprocessing", {}) + root_artifacts = ( + preprocessing.get("root_artifacts", []) + if isinstance(preprocessing, dict) + else [] + ) + token_lookup: dict[str, Any] = {} + if isinstance(root_artifacts, list): + for artifact in root_artifacts: + if not isinstance(artifact, dict): + continue + artifact_id = artifact.get("id") + token = artifact.get("token") + if artifact_id is None: + continue + token_lookup[str(artifact_id)] = token + + for input_def in pipeline.signature.inputs: + token = token_lookup.get(str(input_def.id)) + if token is None: + continue + arguments.inputs[input_def.name] = _resolve_input_path( + token, storage_root=storage_root + ) + + named_inputs = runtime_arguments.get("inputs", {}) + if isinstance(named_inputs, dict): + for name, value in named_inputs.items(): + arguments.inputs[name] = _resolve_input_path( + value, storage_root=storage_root + ) + + task_arguments = runtime_arguments.get("arguments", {}) + if isinstance(task_arguments, dict): + for step in task_arguments.values(): + if not isinstance(step, dict): + continue + params = step.get("parameters", {}) + if isinstance(params, dict): + arguments.parameters.update(params) + + top_level_params = runtime_arguments.get("parameters", {}) + if isinstance(top_level_params, dict): + arguments.parameters.update(top_level_params) + + +def _resolve_input_path(value: Any, *, storage_root: str) -> str: + if isinstance(value, dict): + path = value.get("path") + if path is None: + return str(value) + return _normalize_path(path, storage_root=storage_root) + if isinstance(value, str): + return _normalize_path(value, storage_root=storage_root) + return str(value) + + +def _resolve_outputs(value: Any, *, storage_root: str) -> str | dict[str, str] | None: + if value is None: + return None + if isinstance(value, str): + return _normalize_path(value, storage_root=storage_root) + if isinstance(value, dict): + resolved: dict[str, str] = {} + for name, output in value.items(): + if isinstance(output, dict): + resolved[name] = _resolve_input_path(output, storage_root=storage_root) + elif isinstance(output, str): + resolved[name] = _normalize_path(output, storage_root=storage_root) + else: + resolved[name] = str(output) + return resolved + return None + + +def _normalize_path(path: str, *, storage_root: str) -> str: + if not path: + return path + if path.startswith("/") or "://" in path: + return path + return os.path.join(storage_root, path) + + +def _outputs_need_default(outputs: str | dict[str, str]) -> bool: + if isinstance(outputs, str): + return outputs == "" or outputs == "" + return any(value in {"", ""} for value in outputs.values()) + + +def _is_missing(value: Any) -> bool: + return value is None or value == "" or value == "" + + +def _validate_required_arguments( + pipeline: AdagioPipeline, arguments: AdagioArguments +) -> None: + missing_inputs = [ + input_def.name + for input_def in pipeline.signature.inputs + if input_def.required and _is_missing(arguments.inputs.get(input_def.name)) + ] + missing_params = [ + param.name + for param in pipeline.signature.parameters + if param.required + and param.default is None + and _is_missing(arguments.parameters.get(param.name)) + ] + + if missing_inputs or missing_params: + missing = [f"input:{name}" for name in missing_inputs] + [ + f"param:{name}" for name in missing_params + ] + raise SystemExit("Missing required runtime arguments: " + ", ".join(missing)) + + +def _post_job_event(*, runtime_url: str, job_id: str, payload: dict[str, Any]) -> None: + base = runtime_url.rstrip("/") + url = f"{base}/jobs/{job_id}/events" + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + url, + data=data, + method="POST", + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=5): + pass + except (urllib.error.URLError, TimeoutError): + return None diff --git a/src/adagio/cli/task_exec.py b/src/adagio/cli/task_exec.py new file mode 100644 index 0000000..6a69fd5 --- /dev/null +++ b/src/adagio/cli/task_exec.py @@ -0,0 +1,289 @@ +"""Internal exec-task subcommand: runs a single QIIME action inside a plugin container.""" + +import argparse +from collections.abc import Mapping +from contextlib import nullcontext +import os +import sys +import warnings +import zipfile +from pathlib import Path +from typing import Any + +from adagio.executors.task_contract import ( + build_result_manifest, + read_json_file, + write_json_file, +) + + +def run_task_exec(argv: list[str]) -> None: + """Entrypoint for the internal ``adagio exec-task`` subcommand.""" + parser = argparse.ArgumentParser( + prog="adagio exec-task", + description="Execute a single QIIME plugin action (internal use only).", + ) + parser.add_argument("--task", required=True, help="Path to the task spec JSON file.") + opts = parser.parse_args(argv) + + task_spec = read_json_file(Path(opts.task)) + _run_task(task_spec) + + +def _run_task(spec: dict[str, Any]) -> None: + from qiime2 import Artifact, Cache, Metadata + from qiime2.sdk import PluginManager + + plugin_name: str = spec["plugin"] + action_name: str = spec["action"] + archive_inputs: dict[str, str] = spec.get("archive_inputs", {}) + archive_collection_inputs: dict[str, list[str]] = spec.get("archive_collection_inputs", {}) + metadata_inputs: dict[str, str] = spec.get("metadata_inputs", {}) + params: dict[str, Any] = spec.get("params", {}) + metadata_column_kwargs: dict[str, dict[str, str]] = spec.get("metadata_column_kwargs", {}) + outputs: dict[str, str] = spec["outputs"] + result_manifest: str | None = spec.get("result_manifest") + cache_path: str | None = spec.get("cache_path") + recycle_pool: str | None = spec.get("recycle_pool") + + plugin_manager = PluginManager() + + plugin = _resolve_key(plugin_manager.plugins, plugin_name) + if plugin is None: + available = ", ".join(sorted(plugin_manager.plugins.keys())[:20]) + raise KeyError( + f"QIIME plugin {plugin_name!r} not found. " + f"Available plugins (first 20): [{available}]" + ) + + action = _resolve_key(plugin.actions, action_name) + if action is None: + available = ", ".join(sorted(plugin.actions.keys())[:30]) + raise KeyError( + f"QIIME action {plugin_name!r}.{action_name!r} not found. " + f"Available actions (first 30): [{available}]" + ) + + cache = Cache(cache_path) if cache_path else None + cache_context = cache if cache is not None else nullcontext() + reused = False + + with cache_context: + kwargs: dict[str, Any] = {} + + for name, path in archive_inputs.items(): + loaded = Artifact.load(path) + kwargs[name] = _cache_loaded_input(cache=cache, value=loaded) + + for name, paths in archive_collection_inputs.items(): + kwargs[name] = [ + _cache_loaded_input(cache=cache, value=Artifact.load(path)) + for path in paths + ] + + loaded_metadata: dict[str, Metadata] = {} + for name, path in metadata_inputs.items(): + if zipfile.is_zipfile(path): + loaded_metadata[name] = Artifact.load(path).view(Metadata) + else: + loaded_metadata[name] = Metadata.load(path) + + for param_name, col_spec in metadata_column_kwargs.items(): + source_name: str = col_spec["source"] + column_name: str = col_spec["column"] + metadata = loaded_metadata.pop(source_name) + kwargs[param_name] = metadata.get_column(column_name) + + for name, metadata in loaded_metadata.items(): + kwargs[name] = metadata + + for name, value in params.items(): + kwargs[name] = _coerce_param(action=action, name=name, value=value) + + _materialize_default_parameters(action=action, kwargs=kwargs) + + if recycle_pool is not None and cache is None: + raise ValueError("A recycle pool requires a configured cache path.") + + recycle_context = ( + cache.create_pool(key=recycle_pool, reuse=True) + if recycle_pool is not None and cache is not None + else nullcontext() + ) + with recycle_context: + cached_results = _load_cached_results(cache=cache, action=action, kwargs=kwargs) + if cached_results is not None: + reused = True + results = cached_results + else: + with action_output_context(): + results = action(**kwargs) + + saved_outputs: dict[str, str] = {} + for name, dest_path in outputs.items(): + artifact = getattr(results, name) + saved_outputs[name] = artifact.save(dest_path) + + if result_manifest: + write_json_file( + Path(result_manifest), + build_result_manifest(outputs=saved_outputs, reused=reused), + ) + + +def _cache_loaded_input(*, cache: Any, value: Any) -> Any: + if cache is None: + return value + return cache.process_pool.save(value) + + +def _materialize_default_parameters(*, action: Any, kwargs: dict[str, Any]) -> None: + signature = getattr(action, "signature", None) + parameters = getattr(signature, "parameters", None) + if not isinstance(parameters, Mapping): + return + + for name, spec in parameters.items(): + has_default = getattr(spec, "has_default", None) + if name in kwargs or not callable(has_default) or not has_default(): + continue + kwargs[name] = spec.default + + +def _load_cached_results(*, cache: Any, action: Any, kwargs: dict[str, Any]) -> Any: + if cache is None: + return None + + named_pool = getattr(cache, "named_pool", None) + if named_pool is None: + return None + + named_pool.create_index() + invocation = _build_invocation(action=action, kwargs=kwargs) + if invocation not in named_pool.index: + return None + + from qiime2.core.type.util import is_collection_type + from qiime2.sdk import ResultCollection, Results + + try: + cached_outputs = named_pool.index[invocation] + loaded_outputs: dict[str, Any] = {} + for name, output_spec in action.signature.outputs.items(): + if is_collection_type(output_spec.qiime_type): + cached_collection = cached_outputs[name] + collection_order = list(cached_collection.keys()) + if not _validate_collection_order(collection_order): + return None + + collection_order.sort(key=lambda x: x.idx) + loaded_collection = ResultCollection() + for elem_info in collection_order: + loaded_collection[elem_info.item_name] = named_pool.load( + cached_collection[elem_info] + ) + loaded_outputs[name] = loaded_collection + else: + loaded_outputs[name] = named_pool.load(cached_outputs[name]) + except KeyError: + return None + + return Results(loaded_outputs.keys(), loaded_outputs.values()) + + +def _build_invocation(*, action: Any, kwargs: dict[str, Any]) -> Any: + from rachis.core.type.signature import HashableInvocation + + plugin = action.plugin_id.replace("_", "-") + plugin_action = f"{plugin}:{action.id}" + collated_inputs = action.signature.collate_inputs(**kwargs) + callable_args = action.signature.coerce_user_input(**collated_inputs) + arguments = [] + for name, value in callable_args.items(): + arguments.append({name: value}) + return HashableInvocation(plugin_action, arguments) + + +def _validate_collection_order(collection_order: list[Any]) -> bool: + if not collection_order: + return True + if not all( + elem.total == collection_order[0].total for elem in collection_order + ) or len(collection_order) != collection_order[0].total: + warnings.warn( + "Incomplete collection found when recycling, collection will be remade" + ) + return False + return True + + +def _resolve_key(mapping: Any, requested: str) -> Any: + if requested in mapping: + return mapping[requested] + canonical = _canonical(requested) + for key in mapping: + if _canonical(key) == canonical: + return mapping[key] + return None + + +def _canonical(value: str) -> str: + return value.strip().replace("-", "_").replace(" ", "_").lower() + + +def _coerce_param(*, action: Any, name: str, value: Any) -> Any: + if value is None: + return None + from collections.abc import Mapping + + signature = getattr(action, "signature", None) + parameters = getattr(signature, "parameters", None) + if not isinstance(parameters, Mapping) or name not in parameters: + return value + qiime_type = getattr(parameters[name], "qiime_type", None) + if qiime_type is None: + return value + from qiime2.sdk.util import parse_primitive + + return parse_primitive(qiime_type, value) + + +class action_output_context: + """Suppress plugin stdout/stderr noise unless explicitly enabled.""" + + def __enter__(self): + mode = os.getenv("ADAGIO_ACTION_STDIO", "").strip().lower() + self._suppress = mode not in {"inherit", "show", "verbose", "1", "true", "yes"} + if not self._suppress: + return self + + self._saved_fds: list[tuple[int, int]] = [] + self._sink = open(os.devnull, "w", encoding="utf-8") + self._warnings = warnings.catch_warnings() + self._warnings.__enter__() + warnings.filterwarnings( + "ignore", + message="pkg_resources is deprecated as an API.*", + category=UserWarning, + ) + for fd in (1, 2): + saved = os.dup(fd) + self._saved_fds.append((fd, saved)) + os.dup2(self._sink.fileno(), fd) + return self + + def __exit__(self, exc_type, exc, tb): + if not getattr(self, "_suppress", False): + return False + for fd, saved in reversed(self._saved_fds): + try: + os.dup2(saved, fd) + finally: + os.close(saved) + self._warnings.__exit__(exc_type, exc, tb) + self._sink.close() + return False + + +if __name__ == "__main__": + run_task_exec(sys.argv[1:]) diff --git a/src/adagio/describe.py b/src/adagio/describe.py index e69de29..484e920 100644 --- a/src/adagio/describe.py +++ b/src/adagio/describe.py @@ -0,0 +1,366 @@ +import json +from dataclasses import dataclass + +from rich import box +from rich.console import Group, NewLine +from rich.panel import Panel +from rich.text import Text + +from .cli.dynamic import _compact_type_text +from .executors.common import plan_execution_order +from .model.pipeline import AdagioPipeline +from .model.task import ( + LiteralVal, + MetadataVal, + PluginActionTask, + PromotedVal, + RootInputTask, +) + + +@dataclass(frozen=True) +class _DisplayRef: + label: str + type_label: str | None = None + description: str | None = None + + +def render_pipeline_text(pipeline: AdagioPipeline) -> Text | Group: + available_ids = { + input_def.id: _DisplayRef( + label=_pipeline_input_label(input_def.name), + type_label=_format_spec_type(input_def.type), + description=_clean_description(input_def.description), + ) + for input_def in pipeline.signature.inputs + } + parameter_refs = { + parameter.id: _DisplayRef( + label=_pipeline_parameter_label(parameter.name), + type_label=_format_spec_type(parameter.type), + description=_clean_description(parameter.description), + ) + for parameter in pipeline.signature.parameters + } + pipeline_output_refs = { + output.id: _DisplayRef( + label=f'pipeline output "{output.name}"', + type_label=_format_spec_type(output.type), + description=_clean_description(output.description), + ) + for output in pipeline.signature.outputs + } + execution_plan = plan_execution_order( + tasks=list(pipeline.iter_tasks()), + scope=available_ids, + ) + + panels = [] + for task in execution_plan: + if isinstance(task, RootInputTask): + _record_root_input_outputs(task=task, available_ids=available_ids) + continue + + if not isinstance(task, PluginActionTask): + continue + + body = Text(no_wrap=False, overflow="fold") + _append_section_header(body, "Inputs") + _append_input_lines(body, task=task, available_ids=available_ids) + _append_section_header(body, "Parameters") + _append_parameter_lines( + body, + task=task, + available_ids=available_ids, + parameter_refs=parameter_refs, + ) + _append_section_header(body, "Outputs") + _append_output_lines( + body, + task=task, + pipeline_output_refs=pipeline_output_refs, + ) + panels.append( + Panel( + body, + title=f"{task.plugin}.{task.action}", + title_align="left", + border_style="cyan", + box=box.ROUNDED, + expand=True, + ) + ) + + for output_name, output in task.outputs.items(): + pipeline_output_ref = pipeline_output_refs.get(output.id) + available_ids[output.id] = _DisplayRef( + label=f"{task.plugin}.{task.action}.{output_name}", + type_label=( + pipeline_output_ref.type_label + if pipeline_output_ref is not None + else None + ), + description=( + pipeline_output_ref.description + if pipeline_output_ref is not None + else None + ), + ) + + if not panels: + return Text("No plugin actions found.", style="dim") + + renderables = [] + for index, panel in enumerate(panels): + if index: + renderables.append(NewLine()) + renderables.append(panel) + return Group(*renderables) + + +def _append_section_header(rendered: Text, title: str) -> None: + rendered.append(f" {title}:\n", style="bold cyan") + + +def _append_input_lines( + rendered: Text, + *, + task: PluginActionTask, + available_ids: dict[str, _DisplayRef], +) -> None: + if not task.inputs: + _append_none_line(rendered) + return + + for input_name, source in task.inputs.items(): + if source.kind == "archive-collection": + labels = [ + available_ids.get(item.id, _unknown_reference(item.id)).label + for item in source.items + ] + _append_entry_line( + rendered, + name=input_name, + type_label="list", + value_text=f"[{', '.join(labels)}]", + description=None, + ) + continue + reference = available_ids.get(source.id, _unknown_reference(source.id)) + _append_entry_line( + rendered, + name=input_name, + type_label=reference.type_label, + value_text=reference.label, + description=reference.description, + ) + + +def _append_parameter_lines( + rendered: Text, + *, + task: PluginActionTask, + available_ids: dict[str, _DisplayRef], + parameter_refs: dict[str, _DisplayRef], +) -> None: + if not task.parameters: + _append_none_line(rendered) + return + + for parameter_name, value in task.parameters.items(): + rendered_value, display = _render_parameter_value( + task=task, + parameter_name=parameter_name, + value=value, + available_ids=available_ids, + parameter_refs=parameter_refs, + ) + _append_entry_line( + rendered, + name=parameter_name, + type_label=display.type_label if display is not None else None, + value_text=rendered_value, + description=display.description if display is not None else None, + ) + + +def _append_output_lines( + rendered: Text, + *, + task: PluginActionTask, + pipeline_output_refs: dict[str, _DisplayRef], +) -> None: + if not task.outputs: + _append_none_line(rendered) + return + + for output_name, output in task.outputs.items(): + pipeline_output_ref = pipeline_output_refs.get(output.id) + value_text = _output_annotation(output_name=output_name, output_id=output.id) + _append_entry_line( + rendered, + name=output_name, + type_label=( + pipeline_output_ref.type_label + if pipeline_output_ref is not None + else None + ), + value_text=value_text, + description=( + pipeline_output_ref.description + if pipeline_output_ref is not None + else None + ), + ) + + +def _append_none_line(rendered: Text) -> None: + rendered.append(" (none)\n", style="dim") + + +def _append_entry_line( + rendered: Text, + *, + name: str, + type_label: str | None, + value_text: str | None, + description: str | None, +) -> None: + rendered.append(" - ") + rendered.append(name, style="cyan") + if value_text is not None: + rendered.append(":", style="cyan") + if type_label: + rendered.append(" ") + rendered.append(type_label, style="bold yellow") + if value_text: + rendered.append(" ") + rendered.append(value_text) + rendered.append("\n") + if description: + rendered.append(" ") + rendered.append(description, style="dim") + rendered.append("\n") + + +def _render_parameter_value( + *, + task: PluginActionTask, + parameter_name: str, + value: object, + available_ids: dict[str, _DisplayRef], + parameter_refs: dict[str, _DisplayRef], +) -> tuple[str, _DisplayRef | None]: + if isinstance(value, PromotedVal): + display = parameter_refs.get(value.id) + if display is not None: + return display.label, display + return _pipeline_parameter_label(value.id), None + + if isinstance(value, LiteralVal): + return _render_literal(value.value), _literal_display(value.value) + + if isinstance(value, MetadataVal): + source = task.inputs.get(parameter_name) + source_ref = ( + available_ids.get(source.id, _unknown_reference(source.id)) + if source is not None + else _DisplayRef(label=f'input "{parameter_name}"') + ) + column_label, display = _render_metadata_column( + column=value.column, + parameter_refs=parameter_refs, + ) + rendered_value = ( + f"metadata column from {source_ref.label} using {column_label}" + ) + if display is not None: + return rendered_value, display + return rendered_value, _DisplayRef( + label=rendered_value, + description=source_ref.description, + ) + + return str(value), None + + +def _render_metadata_column( + *, + column: object, + parameter_refs: dict[str, _DisplayRef], +) -> tuple[str, _DisplayRef | None]: + if isinstance(column, PromotedVal): + display = parameter_refs.get(column.id) + if display is not None: + return display.label, display + return _pipeline_parameter_label(column.id), None + + value = getattr(column, "value", None) + return _render_literal(value), _literal_display(value) + + +def _render_literal(value: object) -> str: + return json.dumps(value, sort_keys=True) + + +def _literal_display(value: object) -> _DisplayRef: + type_label = _format_literal_type(value) + return _DisplayRef(label=_render_literal(value), type_label=type_label) + + +def _format_literal_type(value: object) -> str | None: + if isinstance(value, bool): + return "(Boolean)" + if isinstance(value, int): + return "(Int)" + if isinstance(value, float): + return "(Float)" + if isinstance(value, str): + return "(Str)" + return None + + +def _record_root_input_outputs( + *, + task: RootInputTask, + available_ids: dict[str, _DisplayRef], +) -> None: + for name, output in task.outputs.items(): + source = task.inputs.get(name) + if source is None: + available_ids[output.id] = _DisplayRef(label=_pipeline_input_label(name)) + continue + available_ids[output.id] = available_ids.get( + source.id, + _unknown_reference(source.id), + ) + + +def _output_annotation(*, output_name: str, output_id: str) -> str | None: + _ = output_name + _ = output_id + return None + + +def _format_spec_type(type_text: str | None) -> str | None: + cleaned = (type_text or "").strip() + if not cleaned: + return None + return _compact_type_text(cleaned) + + +def _clean_description(description: str | None) -> str | None: + cleaned = (description or "").strip() + return cleaned or None + + +def _pipeline_input_label(name: str) -> str: + return f'pipeline input "{name}"' + + +def _pipeline_parameter_label(name: str) -> str: + return f'pipeline parameter "{name}"' + + +def _unknown_reference(identifier: str) -> _DisplayRef: + return _DisplayRef(label=f'unknown reference "{identifier}"') diff --git a/src/adagio/execute.py b/src/adagio/execute.py index d6ab826..0e1c1a3 100644 --- a/src/adagio/execute.py +++ b/src/adagio/execute.py @@ -3,7 +3,6 @@ import typing as t -from adagio.execution.context import AdagioContext from adagio.model.arguments import AdagioArguments from adagio.model.pipeline import AdagioPipeline from adagio.monitor.log import LogMonitor @@ -53,6 +52,7 @@ def _setup_context(advanced): # TODO: actually configure a non-temp cache + from adagio.execution.context import AdagioContext from qiime2.sdk import PluginManager PluginManager() diff --git a/src/adagio/execution/context.py b/src/adagio/execution/context.py index e82ad52..b1e49d9 100644 --- a/src/adagio/execution/context.py +++ b/src/adagio/execution/context.py @@ -1,14 +1,41 @@ -from parsl import python_app, join_app +from adagio.execution.proxy import IndexedProxyResults, dfk_thread_future, lift_parsl -from qiime2.sdk.proxy import ProxyResults, Proxy -from qiime2.sdk import Pipeline, Results -from qiime2.sdk.context import ParallelContext +_QIIME2_IMPORT_ERROR: ModuleNotFoundError | None = None -from adagio.execution.proxy import IndexedProxyResults, dfk_thread_future, lift_parsl +try: + from qiime2.sdk import Pipeline, Results + from qiime2.sdk.context import ParallelContext + from qiime2.sdk.proxy import Proxy, ProxyResults +except ModuleNotFoundError as exc: + _QIIME2_IMPORT_ERROR = exc + + class Proxy: # type: ignore[no-redef] + pass + + class ProxyResults: # type: ignore[no-redef] + pass + + class Pipeline: # type: ignore[no-redef] + pass + + class Results: # type: ignore[no-redef] + pass + + class ParallelContext: # type: ignore[no-redef] + def __init__(self, *_args, **_kwargs): + _require_qiime2() + + +def _require_qiime2() -> None: + if _QIIME2_IMPORT_ERROR is not None: + raise ModuleNotFoundError( + "qiime2 is required for local execution context support." + ) from _QIIME2_IMPORT_ERROR class AdagioContext(ParallelContext): def __init__(self, action_obj=None, parent=None): + _require_qiime2() super().__init__(action_obj, parent) diff --git a/src/adagio/execution/proxy.py b/src/adagio/execution/proxy.py index 58d7052..5236f8c 100644 --- a/src/adagio/execution/proxy.py +++ b/src/adagio/execution/proxy.py @@ -1,9 +1,53 @@ -from qiime2.sdk.proxy import ( - Proxy, ProxyResult, ProxyVisualization, ProxyArtifact, ProxyResults, - ProxyResultCollection) -from qiime2.core.type.util import is_visualization_type, is_collection_type from parsl import python_app, join_app +_QIIME2_IMPORT_ERROR: ModuleNotFoundError | None = None + +try: + from qiime2.core.type.util import is_collection_type, is_visualization_type + from qiime2.sdk.proxy import ( + Proxy, + ProxyArtifact, + ProxyResult, + ProxyResultCollection, + ProxyResults, + ProxyVisualization, + ) +except ModuleNotFoundError as exc: + _QIIME2_IMPORT_ERROR = exc + + class Proxy: # type: ignore[no-redef] + def __init__(self, future=None, selector=NotImplemented): + self._future_ = future + self._selector_ = selector + + class ProxyResult(Proxy): # type: ignore[no-redef] + pass + + class ProxyVisualization(ProxyResult): # type: ignore[no-redef] + pass + + class ProxyArtifact(ProxyResult): # type: ignore[no-redef] + pass + + class ProxyResults(Proxy): # type: ignore[no-redef] + _signature_ = {} + + class ProxyResultCollection(ProxyResult): # type: ignore[no-redef] + pass + + def is_visualization_type(*_args, **_kwargs): # type: ignore[no-redef] + _require_qiime2() + + def is_collection_type(*_args, **_kwargs): # type: ignore[no-redef] + _require_qiime2() + + +def _require_qiime2() -> None: + if _QIIME2_IMPORT_ERROR is not None: + raise ModuleNotFoundError( + "qiime2 is required for local execution/proxy support." + ) from _QIIME2_IMPORT_ERROR + class ProxyMetadata(Proxy): @@ -174,7 +218,7 @@ def kwargs_from_parsl(args, inputs, selectors, raw): elif key is None: try: new.append(selector(future)) - except: + except Exception: raise Exception(selector(range(10))) else: kwargs[key] = selector(future) @@ -196,4 +240,4 @@ def _detach(value): sel = value._selector_ return (future, lambda result: result.get_column(sel)) else: - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/src/adagio/executors/__init__.py b/src/adagio/executors/__init__.py new file mode 100644 index 0000000..22da8d3 --- /dev/null +++ b/src/adagio/executors/__init__.py @@ -0,0 +1,29 @@ +__all__ = ["select_default_executor"] + + +def select_default_executor( + *, + default_override=None, + plugin_overrides=None, + task_overrides=None, +): + from .defaults import ( + ConfigurableTaskEnvironmentResolver, + DefaultTaskEnvironmentResolver, + ) + from .apptainer import ApptainerTaskEnvironmentLauncher + from .docker import DockerTaskEnvironmentLauncher + from .task_environments import TaskEnvironmentExecutor + + return TaskEnvironmentExecutor( + environment_resolver=ConfigurableTaskEnvironmentResolver( + base=DefaultTaskEnvironmentResolver(), + default_override=default_override, + plugin_overrides=plugin_overrides, + task_overrides=task_overrides, + ), + launchers={ + "apptainer": ApptainerTaskEnvironmentLauncher(), + "docker": DockerTaskEnvironmentLauncher(), + }, + ) diff --git a/src/adagio/executors/apptainer.py b/src/adagio/executors/apptainer.py new file mode 100644 index 0000000..b064ade --- /dev/null +++ b/src/adagio/executors/apptainer.py @@ -0,0 +1,214 @@ +import shutil +import subprocess +from pathlib import Path + +from rich.console import Console + +from .base import ( + TaskEnvironmentLauncher, + TaskEnvironmentSpec, + TaskExecutionRequest, + TaskExecutionResult, +) +from .cache_support import mount_path_for_cache +from .container_support import ( + container_python_root, + containerize_host_value, + containerize_path, + host_path_from_container, + is_uri, + print_filtered_container_stderr, + python_warning_env_assignments, + with_apptainer_binds, +) +from .task_contract import ( + build_task_spec, + parse_result_manifest, + read_json_file, + result_manifest_path, + task_spec_path, + write_json_file, +) + + +class ApptainerTaskEnvironmentLauncher(TaskEnvironmentLauncher): + kind = "apptainer" + + def launch( + self, + *, + environment: TaskEnvironmentSpec, + request: TaskExecutionRequest, + console: Console | None = None, + ) -> TaskExecutionResult: + image_path = _resolve_sif_image(environment.reference) + runtime_executable = _resolve_runtime_executable() + + task = request.task + archive_inputs = { + name: containerize_host_value(value) + for name, value in request.archive_inputs.items() + } + archive_collection_inputs = { + name: [containerize_host_value(value) for value in values] + for name, values in request.archive_collection_inputs.items() + } + metadata_inputs = { + name: containerize_host_value(value) + for name, value in request.metadata_inputs.items() + } + outputs = { + name: containerize_path(Path(path)) + for name, path in request.outputs.items() + } + + manifest_path = result_manifest_path( + task_id=task.id, work_path=request.work_path + ) + spec_path = task_spec_path(task_id=task.id, work_path=request.work_path) + task_spec = build_task_spec( + plugin=task.plugin, + action=task.action, + archive_inputs=archive_inputs, + archive_collection_inputs=archive_collection_inputs, + metadata_inputs=metadata_inputs, + params=dict(request.params), + metadata_column_kwargs=dict(request.metadata_column_kwargs), + outputs=outputs, + result_manifest=containerize_path(manifest_path), + cache_path=( + containerize_path(Path(request.cache_path)) + if request.cache_path is not None + else None + ), + recycle_pool=request.recycle_pool, + ) + write_json_file(spec_path, task_spec) + + python_root = container_python_root(work_path=request.work_path) + command = [ + runtime_executable, + "exec", + "--cleanenv", + "--no-home", + "--pwd", + containerize_path(request.cwd), + ] + + host_paths = [request.cwd, request.work_path, python_root] + for value in ( + list(request.archive_inputs.values()) + + [item for values in request.archive_collection_inputs.values() for item in values] + + list(request.metadata_inputs.values()) + ): + if is_uri(value): + continue + path = Path(value) + if path.is_absolute(): + host_paths.append(path) + if request.cache_path is not None: + host_paths.append(mount_path_for_cache(Path(request.cache_path))) + + command = with_apptainer_binds(command=command, host_paths=host_paths) + command.extend( + [ + str(image_path), + "env", + f"PYTHONPATH={containerize_path(python_root)}", + "PYTHONNOUSERSITE=1", + *python_warning_env_assignments(), + "python", + "-m", + "adagio.cli.task_exec", + "--task", + containerize_path(spec_path), + ] + ) + + if console is not None: + label = f"{Path(runtime_executable).name} {image_path}" + if not getattr(console, "_adagio_inline_monitor_active", False): + console.print(f"[dim]Task environment:[/dim] {label}") + + try: + result = subprocess.run( + command, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise SystemExit( + "Apptainer/Singularity is required for task environment execution " + "but was not found in PATH. Ensure the job environment includes the " + "Apptainer binary location." + ) from exc + + if console is not None: + print_filtered_container_stderr( + console=console, stderr_text=result.stderr or "" + ) + + if result.returncode != 0: + stdout_text = (result.stdout or "").strip() + stderr_text = (result.stderr or "").strip() + if stderr_text: + detail = f" Runtime reported: {stderr_text}" + elif stdout_text: + detail = f" Container stdout: {stdout_text}" + else: + detail = "" + raise RuntimeError( + f"Task {task.id!r} ({task.plugin}.{task.action}) failed " + f"while launching environment {str(image_path)!r} " + f"with exit code {result.returncode}.{detail}" + ) + + if not manifest_path.exists(): + raise RuntimeError( + f"Task {task.id!r} completed but did not write an output manifest." + ) + + output_manifest = read_json_file(manifest_path) + reported_outputs, reused = parse_result_manifest(output_manifest) + resolved_outputs = {} + for output_name in request.outputs: + actual_path = reported_outputs.get(output_name) + if not isinstance(actual_path, str): + raise RuntimeError( + f"Task {task.id!r} did not report output {output_name!r}." + ) + resolved_outputs[output_name] = str(host_path_from_container(actual_path)) + + return TaskExecutionResult(outputs=resolved_outputs, reused=reused) + + +def _resolve_runtime_executable() -> str: + for candidate in ("apptainer", "singularity"): + resolved = shutil.which(candidate) + if resolved: + return resolved + raise SystemExit( + "Apptainer/Singularity is required for task environment execution " + "but was not found in PATH. Ensure the job environment includes the " + "Apptainer binary location." + ) + + +def _resolve_sif_image(reference: str) -> Path: + if is_uri(reference): + raise RuntimeError( + "Apptainer task environments currently support only local .sif image paths." + ) + + image_path = Path(reference).expanduser().resolve() + if image_path.suffix.lower() != ".sif": + raise RuntimeError( + f"Apptainer task environments require a local .sif image path, got {reference!r}." + ) + if not image_path.exists(): + raise RuntimeError(f"Apptainer image not found: {image_path}") + if not image_path.is_file(): + raise RuntimeError(f"Apptainer image is not a file: {image_path}") + return image_path diff --git a/src/adagio/executors/base.py b/src/adagio/executors/base.py new file mode 100644 index 0000000..eb650c6 --- /dev/null +++ b/src/adagio/executors/base.py @@ -0,0 +1,78 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Mapping, Protocol + +from rich.console import Console + +from adagio.model.arguments import AdagioArguments +from adagio.model.pipeline import AdagioPipeline +from adagio.model.task import PluginActionTask +from adagio.monitor.api import Monitor + +from .cache_support import ExecutionCacheConfig + + +class PipelineExecutor(Protocol): + mode_label: str + + def execute( + self, + *, + pipeline: AdagioPipeline, + arguments: AdagioArguments, + console: Console | None = None, + monitor: Monitor | None = None, + cache_config: ExecutionCacheConfig | None = None, + ) -> None: ... + + +@dataclass(frozen=True) +class TaskEnvironmentSpec: + kind: str + reference: str + description: str | None = None + options: Mapping[str, Any] | None = None + + +@dataclass(frozen=True) +class TaskEnvironmentOverride: + kind: str | None = None + reference: str | None = None + platform: str | None = None + + +@dataclass(frozen=True) +class TaskExecutionRequest: + task: PluginActionTask + cwd: Path + work_path: Path + archive_inputs: Mapping[str, str] + archive_collection_inputs: Mapping[str, list[str]] + metadata_inputs: Mapping[str, str] + params: Mapping[str, Any] + metadata_column_kwargs: Mapping[str, Mapping[str, str]] + outputs: Mapping[str, str] + cache_path: str | None = None + recycle_pool: str | None = None + + +@dataclass(frozen=True) +class TaskExecutionResult: + outputs: Mapping[str, str] + reused: bool = False + + +class TaskEnvironmentResolver(Protocol): + def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: ... + + +class TaskEnvironmentLauncher(Protocol): + kind: str + + def launch( + self, + *, + environment: TaskEnvironmentSpec, + request: TaskExecutionRequest, + console: Console | None = None, + ) -> TaskExecutionResult: ... diff --git a/src/adagio/executors/cache_support.py b/src/adagio/executors/cache_support.py new file mode 100644 index 0000000..02d967c --- /dev/null +++ b/src/adagio/executors/cache_support.py @@ -0,0 +1,55 @@ +from dataclasses import dataclass +from pathlib import Path + +DEFAULT_RECYCLE_POOL = "adagio-recycle" + +CACHE_DIR_HELP = "Path to the shared QIIME cache directory. Required." +REUSE_HELP = ( + "Reuse matching prior task results from the selected cache. Enabled by default." +) + + +@dataclass(frozen=True) +class ExecutionCacheConfig: + cache_dir: Path + recycle_pool: str | None = None + + +def resolve_cache_config( + *, + cwd: Path, + cache_dir: str | Path | None, + reuse: bool, +) -> ExecutionCacheConfig: + resolved_cache_dir = resolve_cache_dir_path(cwd=cwd, raw_value=cache_dir) + resolved_cache_dir.parent.mkdir(parents=True, exist_ok=True) + resolved_recycle_pool = DEFAULT_RECYCLE_POOL if reuse else None + + return ExecutionCacheConfig( + cache_dir=resolved_cache_dir, + recycle_pool=resolved_recycle_pool, + ) + + +def mount_path_for_cache(cache_dir: Path) -> Path: + return cache_dir if cache_dir.exists() else cache_dir.parent + + +def describe_cache_config(config: ExecutionCacheConfig) -> str: + if config.recycle_pool is None: + return f"{config.cache_dir} (reuse disabled)" + return f"{config.cache_dir} (reuse enabled)" + + +def resolve_cache_dir_path(*, cwd: Path, raw_value: str | Path | None) -> Path: + if raw_value is None: + raise SystemExit("Missing required --cache-dir.") + + candidate = Path(raw_value) + candidate = candidate.expanduser() + if not candidate.is_absolute(): + candidate = (cwd / candidate).resolve() + else: + candidate = candidate.resolve() + + return candidate diff --git a/src/adagio/executors/common.py b/src/adagio/executors/common.py new file mode 100644 index 0000000..5f5b1eb --- /dev/null +++ b/src/adagio/executors/common.py @@ -0,0 +1,55 @@ +import typing as t + +from adagio.model.task import input_source_ids + + +def plan_execution_order(*, tasks: list[t.Any], scope: dict[str, t.Any]) -> list[t.Any]: + """Return a dependency-respecting serial execution plan.""" + available_ids = set(scope.keys()) + remaining = list(tasks) + planned: list[t.Any] = [] + + while remaining: + progressed = False + for task in list(remaining): + missing = [ + source_id + for src in task.inputs.values() + for source_id in input_source_ids(src) + if source_id not in available_ids + ] + if missing: + continue + + planned.append(task) + remaining.remove(task) + progressed = True + for output in task.outputs.values(): + available_ids.add(output.id) + + if not progressed: + details = [] + for task in remaining: + missing = ", ".join( + source_id + for src in task.inputs.values() + for source_id in input_source_ids(src) + if source_id not in available_ids + ) + details.append(f"{task.id}: missing [{missing}]") + raise RuntimeError("Unable to resolve task dependencies. " + "; ".join(details)) + + return planned + + +def task_label(task: t.Any) -> str: + kind = getattr(task, "kind", "unknown") + task_id = getattr(task, "id", "") + if kind == "plugin-action": + plugin = getattr(task, "plugin", "") + action = getattr(task, "action", "") + return f"{task_id} ({plugin}.{action})" + if kind == "built-in": + name = getattr(task, "name", "built-in") + return f"{task_id} ({name})" + return task_id diff --git a/src/adagio/executors/container_support.py b/src/adagio/executors/container_support.py new file mode 100644 index 0000000..1e5f385 --- /dev/null +++ b/src/adagio/executors/container_support.py @@ -0,0 +1,169 @@ +import os +import shutil +import sys +from pathlib import Path + +from rich.console import Console + +HOST_MOUNT_POINT = "/host" +STAGED_CONTAINER_PYTHON_ROOT = ".adagio-container-python" + + +def with_mounts(*, command: list[str], host_paths: list[Path]) -> list[str]: + """Attach bind mounts for top-level host roots needed by this execution.""" + roots = mount_roots(host_paths) + mount_flags: list[str] = [] + for root in roots: + mount_flags.extend( + [ + "-v", + f"{root}:{containerize_path(root)}:rw", + ] + ) + return [*command[:3], *mount_flags, *command[3:]] + + +def with_apptainer_binds(*, command: list[str], host_paths: list[Path]) -> list[str]: + """Attach bind mounts for top-level host roots needed by Apptainer/Singularity.""" + roots = mount_roots(host_paths) + bind_flags: list[str] = [] + for root in roots: + bind_flags.extend( + [ + "--bind", + f"{root}:{containerize_path(root)}:rw", + ] + ) + return [*command[:2], *bind_flags, *command[2:]] + + +def docker_tty_flags() -> list[str]: + """Allocate Docker TTY when the current session is interactive.""" + if sys.stdin.isatty() and sys.stdout.isatty(): + return ["-t"] + return [] + + +def python_warning_env_assignments() -> list[str]: + """Return runtime warning environment assignments for container execution.""" + filters = os.getenv("ADAGIO_PYTHONWARNINGS") + if filters is None: + filters = "ignore:pkg_resources is deprecated as an API:UserWarning" + filters = filters.strip() + if not filters: + return [] + return [f"PYTHONWARNINGS={filters}"] + + +def python_warning_env_flags() -> list[str]: + """Suppress known noisy runtime warnings in container mode.""" + flags: list[str] = [] + for assignment in python_warning_env_assignments(): + flags.extend(["-e", assignment]) + return flags + + +def mount_roots(paths: list[Path]) -> list[Path]: + """Map paths to their first-level filesystem roots for portable bind mounts.""" + roots: set[Path] = set() + for path in paths: + parts = path.parts + if len(parts) < 2: + continue + root = Path("/", parts[1]) + if root.exists(): + roots.add(root) + return sorted(roots) + + +def containerize_host_value(value: str) -> str: + """Map an absolute host path into the container mount.""" + if is_uri(value): + return value + as_path = Path(value) + if as_path.is_absolute(): + return containerize_path(as_path) + return value + + +def containerize_path(path: Path) -> str: + """Convert an absolute host path to the mounted container path.""" + return f"{HOST_MOUNT_POINT}{path.resolve()}" + + +def host_path_from_container(value: str) -> Path: + """Convert a mounted container path back to the original host path.""" + if not value.startswith(HOST_MOUNT_POINT): + return Path(value) + suffix = value[len(HOST_MOUNT_POINT) :] + return Path(suffix).resolve() + + +def is_uri(value: str) -> bool: + return "://" in value + + +def local_source_root() -> Path: + """Return the local `adagio-cli/src` path for container PYTHONPATH.""" + source_root = _adagio_source_root() + if source_root is None: + raise RuntimeError("Adagio source root is unavailable from this installation.") + return source_root + + +def container_python_root(*, work_path: Path, module_file: Path | None = None) -> Path: + """Return an isolated Python root that exposes only the Adagio package.""" + source_root = _adagio_source_root(module_file=module_file) + if source_root is not None: + return source_root + + package_dir = _adagio_package_dir(module_file=module_file) + staged_root = (work_path / STAGED_CONTAINER_PYTHON_ROOT).resolve() + _stage_adagio_package(package_dir=package_dir, staged_root=staged_root) + return staged_root + + +def print_filtered_container_stderr(*, console: Console, stderr_text: str) -> None: + """Print relevant stderr lines while dropping known noisy platform warnings.""" + if not stderr_text: + return + for line in stderr_text.splitlines(): + if is_docker_platform_warning(line): + continue + if not line.strip(): + continue + console.print(line) + + +def is_docker_platform_warning(line: str) -> bool: + return ( + "requested image's platform" in line + and "does not match the detected host platform" in line + ) + + +def _adagio_source_root(*, module_file: Path | None = None) -> Path | None: + package_dir = _adagio_package_dir(module_file=module_file) + candidate = package_dir.parent + if candidate.name != "src": + return None + if not (candidate / "adagio" / "__init__.py").is_file(): + return None + return candidate + + +def _adagio_package_dir(*, module_file: Path | None = None) -> Path: + resolved = (module_file or Path(__file__)).resolve() + return resolved.parents[1] + + +def _stage_adagio_package(*, package_dir: Path, staged_root: Path) -> None: + staged_package_dir = staged_root / package_dir.name + if staged_package_dir.exists(): + shutil.rmtree(staged_package_dir) + staged_root.mkdir(parents=True, exist_ok=True) + shutil.copytree( + package_dir, + staged_package_dir, + ignore=shutil.ignore_patterns("__pycache__", "*.pyc"), + ) diff --git a/src/adagio/executors/defaults.py b/src/adagio/executors/defaults.py new file mode 100644 index 0000000..9ebf57b --- /dev/null +++ b/src/adagio/executors/defaults.py @@ -0,0 +1,115 @@ +from adagio.model.task import PluginActionTask + +from .base import ( + TaskEnvironmentOverride, + TaskEnvironmentResolver, + TaskEnvironmentSpec, +) + +DEFAULT_REGISTRY = "ghcr.io/cymis" +DEFAULT_IMAGE_PREFIX = "qiime2-plugin-" +DEFAULT_TAG = "2026.1" + + +class DefaultTaskEnvironmentResolver(TaskEnvironmentResolver): + """Resolve plugin actions to default task environments. + + The current default is a Docker image in GHCR derived from the plugin name. + The interface is task-scoped so future config can override individual tasks + with Docker, SIF/Apptainer, Conda, or cluster-specific environments. + """ + + def __init__( + self, + *, + registry: str = DEFAULT_REGISTRY, + image_prefix: str = DEFAULT_IMAGE_PREFIX, + tag: str = DEFAULT_TAG, + ) -> None: + self._registry = registry.rstrip("/") + self._image_prefix = image_prefix + self._tag = tag + + def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: + normalized = task.plugin.lower().replace("_", "-") + reference = f"{self._registry}/{self._image_prefix}{normalized}:{self._tag}" + return TaskEnvironmentSpec( + kind="docker", + reference=reference, + description=f"default plugin image for {task.plugin}", + ) + + +class ConfigurableTaskEnvironmentResolver(TaskEnvironmentResolver): + def __init__( + self, + *, + base: TaskEnvironmentResolver, + default_override: TaskEnvironmentOverride | None = None, + plugin_overrides: dict[str, TaskEnvironmentOverride] | None = None, + task_overrides: dict[str, TaskEnvironmentOverride] | None = None, + ) -> None: + self._base = base + self._default_override = default_override + self._plugin_overrides = plugin_overrides or {} + self._task_overrides = task_overrides or {} + + def resolve(self, *, task: PluginActionTask) -> TaskEnvironmentSpec: + base_environment = self._base.resolve(task=task) + kind = base_environment.kind + reference = base_environment.reference + options = dict(base_environment.options or {}) + configured = False + + for override in ( + self._default_override, + self._find_plugin_override(task=task), + self._find_task_override(task=task), + ): + if override is None: + continue + if override.kind is not None: + kind = override.kind + configured = True + if override.reference is not None: + reference = override.reference + configured = True + if override.platform is not None: + options["platform"] = override.platform + configured = True + + return TaskEnvironmentSpec( + kind=kind, + reference=reference, + description=( + f"configured environment for {task.name or task.id}" + if configured + else base_environment.description + ), + options=options or None, + ) + + def _find_task_override( + self, *, task: PluginActionTask + ) -> TaskEnvironmentOverride | None: + candidates = [task.id] + if task.name: + candidates.insert(0, task.name) + candidates.append(f"{task.plugin}.{task.action}") + + for candidate in candidates: + override = self._task_overrides.get(candidate) + if override: + return override + return None + + def _find_plugin_override( + self, *, task: PluginActionTask + ) -> TaskEnvironmentOverride | None: + plugin_candidates = [task.plugin, task.plugin.lower()] + for candidate in plugin_candidates: + override = self._plugin_overrides.get(candidate) + if override: + return override + + return None diff --git a/src/adagio/executors/docker.py b/src/adagio/executors/docker.py new file mode 100644 index 0000000..059a871 --- /dev/null +++ b/src/adagio/executors/docker.py @@ -0,0 +1,184 @@ +import subprocess +from pathlib import Path + +from rich.console import Console + +from .base import ( + TaskEnvironmentLauncher, + TaskEnvironmentSpec, + TaskExecutionRequest, + TaskExecutionResult, +) +from .cache_support import mount_path_for_cache +from .container_support import ( + container_python_root, + containerize_host_value, + containerize_path, + docker_tty_flags, + host_path_from_container, + is_uri, + print_filtered_container_stderr, + python_warning_env_flags, + with_mounts, +) +from .task_contract import ( + parse_result_manifest, + build_task_spec, + read_json_file, + result_manifest_path, + task_spec_path, + write_json_file, +) + + +class DockerTaskEnvironmentLauncher(TaskEnvironmentLauncher): + kind = "docker" + + def launch( + self, + *, + environment: TaskEnvironmentSpec, + request: TaskExecutionRequest, + console: Console | None = None, + ) -> TaskExecutionResult: + task = request.task + archive_inputs = { + name: containerize_host_value(value) + for name, value in request.archive_inputs.items() + } + archive_collection_inputs = { + name: [containerize_host_value(value) for value in values] + for name, values in request.archive_collection_inputs.items() + } + metadata_inputs = { + name: containerize_host_value(value) + for name, value in request.metadata_inputs.items() + } + outputs = { + name: containerize_path(Path(path)) + for name, path in request.outputs.items() + } + + manifest_path = result_manifest_path(task_id=task.id, work_path=request.work_path) + spec_path = task_spec_path(task_id=task.id, work_path=request.work_path) + task_spec = build_task_spec( + plugin=task.plugin, + action=task.action, + archive_inputs=archive_inputs, + archive_collection_inputs=archive_collection_inputs, + metadata_inputs=metadata_inputs, + params=dict(request.params), + metadata_column_kwargs=dict(request.metadata_column_kwargs), + outputs=outputs, + result_manifest=containerize_path(manifest_path), + cache_path=( + containerize_path(Path(request.cache_path)) + if request.cache_path is not None + else None + ), + recycle_pool=request.recycle_pool, + ) + write_json_file(spec_path, task_spec) + + python_root = container_python_root(work_path=request.work_path) + platform = None + if environment.options is not None: + raw_platform = environment.options.get("platform") + if isinstance(raw_platform, str) and raw_platform: + platform = raw_platform + + command = [ + "docker", + "run", + "--rm", + *docker_tty_flags(), + "-e", + f"PYTHONPATH={containerize_path(python_root)}", + "-e", + "PYTHONNOUSERSITE=1", + *python_warning_env_flags(), + "-w", + containerize_path(request.cwd), + ] + if platform: + command.extend(["--platform", platform]) + command.extend([ + environment.reference, + "python", + "-m", + "adagio.cli.task_exec", + "--task", + containerize_path(spec_path), + ]) + + host_paths = [request.cwd, request.work_path, python_root] + for value in ( + list(request.archive_inputs.values()) + + [item for values in request.archive_collection_inputs.values() for item in values] + + list(request.metadata_inputs.values()) + ): + if is_uri(value): + continue + path = Path(value) + if path.is_absolute(): + host_paths.append(path) + if request.cache_path is not None: + host_paths.append(mount_path_for_cache(Path(request.cache_path))) + + command = with_mounts(command=command, host_paths=host_paths) + + if console is not None: + label = f"docker {environment.reference}" + if platform: + label = f"docker --platform {platform} {environment.reference}" + if not getattr(console, "_adagio_inline_monitor_active", False): + console.print(f"[dim]Task environment:[/dim] {label}") + + try: + result = subprocess.run( + command, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise SystemExit( + "Docker is required for task environment execution but was not found in PATH." + ) from exc + + if console is not None: + print_filtered_container_stderr(console=console, stderr_text=result.stderr or "") + + if result.returncode != 0: + stdout_text = (result.stdout or "").strip() + stderr_text = (result.stderr or "").strip() + if stderr_text: + detail = f" Docker reported: {stderr_text}" + elif stdout_text: + detail = f" Container stdout: {stdout_text}" + else: + detail = "" + raise RuntimeError( + f"Task {task.id!r} ({task.plugin}.{task.action}) failed " + f"while launching environment {environment.reference!r} " + f"with exit code {result.returncode}.{detail}" + ) + + if not manifest_path.exists(): + raise RuntimeError( + f"Task {task.id!r} completed but did not write an output manifest." + ) + + output_manifest = read_json_file(manifest_path) + reported_outputs, reused = parse_result_manifest(output_manifest) + outputs = {} + for output_name in request.outputs: + actual_path = reported_outputs.get(output_name) + if not isinstance(actual_path, str): + raise RuntimeError( + f"Task {task.id!r} did not report output {output_name!r}." + ) + outputs[output_name] = str(host_path_from_container(actual_path)) + + return TaskExecutionResult(outputs=outputs, reused=reused) diff --git a/src/adagio/executors/path_utils.py b/src/adagio/executors/path_utils.py new file mode 100644 index 0000000..245b18e --- /dev/null +++ b/src/adagio/executors/path_utils.py @@ -0,0 +1,47 @@ +import os +from pathlib import Path +from typing import Iterable + +from .container_support import is_uri + + +def resolve_host_path(*, source: str, cwd: Path) -> str: + if is_uri(source): + return source + path = Path(source) + if path.is_absolute(): + return str(path.resolve()) + return str((cwd / path).resolve()) + + +def resolve_output_destination( + *, + output_name: str, + output_names: Iterable[str], + outputs: str | dict[str, str], + source_path: Path, +) -> str: + suffix = source_path.suffix + + if isinstance(outputs, str): + return append_output_suffix(os.path.join(outputs, output_name), suffix) + + if isinstance(outputs, dict): + raw_dest = outputs.get(output_name) + if raw_dest is None: + expected_outputs = ", ".join(sorted(output_names)) + provided_outputs = ", ".join(sorted(outputs.keys())) or "" + raise KeyError( + "Missing destination for output " + f"{output_name!r}. Expected output names: [{expected_outputs}]. " + f"Provided output names: [{provided_outputs}]." + ) + return append_output_suffix(raw_dest, suffix) + + raise TypeError("Unsupported outputs configuration.") + + +def append_output_suffix(destination: str, suffix: str) -> str: + if suffix and not destination.endswith(suffix): + return destination + suffix + return destination diff --git a/src/adagio/executors/serial_runner.py b/src/adagio/executors/serial_runner.py new file mode 100644 index 0000000..ddbd3ed --- /dev/null +++ b/src/adagio/executors/serial_runner.py @@ -0,0 +1,135 @@ +import tempfile +import typing as t +from dataclasses import dataclass, field +from pathlib import Path + +from rich.console import Console + +from adagio.model.arguments import AdagioArguments +from adagio.model.pipeline import AdagioPipeline +from adagio.monitor.api import Monitor +from adagio.monitor.log import LogMonitor +from adagio.monitor.tty import RichMonitor + +from .cache_support import ExecutionCacheConfig +from .common import plan_execution_order, task_label +from .path_utils import resolve_host_path + +CONTAINER_SUBTASK_COUNT = 1 + + +@dataclass +class SerialExecutionState: + cwd: Path + work_path: Path + params: dict[str, t.Any] + scope: dict[str, str] + cache_config: ExecutionCacheConfig | None + saved_output_ids: set[str] = field(default_factory=set) + save_output_started: bool = False + + +def run_serial_pipeline( + *, + pipeline: AdagioPipeline, + arguments: AdagioArguments, + resolve_task: t.Callable[[t.Any, SerialExecutionState, Console | None], bool], + finish_outputs: t.Callable[ + [t.Any, AdagioArguments, SerialExecutionState, Monitor | None, bool], None + ], + console: Console | None = None, + monitor: Monitor | None = None, + total_subtasks: int = CONTAINER_SUBTASK_COUNT, + cache_config: ExecutionCacheConfig | None = None, +) -> None: + sig = pipeline.signature + tasks = list(pipeline.iter_tasks()) + active_monitor = resolve_monitor(console=console, monitor=monitor) + + pipeline.validate_graph() + sig.validate_arguments(arguments) + + active_monitor.start_pipeline(total_tasks=len(tasks)) + + with tempfile.TemporaryDirectory(prefix="adagio-work-") as work_dir: + state = SerialExecutionState( + cwd=Path.cwd().resolve(), + work_path=Path(work_dir), + params=sig.get_params(arguments), + scope={}, + cache_config=cache_config, + ) + completed_task_ids: set[str] = set() + + active_monitor.start_load_input() + for input_def in sig.inputs: + source = arguments.inputs[input_def.name] + state.scope[input_def.id] = resolve_host_path(source=source, cwd=state.cwd) + active_monitor.finish_load_input() + + execution_plan = plan_execution_order(tasks=tasks, scope=state.scope) + for task in execution_plan: + active_monitor.queue_task( + task_id=task.id, + label=task_label(task), + total_subtasks=total_subtasks, + ) + + try: + for task in execution_plan: + active_monitor.start_task(task_id=task.id) + try: + reused = resolve_task(task, state, console) + finish_outputs( + sig=sig, + arguments=arguments, + state=state, + monitor=active_monitor, + require_all=False, + ) + active_monitor.advance_task(task_id=task.id, advance=1) + active_monitor.finish_task( + task_id=task.id, + status="cached" if reused else "completed", + ) + completed_task_ids.add(task.id) + except Exception as exc: # noqa: BLE001 + active_monitor.finish_task( + task_id=task.id, status="failed", error=str(exc) + ) + for skipped_task in tasks: + if ( + skipped_task.id == task.id + or skipped_task.id in completed_task_ids + ): + continue + active_monitor.finish_task( + task_id=skipped_task.id, + status="skipped", + error=f"Skipped because task {task.id!r} failed.", + ) + if state.save_output_started: + active_monitor.finish_save_output() + raise + + try: + finish_outputs( + sig=sig, + arguments=arguments, + state=state, + monitor=active_monitor, + require_all=True, + ) + finally: + if state.save_output_started: + active_monitor.finish_save_output() + finally: + active_monitor.finish_pipeline() + + +def resolve_monitor(*, console: Console | None, monitor: Monitor | None) -> Monitor: + if monitor is not None: + return monitor + if console is not None: + return RichMonitor(console=console) + return LogMonitor() diff --git a/src/adagio/executors/task_contract.py b/src/adagio/executors/task_contract.py new file mode 100644 index 0000000..3998922 --- /dev/null +++ b/src/adagio/executors/task_contract.py @@ -0,0 +1,88 @@ +import json +from collections.abc import Mapping +from pathlib import Path +from typing import Any, Iterable + + +def task_file_stem(task_id: str) -> str: + return task_id.replace("/", "_").replace(" ", "_") + + +def build_task_outputs( + *, + task_id: str, + output_names: Iterable[str], + work_path: Path, +) -> dict[str, str]: + stem = task_file_stem(task_id) + return { + name: str((work_path / f"{stem}_{name}").resolve()) + for name in output_names + } + + +def task_spec_path(*, task_id: str, work_path: Path) -> Path: + return (work_path / f"{task_file_stem(task_id)}_spec.json").resolve() + + +def result_manifest_path(*, task_id: str, work_path: Path) -> Path: + return (work_path / f"{task_file_stem(task_id)}_results.json").resolve() + + +def build_task_spec( + *, + plugin: str, + action: str, + archive_inputs: dict[str, str], + archive_collection_inputs: dict[str, list[str]], + metadata_inputs: dict[str, str], + params: dict[str, Any], + metadata_column_kwargs: dict[str, dict[str, str]], + outputs: dict[str, str], + result_manifest: str | None, + cache_path: str | None, + recycle_pool: str | None, +) -> dict[str, Any]: + return { + "plugin": plugin, + "action": action, + "archive_inputs": archive_inputs, + "archive_collection_inputs": archive_collection_inputs, + "metadata_inputs": metadata_inputs, + "params": params, + "metadata_column_kwargs": metadata_column_kwargs, + "outputs": outputs, + "result_manifest": result_manifest, + "cache_path": cache_path, + "recycle_pool": recycle_pool, + } + + +def build_result_manifest( + *, + outputs: Mapping[str, str], + reused: bool, +) -> dict[str, Any]: + return { + "outputs": dict(outputs), + "reused": reused, + } + + +def parse_result_manifest(payload: dict[str, Any]) -> tuple[dict[str, str], bool]: + if "outputs" in payload: + outputs = payload.get("outputs", {}) + reused = bool(payload.get("reused", False)) + if not isinstance(outputs, dict): + raise TypeError("Invalid task result manifest: 'outputs' must be an object.") + return dict(outputs), reused + + return dict(payload), False + + +def read_json_file(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def write_json_file(path: Path, payload: dict[str, Any]) -> None: + path.write_text(json.dumps(payload, ensure_ascii=True), encoding="utf-8") diff --git a/src/adagio/executors/task_environments.py b/src/adagio/executors/task_environments.py new file mode 100644 index 0000000..2654b4e --- /dev/null +++ b/src/adagio/executors/task_environments.py @@ -0,0 +1,225 @@ +import os +import shutil +from pathlib import Path + +from rich.console import Console + +from adagio.model.arguments import AdagioArguments +from adagio.model.task import PluginActionTask, RootInputTask +from adagio.monitor.api import Monitor + +from .base import ( + PipelineExecutor, + TaskEnvironmentLauncher, + TaskEnvironmentResolver, + TaskExecutionRequest, +) +from .cache_support import ExecutionCacheConfig +from .path_utils import resolve_output_destination +from .serial_runner import SerialExecutionState, run_serial_pipeline +from .task_contract import build_task_outputs + + +class TaskEnvironmentExecutor(PipelineExecutor): + mode_label = "per-task environment mode" + + def __init__( + self, + *, + environment_resolver: TaskEnvironmentResolver, + launchers: dict[str, TaskEnvironmentLauncher], + ) -> None: + self._environment_resolver = environment_resolver + self._launchers = dict(launchers) + + def execute( + self, + *, + pipeline, + arguments: AdagioArguments, + console: Console | None = None, + monitor: Monitor | None = None, + cache_config: ExecutionCacheConfig | None = None, + ) -> None: + run_serial_pipeline( + pipeline=pipeline, + arguments=arguments, + resolve_task=self._resolve_task, + finish_outputs=_save_outputs, + console=console, + monitor=monitor, + cache_config=cache_config, + ) + + def _resolve_task( + self, + task, + state: SerialExecutionState, + console: Console | None, + ) -> bool: + if isinstance(task, RootInputTask): + for name, src in task.inputs.items(): + dst = task.outputs[name] + state.scope[dst.id] = state.scope[src.id] + return False + + if isinstance(task, PluginActionTask): + return self._execute_plugin_action( + task=task, + state=state, + console=console, + ) + + raise TypeError(f"Unsupported task type: {type(task)}") + + def _execute_plugin_action( + self, + *, + task: PluginActionTask, + state: SerialExecutionState, + console: Console | None, + ) -> bool: + environment = self._environment_resolver.resolve(task=task) + launcher = self._launchers.get(environment.kind) + if launcher is None: + raise RuntimeError( + f"No task environment launcher registered for kind {environment.kind!r}." + ) + + archive_inputs: dict[str, str] = {} + archive_collection_inputs: dict[str, list[str]] = {} + metadata_inputs: dict[str, str] = {} + for name, src in task.inputs.items(): + if src.kind == "archive": + archive_inputs[name] = state.scope[src.id] + elif src.kind == "archive-collection": + archive_collection_inputs[name] = [ + state.scope[item.id] for item in src.items + ] + elif src.kind == "metadata": + metadata_inputs[name] = state.scope[src.id] + else: + raise TypeError(f"Unsupported input kind: {src.kind!r}") + + resolved_params: dict[str, object] = {} + metadata_column_kwargs: dict[str, dict[str, str]] = {} + for name, param in task.parameters.items(): + if param.kind == "literal": + resolved_params[name] = param.value + elif param.kind == "promoted": + resolved_params[name] = state.params[param.id] + elif param.kind == "metadata": + column = param.column + if column.kind == "literal": + column_name = str(column.value) + elif column.kind == "promoted": + column_name = str(state.params[column.id]) + else: + raise TypeError( + f"Unsupported metadata column kind: {column.kind!r}" + ) + metadata_column_kwargs[name] = {"source": name, "column": column_name} + else: + raise TypeError(f"Unsupported parameter kind: {param.kind!r}") + + outputs = build_task_outputs( + task_id=task.id, + output_names=task.outputs.keys(), + work_path=state.work_path, + ) + request = TaskExecutionRequest( + task=task, + cwd=state.cwd, + work_path=state.work_path, + archive_inputs=archive_inputs, + archive_collection_inputs=archive_collection_inputs, + metadata_inputs=metadata_inputs, + params=resolved_params, + metadata_column_kwargs=metadata_column_kwargs, + outputs=outputs, + cache_path=( + str(state.cache_config.cache_dir) + if state.cache_config is not None + else None + ), + recycle_pool=( + state.cache_config.recycle_pool + if state.cache_config is not None + else None + ), + ) + result = launcher.launch( + environment=environment, + request=request, + console=console, + ) + + for output_name, dest in task.outputs.items(): + actual_path = result.outputs.get(output_name) + if not isinstance(actual_path, str): + raise RuntimeError( + f"Task {task.id!r} did not produce output {output_name!r}." + ) + state.scope[dest.id] = actual_path + + return result.reused + + +def _save_outputs( + *, + sig, + arguments: AdagioArguments, + state: SerialExecutionState, + monitor: Monitor | None, + require_all: bool = True, +) -> None: + if isinstance(arguments.outputs, str): + os.makedirs(arguments.outputs, exist_ok=True) + + for output in sig.outputs: + if output.id in state.saved_output_ids: + continue + if output.id not in state.scope: + if require_all: + raise KeyError( + f"Missing output value for {output.name!r} ({output.id})." + ) + continue + + source_path = Path(state.scope[output.id]) + destination = resolve_output_destination( + output_name=output.name, + output_names=[item.name for item in sig.outputs], + outputs=arguments.outputs, + source_path=source_path, + ) + + parent = os.path.dirname(destination) + if parent: + os.makedirs(parent, exist_ok=True) + + if monitor is not None and not state.save_output_started: + monitor.start_save_output() + state.save_output_started = True + + try: + shutil.copy2(source_path, destination) + except Exception as exc: # noqa: BLE001 + if monitor is not None: + monitor.finish_output( + output_id=output.id, + output_name=output.name, + destination=destination, + status="failed", + error=str(exc), + ) + raise + else: + if monitor is not None: + monitor.finish_output( + output_id=output.id, + output_name=output.name, + destination=destination, + status="succeeded", + ) + state.saved_output_ids.add(output.id) diff --git a/src/adagio/hello.py b/src/adagio/hello.py deleted file mode 100644 index 9f0b5ce..0000000 --- a/src/adagio/hello.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Hello module.""" - -from pathlib import Path - - -def hello(input_file: Path) -> None: - """Hello.""" - print(input_file) diff --git a/src/adagio/model/arguments.py b/src/adagio/model/arguments.py index aba7a80..cc8f01c 100644 --- a/src/adagio/model/arguments.py +++ b/src/adagio/model/arguments.py @@ -1,5 +1,5 @@ import typing as t -from pydantic import BaseModel +from pydantic import BaseModel, Field from .task import AllowableValue @@ -10,6 +10,7 @@ class AdagioArguments(BaseModel): outputs: str | dict[str, str] def __repr__(self): + """Format arguments for display.""" return '\n'.join([ *self._format_repr_sect(self.inputs, 'inputs'), *self._format_repr_sect(self.parameters, 'parameters'), @@ -17,6 +18,7 @@ def __repr__(self): ]) def _format_repr_sect(self, section, name): + """Format a single argument section.""" lines = [] if not section: lines.append(f'{name}: {{}}') @@ -27,3 +29,11 @@ def _format_repr_sect(self, section, name): return lines + +class AdagioArgumentsFile(BaseModel): + """Represent arguments loaded from a JSON file.""" + + version: int = 1 + inputs: dict[str, str] = Field(default_factory=dict) + parameters: dict[str, AllowableValue] = Field(default_factory=dict) + outputs: str | dict[str, str] | None = None diff --git a/src/adagio/model/pipeline.py b/src/adagio/model/pipeline.py index 2f32519..17e83bc 100644 --- a/src/adagio/model/pipeline.py +++ b/src/adagio/model/pipeline.py @@ -7,7 +7,7 @@ from .arguments import AdagioArguments from .task import AllowableValue, AdagioTask -from .ast import TypeAST +from .ast import TypeAST, TypeASTExpression, TypeASTIntersection, TypeASTUnion class AdagioPipeline(BaseModel): @@ -69,7 +69,7 @@ def load_inputs(self, ctx, arguments, scope): for input in self.inputs: source = arguments.inputs[input.name] - if input.ast.name.startswith('Metadata') and input.ast.builtin: + if _is_metadata_ast(input.ast): print("SCHEDULED:", f'load_metadata({source!r})') scope[input.id] = load_metadata(ctx=ctx, source=source) # IIFE for the dreaded for-loop in the parent closure problem. @@ -110,6 +110,7 @@ class _Def(BaseModel): name: str type: str ast: TypeAST + description: str | None = None class _InputDef(_Def): @@ -118,8 +119,16 @@ class _InputDef(_Def): class _ParameterDef(_Def): required: bool - default: 'AllowableValue' + default: 'AllowableValue | None' = None class _OutputDef(_Def): - pass \ No newline at end of file + pass + + +def _is_metadata_ast(ast: TypeAST) -> bool: + if isinstance(ast, TypeASTExpression): + return bool(ast.builtin and ast.name.startswith("Metadata")) + if isinstance(ast, (TypeASTUnion, TypeASTIntersection)): + return any(_is_metadata_ast(member) for member in ast.members) + return False diff --git a/src/adagio/model/task.py b/src/adagio/model/task.py index fc38ae5..f932d68 100644 --- a/src/adagio/model/task.py +++ b/src/adagio/model/task.py @@ -1,15 +1,11 @@ import typing as t from pydantic import BaseModel, Field -from adagio.io import convert_metadata - - - class _BaseTask(BaseModel): id: str kind: str - inputs: dict[str, 'InputVal'] + inputs: dict[str, 'TaskInputVal'] parameters: dict[str, 'LiteralVal | MetadataVal | PromotedVal'] outputs: dict[str, 'OutputVal'] @@ -20,21 +16,26 @@ def exec(self, ctx, params, scope): class PluginActionTask(_BaseTask): id: str kind: t.Literal['plugin-action'] + name: str | None = None plugin: str action: str def exec(self, ctx, params, scope): + from adagio.io import convert_metadata + action = ctx.get_action(self.plugin, self.action) kwargs = {} metadata = {} for name, src in self.inputs.items(): if src.kind == 'archive': kwargs[name] = scope[src.id] + elif src.kind == 'archive-collection': + kwargs[name] = [scope[item.id] for item in src.items] elif src.kind == 'metadata': # store for second pass in params metadata[name] = scope[src.id] else: - raise NotImplemented('impossible') + raise NotImplementedError('impossible') for name, param in self.parameters.items(): if param.kind == 'metadata': @@ -82,6 +83,17 @@ class InputVal(BaseModel): id: str +class ArchiveCollectionItemVal(BaseModel): + key: str + id: str + + +class ArchiveCollectionInputVal(BaseModel): + kind: t.Literal['archive-collection'] + style: t.Literal['list'] + items: list[ArchiveCollectionItemVal] + + class OutputVal(BaseModel): kind: t.Literal['archive'] id: str @@ -109,5 +121,15 @@ class MetadataVal(BaseModel): Primitive = int | float | str | bool | t.Literal[None] Collection = list[Primitive] | dict[str, Primitive] AllowableValue = Primitive | Collection +TaskInputVal = t.Annotated[ + t.Union[InputVal, ArchiveCollectionInputVal], + Field(discriminator='kind') +] AdagioTask = t.Annotated[t.Union[PluginActionTask, RootInputTask], - Field(discriminator='kind')] \ No newline at end of file + Field(discriminator='kind')] + + +def input_source_ids(value: TaskInputVal) -> list[str]: + if value.kind == 'archive-collection': + return [item.id for item in value.items] + return [value.id] diff --git a/src/adagio/monitor/api.py b/src/adagio/monitor/api.py index 3a4eaa8..93c4bbe 100644 --- a/src/adagio/monitor/api.py +++ b/src/adagio/monitor/api.py @@ -1,28 +1,60 @@ class Monitor: - - def start_pipeline(self): - pass - - def start_load_input(self): - pass - - def finish_load_input(self): - pass - - def queue_task(self): - pass - - def start_task(self): - pass - - def finish_task(self): - pass - - def start_save_output(self): - pass - - def finish_save_output(self): - pass - - def finish_pipeline(self): - pass \ No newline at end of file + """Define monitor hooks used by pipeline execution.""" + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + """Start tracking a pipeline run.""" + return None + + def start_load_input(self) -> None: + """Start tracking input loading.""" + return None + + def finish_load_input(self) -> None: + """Finish tracking input loading.""" + return None + + def queue_task( + self, *, task_id: str, label: str, total_subtasks: int = 1 + ) -> None: + """Queue a task before execution starts.""" + return None + + def start_task(self, *, task_id: str) -> None: + """Start tracking an individual task.""" + return None + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + """Advance progress for an individual task.""" + return None + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + """Finish tracking an individual task.""" + return None + + def start_save_output(self) -> None: + """Start tracking output saving.""" + return None + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + """Track completion for an individual output artifact.""" + return None + + def finish_save_output(self) -> None: + """Finish tracking output saving.""" + return None + + def finish_pipeline(self) -> None: + """Finish tracking a pipeline run.""" + return None diff --git a/src/adagio/monitor/composite.py b/src/adagio/monitor/composite.py new file mode 100644 index 0000000..5c208c9 --- /dev/null +++ b/src/adagio/monitor/composite.py @@ -0,0 +1,74 @@ +from .api import Monitor + + +class CompositeMonitor(Monitor): + """Fan out monitor hooks to multiple monitor instances.""" + + def __init__(self, *monitors: Monitor): + self._monitors = tuple(monitors) + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + for monitor in self._monitors: + monitor.start_pipeline(total_tasks=total_tasks) + + def start_load_input(self) -> None: + for monitor in self._monitors: + monitor.start_load_input() + + def finish_load_input(self) -> None: + for monitor in self._monitors: + monitor.finish_load_input() + + def queue_task(self, *, task_id: str, label: str, total_subtasks: int = 1) -> None: + for monitor in self._monitors: + monitor.queue_task( + task_id=task_id, + label=label, + total_subtasks=total_subtasks, + ) + + def start_task(self, *, task_id: str) -> None: + for monitor in self._monitors: + monitor.start_task(task_id=task_id) + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + for monitor in self._monitors: + monitor.advance_task(task_id=task_id, advance=advance, message=message) + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + for monitor in self._monitors: + monitor.finish_task(task_id=task_id, status=status, error=error) + + def start_save_output(self) -> None: + for monitor in self._monitors: + monitor.start_save_output() + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + for monitor in self._monitors: + monitor.finish_output( + output_id=output_id, + output_name=output_name, + destination=destination, + status=status, + error=error, + ) + + def finish_save_output(self) -> None: + for monitor in self._monitors: + monitor.finish_save_output() + + def finish_pipeline(self) -> None: + for monitor in self._monitors: + monitor.finish_pipeline() diff --git a/src/adagio/monitor/connected.py b/src/adagio/monitor/connected.py new file mode 100644 index 0000000..520ea8e --- /dev/null +++ b/src/adagio/monitor/connected.py @@ -0,0 +1,106 @@ +import json +import urllib.error +import urllib.request +from typing import Any + +from .api import Monitor + + +class ConnectedMonitor(Monitor): + """Send monitor lifecycle events to the runtime-adapter.""" + + def __init__(self, *, runtime_url: str, job_id: str, timeout: float = 5.0): + base = runtime_url.rstrip("/") + self._url = f"{base}/jobs/{job_id}/events" + self._timeout = timeout + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + self._post(event="pipeline_start", total_tasks=total_tasks) + + def start_load_input(self) -> None: + self._post(event="load_input_start") + + def finish_load_input(self) -> None: + self._post(event="load_input_finish") + + def queue_task( + self, *, task_id: str, label: str, total_subtasks: int = 1 + ) -> None: + self._post( + event="task_queued", + task_id=task_id, + label=label, + total_subtasks=total_subtasks, + ) + + def start_task(self, *, task_id: str) -> None: + self._post(event="task_started", task_id=task_id) + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + payload: dict[str, Any] = { + "event": "task_progress", + "task_id": task_id, + "advance": advance, + } + if message: + payload["message"] = message + self._post(**payload) + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + payload: dict[str, Any] = { + "event": "task_finished", + "task_id": task_id, + "status": status, + } + if error: + payload["error"] = error + self._post(**payload) + + def start_save_output(self) -> None: + self._post(event="save_output_start") + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + payload: dict[str, Any] = { + "event": "output_saved", + "output_id": output_id, + "output_name": output_name, + "destination": destination, + "status": status, + } + if error: + payload["error"] = error + self._post(**payload) + + def finish_save_output(self) -> None: + self._post(event="save_output_finish") + + def finish_pipeline(self) -> None: + self._post(event="pipeline_finish") + + def _post(self, **payload: Any) -> None: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + self._url, + data=data, + method="POST", + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=self._timeout): + pass + except (urllib.error.URLError, TimeoutError): + # Best-effort telemetry: execution should continue even if the + # adapter is unavailable. + return None diff --git a/src/adagio/monitor/log.py b/src/adagio/monitor/log.py index 424cae6..e350025 100644 --- a/src/adagio/monitor/log.py +++ b/src/adagio/monitor/log.py @@ -1,2 +1,72 @@ -class LogMonitor: - pass \ No newline at end of file +from rich.console import Console + +from .api import Monitor + + +class LogMonitor(Monitor): + """Log monitor events to a Rich console.""" + + def __init__(self, *, console: Console | None = None): + """Initialize the log monitor.""" + self._console = console or Console(stderr=True) + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + """Log pipeline start.""" + self._console.log(f"pipeline started (tasks={total_tasks})") + + def queue_task( + self, *, task_id: str, label: str, total_subtasks: int = 1 + ) -> None: + """Log task queueing.""" + self._console.log( + f"queued task id={task_id} label={label!r} subtasks={total_subtasks}" + ) + + def start_task(self, *, task_id: str) -> None: + """Log task start.""" + self._console.log(f"started task id={task_id}") + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + """Log task progress updates.""" + details = f" advanced={advance}" + if message: + details += f" message={message!r}" + self._console.log(f"updated task id={task_id}{details}") + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + """Log task completion.""" + details = f"status={status}" + if error: + details += f" error={error!r}" + self._console.log(f"finished task id={task_id} {details}") + + def start_save_output(self) -> None: + """Log output-save start.""" + self._console.log("saving outputs") + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + """Log completion of an individual output.""" + details = f"status={status} id={output_id} name={output_name!r} destination={destination!r}" + if error: + details += f" error={error!r}" + self._console.log(f"saved output {details}") + + def finish_save_output(self) -> None: + """Log output-save completion.""" + self._console.log("finished saving outputs") + + def finish_pipeline(self) -> None: + """Log pipeline completion.""" + self._console.log("pipeline finished") diff --git a/src/adagio/monitor/tty.py b/src/adagio/monitor/tty.py index e69de29..85a7794 100644 --- a/src/adagio/monitor/tty.py +++ b/src/adagio/monitor/tty.py @@ -0,0 +1,293 @@ +import re +import threading +import time +from dataclasses import dataclass + +from rich.console import Console +from rich.control import Control +from rich.segment import ControlType + +from .api import Monitor + +BADGE_WIDTH = 8 +LABEL_WIDTH = 28 +BAR_WIDTH = 28 +COUNTER_WIDTH = 5 +ELAPSED_WIDTH = 4 +ELAPSED_REFRESH_POLL_SECONDS = 0.2 +ELAPSED_COLUMN = ( + BADGE_WIDTH + 1 + LABEL_WIDTH + 1 + BAR_WIDTH + 2 + COUNTER_WIDTH + 2 +) + + +@dataclass +class _TaskState: + task_id: str + label: str + total_subtasks: int + completed_subtasks: int = 0 + status: str = "pending" + error: str | None = None + started_at: float | None = None + finished_at: float | None = None + last_rendered_elapsed_seconds: int | None = None + + +class RichMonitor(Monitor): + """Render compact pipeline progress rows.""" + + def __init__(self, *, console: Console | None = None): + """Initialize the Rich monitor.""" + self._console = console or Console() + self._inline_updates = ( + self._console.is_terminal and not self._console.is_dumb_terminal + ) + self._task_lookup: dict[str, _TaskState] = {} + self._task_order: list[str] = [] + self._status_counts: dict[str, int] = { + "completed": 0, + "cached": 0, + "failed": 0, + "skipped": 0, + } + self._lock = threading.RLock() + self._stop_refresh = threading.Event() + self._refresh_thread: threading.Thread | None = None + self._pipeline_started = False + self._total_tasks = 0 + + def start_pipeline(self, *, total_tasks: int = 0) -> None: + """Start rendering pipeline progress.""" + with self._lock: + if self._pipeline_started: + return + self._pipeline_started = True + self._total_tasks = total_tasks + self._stop_refresh.clear() + setattr(self._console, "_adagio_inline_monitor_active", self._inline_updates) + if self._inline_updates: + self._console.control(Control.show_cursor(False)) + self._console.print("[bold]Task Progress[/bold]") + if self._inline_updates: + self._refresh_thread = threading.Thread( + target=self._refresh_loop, + name="adagio-rich-monitor", + daemon=True, + ) + self._refresh_thread.start() + + def queue_task( + self, *, task_id: str, label: str, total_subtasks: int = 1 + ) -> None: + """Queue a task row in the progress view.""" + with self._lock: + total = max(total_subtasks, 1) + state = _TaskState( + task_id=task_id, + label=label, + total_subtasks=total, + ) + self._task_lookup[task_id] = state + self._task_order.append(task_id) + self._print_row(self._render_row(state)) + + def start_task(self, *, task_id: str) -> None: + """Mark a task as running.""" + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return + task.status = "running" + task.started_at = time.monotonic() + self._refresh_row(task) + + def advance_task( + self, *, task_id: str, advance: int = 1, message: str | None = None + ) -> None: + """Advance a task's subtask progress.""" + del message + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return + task.completed_subtasks = min( + task.total_subtasks, task.completed_subtasks + max(advance, 0) + ) + self._refresh_row(task) + + def finish_task( + self, *, task_id: str, status: str = "completed", error: str | None = None + ) -> None: + """Mark a task as finished.""" + with self._lock: + task = self._task_lookup.get(task_id) + if task is None: + return + + task.status = status + task.error = error + task.finished_at = time.monotonic() + if status in {"completed", "cached", "skipped"}: + task.completed_subtasks = task.total_subtasks + if status in self._status_counts: + self._status_counts[status] += 1 + self._refresh_row(task) + + def finish_pipeline(self) -> None: + """Stop rendering and print a summary.""" + if not self._pipeline_started: + return + self._stop_refresh.set() + if self._refresh_thread is not None: + self._refresh_thread.join(timeout=1.0) + self._refresh_thread = None + with self._lock: + if self._inline_updates: + self._console.control(Control.show_cursor(True)) + setattr(self._console, "_adagio_inline_monitor_active", False) + pending = self._total_tasks - sum(self._status_counts.values()) + self._console.print( + "Summary: " + f"{self._status_counts['completed']} completed, " + f"{self._status_counts['cached']} cached, " + f"{self._status_counts['failed']} failed, " + f"{self._status_counts['skipped']} skipped, " + f"{max(pending, 0)} pending" + ) + self._pipeline_started = False + + def _refresh_row(self, task: _TaskState) -> None: + """Refresh a rendered task row.""" + task.last_rendered_elapsed_seconds = _elapsed_seconds(task) + if self._inline_updates: + self._rewrite_task_row(task) + return + self._print_row(self._render_row(task)) + + def _refresh_loop(self) -> None: + """Refresh running task timers once per displayed second.""" + while not self._stop_refresh.wait(ELAPSED_REFRESH_POLL_SECONDS): + with self._lock: + self._refresh_running_timers() + + def _refresh_running_timers(self) -> None: + """Refresh only the elapsed field for running tasks that advanced.""" + for task in self._task_lookup.values(): + if task.status != "running": + continue + elapsed_seconds = _elapsed_seconds(task) + if elapsed_seconds == task.last_rendered_elapsed_seconds: + continue + self._rewrite_elapsed(task) + task.last_rendered_elapsed_seconds = elapsed_seconds + + def _render_row(self, task: _TaskState) -> str: + """Build a compact row for a task.""" + badge_text, color = _status_style(task.status) + badge_plain = badge_text.ljust(BADGE_WIDTH) + badge = f"[bold {color}]{badge_plain}[/]" + label = _compact_label(task.label, LABEL_WIDTH).ljust(LABEL_WIDTH) + bar = _bar_text(task.completed_subtasks, task.total_subtasks, color, BAR_WIDTH) + counter = f"{task.completed_subtasks}/{task.total_subtasks}" + elapsed = _elapsed(task) + error = "" + if task.status == "failed" and task.error: + error = f" [red]{task.error}[/]" + return ( + f"{badge} {label} {bar} " + f"{counter.rjust(COUNTER_WIDTH)} {elapsed.rjust(ELAPSED_WIDTH)}{error}" + ) + + def _print_row(self, row: str) -> None: + """Print a single task row.""" + self._console.print( + row, + markup=True, + highlight=False, + no_wrap=True, + overflow="crop", + ) + + def _rewrite_task_row(self, task: _TaskState) -> None: + """Rewrite a task row in place without repainting the whole table.""" + distance = self._distance_from_bottom(task) + self._console.control( + Control.move_to_column(0, y=-distance), + Control((ControlType.ERASE_IN_LINE, 2)), + ) + self._console.print( + self._render_row(task), + markup=True, + highlight=False, + no_wrap=True, + overflow="crop", + end="", + ) + self._restore_cursor(distance) + + def _rewrite_elapsed(self, task: _TaskState) -> None: + """Rewrite only the elapsed field for a running task.""" + distance = self._distance_from_bottom(task) + elapsed = _elapsed(task) + padded = elapsed.rjust(max(ELAPSED_WIDTH, len(elapsed))) + self._console.control(Control.move_to_column(ELAPSED_COLUMN, y=-distance)) + self._console.out(padded, end="") + self._restore_cursor(distance) + + def _restore_cursor(self, distance: int) -> None: + """Return the cursor to the stable line below the task list.""" + self._console.control(Control.move_to_column(0, y=distance)) + + def _distance_from_bottom(self, task: _TaskState) -> int: + """Return the cursor distance from the footer line to a task row.""" + row_index = self._task_order.index(task.task_id) + return len(self._task_order) - row_index + + +def _status_style(status: str) -> tuple[str, str]: + """Map task state to badge text and color.""" + lookup = { + "pending": ("PENDING", "yellow"), + "running": ("RUNNING", "cyan"), + "completed": ("DONE", "green"), + "cached": ("CACHED", "blue"), + "failed": ("FAILED", "red"), + "skipped": ("SKIPPED", "magenta"), + } + return lookup.get(status, ("PENDING", "yellow")) + + +def _compact_label(label: str, width: int = 28) -> str: + """Trim task labels to a compact display name.""" + match = re.search(r"\(([^)]+)\)\s*$", label) + compact = match.group(1) if match else label + if len(compact) <= width: + return compact + return compact[: width - 1] + "…" + + +def _bar_text(completed: int, total: int, color: str, width: int = 28) -> str: + """Build a colored progress bar string.""" + if total <= 0: + total = 1 + ratio = min(max(completed / total, 0.0), 1.0) + filled = int(round(ratio * width)) + empty = width - filled + return f"[{color}]{'━' * filled}[/][dim]{'─' * empty}[/]" + + +def _elapsed(task: _TaskState) -> str: + """Format elapsed task time as M:SS.""" + seconds = _elapsed_seconds(task) + minutes, sec = divmod(seconds, 60) + return f"{minutes}:{sec:02d}" + + +def _elapsed_seconds(task: _TaskState) -> int: + """Return elapsed task time in whole seconds.""" + start = task.started_at + if start is None: + return 0 + if task.finished_at is not None: + return max(0, int(task.finished_at - start)) + return max(0, int(time.monotonic() - start)) diff --git a/src/adagio/qapi/__init__.py b/src/adagio/qapi/__init__.py new file mode 100644 index 0000000..dd69a83 --- /dev/null +++ b/src/adagio/qapi/__init__.py @@ -0,0 +1,8 @@ +from .build import DEFAULT_SCHEMA_VERSION, generate_qapi_payload +from .client import submit_qapi_payload + +__all__ = [ + "DEFAULT_SCHEMA_VERSION", + "generate_qapi_payload", + "submit_qapi_payload", +] diff --git a/src/adagio/qapi/build.py b/src/adagio/qapi/build.py new file mode 100644 index 0000000..c74b578 --- /dev/null +++ b/src/adagio/qapi/build.py @@ -0,0 +1,158 @@ +import collections +from collections.abc import Sequence +from typing import Any, cast + +DEFAULT_SCHEMA_VERSION = "0.1.0" + + +def normalize_plugin_selection(plugin_names: Sequence[str] | None) -> list[str] | None: + """Normalize repeated or comma-separated plugin names.""" + if plugin_names is None: + return None + + normalized: list[str] = [] + for plugin_name in plugin_names: + for token in plugin_name.split(","): + stripped = token.strip() + if stripped: + normalized.append(stripped) + + return normalized + + +def generate_qapi_payload( + *, + schema_version: str = DEFAULT_SCHEMA_VERSION, + plugins: Sequence[str] | None = None, +) -> dict[str, Any]: + """Generate a QAPI payload for all plugins or a selected subset.""" + import qiime2 + import qiime2.core.transform as transform + import qiime2.sdk + from qiime2.core.type.grammar import IntersectionExp, PredicateExp, UnionExp + from qiime2.core.type.meta import TypeExp, TypeVarExp + + plugin_manager = qiime2.sdk.PluginManager() + + def flatten_type_maps(qiime_type: Any) -> Any: + if isinstance(qiime_type, TypeVarExp): + final = [] + for outer in list(qiime_type): + if isinstance(outer, PredicateExp): + final.append(outer) + continue + for inner in list(outer): + final.append(flatten_type_maps(inner)) + final_union = UnionExp(final) + final_union.normalize() + return final_union + + if isinstance(qiime_type, TypeExp): + final_fields = [flatten_type_maps(field) for field in qiime_type.fields] + + final_predicate = None + if isinstance(qiime_type.predicate, UnionExp): + predicate = qiime_type.predicate.unpack_union() + final_predicate = UnionExp([flatten_type_maps(elem) for elem in predicate]) + final_predicate.normalize() + elif isinstance(qiime_type.predicate, IntersectionExp): + predicate = qiime_type.predicate.unpack_intersection() + final_predicate = IntersectionExp( + [flatten_type_maps(elem) for elem in predicate] + ) + final_predicate.normalize() + elif isinstance(qiime_type.predicate, PredicateExp): + final_predicate = flatten_type_maps(qiime_type.predicate) + + return qiime_type.duplicate(final_fields, final_predicate) + + return qiime_type + + def ast_to_basename(ast: dict[str, Any]) -> str: + if not ast.get("fields"): + return cast(str, ast["name"]) + + fields = [ast_to_basename(field) for field in cast(list[dict[str, Any]], ast["fields"])] + return f"{ast['name']}[{', '.join(fields)}]" + + def add_metadata_flag(ast: dict[str, Any]) -> dict[str, Any]: + try: + key = ast_to_basename(ast) + artifact_class = plugin_manager.artifact_classes[key] + from_type = transform.ModelType.from_view_type(artifact_class.format) + to_type = transform.ModelType.from_view_type(qiime2.Metadata) + ast["has_metadata"] = from_type.has_transformation(to_type) + except Exception: + return ast + return ast + + def optional_desc(value: Any) -> str | None: + no_value = qiime2.core.type.signature.__NoValueMeta # type: ignore[attr-defined] + return value if type(value) is not no_value else None + + def build_inspect_dict(action: Any) -> dict[str, Any]: + return { + "id": action.id, + "inputs": [ + { + "name": name, + "type": repr(spec.qiime_type), + "ast": flatten_type_maps(spec.qiime_type).to_ast(), + "required": not spec.has_default(), + "description": optional_desc(spec.description), + } + for name, spec in action.signature.inputs.items() + ], + "parameters": [ + { + "name": name, + "type": repr(spec.qiime_type), + "ast": flatten_type_maps(spec.qiime_type).to_ast(), + "required": not spec.has_default(), + "default": spec.default if spec.has_default() else None, + "description": optional_desc(spec.description), + } + for name, spec in action.signature.parameters.items() + ], + "outputs": [ + { + "name": name, + "type": repr(spec.qiime_type), + "ast": add_metadata_flag(flatten_type_maps(spec.qiime_type).to_ast()), + "description": optional_desc(spec.description), + } + for name, spec in action.signature.outputs.items() + ], + "name": action.name, + "description": action.description, + "source": action.source.replace("\n```python\n", "").replace("```\n", ""), + } + + def build_data_dict(data: Any) -> dict[str, Any]: + result: dict[str, Any] = collections.defaultdict(dict) + for key, value in data.items(): + result[key] = build_inspect_dict(value) + return result + + qapi: dict[str, Any] = {} + requested_plugins = normalize_plugin_selection(plugins) + selected_plugins = sorted(plugin_manager.plugins) + if requested_plugins is not None: + available_plugins = set(plugin_manager.plugins) + missing_plugins = sorted(set(requested_plugins) - available_plugins) + if missing_plugins: + missing = ", ".join(missing_plugins) + raise ValueError(f"Unknown plugin name(s): {missing}") + selected_plugins = sorted(set(requested_plugins)) + + for plugin_name in selected_plugins: + plugin = plugin_manager.plugins[plugin_name] + methods_dict = build_data_dict(plugin.actions) + methods_dict.update(build_data_dict(plugin.pipelines)) + qapi[plugin_name] = {"methods": methods_dict} + + return { + "qiime_version": qiime2.__version__, + "schema_version": schema_version, + "data": qapi, + } diff --git a/src/adagio/qapi/client.py b/src/adagio/qapi/client.py new file mode 100644 index 0000000..e1a9907 --- /dev/null +++ b/src/adagio/qapi/client.py @@ -0,0 +1,56 @@ +import json +import os +from typing import Any +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + + +def submit_qapi_payload( + payload: dict[str, Any], + *, + action_url: str | None = None, + submission_token: str | None = None, + timeout: int = 60, + dry_run: bool = False, + force_overwrite: bool = False, +) -> tuple[str, int, Any]: + resolved_action_url = action_url or os.getenv("ACTION_URL") + if not resolved_action_url: + raise SystemExit( + "No Action URL configured. Set --action-url or ACTION_URL environment variable." + ) + + url = resolved_action_url.rstrip("/") + "/qapi/" + resolved_submission_token = submission_token or os.getenv("QAPI_SUBMISSION_TOKEN") + request_body = { + **payload, + "dry_run": dry_run, + "force_overwrite": force_overwrite, + } + headers = {"Content-Type": "application/json"} + if resolved_submission_token: + headers["Authorization"] = f"Bearer {resolved_submission_token}" + req = Request( + url=url, + data=json.dumps(request_body).encode("utf-8"), + headers=headers, + method="POST", + ) + + try: + with urlopen(req, timeout=timeout) as resp: # nosec: B310 - user-supplied API URL is intended + status = resp.status + response_body = resp.read().decode("utf-8", errors="replace") + except HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + raise SystemExit(f"QAPI submit failed ({exc.code}): {body}") from exc + except URLError as exc: + raise SystemExit(f"QAPI submit failed: {exc.reason}") from exc + + if not response_body.strip(): + return url, status, "" + + try: + return url, status, json.loads(response_body) + except json.JSONDecodeError: + return url, status, response_body diff --git a/tests/test_apptainer_launcher.py b/tests/test_apptainer_launcher.py new file mode 100644 index 0000000..5032d2a --- /dev/null +++ b/tests/test_apptainer_launcher.py @@ -0,0 +1,214 @@ +import subprocess +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from adagio.executors.apptainer import ApptainerTaskEnvironmentLauncher +from adagio.executors.base import TaskEnvironmentSpec, TaskExecutionRequest +from adagio.executors.container_support import ( + container_python_root, + containerize_path, + mount_roots, +) +from adagio.executors.task_contract import ( + build_result_manifest, + result_manifest_path, + task_spec_path, + write_json_file, +) +from adagio.model.task import PluginActionTask + + +def _task() -> PluginActionTask: + return PluginActionTask.model_validate( + { + "id": "task-1", + "kind": "plugin-action", + "plugin": "dada2", + "action": "denoise_single", + "inputs": {}, + "parameters": {}, + "outputs": {"table": {"kind": "archive", "id": "out-1"}}, + } + ) + + +class ApptainerLauncherTests(unittest.TestCase): + def test_launch_builds_apptainer_exec_command(self) -> None: + launcher = ApptainerTaskEnvironmentLauncher() + task = _task() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + cwd = root / "cwd" + work_path = root / "work" + cwd.mkdir() + work_path.mkdir() + image_path = root / "q2-dada2.sif" + image_path.write_text("stub", encoding="utf-8") + output_path = work_path / "table.qza" + input_path = cwd / "input.qza" + input_path.write_text("input", encoding="utf-8") + + request = TaskExecutionRequest( + task=task, + cwd=cwd, + work_path=work_path, + archive_inputs={"seqs": str(input_path)}, + archive_collection_inputs={}, + metadata_inputs={}, + params={}, + metadata_column_kwargs={}, + outputs={"table": str(output_path)}, + ) + + manifest_path = result_manifest_path(task_id=task.id, work_path=work_path) + expected_spec = containerize_path( + task_spec_path(task_id=task.id, work_path=work_path) + ) + + def fake_run(cmd, check, stdout, stderr, text): # noqa: ANN001 + write_json_file( + manifest_path, + build_result_manifest( + outputs={"table": containerize_path(output_path)}, + reused=False, + ), + ) + return subprocess.CompletedProcess(cmd, 0, "", "") + + with ( + patch( + "adagio.executors.apptainer.shutil.which", + side_effect=["/usr/bin/apptainer", None], + ), + patch( + "adagio.executors.apptainer.subprocess.run", + side_effect=fake_run, + ) as run_mock, + ): + result = launcher.launch( + environment=TaskEnvironmentSpec( + kind="apptainer", + reference=str(image_path), + ), + request=request, + ) + + command = run_mock.call_args.args[0] + python_root = container_python_root(work_path=work_path) + bind_targets = { + f"{root_path}:{containerize_path(root_path)}:rw" + for root_path in mount_roots( + [cwd, work_path, input_path, python_root] + ) + } + + self.assertEqual(command[0], "/usr/bin/apptainer") + self.assertEqual(command[1], "exec") + self.assertIn("--cleanenv", command) + self.assertIn("--no-home", command) + self.assertIn("--pwd", command) + self.assertIn(containerize_path(cwd), command) + self.assertIn(str(image_path), command) + self.assertIn("env", command) + self.assertIn(f"PYTHONPATH={containerize_path(python_root)}", command) + self.assertIn("PYTHONNOUSERSITE=1", command) + self.assertIn("python", command) + self.assertIn("-m", command) + self.assertIn("adagio.cli.task_exec", command) + self.assertIn("--task", command) + self.assertIn(expected_spec, command) + self.assertTrue(bind_targets.issubset(set(command))) + self.assertEqual(result.outputs, {"table": str(output_path)}) + self.assertFalse(result.reused) + + def test_launch_falls_back_to_singularity(self) -> None: + launcher = ApptainerTaskEnvironmentLauncher() + task = _task() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + cwd = root / "cwd" + work_path = root / "work" + cwd.mkdir() + work_path.mkdir() + image_path = root / "q2-dada2.sif" + image_path.write_text("stub", encoding="utf-8") + output_path = work_path / "table.qza" + manifest_path = result_manifest_path(task_id=task.id, work_path=work_path) + + request = TaskExecutionRequest( + task=task, + cwd=cwd, + work_path=work_path, + archive_inputs={}, + archive_collection_inputs={}, + metadata_inputs={}, + params={}, + metadata_column_kwargs={}, + outputs={"table": str(output_path)}, + ) + + def fake_run(cmd, check, stdout, stderr, text): # noqa: ANN001 + write_json_file( + manifest_path, + build_result_manifest( + outputs={"table": containerize_path(output_path)}, + reused=False, + ), + ) + return subprocess.CompletedProcess(cmd, 0, "", "") + + with ( + patch( + "adagio.executors.apptainer.shutil.which", + side_effect=[None, "/usr/bin/singularity"], + ), + patch( + "adagio.executors.apptainer.subprocess.run", + side_effect=fake_run, + ) as run_mock, + ): + launcher.launch( + environment=TaskEnvironmentSpec( + kind="apptainer", + reference=str(image_path), + ), + request=request, + ) + + command = run_mock.call_args.args[0] + self.assertEqual(command[0], "/usr/bin/singularity") + + def test_launch_rejects_non_local_image_reference(self) -> None: + launcher = ApptainerTaskEnvironmentLauncher() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + cwd = root / "cwd" + work_path = root / "work" + cwd.mkdir() + work_path.mkdir() + + request = TaskExecutionRequest( + task=_task(), + cwd=cwd, + work_path=work_path, + archive_inputs={}, + archive_collection_inputs={}, + metadata_inputs={}, + params={}, + metadata_column_kwargs={}, + outputs={"table": str(work_path / "table.qza")}, + ) + + with self.assertRaisesRegex(RuntimeError, "local \\.sif image paths"): + launcher.launch( + environment=TaskEnvironmentSpec( + kind="apptainer", + reference="docker://ghcr.io/cymis/qiime2-plugin-dada2:2026.1", + ), + request=request, + ) diff --git a/tests/test_container_support.py b/tests/test_container_support.py new file mode 100644 index 0000000..3dccd2e --- /dev/null +++ b/tests/test_container_support.py @@ -0,0 +1,58 @@ +import tempfile +import unittest +from pathlib import Path + +from adagio.executors.container_support import ( + STAGED_CONTAINER_PYTHON_ROOT, + container_python_root, +) + + +class ContainerPythonRootTests(unittest.TestCase): + def test_prefers_repo_src_tree_when_available(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + src_root = root / "src" + package_dir = src_root / "adagio" + module_file = package_dir / "executors" / "container_support.py" + work_path = root / "work" + + (package_dir / "executors").mkdir(parents=True) + work_path.mkdir() + (package_dir / "__init__.py").write_text("", encoding="utf-8") + module_file.write_text("", encoding="utf-8") + + result = container_python_root(work_path=work_path, module_file=module_file) + + self.assertEqual(result, src_root) + self.assertFalse((work_path / STAGED_CONTAINER_PYTHON_ROOT).exists()) + + def test_stages_only_adagio_package_from_site_packages(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + site_packages = root / "site-packages" + package_dir = site_packages / "adagio" + module_file = package_dir / "executors" / "container_support.py" + work_path = root / "work" + + (package_dir / "executors").mkdir(parents=True) + (package_dir / "cli").mkdir() + (site_packages / "psutil").mkdir(parents=True) + work_path.mkdir() + + (package_dir / "__init__.py").write_text("", encoding="utf-8") + (package_dir / "cli" / "task_exec.py").write_text( + "VALUE = 1\n", encoding="utf-8" + ) + module_file.write_text("", encoding="utf-8") + (site_packages / "psutil" / "__init__.py").write_text( + "VALUE = 2\n", encoding="utf-8" + ) + + result = container_python_root(work_path=work_path, module_file=module_file) + + staged_root = work_path / STAGED_CONTAINER_PYTHON_ROOT + self.assertEqual(result, staged_root) + self.assertTrue((staged_root / "adagio" / "__init__.py").exists()) + self.assertTrue((staged_root / "adagio" / "cli" / "task_exec.py").exists()) + self.assertFalse((staged_root / "psutil").exists()) diff --git a/tests/test_docker_launcher.py b/tests/test_docker_launcher.py new file mode 100644 index 0000000..e41d4b6 --- /dev/null +++ b/tests/test_docker_launcher.py @@ -0,0 +1,129 @@ +import subprocess +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from adagio.executors.base import TaskEnvironmentSpec, TaskExecutionRequest +from adagio.executors.container_support import ( + container_python_root, + containerize_path, + mount_roots, +) +from adagio.executors.docker import DockerTaskEnvironmentLauncher +from adagio.executors.task_contract import ( + build_result_manifest, + read_json_file, + result_manifest_path, + task_spec_path, + write_json_file, +) +from adagio.model.task import PluginActionTask + + +def _task() -> PluginActionTask: + return PluginActionTask.model_validate( + { + "id": "task-1", + "kind": "plugin-action", + "plugin": "demux", + "action": "summarize", + "inputs": {}, + "parameters": {}, + "outputs": {"visualization": {"kind": "archive", "id": "out-1"}}, + } + ) + + +class DockerLauncherTests(unittest.TestCase): + def test_launch_builds_docker_run_command(self) -> None: + launcher = DockerTaskEnvironmentLauncher() + task = _task() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir).resolve() + cwd = root / "cwd" + work_path = root / "work" + cwd.mkdir() + work_path.mkdir() + output_path = work_path / "summary.qzv" + input_path = cwd / "input.qza" + collection_input_path = cwd / "collection-input.qza" + input_path.write_text("input", encoding="utf-8") + collection_input_path.write_text("collection", encoding="utf-8") + + request = TaskExecutionRequest( + task=task, + cwd=cwd, + work_path=work_path, + archive_inputs={"data": str(input_path)}, + archive_collection_inputs={ + "tables": [str(collection_input_path)] + }, + metadata_inputs={}, + params={}, + metadata_column_kwargs={}, + outputs={"visualization": str(output_path)}, + ) + + manifest_path = result_manifest_path(task_id=task.id, work_path=work_path) + expected_spec = containerize_path( + task_spec_path(task_id=task.id, work_path=work_path) + ) + + def fake_run(cmd, check, stdout, stderr, text): # noqa: ANN001 + write_json_file( + manifest_path, + build_result_manifest( + outputs={"visualization": containerize_path(output_path)}, + reused=False, + ), + ) + return subprocess.CompletedProcess(cmd, 0, "", "") + + with patch( + "adagio.executors.docker.subprocess.run", + side_effect=fake_run, + ) as run_mock: + result = launcher.launch( + environment=TaskEnvironmentSpec( + kind="docker", + reference="ghcr.io/cymis/qiime2-plugin-demux:2026.1", + ), + request=request, + ) + + task_spec = read_json_file(task_spec_path(task_id=task.id, work_path=work_path)) + + command = run_mock.call_args.args[0] + python_root = container_python_root(work_path=work_path) + bind_targets = { + f"{root_path}:{containerize_path(root_path)}:rw" + for root_path in mount_roots( + [cwd, work_path, input_path, collection_input_path, python_root] + ) + } + + self.assertEqual(command[0], "docker") + self.assertEqual(command[1], "run") + self.assertEqual(command[2], "--rm") + self.assertIn("-w", command) + self.assertIn(containerize_path(cwd), command) + self.assertIn( + f"PYTHONPATH={containerize_path(python_root)}", + command, + ) + self.assertIn("PYTHONNOUSERSITE=1", command) + self.assertIn("python", command) + self.assertIn("-m", command) + self.assertIn("adagio.cli.task_exec", command) + self.assertIn("--task", command) + self.assertIn(expected_spec, command) + self.assertIn("ghcr.io/cymis/qiime2-plugin-demux:2026.1", command) + self.assertTrue(bind_targets.issubset(set(command))) + self.assertEqual( + task_spec["archive_collection_inputs"], + {"tables": [containerize_path(collection_input_path)]}, + ) + self.assertEqual(result.outputs, {"visualization": str(output_path)}) + self.assertFalse(result.reused) diff --git a/tests/test_output_options.py b/tests/test_output_options.py new file mode 100644 index 0000000..229ab8e --- /dev/null +++ b/tests/test_output_options.py @@ -0,0 +1,202 @@ +import typing +import unittest + +from adagio.app.parsers.pipeline import Input, Output, Parameter, parse_outputs +from adagio.cli.args import ShowParamsMode +from adagio.cli.dynamic import build_dynamic_run +from adagio.cli.main import _filter_visible_specs +from adagio.cli.runner import _apply_output_overrides + + +class OutputOptionTests(unittest.TestCase): + def test_parse_outputs_preserves_descriptions(self) -> None: + data = { + "signature": { + "inputs": [], + "parameters": [], + "outputs": [ + { + "id": "00000000-0000-0000-0000-000000000001", + "name": "table", + "type": "FeatureTable[Frequency]", + "description": "Denoised feature table.", + } + ], + } + } + + outputs = parse_outputs(data) + + self.assertEqual(outputs[0].name, "table") + self.assertEqual(outputs[0].description, "Denoised feature table.") + + def test_dynamic_run_adds_output_dir_and_per_output_options(self) -> None: + dynamic_run = build_dynamic_run( + input_specs=[], + param_specs=[], + output_specs=[ + Output( + id="00000000-0000-0000-0000-000000000001", + name="table", + type="FeatureTable[Frequency]", + description="Denoised feature table.", + ) + ], + run_handler=lambda *args, **kwargs: None, + ) + + self.assertIn("output_dir", dynamic_run.__signature__.parameters) + self.assertIn("output_table", dynamic_run.__signature__.parameters) + + output_dir_annotation = dynamic_run.__signature__.parameters["output_dir"].annotation + output_annotation = dynamic_run.__signature__.parameters["output_table"].annotation + output_dir_help = typing.get_args(output_dir_annotation)[1].help + output_help = typing.get_args(output_annotation)[1].help + + self.assertEqual(output_dir_help, "Directory for all pipeline outputs.") + self.assertIn("Denoised feature table.", output_help) + self.assertIn("Overrides --output-dir", output_help) + + def test_output_dir_is_a_command_option_and_required_pipeline_options_are_first( + self, + ) -> None: + dynamic_run = build_dynamic_run( + input_specs=[ + Input( + id="00000000-0000-0000-0000-000000000001", + name="tree", + required=False, + type="Phylogeny[Rooted]", + description="Optional tree.", + ), + Input( + id="00000000-0000-0000-0000-000000000002", + name="seqs", + required=True, + type="SampleData[Sequences]", + description="Required sequences.", + ), + ], + param_specs=[ + Parameter( + id="00000000-0000-0000-0000-000000000003", + name="threads", + required=False, + default=1, + type="Int", + description="Optional thread count.", + ), + Parameter( + id="00000000-0000-0000-0000-000000000004", + name="metric", + required=True, + default=None, + type="Str", + description="Required metric.", + ), + ], + output_specs=[ + Output( + id="00000000-0000-0000-0000-000000000005", + name="table", + type="FeatureTable[Frequency]", + description="Output table.", + ) + ], + run_handler=lambda *args, **kwargs: None, + ) + + output_dir_annotation = dynamic_run.__signature__.parameters["output_dir"].annotation + output_dir_group = typing.get_args(output_dir_annotation)[1].group + + self.assertEqual(output_dir_group[0]._name, "Command Options") + self.assertEqual( + list(dynamic_run.__signature__.parameters)[:7], + [ + "pipeline", + "cache_dir", + "arguments_file", + "show_params", + "config_file", + "reuse", + "output_dir", + ], + ) + self.assertEqual( + list(dynamic_run.__signature__.parameters)[7:], + [ + "input_seqs", + "param_metric", + "input_tree", + "param_threads", + "output_table", + ], + ) + + def test_outputs_are_only_visible_in_all_mode(self) -> None: + output_specs = [ + Output( + id="00000000-0000-0000-0000-000000000005", + name="table", + type="FeatureTable[Frequency]", + description="Output table.", + ) + ] + + _, _, required_outputs = _filter_visible_specs( + input_specs=[], + param_specs=[], + output_specs=output_specs, + show_mode=ShowParamsMode.REQUIRED, + arguments_data=None, + ) + _, _, missing_outputs = _filter_visible_specs( + input_specs=[], + param_specs=[], + output_specs=output_specs, + show_mode=ShowParamsMode.MISSING, + arguments_data=None, + ) + _, _, all_outputs = _filter_visible_specs( + input_specs=[], + param_specs=[], + output_specs=output_specs, + show_mode=ShowParamsMode.ALL, + arguments_data=None, + ) + + self.assertEqual(required_outputs, []) + self.assertEqual(missing_outputs, []) + self.assertEqual(all_outputs, output_specs) + + def test_output_dir_override_applies_to_all_outputs(self) -> None: + resolved = _apply_output_overrides( + outputs={"table": "/tmp/from-file/table.qza", "stats": "/tmp/from-file/stats.qza"}, + output_names=["table", "stats"], + output_dir="/tmp/all-outputs", + output_overrides={"stats": "/tmp/custom/stats.qza"}, + ) + + self.assertEqual( + resolved, + { + "table": "/tmp/all-outputs/table", + "stats": "/tmp/custom/stats.qza", + }, + ) + + def test_per_output_override_merges_with_shared_directory_outputs(self) -> None: + resolved = _apply_output_overrides( + outputs="/tmp/from-arguments-dir", + output_names=["table", "stats"], + output_dir=None, + output_overrides={"stats": "/tmp/custom/stats.qza"}, + ) + + self.assertEqual( + resolved, + { + "table": "/tmp/from-arguments-dir/table", + "stats": "/tmp/custom/stats.qza", + }, + ) diff --git a/tests/test_pipeline_descriptions.py b/tests/test_pipeline_descriptions.py new file mode 100644 index 0000000..ceaec18 --- /dev/null +++ b/tests/test_pipeline_descriptions.py @@ -0,0 +1,182 @@ +import typing +import unittest + +from adagio.app.parsers.pipeline import Input, Parameter, parse_inputs, parse_parameters +from adagio.cli.dynamic import ( + _compact_type_text, + _display_type_label, + _pipeline_type_label, + _wrap_type_label, + build_dynamic_run, +) +from adagio.model.pipeline import AdagioPipeline + + +class PipelineDescriptionTests(unittest.TestCase): + def test_pipeline_model_accepts_signature_descriptions(self) -> None: + ast = { + "type": "expression", + "builtin": True, + "name": "Str", + "predicate": None, + "fields": [], + } + pipeline = AdagioPipeline.model_validate( + { + "type": "pipeline", + "signature": { + "inputs": [ + { + "id": "input-1", + "name": "table", + "type": "FeatureTable[Frequency]", + "ast": ast, + "required": True, + "description": "Input table.", + } + ], + "parameters": [ + { + "id": "param-1", + "name": "trunc_len", + "type": "Int", + "ast": ast, + "required": False, + "default": 120, + "description": "Trim reads to this length.", + } + ], + "outputs": [ + { + "id": "output-1", + "name": "table", + "type": "FeatureTable[Frequency]", + "ast": ast, + "description": "Denoised table.", + } + ], + }, + "graph": [], + } + ) + + self.assertEqual(pipeline.signature.inputs[0].description, "Input table.") + self.assertEqual( + pipeline.signature.parameters[0].description, + "Trim reads to this length.", + ) + self.assertEqual(pipeline.signature.outputs[0].description, "Denoised table.") + + def test_pipeline_parsers_preserve_descriptions(self) -> None: + data = { + "signature": { + "inputs": [ + { + "id": "00000000-0000-0000-0000-000000000001", + "name": "table", + "required": True, + "type": "FeatureTable[Frequency]", + "description": "Input table.", + } + ], + "parameters": [ + { + "id": "00000000-0000-0000-0000-000000000002", + "name": "trunc_len", + "required": False, + "default": 120, + "type": "Int", + "description": "Trim reads to this length.", + } + ], + "outputs": [], + } + } + + self.assertEqual(parse_inputs(data)[0].description, "Input table.") + self.assertEqual( + parse_parameters(data)[0].description, "Trim reads to this length." + ) + + def test_dynamic_run_help_includes_descriptions(self) -> None: + dynamic_run = build_dynamic_run( + input_specs=[ + Input( + id="00000000-0000-0000-0000-000000000001", + name="table", + required=True, + type="FeatureTable[Frequency]", + description="Input table.", + ) + ], + param_specs=[ + Parameter( + id="00000000-0000-0000-0000-000000000002", + name="trunc_len", + required=False, + default=120, + type="Int", + description="Trim reads to this length.", + ) + ], + output_specs=[], + run_handler=lambda *args, **kwargs: None, + ) + + input_annotation = dynamic_run.__signature__.parameters["input_table"].annotation + param_annotation = dynamic_run.__signature__.parameters["param_trunc_len"].annotation + input_help = typing.get_args(input_annotation)[1].help + param_help = typing.get_args(param_annotation)[1].help + + self.assertIsInstance(input_help, str) + self.assertIsInstance(param_help, str) + self.assertIn("Input table.", input_help) + self.assertIn("Trim reads to this length.", param_help) + self.assertNotIn("Pipeline input:", input_help) + self.assertNotIn("Pipeline parameter:", param_help) + + def test_choices_are_rendered_compactly(self) -> None: + compact = _compact_type_text( + "Str % Choices('ace', 'berger_parker_d', 'brillouin_d')" + ) + self.assertEqual(compact, "[ace|berger_parker_d|brillouin_d]") + + compact_unquoted = _compact_type_text( + "Str % Choices(ace, berger_parker_d, brillouin_d)" + ) + self.assertEqual(compact_unquoted, "[ace|berger_parker_d|brillouin_d]") + + def test_long_choice_labels_wrap_on_pipes(self) -> None: + wrapped = _wrap_type_label( + "[ace|berger_parker_d|brillouin_d|chao1|dominance]", 22 + ) + self.assertIn("\n", wrapped) + self.assertTrue(wrapped.startswith("[")) + self.assertTrue(wrapped.endswith("]")) + self.assertIn("\n |", wrapped) + + def test_pipeline_type_labels_use_general_cli_types(self) -> None: + self.assertEqual(_pipeline_type_label(int), "INTEGER") + self.assertEqual(_pipeline_type_label(float), "NUMBER") + self.assertEqual(_pipeline_type_label(bool), "BOOLEAN") + self.assertEqual(_pipeline_type_label(str | None), "TEXT") + + def test_display_type_label_prefers_choices_and_path(self) -> None: + self.assertEqual( + _display_type_label( + spec_type="FeatureTable[Frequency]", type_hint=str, is_input=True + ), + "PATH", + ) + self.assertEqual( + _display_type_label( + spec_type="Str % Choices(ace, berger_parker_d, brillouin_d)", + type_hint=str, + is_input=False, + ), + "[ace|berger_parker_d|brillouin_d]", + ) + self.assertEqual( + _display_type_label(spec_type="Int", type_hint=int, is_input=False), + "INTEGER", + ) diff --git a/tests/test_pipeline_show.py b/tests/test_pipeline_show.py new file mode 100644 index 0000000..4324f9c --- /dev/null +++ b/tests/test_pipeline_show.py @@ -0,0 +1,241 @@ +import io +import json +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path + +from rich.console import Console + +from adagio.describe import render_pipeline_text +from adagio.model.pipeline import AdagioPipeline + + +AST = { + "type": "expression", + "builtin": True, + "name": "Str", + "predicate": None, + "fields": [], +} + + +def _sample_pipeline_dict() -> dict: + return { + "type": "pipeline", + "signature": { + "inputs": [ + { + "id": "input-seqs", + "name": "seqs", + "type": "SampleData[SequencesWithQuality]", + "ast": AST, + "required": True, + "description": "Demultiplexed sequence data.", + }, + { + "id": "input-barcodes", + "name": "barcodes", + "type": "MetadataColumn[Categorical]", + "ast": AST, + "required": True, + "description": "Barcode metadata column.", + }, + ], + "parameters": [ + { + "id": "param-barcodes", + "name": "barcodes", + "type": "MetadataColumn[Categorical]", + "ast": AST, + "required": True, + "description": "Column used to find barcode values.", + }, + { + "id": "param-trim-left", + "name": "trim_left", + "type": "Int", + "ast": AST, + "required": True, + "description": "Trim this many bases from the start of each read.", + }, + ], + "outputs": [ + { + "id": "output-table", + "name": "table", + "type": "FeatureTable[Frequency]", + "ast": AST, + "description": "Denoised feature table.", + }, + { + "id": "output-demux", + "name": "per_sample_sequences", + "type": "SampleData[SequencesWithQuality]", + "ast": AST, + "description": "Per-sample demultiplexed sequences.", + } + ], + }, + "graph": [ + { + "id": "task-dada2", + "kind": "plugin-action", + "plugin": "dada2", + "action": "denoise_single", + "inputs": { + "demultiplexed_seqs": {"kind": "archive", "id": "output-demux"} + }, + "parameters": { + "trim_left": {"kind": "promoted", "id": "param-trim-left"} + }, + "outputs": { + "table": {"kind": "archive", "id": "output-table"} + }, + }, + { + "id": "task-demux", + "kind": "plugin-action", + "plugin": "demux", + "action": "emp_single", + "inputs": { + "seqs": {"kind": "archive", "id": "input-seqs"}, + "barcodes": {"kind": "metadata", "id": "input-barcodes"}, + }, + "parameters": { + "barcodes": { + "kind": "metadata", + "column": {"kind": "promoted", "id": "param-barcodes"}, + } + }, + "outputs": { + "per_sample_sequences": { + "kind": "archive", + "id": "output-demux", + } + }, + }, + ], + } + + +def _collection_pipeline_dict() -> dict: + return { + "type": "pipeline", + "signature": { + "inputs": [ + { + "id": "input-table-a", + "name": "table_a", + "type": "FeatureTable[Frequency]", + "ast": AST, + "required": True, + "description": "First table.", + }, + { + "id": "input-table-b", + "name": "table_b", + "type": "FeatureTable[Frequency]", + "ast": AST, + "required": True, + "description": "Second table.", + }, + ], + "parameters": [], + "outputs": [], + }, + "graph": [ + { + "id": "task-merge", + "kind": "plugin-action", + "plugin": "feature_table", + "action": "merge", + "inputs": { + "tables": { + "kind": "archive-collection", + "style": "list", + "items": [ + {"key": "0", "id": "input-table-a"}, + {"key": "1", "id": "input-table-b"}, + ], + } + }, + "parameters": {}, + "outputs": {}, + } + ], + } + + +def _render_plain(renderable: object) -> str: + console = Console(record=True, width=160, file=io.StringIO()) + console.print(renderable, soft_wrap=True) + return console.export_text() + + +class PipelineShowTests(unittest.TestCase): + def test_render_pipeline_text_uses_dependency_order_and_resolves_bindings( + self, + ) -> None: + pipeline = AdagioPipeline.model_validate(_sample_pipeline_dict()) + + rendered = _render_plain(render_pipeline_text(pipeline)) + + self.assertLess(rendered.index("demux.emp_single"), rendered.index("dada2.denoise_single")) + self.assertNotIn('Plugin: demux', rendered) + self.assertNotIn('Action: emp_single', rendered) + self.assertIn("╭─ demux.emp_single ", rendered) + self.assertIn('seqs: (SampleData[SequencesWithQuality]) pipeline input "seqs"', rendered) + self.assertIn('Demultiplexed sequence data.', rendered) + self.assertIn('barcodes: (MetadataColumn[Categorical]) pipeline input "barcodes"', rendered) + self.assertIn('Barcode metadata column.', rendered) + self.assertIn( + 'barcodes: (MetadataColumn[Categorical]) metadata column from pipeline input "barcodes" using pipeline parameter "barcodes"', + rendered, + ) + self.assertIn('Column used to find barcode values.', rendered) + self.assertIn( + 'demultiplexed_seqs: (SampleData[SequencesWithQuality]) demux.emp_single.per_sample_sequences', + rendered, + ) + self.assertIn('Per-sample demultiplexed sequences.', rendered) + self.assertIn('trim_left: (Int) pipeline parameter "trim_left"', rendered) + self.assertIn('Trim this many bases from the start of each read.', rendered) + self.assertIn('table (FeatureTable[Frequency])', rendered) + self.assertIn('Denoised feature table.', rendered) + + def test_pipeline_show_cli_prints_summary(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + pipeline_path = Path(tmpdir) / "pipeline.json" + payload = {"spec": _sample_pipeline_dict()} + pipeline_path.write_text(json.dumps(payload), encoding="utf-8") + + result = subprocess.run( + [sys.executable, "-m", "adagio.cli.main", "pipeline", "show", str(pipeline_path)], + capture_output=True, + check=False, + text=True, + ) + + self.assertEqual(result.returncode, 0, msg=result.stderr) + self.assertIn("demux.emp_single", result.stdout) + self.assertIn("dada2.denoise_single", result.stdout) + self.assertIn("Inputs:", result.stdout) + self.assertIn('barcodes: (MetadataColumn[Categorical]) pipeline input "barcodes"', result.stdout) + self.assertIn('table (FeatureTable[Frequency])', result.stdout) + + def test_render_pipeline_text_displays_collection_inputs(self) -> None: + pipeline = AdagioPipeline.model_validate(_collection_pipeline_dict()) + + rendered = _render_plain(render_pipeline_text(pipeline)) + + self.assertIn("feature_table.merge", rendered) + self.assertIn( + 'tables: list [pipeline input "table_a", pipeline input "table_b"]', + rendered, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_qapi_submit.py b/tests/test_qapi_submit.py new file mode 100644 index 0000000..1135716 --- /dev/null +++ b/tests/test_qapi_submit.py @@ -0,0 +1,88 @@ +import json +import os +import unittest +from unittest.mock import patch + +from adagio.cli import qapi as qapi_cli +from adagio.qapi.client import submit_qapi_payload + + +class _FakeResponse: + def __init__(self, body: object = None, status: int = 200) -> None: + self.status = status + self._body = "" if body is None else json.dumps(body) + + def __enter__(self) -> "_FakeResponse": + return self + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + def read(self) -> bytes: + return self._body.encode("utf-8") + + +class QapiSubmitTests(unittest.TestCase): + def test_submit_qapi_payload_adds_bearer_token_header(self) -> None: + seen_headers: dict[str, str | None] = {} + + def fake_urlopen(request, timeout=60): + seen_headers["authorization"] = request.get_header("Authorization") + seen_headers["content_type"] = request.get_header("Content-type") + return _FakeResponse({"message": "ok"}) + + with patch("adagio.qapi.client.urlopen", side_effect=fake_urlopen): + url, status, response = submit_qapi_payload( + {"qiime_version": "2024.10.0", "schema_version": "0.1.0", "data": {"dada2": {"methods": {}}}}, + action_url="https://adagiodata.com/api/v1", + submission_token="token-123", + ) + + self.assertEqual(url, "https://adagiodata.com/api/v1/qapi/") + self.assertEqual(status, 200) + self.assertEqual(response, {"message": "ok"}) + self.assertEqual(seen_headers["authorization"], "Bearer token-123") + self.assertEqual(seen_headers["content_type"], "application/json") + + def test_submit_qapi_payload_reads_submission_token_from_env(self) -> None: + seen_authorization: dict[str, str | None] = {} + + def fake_urlopen(request, timeout=60): + seen_authorization["value"] = request.get_header("Authorization") + return _FakeResponse({"message": "ok"}) + + with patch.dict(os.environ, {"QAPI_SUBMISSION_TOKEN": "env-token"}, clear=False): + with patch("adagio.qapi.client.urlopen", side_effect=fake_urlopen): + submit_qapi_payload( + { + "qiime_version": "2024.10.0", + "schema_version": "0.1.0", + "data": {"feature-table": {"methods": {}}}, + }, + action_url="https://adagiodata.com/api/v1", + ) + + self.assertEqual(seen_authorization["value"], "Bearer env-token") + + def test_build_qapi_passes_submission_token_to_client(self) -> None: + with patch( + "adagio.cli.qapi.generate_qapi_payload", + return_value={ + "qiime_version": "2024.10.0", + "schema_version": "0.1.0", + "data": {"dada2": {"methods": {}}}, + }, + ), patch("adagio.cli.qapi.submit_qapi_payload") as submit_mock: + submit_mock.return_value = ( + "https://adagiodata.com/api/v1/qapi/", + 200, + {"message": "ok"}, + ) + + qapi_cli.build_qapi( + action_url="https://adagiodata.com/api/v1", + submission_token="token-456", + ) + + submit_mock.assert_called_once() + self.assertEqual(submit_mock.call_args.kwargs["submission_token"], "token-456") diff --git a/tests/test_serial_runner.py b/tests/test_serial_runner.py new file mode 100644 index 0000000..8ccad51 --- /dev/null +++ b/tests/test_serial_runner.py @@ -0,0 +1,177 @@ +import tempfile +import unittest +from dataclasses import dataclass, field +from pathlib import Path + +from adagio.executors.serial_runner import run_serial_pipeline +from adagio.executors.task_environments import _save_outputs +from adagio.model.arguments import AdagioArguments +from adagio.monitor.api import Monitor + + +@dataclass(frozen=True) +class FakeEndpoint: + id: str + + +@dataclass(frozen=True) +class FakeOutputDef: + id: str + name: str + + +@dataclass +class FakeTask: + id: str + outputs: dict[str, FakeEndpoint] + kind: str = "plugin-action" + plugin: str = "dummy" + action: str = "action" + inputs: dict[str, FakeEndpoint] = field(default_factory=dict) + + +class FakeSignature: + def __init__(self, outputs: list[FakeOutputDef]) -> None: + self.inputs: list[object] = [] + self.parameters: list[object] = [] + self.outputs = outputs + + def validate_arguments(self, arguments: AdagioArguments) -> None: + del arguments + + def get_params(self, arguments: AdagioArguments) -> dict[str, object]: + del arguments + return {} + + +class FakePipeline: + def __init__(self, *, tasks: list[FakeTask], outputs: list[FakeOutputDef]) -> None: + self.signature = FakeSignature(outputs) + self._tasks = tasks + + def validate_graph(self) -> None: + return None + + def iter_tasks(self): + return iter(self._tasks) + + +class RecordingMonitor(Monitor): + def __init__(self) -> None: + self.save_start_count = 0 + self.save_finish_count = 0 + self.saved_outputs: list[tuple[str, str, str, str]] = [] + + def start_save_output(self) -> None: + self.save_start_count += 1 + + def finish_output( + self, + *, + output_id: str, + output_name: str, + destination: str, + status: str = "succeeded", + error: str | None = None, + ) -> None: + del error + self.saved_outputs.append((output_id, output_name, destination, status)) + + def finish_save_output(self) -> None: + self.save_finish_count += 1 + + +class SerialRunnerOutputTests(unittest.TestCase): + def test_preserves_completed_output_when_later_task_fails(self) -> None: + output_def = FakeOutputDef(id="out-1", name="result") + pipeline = FakePipeline( + tasks=[ + FakeTask(id="task-1", outputs={"result": FakeEndpoint("out-1")}), + FakeTask(id="task-2", outputs={"other": FakeEndpoint("out-2")}), + ], + outputs=[output_def], + ) + monitor = RecordingMonitor() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + output_dir = root / "outputs" + arguments = AdagioArguments( + inputs={}, parameters={}, outputs=str(output_dir) + ) + + def resolve_task(task, state, console): # noqa: ANN001 + del console + if task.id == "task-1": + produced = state.work_path / "task-1_result.qza" + produced.write_text("done", encoding="utf-8") + state.scope["out-1"] = str(produced) + return False + raise RuntimeError("task 2 failed") + + with self.assertRaisesRegex(RuntimeError, "task 2 failed"): + run_serial_pipeline( + pipeline=pipeline, + arguments=arguments, + resolve_task=resolve_task, + finish_outputs=_save_outputs, + monitor=monitor, + ) + + saved_path = output_dir / "result.qza" + self.assertTrue(saved_path.exists()) + self.assertEqual(saved_path.read_text(encoding="utf-8"), "done") + self.assertEqual(monitor.save_start_count, 1) + self.assertEqual(monitor.save_finish_count, 1) + + def test_saves_each_output_only_once_across_multiple_tasks(self) -> None: + output_def = FakeOutputDef(id="out-1", name="result") + pipeline = FakePipeline( + tasks=[ + FakeTask(id="task-1", outputs={"result": FakeEndpoint("out-1")}), + FakeTask(id="task-2", outputs={"other": FakeEndpoint("out-2")}), + ], + outputs=[output_def], + ) + monitor = RecordingMonitor() + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + output_dir = root / "outputs" + arguments = AdagioArguments( + inputs={}, parameters={}, outputs=str(output_dir) + ) + + def resolve_task(task, state, console): # noqa: ANN001 + del console + if task.id == "task-1": + produced = state.work_path / "task-1_result.qza" + produced.write_text("done", encoding="utf-8") + state.scope["out-1"] = str(produced) + return False + produced = state.work_path / "task-2_other.qza" + produced.write_text("other", encoding="utf-8") + state.scope["out-2"] = str(produced) + return False + + run_serial_pipeline( + pipeline=pipeline, + arguments=arguments, + resolve_task=resolve_task, + finish_outputs=_save_outputs, + monitor=monitor, + ) + + self.assertEqual( + monitor.saved_outputs, + [ + ( + output_def.id, + output_def.name, + str(output_dir / "result.qza"), + "succeeded", + ) + ], + ) + self.assertEqual(monitor.save_start_count, 1) + self.assertEqual(monitor.save_finish_count, 1) diff --git a/tests/test_task_environment_config.py b/tests/test_task_environment_config.py new file mode 100644 index 0000000..a7d8bd8 --- /dev/null +++ b/tests/test_task_environment_config.py @@ -0,0 +1,99 @@ +import tempfile +import unittest +from pathlib import Path + +from adagio.cli.config import load_run_config +from adagio.executors.base import TaskEnvironmentOverride +from adagio.executors.defaults import ( + ConfigurableTaskEnvironmentResolver, + DefaultTaskEnvironmentResolver, +) +from adagio.model.task import PluginActionTask + + +def _task(*, name: str | None = None) -> PluginActionTask: + return PluginActionTask.model_validate( + { + "id": "task-1", + "kind": "plugin-action", + "name": name, + "plugin": "dada2", + "action": "denoise_single", + "inputs": {}, + "parameters": {}, + "outputs": {}, + } + ) + + +class RunConfigTests(unittest.TestCase): + def test_load_run_config_accepts_apptainer_kind(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + config_path = Path(tmpdir) / "runtime.toml" + config_path.write_text( + "\n".join( + [ + "version = 1", + "", + "[defaults]", + 'kind = "apptainer"', + 'image = "/images/default.sif"', + "", + "[plugins]", + 'dada2 = { kind = "apptainer", image = "/images/dada2.sif" }', + "", + "[tasks]", + '"dada2.denoise_single" = { image = "/images/task.sif" }', + ] + ), + encoding="utf-8", + ) + + config = load_run_config(config_path) + + assert config is not None + self.assertEqual(config.defaults.kind, "apptainer") + self.assertEqual(config.defaults.image, "/images/default.sif") + self.assertEqual(config.plugins["dada2"].kind, "apptainer") + self.assertEqual(config.tasks["dada2.denoise_single"].image, "/images/task.sif") + + +class ConfigurableResolverTests(unittest.TestCase): + def test_plugin_override_inherits_default_apptainer_kind(self) -> None: + resolver = ConfigurableTaskEnvironmentResolver( + base=DefaultTaskEnvironmentResolver(), + default_override=TaskEnvironmentOverride( + kind="apptainer", + reference="/images/default.sif", + ), + plugin_overrides={ + "dada2": TaskEnvironmentOverride(reference="/images/dada2.sif"), + }, + ) + + environment = resolver.resolve(task=_task()) + + self.assertEqual(environment.kind, "apptainer") + self.assertEqual(environment.reference, "/images/dada2.sif") + + def test_task_override_can_switch_back_to_docker(self) -> None: + resolver = ConfigurableTaskEnvironmentResolver( + base=DefaultTaskEnvironmentResolver(), + default_override=TaskEnvironmentOverride( + kind="apptainer", + reference="/images/default.sif", + ), + task_overrides={ + "named-step": TaskEnvironmentOverride( + kind="docker", + reference="registry.internal/dada2:1.0", + platform="linux/amd64", + ) + }, + ) + + environment = resolver.resolve(task=_task(name="named-step")) + + self.assertEqual(environment.kind, "docker") + self.assertEqual(environment.reference, "registry.internal/dada2:1.0") + self.assertEqual(environment.options, {"platform": "linux/amd64"}) diff --git a/tests/test_tty_monitor.py b/tests/test_tty_monitor.py new file mode 100644 index 0000000..896e0f3 --- /dev/null +++ b/tests/test_tty_monitor.py @@ -0,0 +1,75 @@ +import io +import unittest +from unittest.mock import patch + +from rich.console import Console + +from adagio.monitor.tty import RichMonitor, _TaskState, _elapsed + + +class RichMonitorTests(unittest.TestCase): + def test_progress_auto_refresh_is_disabled(self) -> None: + monitor = RichMonitor(console=Console(file=io.StringIO())) + + self.assertFalse(monitor._inline_updates) + + def test_running_task_elapsed_uses_current_second(self) -> None: + task = _TaskState( + task_id="task-1", + label="demo", + total_subtasks=1, + status="running", + started_at=10.0, + ) + + with patch("adagio.monitor.tty.time.monotonic", return_value=18.9): + self.assertEqual(_elapsed(task), "0:08") + + def test_finished_task_elapsed_uses_duration(self) -> None: + task = _TaskState( + task_id="task-1", + label="demo", + total_subtasks=1, + status="completed", + started_at=10.0, + finished_at=75.0, + ) + + self.assertEqual(_elapsed(task), "1:05") + + def test_refresh_running_rows_skips_same_elapsed_bucket(self) -> None: + monitor = RichMonitor(console=Console(file=io.StringIO())) + monitor._task_lookup["task-1"] = _TaskState( + task_id="task-1", + label="demo", + total_subtasks=1, + status="running", + started_at=10.0, + last_rendered_elapsed_seconds=9, + ) + monitor._task_order.append("task-1") + + with patch.object(monitor, "_rewrite_elapsed") as rewrite_elapsed: + with patch("adagio.monitor.tty.time.monotonic", return_value=19.9): + monitor._refresh_running_timers() + + rewrite_elapsed.assert_not_called() + + def test_refresh_running_rows_updates_on_new_elapsed_second(self) -> None: + monitor = RichMonitor(console=Console(file=io.StringIO())) + task = _TaskState( + task_id="task-1", + label="demo", + total_subtasks=1, + status="running", + started_at=10.0, + last_rendered_elapsed_seconds=9, + ) + monitor._task_lookup["task-1"] = task + monitor._task_order.append("task-1") + + with patch.object(monitor, "_rewrite_elapsed") as rewrite_elapsed: + with patch("adagio.monitor.tty.time.monotonic", return_value=20.0): + monitor._refresh_running_timers() + + rewrite_elapsed.assert_called_once_with(task)