diff --git a/server/migrations/versions/076_add_feature_tables_record_srn_fks.py b/server/migrations/versions/076_add_feature_tables_record_srn_fks.py deleted file mode 100644 index b61ee14..0000000 --- a/server/migrations/versions/076_add_feature_tables_record_srn_fks.py +++ /dev/null @@ -1,95 +0,0 @@ -"""076_add_feature_tables_record_srn_fks - -For each row currently registered in the ``public.feature_tables`` catalog, -add a foreign-key constraint on ``features..record_srn`` referencing -``records.srn`` with ``ON DELETE CASCADE``. Bundles GitHub #75. - -Idempotent: skips any hook whose FK is already present (detected by naming -convention). No-op on greenfield deployments where the catalog is empty. - -Revision ID: 076_feature_fks -Revises: 076_records_schema_srn -Create Date: 2026-04-19 - -""" - -import re -from typing import Sequence, Union - -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "076_feature_fks" -down_revision: Union[str, Sequence[str], None] = "076_records_schema_srn" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -FK_NAME_TEMPLATE = "fk_features_{hook}_record_srn" - -# Defense-in-depth: hook names read from ``feature_tables`` are interpolated -# into raw DDL below. Application code constrains hooks to this shape at write -# time, but the migration should not trust that invariant — a stray ``"`` in a -# stored name would break out of quoting. Mirrors the ``_safe_ident`` check in -# ``osa.infrastructure.persistence.metadata_store``. -_PG_IDENT_RE = re.compile(r"^[a-z][a-z0-9_]{0,62}$") - - -def _safe_ident(name: str) -> str: - if not _PG_IDENT_RE.match(name): - raise ValueError(f"Refusing to interpolate unsafe PG identifier {name!r} into DDL") - return name - - -def upgrade() -> None: - conn = op.get_bind() - rows = conn.execute(_select_hooks()).fetchall() - - for row in rows: - hook = _safe_ident(row[0]) - fk_name = _safe_ident(FK_NAME_TEMPLATE.format(hook=hook)) - exists = conn.execute(_check_constraint(fk_name)).scalar() - if exists: - continue - - conn.execute(_add_fk_sql(hook, fk_name)) - - -def downgrade() -> None: - conn = op.get_bind() - rows = conn.execute(_select_hooks()).fetchall() - for row in rows: - hook = _safe_ident(row[0]) - fk_name = _safe_ident(FK_NAME_TEMPLATE.format(hook=hook)) - exists = conn.execute(_check_constraint(fk_name)).scalar() - if not exists: - continue - conn.execute(_drop_fk_sql(hook, fk_name)) - - -def _select_hooks(): - from sqlalchemy import text - - return text("SELECT hook_name FROM feature_tables") - - -def _check_constraint(fk_name: str): - from sqlalchemy import text - - return text("SELECT 1 FROM pg_constraint WHERE conname = :fk_name").bindparams(fk_name=fk_name) - - -def _add_fk_sql(hook: str, fk_name: str): - from sqlalchemy import text - - return text( - f'ALTER TABLE features."{hook}" ' - f'ADD CONSTRAINT "{fk_name}" ' - f"FOREIGN KEY (record_srn) REFERENCES records(srn) ON DELETE CASCADE" - ) - - -def _drop_fk_sql(hook: str, fk_name: str): - from sqlalchemy import text - - return text(f'ALTER TABLE features."{hook}" DROP CONSTRAINT "{fk_name}"') diff --git a/server/migrations/versions/076_add_metadata_schema_and_catalog.py b/server/migrations/versions/076_add_metadata_schema_and_catalog.py deleted file mode 100644 index e425c95..0000000 --- a/server/migrations/versions/076_add_metadata_schema_and_catalog.py +++ /dev/null @@ -1,47 +0,0 @@ -"""076_add_metadata_schema_and_catalog - -Create the ``metadata`` PostgreSQL schema and the ``public.metadata_tables`` -catalog table. Dynamic per-schema metadata tables will live inside the -``metadata`` schema; the catalog indexes them by short schema id + major. - -Revision ID: 076_metadata_catalog -Revises: add_deliver_after -Create Date: 2026-04-19 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects.postgresql import JSONB - -# revision identifiers, used by Alembic. -revision: str = "076_metadata_catalog" -down_revision: Union[str, Sequence[str], None] = "add_deliver_after" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - op.execute('CREATE SCHEMA IF NOT EXISTS "metadata"') - - op.create_table( - "metadata_tables", - sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), - sa.Column("schema_id", sa.Text(), nullable=False), - sa.Column("schema_slug", sa.Text(), nullable=False), - sa.Column("schema_major", sa.Integer(), nullable=False), - sa.Column("schema_versions", JSONB(), nullable=False), - sa.Column("pg_table", sa.Text(), nullable=False), - sa.Column("metadata_schema", JSONB(), nullable=False), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), - sa.UniqueConstraint("schema_id", "schema_major", name="uq_metadata_tables_id_major"), - sa.UniqueConstraint("pg_table", name="uq_metadata_tables_pg_table"), - ) - - -def downgrade() -> None: - op.drop_table("metadata_tables") - op.execute('DROP SCHEMA IF EXISTS "metadata" CASCADE') diff --git a/server/migrations/versions/076_add_records_schema_srn.py b/server/migrations/versions/076_add_records_schema_srn.py deleted file mode 100644 index 585cb0b..0000000 --- a/server/migrations/versions/076_add_records_schema_srn.py +++ /dev/null @@ -1,40 +0,0 @@ -"""076_add_records_schema_id - -Add ``records.schema_id`` + ``records.schema_version`` so a Record's typed -linkage is first-class (FR-008). - -Greenfield only: no backfill from the linked convention. If this runs -against a populated ``records`` table it fails at ``SET NOT NULL`` with a -clear constraint error, which is the correct signal that the data predates -this schema. - -Revision ID: 076_records_schema_srn -Revises: 076_schemas_to_id -Create Date: 2026-04-19 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "076_records_schema_srn" -down_revision: Union[str, Sequence[str], None] = "076_schemas_to_id" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - op.add_column("records", sa.Column("schema_id", sa.Text(), nullable=True)) - op.add_column("records", sa.Column("schema_version", sa.Text(), nullable=True)) - op.alter_column("records", "schema_id", nullable=False) - op.alter_column("records", "schema_version", nullable=False) - op.create_index("idx_records_schema_id", "records", ["schema_id"]) - - -def downgrade() -> None: - op.drop_index("idx_records_schema_id", table_name="records") - op.drop_column("records", "schema_version") - op.drop_column("records", "schema_id") diff --git a/server/migrations/versions/076_schemas_to_id.py b/server/migrations/versions/076_schemas_to_id.py deleted file mode 100644 index c05f5e2..0000000 --- a/server/migrations/versions/076_schemas_to_id.py +++ /dev/null @@ -1,66 +0,0 @@ -"""076_schemas_to_id - -Replace URN-keyed ``schemas`` and ``conventions`` columns with short-form -``(id, version)`` pairs. After this migration, internal code works entirely -in ``SchemaId``; full URNs are reserved for federation edges. - -Changes: -- ``schemas.srn`` → ``schemas.id`` + ``schemas.version``. Composite PK. -- ``conventions.schema_srn`` → ``conventions.schema_id`` + ``conventions.schema_version``. - -Greenfield only: no backfill from the old URN columns. If this runs against -a populated DB it fails at ``SET NOT NULL`` with a clear constraint error, -which is the correct signal that the data predates this schema. - -Revision ID: 076_schemas_to_id -Revises: 076_metadata_catalog -Create Date: 2026-04-20 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "076_schemas_to_id" -down_revision: Union[str, Sequence[str], None] = "076_metadata_catalog" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # schemas: drop old SRN PK, add id + version, recompose PK. - op.add_column("schemas", sa.Column("id", sa.String(), nullable=True)) - op.add_column("schemas", sa.Column("version", sa.String(), nullable=True)) - op.alter_column("schemas", "id", nullable=False) - op.alter_column("schemas", "version", nullable=False) - op.drop_constraint("schemas_pkey", "schemas", type_="primary") - op.drop_column("schemas", "srn") - op.create_primary_key("schemas_pkey", "schemas", ["id", "version"]) - op.create_index("idx_schemas_id", "schemas", ["id"]) - - # conventions: split schema_srn into schema_id + schema_version. - op.add_column("conventions", sa.Column("schema_id", sa.String(), nullable=True)) - op.add_column("conventions", sa.Column("schema_version", sa.String(), nullable=True)) - op.alter_column("conventions", "schema_id", nullable=False) - op.alter_column("conventions", "schema_version", nullable=False) - op.drop_column("conventions", "schema_srn") - - -def downgrade() -> None: - # conventions back to schema_srn - op.add_column("conventions", sa.Column("schema_srn", sa.String(), nullable=True)) - op.alter_column("conventions", "schema_srn", nullable=False) - op.drop_column("conventions", "schema_version") - op.drop_column("conventions", "schema_id") - - # schemas back to srn - op.drop_index("idx_schemas_id", table_name="schemas") - op.drop_constraint("schemas_pkey", "schemas", type_="primary") - op.add_column("schemas", sa.Column("srn", sa.String(), nullable=True)) - op.alter_column("schemas", "srn", nullable=False) - op.create_primary_key("schemas_pkey", "schemas", ["srn"]) - op.drop_column("schemas", "version") - op.drop_column("schemas", "id") diff --git a/server/migrations/versions/0d9fbacf8e58_initial_tables.py b/server/migrations/versions/0d9fbacf8e58_initial_tables.py deleted file mode 100644 index 697bdd5..0000000 --- a/server/migrations/versions/0d9fbacf8e58_initial_tables.py +++ /dev/null @@ -1,111 +0,0 @@ -"""initial_tables - -Revision ID: 0d9fbacf8e58 -Revises: -Create Date: 2025-11-28 01:22:35.013560 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision: str = "0d9fbacf8e58" -down_revision: Union[str, Sequence[str], None] = None -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Upgrade schema.""" - # DEPOSITIONS - op.create_table( - "depositions", - sa.Column("srn", sa.String(), nullable=False), - sa.Column("status", sa.String(32), nullable=False), - sa.Column("metadata", sa.JSON(), nullable=False), - sa.Column("provenance", sa.JSON(), nullable=False), - sa.Column("files", sa.JSON(), nullable=False), - sa.Column("record_id", sa.String(), nullable=True), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), - sa.PrimaryKeyConstraint("srn"), - ) - op.create_index("idx_depositions_record_id", "depositions", ["record_id"]) - - # VALIDATION RUNS - op.create_table( - "validation_runs", - sa.Column("srn", sa.String(), nullable=False), - sa.Column("status", sa.String(32), nullable=False), - sa.Column("results", sa.JSON(), nullable=False), - sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("expires_at", sa.DateTime(timezone=True), nullable=True), - sa.PrimaryKeyConstraint("srn"), - ) - op.create_index("idx_validation_runs_expires_at", "validation_runs", ["expires_at"]) - - # RECORDS - op.create_table( - "records", - sa.Column("srn", sa.String(), nullable=False), - sa.Column("deposition_srn", sa.String(), nullable=False), - sa.Column("metadata", postgresql.JSONB(), nullable=False), - sa.Column("indexes", sa.JSON(), nullable=False), - sa.Column("published_at", sa.DateTime(timezone=True), nullable=False), - sa.PrimaryKeyConstraint("srn"), - ) - op.create_index("idx_records_deposition_srn", "records", ["deposition_srn"]) - op.create_index("idx_records_published_at", "records", ["published_at"]) - op.create_index( - "idx_records_metadata_gin", - "records", - ["metadata"], - postgresql_using="gin", - postgresql_ops={"metadata": "jsonb_path_ops"}, - ) - - # EVENTS (Outbox) - op.create_table( - "events", - sa.Column("id", sa.String(), nullable=False), - sa.Column("event_type", sa.String(128), nullable=False), - sa.Column("payload", sa.JSON(), nullable=False), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("delivery_status", sa.String(32), nullable=False), - sa.Column("delivered_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("delivery_error", sa.Text(), nullable=True), - sa.PrimaryKeyConstraint("id"), - ) - op.create_index( - "idx_events_type_created", - "events", - ["event_type", sa.text("created_at DESC")], - ) - op.create_index("idx_events_delivery_status", "events", ["delivery_status"]) - - -def downgrade() -> None: - """Downgrade schema.""" - # EVENTS - op.drop_index("idx_events_delivery_status", table_name="events") - op.drop_index("idx_events_type_created", table_name="events") - op.drop_table("events") - - # RECORDS - op.drop_index("idx_records_metadata_gin", table_name="records") - op.drop_index("idx_records_published_at", table_name="records") - op.drop_index("idx_records_deposition_srn", table_name="records") - op.drop_table("records") - - # VALIDATION RUNS - op.drop_index("idx_validation_runs_expires_at", table_name="validation_runs") - op.drop_table("validation_runs") - - # DEPOSITIONS - op.drop_index("idx_depositions_record_id", table_name="depositions") - op.drop_table("depositions") diff --git a/server/migrations/versions/add_auth_tables.py b/server/migrations/versions/add_auth_tables.py deleted file mode 100644 index b04ac0b..0000000 --- a/server/migrations/versions/add_auth_tables.py +++ /dev/null @@ -1,89 +0,0 @@ -"""add_auth_tables - -Add users, identities, and refresh_tokens tables for authentication. - -Revision ID: add_auth_tables -Revises: add_worker_columns -Create Date: 2026-02-04 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "add_auth_tables" -down_revision: Union[str, Sequence[str], None] = "add_worker_columns" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Add authentication tables.""" - # USERS TABLE - op.create_table( - "users", - sa.Column("id", sa.String(), nullable=False), - sa.Column("display_name", sa.String(255), nullable=True), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), - sa.PrimaryKeyConstraint("id"), - ) - - # IDENTITIES TABLE - op.create_table( - "identities", - sa.Column("id", sa.String(), nullable=False), - sa.Column("user_id", sa.String(), nullable=False), - sa.Column("provider", sa.String(50), nullable=False), - sa.Column("external_id", sa.String(255), nullable=False), - sa.Column("metadata", sa.JSON(), nullable=True), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.PrimaryKeyConstraint("id"), - sa.ForeignKeyConstraint( - ["user_id"], - ["users.id"], - ondelete="CASCADE", - ), - sa.UniqueConstraint("provider", "external_id", name="uq_identity_provider_external"), - ) - op.create_index("ix_identities_user_id", "identities", ["user_id"]) - - # REFRESH TOKENS TABLE - op.create_table( - "refresh_tokens", - sa.Column("id", sa.String(), nullable=False), - sa.Column("user_id", sa.String(), nullable=False), - sa.Column("token_hash", sa.String(64), nullable=False), - sa.Column("family_id", sa.String(), nullable=False), - sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("revoked_at", sa.DateTime(timezone=True), nullable=True), - sa.PrimaryKeyConstraint("id"), - sa.ForeignKeyConstraint( - ["user_id"], - ["users.id"], - ondelete="CASCADE", - ), - ) - op.create_index("ix_refresh_tokens_user_id", "refresh_tokens", ["user_id"]) - op.create_index("ix_refresh_tokens_token_hash", "refresh_tokens", ["token_hash"]) - op.create_index("ix_refresh_tokens_family_id", "refresh_tokens", ["family_id"]) - - -def downgrade() -> None: - """Remove authentication tables.""" - # REFRESH TOKENS - op.drop_index("ix_refresh_tokens_family_id", table_name="refresh_tokens") - op.drop_index("ix_refresh_tokens_token_hash", table_name="refresh_tokens") - op.drop_index("ix_refresh_tokens_user_id", table_name="refresh_tokens") - op.drop_table("refresh_tokens") - - # IDENTITIES - op.drop_index("ix_identities_user_id", table_name="identities") - op.drop_table("identities") - - # USERS - op.drop_table("users") diff --git a/server/migrations/versions/add_authorization.py b/server/migrations/versions/add_authorization.py deleted file mode 100644 index 0a9977b..0000000 --- a/server/migrations/versions/add_authorization.py +++ /dev/null @@ -1,71 +0,0 @@ -"""add_authorization - -Add role_assignments table and owner_id column to depositions. - -Revision ID: add_authorization -Revises: add_auth_tables -Create Date: 2026-02-06 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "add_authorization" -down_revision: Union[str, Sequence[str], None] = "add_auth_tables" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Add authorization tables and columns.""" - # ROLE ASSIGNMENTS TABLE - op.create_table( - "role_assignments", - sa.Column("id", sa.String(), nullable=False), - sa.Column("user_id", sa.String(), nullable=False), - sa.Column("role", sa.String(32), nullable=False), - sa.Column("assigned_by", sa.String(), nullable=False), - sa.Column("assigned_at", sa.DateTime(timezone=True), nullable=False), - sa.PrimaryKeyConstraint("id"), - sa.ForeignKeyConstraint( - ["user_id"], - ["users.id"], - ondelete="CASCADE", - ), - sa.ForeignKeyConstraint( - ["assigned_by"], - ["users.id"], - ), - sa.UniqueConstraint("user_id", "role", name="uq_role_assignments_user_role"), - ) - op.create_index("ix_role_assignments_user_id", "role_assignments", ["user_id"]) - - # ADD owner_id TO DEPOSITIONS (nullable initially for existing data) - op.add_column( - "depositions", - sa.Column("owner_id", sa.String(), nullable=True), - ) - op.create_foreign_key( - "fk_depositions_owner_id", - "depositions", - "users", - ["owner_id"], - ["id"], - ) - op.create_index("idx_depositions_owner_id", "depositions", ["owner_id"]) - - -def downgrade() -> None: - """Remove authorization tables and columns.""" - # DEPOSITIONS owner_id - op.drop_index("idx_depositions_owner_id", table_name="depositions") - op.drop_constraint("fk_depositions_owner_id", "depositions", type_="foreignkey") - op.drop_column("depositions", "owner_id") - - # ROLE ASSIGNMENTS - op.drop_index("ix_role_assignments_user_id", table_name="role_assignments") - op.drop_table("role_assignments") diff --git a/server/migrations/versions/add_deliver_after.py b/server/migrations/versions/add_deliver_after.py deleted file mode 100644 index c7a7b6b..0000000 --- a/server/migrations/versions/add_deliver_after.py +++ /dev/null @@ -1,45 +0,0 @@ -"""add_deliver_after_and_batches_failed - -Add deliver_after column to deliveries table for explicit backoff scheduling. -Add batches_failed column to ingest_runs table for batch failure accounting. - -Revision ID: add_deliver_after -Revises: add_ingest_runs -Create Date: 2026-04-04 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "add_deliver_after" -down_revision: Union[str, Sequence[str], None] = "add_ingest_runs" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - op.add_column( - "deliveries", - sa.Column("deliver_after", sa.DateTime(timezone=True), nullable=True), - ) - op.create_index( - "idx_deliveries_deliver_after", - "deliveries", - ["deliver_after"], - postgresql_where=sa.text("status = 'pending'"), - ) - - op.add_column( - "ingest_runs", - sa.Column("batches_failed", sa.Integer, nullable=False, server_default=sa.text("0")), - ) - - -def downgrade() -> None: - op.drop_column("ingest_runs", "batches_failed") - op.drop_index("idx_deliveries_deliver_after", table_name="deliveries") - op.drop_column("deliveries", "deliver_after") diff --git a/server/migrations/versions/add_deposition_tables.py b/server/migrations/versions/add_deposition_tables.py deleted file mode 100644 index 477274e..0000000 --- a/server/migrations/versions/add_deposition_tables.py +++ /dev/null @@ -1,108 +0,0 @@ -"""add_deposition_tables - -Add ontologies, ontology_terms, schemas, conventions tables. -Alter depositions: add convention_srn, drop provenance. - -Revision ID: add_deposition_tables -Revises: add_authorization -Create Date: 2026-02-08 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "add_deposition_tables" -down_revision: Union[str, Sequence[str], None] = "add_authorization" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Add semantics/convention tables and update depositions.""" - # ONTOLOGIES - op.create_table( - "ontologies", - sa.Column("srn", sa.String(), nullable=False), - sa.Column("title", sa.String(255), nullable=False), - sa.Column("description", sa.Text(), nullable=True), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.PrimaryKeyConstraint("srn"), - ) - - # ONTOLOGY TERMS - op.create_table( - "ontology_terms", - sa.Column("id", sa.String(), nullable=False), - sa.Column("ontology_srn", sa.String(), nullable=False), - sa.Column("term_id", sa.String(255), nullable=False), - sa.Column("label", sa.String(255), nullable=False), - sa.Column("synonyms", sa.JSON(), nullable=False), - sa.Column("parent_ids", sa.JSON(), nullable=False), - sa.Column("definition", sa.Text(), nullable=True), - sa.Column("deprecated", sa.Boolean(), nullable=False, server_default="false"), - sa.PrimaryKeyConstraint("id"), - sa.ForeignKeyConstraint( - ["ontology_srn"], - ["ontologies.srn"], - ondelete="CASCADE", - ), - sa.UniqueConstraint("ontology_srn", "term_id", name="uq_ontology_term"), - ) - op.create_index("idx_ontology_terms_ontology_srn", "ontology_terms", ["ontology_srn"]) - - # SCHEMAS - op.create_table( - "schemas", - sa.Column("srn", sa.String(), nullable=False), - sa.Column("title", sa.String(255), nullable=False), - sa.Column("fields", sa.JSON(), nullable=False), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.PrimaryKeyConstraint("srn"), - ) - - # CONVENTIONS - op.create_table( - "conventions", - sa.Column("srn", sa.String(), nullable=False), - sa.Column("title", sa.String(255), nullable=False), - sa.Column("description", sa.Text(), nullable=True), - sa.Column("schema_srn", sa.String(), nullable=False), - sa.Column("file_requirements", sa.JSON(), nullable=False), - sa.Column("validator_refs", sa.JSON(), nullable=False), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.PrimaryKeyConstraint("srn"), - ) - - # ALTER DEPOSITIONS: add convention_srn, drop provenance - op.add_column( - "depositions", - sa.Column("convention_srn", sa.String(), nullable=False), - ) - op.drop_column("depositions", "provenance") - - -def downgrade() -> None: - """Reverse: restore depositions, drop new tables.""" - # DEPOSITIONS: re-add provenance, drop convention_srn - op.add_column( - "depositions", - sa.Column("provenance", sa.JSON(), nullable=False, server_default="{}"), - ) - op.drop_column("depositions", "convention_srn") - - # CONVENTIONS - op.drop_table("conventions") - - # SCHEMAS - op.drop_table("schemas") - - # ONTOLOGY TERMS - op.drop_index("idx_ontology_terms_ontology_srn", table_name="ontology_terms") - op.drop_table("ontology_terms") - - # ONTOLOGIES - op.drop_table("ontologies") diff --git a/server/migrations/versions/add_device_authorizations.py b/server/migrations/versions/add_device_authorizations.py deleted file mode 100644 index 0e31ad7..0000000 --- a/server/migrations/versions/add_device_authorizations.py +++ /dev/null @@ -1,66 +0,0 @@ -"""add_device_authorizations - -Create device_authorizations table for OAuth device flow. - -Revision ID: add_device_authorizations -Revises: consumer_group_delivery -Create Date: 2026-03-13 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "add_device_authorizations" -down_revision: Union[str, Sequence[str], None] = "consumer_group_delivery" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Create device_authorizations table.""" - op.create_table( - "device_authorizations", - sa.Column("id", sa.String(), primary_key=True), - sa.Column("device_code", sa.String(64), nullable=False), - sa.Column("user_code", sa.String(8), nullable=False), - sa.Column( - "status", - sa.String(20), - nullable=False, - server_default=sa.text("'pending'"), - ), - sa.Column( - "user_id", - sa.String(), - sa.ForeignKey("users.id"), - nullable=True, - ), - sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - ) - - op.create_unique_constraint( - "uq_device_auth_device_code", - "device_authorizations", - ["device_code"], - ) - op.create_unique_constraint( - "uq_device_auth_user_code", - "device_authorizations", - ["user_code"], - ) - op.create_index( - "ix_device_auth_status_expires", - "device_authorizations", - ["status", "expires_at"], - ) - - -def downgrade() -> None: - """Drop device_authorizations table.""" - op.drop_index("ix_device_auth_status_expires", table_name="device_authorizations") - op.drop_table("device_authorizations") diff --git a/server/migrations/versions/add_hooks_and_feature_tables.py b/server/migrations/versions/add_hooks_and_feature_tables.py deleted file mode 100644 index 7a5fd0e..0000000 --- a/server/migrations/versions/add_hooks_and_feature_tables.py +++ /dev/null @@ -1,60 +0,0 @@ -"""add_hooks_and_feature_tables - -Replace conventions.validator_refs with hooks JSON column. -Add source JSON column to conventions. -Add feature_tables catalog table for tracking dynamically created feature tables. - -Revision ID: add_hooks_and_feature_tables -Revises: add_deposition_tables -Create Date: 2026-02-20 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "add_hooks_and_feature_tables" -down_revision: Union[str, Sequence[str], None] = "add_deposition_tables" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Replace validator_refs with hooks; add source; add feature_tables catalog.""" - # CONVENTIONS: replace validator_refs with hooks, add source - op.add_column( - "conventions", - sa.Column("hooks", sa.JSON(), nullable=False, server_default="[]"), - ) - op.add_column( - "conventions", - sa.Column("source", sa.JSON(), nullable=True), - ) - op.drop_column("conventions", "validator_refs") - - # FEATURE_TABLES: catalog for dynamically created feature tables - op.create_table( - "feature_tables", - sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), - sa.Column("hook_name", sa.String(), nullable=False), - sa.Column("pg_table", sa.String(), nullable=False), - sa.Column("feature_schema", sa.JSON(), nullable=False), - sa.Column("schema_version", sa.Integer(), nullable=False, server_default="1"), - sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), - sa.UniqueConstraint("hook_name", name="uq_feature_tables_hook_name"), - ) - - -def downgrade() -> None: - """Reverse: drop feature_tables, restore validator_refs, drop source.""" - op.drop_table("feature_tables") - - op.drop_column("conventions", "source") - op.add_column( - "conventions", - sa.Column("validator_refs", sa.JSON(), nullable=False, server_default="[]"), - ) - op.drop_column("conventions", "hooks") diff --git a/server/migrations/versions/add_ingest_runs.py b/server/migrations/versions/add_ingest_runs.py deleted file mode 100644 index d4dedf5..0000000 --- a/server/migrations/versions/add_ingest_runs.py +++ /dev/null @@ -1,92 +0,0 @@ -"""add_ingest_runs - -Add ingest_runs table for bulk ingestion tracking. - -Revision ID: add_ingest_runs -Revises: source_agnostic_records -Create Date: 2026-03-25 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "add_ingest_runs" -down_revision: Union[str, Sequence[str], None] = "source_agnostic_records" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - op.create_table( - "ingest_runs", - sa.Column("id", sa.String(), primary_key=True), - sa.Column( - "convention_srn", - sa.String(), - nullable=False, - ), - sa.Column( - "status", - sa.String(32), - nullable=False, - server_default=sa.text("'pending'"), - ), - sa.Column( - "ingestion_finished", - sa.Boolean(), - nullable=False, - server_default=sa.text("false"), - ), - sa.Column( - "batches_ingested", - sa.Integer(), - nullable=False, - server_default=sa.text("0"), - ), - sa.Column( - "batches_completed", - sa.Integer(), - nullable=False, - server_default=sa.text("0"), - ), - sa.Column( - "published_count", - sa.Integer(), - nullable=False, - server_default=sa.text("0"), - ), - sa.Column( - "batch_size", - sa.Integer(), - nullable=False, - server_default=sa.text("1000"), - ), - sa.Column("record_limit", sa.Integer(), nullable=True), - sa.Column("started_at", sa.DateTime(timezone=True), nullable=False), - sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), - sa.CheckConstraint( - "status IN ('pending', 'running', 'completed', 'failed')", - name="ingest_runs_status_check", - ), - ) - - op.create_index( - "idx_ingest_runs_convention", - "ingest_runs", - ["convention_srn"], - ) - op.create_index( - "idx_ingest_runs_status", - "ingest_runs", - ["status"], - ) - - -def downgrade() -> None: - op.drop_index("idx_ingest_runs_status", table_name="ingest_runs") - op.drop_index("idx_ingest_runs_convention", table_name="ingest_runs") - op.drop_table("ingest_runs") diff --git a/server/migrations/versions/add_worker_columns.py b/server/migrations/versions/add_worker_columns.py deleted file mode 100644 index bbdfe7f..0000000 --- a/server/migrations/versions/add_worker_columns.py +++ /dev/null @@ -1,69 +0,0 @@ -"""add_worker_columns - -Add columns and indexes to events table for pull-based worker architecture. - -Revision ID: add_worker_columns -Revises: 0d9fbacf8e58 -Create Date: 2026-02-02 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "add_worker_columns" -down_revision: Union[str, Sequence[str], None] = "0d9fbacf8e58" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Add worker columns to events table.""" - # Add new columns for pull-based claiming - op.add_column( - "events", sa.Column("retry_count", sa.Integer(), nullable=False, server_default="0") - ) - op.add_column("events", sa.Column("claimed_at", sa.DateTime(timezone=True), nullable=True)) - op.add_column( - "events", - sa.Column( - "updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() - ), - ) - - # Create partial index for efficient claiming query - op.create_index( - "idx_events_claim", - "events", - ["delivery_status", "event_type", "created_at"], - postgresql_where=sa.text("delivery_status IN ('pending', 'claimed')"), - ) - - # Create partial index for stale claim detection - op.create_index( - "idx_events_stale_claims", - "events", - ["claimed_at"], - postgresql_where=sa.text("delivery_status = 'claimed'"), - ) - - # Create partial index for failed event queries - op.create_index( - "idx_events_failed", - "events", - ["event_type", "created_at"], - postgresql_where=sa.text("delivery_status = 'failed'"), - ) - - -def downgrade() -> None: - """Remove worker columns from events table.""" - op.drop_index("idx_events_failed", table_name="events") - op.drop_index("idx_events_stale_claims", table_name="events") - op.drop_index("idx_events_claim", table_name="events") - op.drop_column("events", "updated_at") - op.drop_column("events", "claimed_at") - op.drop_column("events", "retry_count") diff --git a/server/migrations/versions/c6d9f4c0c3ab_initial_schema.py b/server/migrations/versions/c6d9f4c0c3ab_initial_schema.py new file mode 100644 index 0000000..03f8b5a --- /dev/null +++ b/server/migrations/versions/c6d9f4c0c3ab_initial_schema.py @@ -0,0 +1,469 @@ +"""initial schema + +Revision ID: c6d9f4c0c3ab +Revises: +Create Date: 2026-06-14 19:29:44.101012 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "c6d9f4c0c3ab" +down_revision: Union[str, Sequence[str], None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # Per-schema metadata projection tables (metadata.*) are created at runtime, + # but the metadata_store does NOT create their PG schema — do it here. + op.execute('CREATE SCHEMA IF NOT EXISTS "metadata"') + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "conventions", + sa.Column("id", sa.String(), nullable=False), + sa.Column("title", sa.String(length=255), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("schema_id", sa.String(), nullable=False), + sa.Column("schema_version", sa.String(), nullable=False), + sa.Column("file_requirements", sa.JSON(), nullable=False), + sa.Column("hooks", sa.JSON(), nullable=False), + sa.Column("source", sa.JSON(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "events", + sa.Column("id", sa.String(), nullable=False), + sa.Column("event_type", sa.String(length=128), nullable=False), + sa.Column("payload", sa.JSON(), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + "idx_events_type_created", + "events", + ["event_type", sa.literal_column("created_at DESC")], + unique=False, + ) + op.create_table( + "feature_tables", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("hook_name", sa.String(), nullable=False), + sa.Column("pg_table", sa.String(), nullable=False), + sa.Column("feature_schema", sa.JSON(), nullable=False), + sa.Column("schema_version", sa.Integer(), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("hook_name", name="uq_feature_tables_hook_name"), + ) + op.create_table( + "hooks", + sa.Column("name", sa.String(length=40), nullable=False), + sa.Column("feature_spec", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("live_release_id", sa.UUID(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("name"), + ) + op.create_table( + "ingest_runs", + sa.Column("id", sa.String(), nullable=False), + sa.Column("convention_id", sa.String(), nullable=False), + sa.Column( + "status", sa.String(length=32), server_default=sa.text("'pending'"), nullable=False + ), + sa.Column( + "ingestion_finished", sa.Boolean(), server_default=sa.text("false"), nullable=False + ), + sa.Column("batches_ingested", sa.Integer(), server_default=sa.text("0"), nullable=False), + sa.Column("batches_completed", sa.Integer(), server_default=sa.text("0"), nullable=False), + sa.Column("published_count", sa.Integer(), server_default=sa.text("0"), nullable=False), + sa.Column("batch_size", sa.Integer(), server_default=sa.text("1000"), nullable=False), + sa.Column("record_limit", sa.Integer(), nullable=True), + sa.Column("batches_failed", sa.Integer(), server_default=sa.text("0"), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("idx_ingest_runs_convention", "ingest_runs", ["convention_id"], unique=False) + op.create_index("idx_ingest_runs_status", "ingest_runs", ["status"], unique=False) + op.create_table( + "metadata_tables", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("schema_id", sa.Text(), nullable=False), + sa.Column("schema_slug", sa.Text(), nullable=False), + sa.Column("schema_major", sa.Integer(), nullable=False), + sa.Column("schema_versions", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("pg_table", sa.Text(), nullable=False), + sa.Column("metadata_schema", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("pg_table", name="uq_metadata_tables_pg_table"), + sa.UniqueConstraint("schema_id", "schema_major", name="uq_metadata_tables_id_major"), + ) + op.create_table( + "ontologies", + sa.Column("srn", sa.String(), nullable=False), + sa.Column("title", sa.String(length=255), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("srn"), + ) + op.create_table( + "records", + sa.Column("srn", sa.String(), nullable=False), + sa.Column("convention_id", sa.Text(), nullable=False), + sa.Column("schema_id", sa.Text(), nullable=False), + sa.Column("schema_version", sa.Text(), nullable=False), + sa.Column("source", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("published_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("srn"), + ) + op.create_index("idx_records_convention_id", "records", ["convention_id"], unique=False) + op.create_index( + "idx_records_metadata_gin", + "records", + ["metadata"], + unique=False, + postgresql_using="gin", + postgresql_ops={"metadata": "jsonb_path_ops"}, + ) + op.create_index("idx_records_published_at", "records", ["published_at"], unique=False) + op.create_index("idx_records_schema_id", "records", ["schema_id"], unique=False) + op.create_index( + "uq_records_source", + "records", + [sa.literal_column("(source ->> 'type')"), sa.literal_column("(source ->> 'id')")], + unique=True, + ) + op.create_table( + "schemas", + sa.Column("id", sa.String(), nullable=False), + sa.Column("version", sa.String(), nullable=False), + sa.Column("title", sa.String(length=255), nullable=False), + sa.Column("fields", sa.JSON(), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint("id", "version"), + ) + op.create_index("idx_schemas_id", "schemas", ["id"], unique=False) + op.create_table( + "users", + sa.Column("id", sa.String(), nullable=False), + sa.Column("display_name", sa.String(length=255), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "validation_runs", + sa.Column("srn", sa.String(), nullable=False), + sa.Column("status", sa.String(length=32), nullable=False), + sa.Column("results", sa.JSON(), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("expires_at", sa.DateTime(timezone=True), nullable=True), + sa.PrimaryKeyConstraint("srn"), + ) + op.create_index( + "idx_validation_runs_expires_at", "validation_runs", ["expires_at"], unique=False + ) + op.create_table( + "deliveries", + sa.Column("id", sa.String(), nullable=False), + sa.Column("event_id", sa.String(), nullable=False), + sa.Column("consumer_group", sa.String(length=128), nullable=False), + sa.Column( + "status", sa.String(length=32), server_default=sa.text("'pending'"), nullable=False + ), + sa.Column("claimed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("delivered_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("delivery_error", sa.Text(), nullable=True), + sa.Column("retry_count", sa.Integer(), server_default=sa.text("0"), nullable=False), + sa.Column("deliver_after", sa.DateTime(timezone=True), nullable=True), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint( + ["event_id"], + ["events.id"], + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("event_id", "consumer_group", name="uq_delivery_event_consumer"), + ) + op.create_index( + "idx_deliveries_claim", + "deliveries", + ["consumer_group", "status", "event_id"], + unique=False, + postgresql_where=sa.text("status IN ('pending', 'claimed')"), + ) + op.create_index( + "idx_deliveries_deliver_after", + "deliveries", + ["deliver_after"], + unique=False, + postgresql_where=sa.text("status = 'pending'"), + ) + op.create_index("idx_deliveries_event", "deliveries", ["event_id"], unique=False) + op.create_index( + "idx_deliveries_failed", + "deliveries", + ["consumer_group", "retry_count"], + unique=False, + postgresql_where=sa.text("status = 'failed'"), + ) + op.create_index( + "idx_deliveries_stale", + "deliveries", + ["claimed_at"], + unique=False, + postgresql_where=sa.text("status = 'claimed'"), + ) + op.create_table( + "depositions", + sa.Column("srn", sa.String(), nullable=False), + sa.Column("convention_id", sa.String(), nullable=False), + sa.Column("status", sa.String(length=32), nullable=False), + sa.Column("metadata", sa.JSON(), nullable=False), + sa.Column("files", sa.JSON(), nullable=False), + sa.Column("record_id", sa.String(), nullable=True), + sa.Column("owner_id", sa.String(), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint( + ["owner_id"], + ["users.id"], + ), + sa.PrimaryKeyConstraint("srn"), + ) + op.create_index("idx_depositions_owner_id", "depositions", ["owner_id"], unique=False) + op.create_index("idx_depositions_record_id", "depositions", ["record_id"], unique=False) + op.create_table( + "device_authorizations", + sa.Column("id", sa.String(), nullable=False), + sa.Column("device_code", sa.String(length=64), nullable=False), + sa.Column("user_code", sa.String(length=8), nullable=False), + sa.Column( + "status", sa.String(length=20), server_default=sa.text("'pending'"), nullable=False + ), + sa.Column("user_id", sa.String(), nullable=True), + sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["users.id"], + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("device_code", name="uq_device_auth_device_code"), + sa.UniqueConstraint("user_code", name="uq_device_auth_user_code"), + ) + op.create_index( + "ix_device_auth_status_expires", + "device_authorizations", + ["status", "expires_at"], + unique=False, + ) + op.create_table( + "hook_releases", + sa.Column("id", sa.UUID(), nullable=False), + sa.Column("hook_name", sa.String(length=40), nullable=False), + sa.Column("version", sa.Integer(), nullable=False), + sa.Column("image", sa.Text(), nullable=False), + sa.Column("digest", sa.Text(), nullable=False), + sa.Column( + "config", + postgresql.JSONB(astext_type=sa.Text()), + server_default=sa.text("'{}'"), + nullable=False, + ), + sa.Column("limits", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("source_ref", sa.Text(), nullable=False), + sa.Column("built_by", sa.Text(), nullable=True), + sa.Column("built_at", sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint( + ["hook_name"], + ["hooks.name"], + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("hook_name", "digest", name="uq_hook_releases_hook_digest"), + sa.UniqueConstraint("hook_name", "version", name="uq_hook_releases_hook_version"), + ) + op.create_index( + "idx_hook_releases_hook_version", + "hook_releases", + ["hook_name", sa.literal_column("version DESC")], + unique=False, + ) + # Live-pointer FK, added after both tables exist to break the circular + # hooks.live_release_id ↔ hook_releases.hook_name dependency. DEFERRABLE so + # the release-insert-then-pointer-update happens in one transaction. + op.create_foreign_key( + "fk_hooks_live_release_id", + "hooks", + "hook_releases", + ["live_release_id"], + ["id"], + deferrable=True, + initially="DEFERRED", + ) + op.create_table( + "identities", + sa.Column("id", sa.String(), nullable=False), + sa.Column("user_id", sa.String(), nullable=False), + sa.Column("provider", sa.String(length=50), nullable=False), + sa.Column("external_id", sa.String(length=255), nullable=False), + sa.Column("metadata", sa.JSON(), nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("provider", "external_id", name="uq_identity_provider_external"), + ) + op.create_index("ix_identities_user_id", "identities", ["user_id"], unique=False) + op.create_table( + "ontology_terms", + sa.Column("id", sa.String(), nullable=False), + sa.Column("ontology_srn", sa.String(), nullable=False), + sa.Column("term_id", sa.String(length=255), nullable=False), + sa.Column("label", sa.String(length=255), nullable=False), + sa.Column("synonyms", sa.JSON(), nullable=False), + sa.Column("parent_ids", sa.JSON(), nullable=False), + sa.Column("definition", sa.Text(), nullable=True), + sa.Column("deprecated", sa.Boolean(), nullable=False), + sa.ForeignKeyConstraint(["ontology_srn"], ["ontologies.srn"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("ontology_srn", "term_id", name="uq_ontology_term"), + ) + op.create_index( + "idx_ontology_terms_ontology_srn", "ontology_terms", ["ontology_srn"], unique=False + ) + op.create_table( + "refresh_tokens", + sa.Column("id", sa.String(), nullable=False), + sa.Column("user_id", sa.String(), nullable=False), + sa.Column("token_hash", sa.String(length=64), nullable=False), + sa.Column("family_id", sa.String(), nullable=False), + sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("revoked_at", sa.DateTime(timezone=True), nullable=True), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("ix_refresh_tokens_family_id", "refresh_tokens", ["family_id"], unique=False) + op.create_index("ix_refresh_tokens_token_hash", "refresh_tokens", ["token_hash"], unique=False) + op.create_index("ix_refresh_tokens_user_id", "refresh_tokens", ["user_id"], unique=False) + op.create_table( + "role_assignments", + sa.Column("id", sa.String(), nullable=False), + sa.Column("user_id", sa.String(), nullable=False), + sa.Column("role", sa.String(length=32), nullable=False), + sa.Column("assigned_by", sa.String(), nullable=False), + sa.Column("assigned_at", sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint( + ["assigned_by"], + ["users.id"], + ), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("user_id", "role", name="uq_role_assignments_user_role"), + ) + op.create_index("ix_role_assignments_user_id", "role_assignments", ["user_id"], unique=False) + op.create_table( + "hook_runs", + sa.Column("id", sa.UUID(), nullable=False), + sa.Column("release_id", sa.UUID(), nullable=False), + sa.Column("status", sa.String(length=16), nullable=False), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("finished_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("duration_s", sa.Float(), nullable=False), + sa.Column("oom_retries", sa.Integer(), server_default=sa.text("0"), nullable=False), + sa.Column("log_ref", sa.Text(), nullable=True), + sa.ForeignKeyConstraint( + ["release_id"], + ["hook_releases.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("idx_hook_runs_release", "hook_runs", ["release_id"], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + # Drop the live-pointer FK first so hook_releases can be dropped below. + op.drop_constraint("fk_hooks_live_release_id", "hooks", type_="foreignkey") + op.drop_index("idx_hook_runs_release", table_name="hook_runs") + op.drop_table("hook_runs") + op.drop_index("ix_role_assignments_user_id", table_name="role_assignments") + op.drop_table("role_assignments") + op.drop_index("ix_refresh_tokens_user_id", table_name="refresh_tokens") + op.drop_index("ix_refresh_tokens_token_hash", table_name="refresh_tokens") + op.drop_index("ix_refresh_tokens_family_id", table_name="refresh_tokens") + op.drop_table("refresh_tokens") + op.drop_index("idx_ontology_terms_ontology_srn", table_name="ontology_terms") + op.drop_table("ontology_terms") + op.drop_index("ix_identities_user_id", table_name="identities") + op.drop_table("identities") + op.drop_index("idx_hook_releases_hook_version", table_name="hook_releases") + op.drop_table("hook_releases") + op.drop_index("ix_device_auth_status_expires", table_name="device_authorizations") + op.drop_table("device_authorizations") + op.drop_index("idx_depositions_record_id", table_name="depositions") + op.drop_index("idx_depositions_owner_id", table_name="depositions") + op.drop_table("depositions") + op.drop_index( + "idx_deliveries_stale", + table_name="deliveries", + postgresql_where=sa.text("status = 'claimed'"), + ) + op.drop_index( + "idx_deliveries_failed", + table_name="deliveries", + postgresql_where=sa.text("status = 'failed'"), + ) + op.drop_index("idx_deliveries_event", table_name="deliveries") + op.drop_index( + "idx_deliveries_deliver_after", + table_name="deliveries", + postgresql_where=sa.text("status = 'pending'"), + ) + op.drop_index( + "idx_deliveries_claim", + table_name="deliveries", + postgresql_where=sa.text("status IN ('pending', 'claimed')"), + ) + op.drop_table("deliveries") + op.drop_index("idx_validation_runs_expires_at", table_name="validation_runs") + op.drop_table("validation_runs") + op.drop_table("users") + op.drop_index("idx_schemas_id", table_name="schemas") + op.drop_table("schemas") + op.drop_index("uq_records_source", table_name="records") + op.drop_index("idx_records_schema_id", table_name="records") + op.drop_index("idx_records_published_at", table_name="records") + op.drop_index( + "idx_records_metadata_gin", + table_name="records", + postgresql_using="gin", + postgresql_ops={"metadata": "jsonb_path_ops"}, + ) + op.drop_index("idx_records_convention_id", table_name="records") + op.drop_table("records") + op.drop_table("ontologies") + op.drop_table("metadata_tables") + op.drop_index("idx_ingest_runs_status", table_name="ingest_runs") + op.drop_index("idx_ingest_runs_convention", table_name="ingest_runs") + op.drop_table("ingest_runs") + op.drop_table("hooks") + op.drop_table("feature_tables") + op.drop_index("idx_events_type_created", table_name="events") + op.drop_table("events") + op.drop_table("conventions") + # ### end Alembic commands ### + op.execute('DROP SCHEMA IF EXISTS "metadata" CASCADE') diff --git a/server/migrations/versions/consumer_group_delivery.py b/server/migrations/versions/consumer_group_delivery.py deleted file mode 100644 index 9d3046c..0000000 --- a/server/migrations/versions/consumer_group_delivery.py +++ /dev/null @@ -1,124 +0,0 @@ -"""consumer_group_delivery - -Create deliveries table for per-consumer-group tracking. -Drop delivery columns from events table (becomes append-only log). -No data migration needed — pre-launch. - -Revision ID: consumer_group_delivery -Revises: add_hooks_and_feature_tables -Create Date: 2026-02-26 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "consumer_group_delivery" -down_revision: Union[str, Sequence[str], None] = "add_hooks_and_feature_tables" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Create deliveries table, drop delivery columns from events.""" - # 1. Create deliveries table - op.create_table( - "deliveries", - sa.Column("id", sa.String(), primary_key=True), - sa.Column("event_id", sa.String(), sa.ForeignKey("events.id"), nullable=False), - sa.Column("consumer_group", sa.String(128), nullable=False), - sa.Column("status", sa.String(32), nullable=False, server_default=sa.text("'pending'")), - sa.Column("claimed_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("delivered_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("delivery_error", sa.Text(), nullable=True), - sa.Column("retry_count", sa.Integer(), nullable=False, server_default=sa.text("0")), - sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), - sa.UniqueConstraint("event_id", "consumer_group", name="uq_delivery_event_consumer"), - ) - - # Deliveries indexes - op.create_index( - "idx_deliveries_claim", - "deliveries", - ["consumer_group", "status", "event_id"], - postgresql_where=sa.text("status IN ('pending', 'claimed')"), - ) - op.create_index("idx_deliveries_event", "deliveries", ["event_id"]) - op.create_index( - "idx_deliveries_stale", - "deliveries", - ["claimed_at"], - postgresql_where=sa.text("status = 'claimed'"), - ) - op.create_index( - "idx_deliveries_failed", - "deliveries", - ["consumer_group", "retry_count"], - postgresql_where=sa.text("status = 'failed'"), - ) - - # 2. Drop delivery-related indexes from events - op.drop_index("idx_events_failed", table_name="events") - op.drop_index("idx_events_stale_claims", table_name="events") - op.drop_index("idx_events_claim", table_name="events") - op.drop_index("idx_events_delivery_status", table_name="events") - - # 3. Drop delivery columns from events (becomes append-only) - op.drop_column("events", "updated_at") - op.drop_column("events", "claimed_at") - op.drop_column("events", "retry_count") - op.drop_column("events", "delivery_error") - op.drop_column("events", "delivered_at") - op.drop_column("events", "delivery_status") - - -def downgrade() -> None: - """Restore delivery columns to events, drop deliveries table.""" - # Restore columns - op.add_column( - "events", - sa.Column("delivery_status", sa.String(32), nullable=False, server_default="pending"), - ) - op.add_column("events", sa.Column("delivered_at", sa.DateTime(timezone=True), nullable=True)) - op.add_column("events", sa.Column("delivery_error", sa.Text(), nullable=True)) - op.add_column( - "events", sa.Column("retry_count", sa.Integer(), nullable=False, server_default="0") - ) - op.add_column("events", sa.Column("claimed_at", sa.DateTime(timezone=True), nullable=True)) - op.add_column( - "events", - sa.Column( - "updated_at", sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() - ), - ) - - # Restore indexes - op.create_index("idx_events_delivery_status", "events", ["delivery_status"]) - op.create_index( - "idx_events_claim", - "events", - ["delivery_status", "event_type", "created_at"], - postgresql_where=sa.text("delivery_status IN ('pending', 'claimed')"), - ) - op.create_index( - "idx_events_stale_claims", - "events", - ["claimed_at"], - postgresql_where=sa.text("delivery_status = 'claimed'"), - ) - op.create_index( - "idx_events_failed", - "events", - ["event_type", "created_at"], - postgresql_where=sa.text("delivery_status = 'failed'"), - ) - - # Drop deliveries table - op.drop_index("idx_deliveries_failed", table_name="deliveries") - op.drop_index("idx_deliveries_stale", table_name="deliveries") - op.drop_index("idx_deliveries_event", table_name="deliveries") - op.drop_index("idx_deliveries_claim", table_name="deliveries") - op.drop_table("deliveries") diff --git a/server/migrations/versions/source_agnostic_records.py b/server/migrations/versions/source_agnostic_records.py deleted file mode 100644 index 2153bca..0000000 --- a/server/migrations/versions/source_agnostic_records.py +++ /dev/null @@ -1,83 +0,0 @@ -"""source_agnostic_records - -Replace deposition_srn + indexes with source (JSONB) + convention_srn. -No data migration needed — no production data exists. - -Revision ID: source_agnostic_records -Revises: add_device_authorizations -Create Date: 2026-03-24 - -""" - -from typing import Sequence, Union - -import sqlalchemy as sa -from alembic import op - -# revision identifiers, used by Alembic. -revision: str = "source_agnostic_records" -down_revision: Union[str, Sequence[str], None] = "add_device_authorizations" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # Drop old indexes - op.drop_index("idx_records_deposition_srn", table_name="records") - - # Drop old columns - op.drop_column("records", "deposition_srn") - op.drop_column("records", "indexes") - - # Add new columns - op.add_column( - "records", - sa.Column("convention_srn", sa.Text(), nullable=False), - ) - op.add_column( - "records", - sa.Column("source", sa.dialects.postgresql.JSONB(), nullable=False), - ) - - # Add new indexes - op.create_index( - "idx_records_convention_srn", - "records", - ["convention_srn"], - ) - op.create_index( - "uq_records_source", - "records", - [ - sa.text("(source->>'type')"), - sa.text("(source->>'id')"), - ], - unique=True, - ) - - -def downgrade() -> None: - # Drop new indexes - op.drop_index("uq_records_source", table_name="records") - op.drop_index("idx_records_convention_srn", table_name="records") - - # Drop new columns - op.drop_column("records", "source") - op.drop_column("records", "convention_srn") - - # Re-add old columns - op.add_column( - "records", - sa.Column("deposition_srn", sa.String(), nullable=False), - ) - op.add_column( - "records", - sa.Column("indexes", sa.JSON(), nullable=False), - ) - - # Re-add old index - op.create_index( - "idx_records_deposition_srn", - "records", - ["deposition_srn"], - ) diff --git a/server/osa/application/api/rest/app.py b/server/osa/application/api/rest/app.py index 63925cf..686edfb 100644 --- a/server/osa/application/api/rest/app.py +++ b/server/osa/application/api/rest/app.py @@ -18,6 +18,7 @@ conventions, depositions, events, + hooks, ingestions, health, ontologies, @@ -125,7 +126,7 @@ def create_app( registration, and DI resolution. """ # Pydantic Settings populates from env vars at runtime - config = Config() # type: ignore[call-arg] + config = Config() # Refuse to boot if the dev JWT secret is misconfigured for the deploy. _check_dev_secret_safety(config) @@ -198,6 +199,7 @@ def create_app( app_instance.include_router(ontologies.router, prefix="/api/v1") app_instance.include_router(schemas.router, prefix="/api/v1") app_instance.include_router(conventions.router, prefix="/api/v1") + app_instance.include_router(hooks.router, prefix="/api/v1") app_instance.include_router(depositions.router, prefix="/api/v1") app_instance.include_router(ingestions.router, prefix="/api/v1") app_instance.include_router(validation.router, prefix="/api/v1") diff --git a/server/osa/application/api/v1/routes/conventions.py b/server/osa/application/api/v1/routes/conventions.py index 4f34ac4..7abf60e 100644 --- a/server/osa/application/api/v1/routes/conventions.py +++ b/server/osa/application/api/v1/routes/conventions.py @@ -7,9 +7,9 @@ from fastapi.responses import StreamingResponse from osa.domain.deposition.command.create_convention import ( - CreateConvention, - CreateConventionHandler, ConventionCreated, + DeployConvention, + DeployConventionHandler, ) from osa.domain.deposition.query.download_template import ( DownloadTemplate, @@ -25,15 +25,15 @@ ListConventionsHandler, ConventionList, ) -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug router = APIRouter(prefix="/conventions", tags=["Conventions"], route_class=DishkaRoute) @router.post("", response_model=ConventionCreated, status_code=201) -async def create_convention( - body: CreateConvention, - handler: FromDishka[CreateConventionHandler], +async def deploy_convention( + body: DeployConvention, + handler: FromDishka[DeployConventionHandler], ) -> ConventionCreated: return await handler.run(body) @@ -43,7 +43,7 @@ async def download_convention_template( srn: str, handler: FromDishka[DownloadTemplateHandler], ) -> StreamingResponse: - result = await handler.run(DownloadTemplate(convention_srn=ConventionSRN.parse(srn))) + result = await handler.run(DownloadTemplate(convention_id=ConventionSlug.parse(srn))) return StreamingResponse( iter([result.content]), media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", @@ -58,7 +58,7 @@ async def get_convention( srn: str, handler: FromDishka[GetConventionHandler], ) -> ConventionDetail: - return await handler.run(GetConvention(srn=ConventionSRN.parse(srn))) + return await handler.run(GetConvention(id=ConventionSlug.parse(srn))) @router.get("", response_model=ConventionList) diff --git a/server/osa/application/api/v1/routes/data/features_table.py b/server/osa/application/api/v1/routes/data/features_table.py index a23bc57..3d7073b 100644 --- a/server/osa/application/api/v1/routes/data/features_table.py +++ b/server/osa/application/api/v1/routes/data/features_table.py @@ -17,6 +17,7 @@ from osa.application.api.v1.routes.data.formats import DataResponseFormat from osa.application.api.v1.routes.data.tables import format_key, register_table_routes from osa.domain.data.query.read_table import ReadFeatureTable, ReadFeatureTableHandler +from osa.domain.shared.model.ids import FeatureName def _make_get_endpoint(fmt: DataResponseFormat): @@ -31,7 +32,7 @@ async def endpoint( result = await handler.run( ReadFeatureTable( schema=schema, - feature=feature, + feature=FeatureName(feature), cursor=cursor, limit=limit, sort=parse_sort(sort), @@ -54,7 +55,7 @@ async def endpoint( result = await handler.run( ReadFeatureTable( schema=schema, - feature=feature, + feature=FeatureName(feature), filter=body.filter, cursor=body.cursor, limit=body.limit, diff --git a/server/osa/application/api/v1/routes/depositions.py b/server/osa/application/api/v1/routes/depositions.py index dd20ca7..c4def06 100644 --- a/server/osa/application/api/v1/routes/depositions.py +++ b/server/osa/application/api/v1/routes/depositions.py @@ -82,7 +82,7 @@ async def download_template( template_handler: FromDishka[DownloadTemplateHandler], ) -> StreamingResponse: dep = await handler.run(GetDeposition(srn=DepositionSRN.parse(srn))) - result = await template_handler.run(DownloadTemplate(convention_srn=dep.convention_srn)) + result = await template_handler.run(DownloadTemplate(convention_id=dep.convention_id)) return StreamingResponse( iter([result.content]), media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", diff --git a/server/osa/application/api/v1/routes/hooks.py b/server/osa/application/api/v1/routes/hooks.py new file mode 100644 index 0000000..60e902d --- /dev/null +++ b/server/osa/application/api/v1/routes/hooks.py @@ -0,0 +1,111 @@ +"""Hook registry REST routes (#145) — releases, live pointer, catalog. + +Thin HTTP ↔ DTO coercion only. The path ``{name}`` is parsed into a +:class:`HookName`; release/live bodies are edge DTOs carrying just the +payload (the hook is identified by the path). All business logic lives in +``HookRegistryService`` behind the command/query handlers; domain errors map +centrally in ``application/api/v1/errors.py``. +""" + +from __future__ import annotations + +from dishka.integrations.fastapi import DishkaRoute, FromDishka +from fastapi import APIRouter, Response +from pydantic import BaseModel, Field + +from osa.domain.shared.model.hook import HookName, OciLimits +from osa.domain.validation.command.create_release import ( + CreateRelease, + CreateReleaseHandler, + ReleaseCreated, +) +from osa.domain.validation.command.set_live import ( + LiveSet, + SetLive, + SetLiveHandler, +) +from osa.domain.validation.query.get_release import ( + GetRelease, + GetReleaseHandler, + ReleaseDetail, +) +from osa.domain.validation.query.list_hooks import ( + HookCatalog, + ListHooks, + ListHooksHandler, +) +from osa.domain.validation.query.list_releases import ( + ListReleases, + ListReleasesHandler, + ReleaseList, +) + +router = APIRouter(prefix="/hooks", tags=["Hooks"], route_class=DishkaRoute) + + +class CreateReleaseBody(BaseModel): + """Release payload — byte-identical to the deploy's ``release`` block.""" + + image: str + digest: str + config: dict = Field(default_factory=dict) + limits: OciLimits = Field(default_factory=OciLimits) + source_ref: str + + +class SetLiveBody(BaseModel): + version: int + + +@router.post("/{name}/releases", response_model=ReleaseCreated) +async def create_release( + name: str, + body: CreateReleaseBody, + handler: FromDishka[CreateReleaseHandler], + response: Response, +) -> ReleaseCreated: + result = await handler.run( + CreateRelease( + name=HookName(name), + image=body.image, + digest=body.digest, + config=body.config, + limits=body.limits, + source_ref=body.source_ref, + ) + ) + response.status_code = 201 if result.created else 200 + return result + + +@router.put("/{name}/live", response_model=LiveSet) +async def set_live( + name: str, + body: SetLiveBody, + handler: FromDishka[SetLiveHandler], +) -> LiveSet: + return await handler.run(SetLive(name=HookName(name), version=body.version)) + + +@router.get("", response_model=HookCatalog) +async def list_hooks( + handler: FromDishka[ListHooksHandler], +) -> HookCatalog: + return await handler.run(ListHooks()) + + +@router.get("/{name}/releases", response_model=ReleaseList) +async def list_releases( + name: str, + handler: FromDishka[ListReleasesHandler], +) -> ReleaseList: + return await handler.run(ListReleases(name=HookName(name))) + + +@router.get("/{name}/releases/{version}", response_model=ReleaseDetail) +async def get_release( + name: str, + version: int, + handler: FromDishka[GetReleaseHandler], +) -> ReleaseDetail: + return await handler.run(GetRelease(name=HookName(name), version=version)) diff --git a/server/osa/application/api/v1/routes/validation.py b/server/osa/application/api/v1/routes/validation.py index 333d949..031c6fd 100644 --- a/server/osa/application/api/v1/routes/validation.py +++ b/server/osa/application/api/v1/routes/validation.py @@ -77,7 +77,7 @@ async def get_validation_status( results_dto = [ HookResultDTO( - hook_name=r.hook_name, + hook_name=r.hook_name.root, status=r.status, rejection_reason=r.rejection_reason, error_message=r.error_message, diff --git a/server/osa/application/di.py b/server/osa/application/di.py index a85074a..96a1a16 100644 --- a/server/osa/application/di.py +++ b/server/osa/application/di.py @@ -37,7 +37,7 @@ def create_container( the core handlers. They will be included in the subscription registry, WorkerPool, and DI resolution automatically. """ - config = Config() # type: ignore[call-arg] + config = Config() paths = OSAPaths() return make_async_container( diff --git a/server/osa/config.py b/server/osa/config.py index 69e15ee..c25abd9 100644 --- a/server/osa/config.py +++ b/server/osa/config.py @@ -225,6 +225,25 @@ def validate_orcid_ids(cls, v: list[str]) -> list[str]: return v +class ExtraIssuerConfig(BaseModel): + """Optional second JWT issuer for machine (M2M) credentials (#145, US5). + + When absent, token validation is byte-identical to the single-HS256-secret + path (SC-007). When present, tokens whose ``iss`` matches ``issuer`` are + verified against ``public_key`` (Ed25519, verify-only) and authorized by the + scopes parsed from ``scope_claim`` rather than DB roles. + + The signature algorithm is fixed to **EdDSA (Ed25519)** — not configurable. + Pinning a single asymmetric algorithm removes a whole class of misconfig / + downgrade footguns (no ``none``, no HS256-treats-the-PEM-as-a-shared-secret). + """ + + issuer: str # expected `iss` claim + public_key: str # Ed25519 public key, PEM (verify-only) + audience: str # expected `aud` + scope_claim: str = "scope" # "scope" (space-delimited) or "scp" (array) + + class AuthConfig(BaseModel): """Authentication configuration.""" @@ -232,6 +251,8 @@ class AuthConfig(BaseModel): # JwtConfig has a loud dev default; the boot safety check refuses to # start on the default secret unless `dev_mode=True` and bind is loopback. jwt: JwtConfig = JwtConfig() + # Optional M2M issuer (#145, US5). None → existing single-issuer behaviour. + extra_issuer: ExtraIssuerConfig | None = None callback_url: str = "" # Full callback URL (e.g., https://myarchive.org/api/v1/auth/callback) base_role: str | None = None # Implicit role for all authenticated users (e.g., "DEPOSITOR") admins: AdminsConfig = AdminsConfig() diff --git a/server/osa/domain/auth/model/principal.py b/server/osa/domain/auth/model/principal.py index 8767bd9..ed9ed48 100644 --- a/server/osa/domain/auth/model/principal.py +++ b/server/osa/domain/auth/model/principal.py @@ -18,6 +18,14 @@ class Principal(Identity): user_id: UserId provider_identity: ProviderIdentity roles: frozenset[Role] + scopes: frozenset[str] = frozenset() + """OAuth scopes granted to a machine (M2M) credential (#145, US5). + + Empty for human/user principals (authorized by ``roles``). A second token + issuer mints scope-limited credentials (e.g. ``conventions:write``, + ``hooks:write``) with empty ``roles``; the ``RequiresScope`` gate authorizes + on scope OR ADMIN role. + """ def has_role(self, role: Role) -> bool: """Check if any assigned role >= the given role (hierarchy comparison).""" @@ -26,3 +34,7 @@ def has_role(self, role: Role) -> bool: def has_any_role(self, *roles: Role) -> bool: """Check if any assigned role satisfies any of the given roles.""" return any(self.has_role(r) for r in roles) + + def has_scope(self, scope: str) -> bool: + """Check if the principal was granted the given OAuth scope.""" + return scope in self.scopes diff --git a/server/osa/domain/auth/service/token.py b/server/osa/domain/auth/service/token.py index a867de4..68a12ee 100644 --- a/server/osa/domain/auth/service/token.py +++ b/server/osa/domain/auth/service/token.py @@ -12,7 +12,7 @@ import jwt -from osa.config import JwtConfig +from osa.config import ExtraIssuerConfig, JwtConfig from osa.domain.auth.model.value import OAuthStateData, ProviderIdentity, UserId from osa.domain.shared.service import Service @@ -31,6 +31,14 @@ class TokenService(Service): """ _config: JwtConfig + # Optional second issuer for M2M tokens (#145, US5). None → single-issuer + # behaviour, byte-identical to before. + _extra_issuer: ExtraIssuerConfig | None = None + + @property + def extra_issuer(self) -> ExtraIssuerConfig | None: + """The configured M2M issuer, if any (read by identity resolution).""" + return self._extra_issuer def create_access_token( self, @@ -73,6 +81,12 @@ def create_access_token( def validate_access_token(self, token: str) -> dict[str, Any]: """Validate and decode a JWT access token. + Routes on the ``iss`` claim (#145, US5): when a second issuer is + configured and the token's ``iss`` matches it, verify against that + issuer's public key (asymmetric, verify-only) and audience. Otherwise — + and always when no second issuer is configured — verify with the primary + HS256 secret, byte-identical to the single-issuer path (SC-007). + Args: token: The JWT string to validate @@ -82,6 +96,21 @@ def validate_access_token(self, token: str) -> dict[str, Any]: Raises: jwt.InvalidTokenError: If token is invalid or expired """ + if self._extra_issuer is not None: + # Read `iss` without verifying — routing only; the branch below + # performs the real signature + audience verification. The algorithm + # is pinned to EdDSA (Ed25519): the token header's `alg` is never + # trusted, so a token can't downgrade to `none`/HS256. + unverified = jwt.decode(token, options={"verify_signature": False}) + if unverified.get("iss") == self._extra_issuer.issuer: + return jwt.decode( + token, + self._extra_issuer.public_key, + algorithms=["EdDSA"], + audience=self._extra_issuer.audience, + issuer=self._extra_issuer.issuer, + ) + return jwt.decode( token, self._config.secret, diff --git a/server/osa/domain/auth/util/di/provider.py b/server/osa/domain/auth/util/di/provider.py index ac118b1..bdac71a 100644 --- a/server/osa/domain/auth/util/di/provider.py +++ b/server/osa/domain/auth/util/di/provider.py @@ -70,7 +70,10 @@ class AuthProvider(Provider): @provide(scope=Scope.APP) def get_token_service(self, config: Config) -> TokenService: """Provide TokenService (stateless, only needs config).""" - return TokenService(_config=config.auth.jwt) + return TokenService( + _config=config.auth.jwt, + _extra_issuer=config.auth.extra_issuer, + ) @provide(scope=Scope.UOW) def get_auth_service( diff --git a/server/osa/domain/curation/event/deposition_approved.py b/server/osa/domain/curation/event/deposition_approved.py index 5ad6ded..7cbc810 100644 --- a/server/osa/domain/curation/event/deposition_approved.py +++ b/server/osa/domain/curation/event/deposition_approved.py @@ -3,7 +3,8 @@ from typing import Any from osa.domain.shared.event import Event, EventId -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN +from osa.domain.shared.model.hook import FeatureName class DepositionApproved(Event): @@ -16,5 +17,5 @@ class DepositionApproved(Event): id: EventId deposition_srn: DepositionSRN metadata: dict[str, Any] - convention_srn: ConventionSRN - expected_features: list[str] = [] + convention_id: ConventionSlug + expected_features: list[FeatureName] = [] diff --git a/server/osa/domain/curation/handler/auto_approve_curation.py b/server/osa/domain/curation/handler/auto_approve_curation.py index 9883b29..2961743 100644 --- a/server/osa/domain/curation/handler/auto_approve_curation.py +++ b/server/osa/domain/curation/handler/auto_approve_curation.py @@ -36,7 +36,7 @@ async def handle(self, event: ValidationCompleted) -> None: id=EventId(uuid4()), deposition_srn=event.deposition_srn, metadata=event.metadata, - convention_srn=event.convention_srn, + convention_id=event.convention_id, expected_features=event.expected_features, ) diff --git a/server/osa/domain/data/model/query_plan.py b/server/osa/domain/data/model/query_plan.py index 72e8295..8c4a6de 100644 --- a/server/osa/domain/data/model/query_plan.py +++ b/server/osa/domain/data/model/query_plan.py @@ -22,7 +22,7 @@ from pydantic import BaseModel, Field, model_validator from osa.domain.data.model.filter import FilterExpr -from osa.domain.shared.model.ids import HookName +from osa.domain.shared.model.ids import FeatureName from osa.domain.shared.model.srn import SchemaId @@ -118,7 +118,7 @@ def cursor_from_row(self, row: Mapping[str, Any]) -> str: class QueryPlan(BaseModel): schema_id: SchemaId table_kind: TableKind - feature_name: HookName | None = None + feature_name: FeatureName | None = None filter: FilterExpr | None = None pagination: PaginationParams = Field(default_factory=PaginationParams) sort: list[SortSpec] = Field(default_factory=list) diff --git a/server/osa/domain/data/query/read_table.py b/server/osa/domain/data/query/read_table.py index 38b19d1..9cb4082 100644 --- a/server/osa/domain/data/query/read_table.py +++ b/server/osa/domain/data/query/read_table.py @@ -31,7 +31,7 @@ from osa.domain.data.service.data_catalog import DataCatalogService from osa.domain.data.service.data_query import DataQueryService from osa.domain.shared.authorization.gate import public -from osa.domain.shared.model.ids import HookName +from osa.domain.shared.model.ids import FeatureName from osa.domain.shared.query import Query, QueryHandler @@ -45,7 +45,7 @@ class ReadRecordsTable(Query): class ReadFeatureTable(ReadRecordsTable): - feature: HookName + feature: FeatureName @dataclass diff --git a/server/osa/domain/data/service/data_catalog.py b/server/osa/domain/data/service/data_catalog.py index f461b33..376f3f1 100644 --- a/server/osa/domain/data/service/data_catalog.py +++ b/server/osa/domain/data/service/data_catalog.py @@ -13,7 +13,7 @@ from osa.domain.data.model.record_summary import RecordSummary from osa.domain.data.port.data_read_store import DataCatalogReadStore from osa.domain.shared.error import NotFoundError -from osa.domain.shared.model.ids import HookName, RecordId +from osa.domain.shared.model.ids import FeatureName, RecordId from osa.domain.shared.model.reserved import RESERVED_NAMES from osa.domain.shared.model.srn import SchemaId from osa.domain.shared.service import Service @@ -72,7 +72,7 @@ async def resolve_table( self, schema: str, table_kind: TableKind, - feature_name: HookName | None = None, + feature_name: FeatureName | None = None, ) -> ResolvedTable: """Resolve a URL schema segment + table selector to its column schema. @@ -83,7 +83,12 @@ async def resolve_table( """ schema_id = await self.resolve_schema(schema) manifest = await self.get_schema_manifest(schema_id) - name = "records" if table_kind == TableKind.RECORDS else feature_name + # TableResource.name is a plain str ("records" or a feature-table name). + name = ( + "records" + if table_kind == TableKind.RECORDS + else (feature_name.root if feature_name is not None else None) + ) resource = next( (tr for tr in manifest.table_resources if tr.name == name and tr.kind == table_kind), None, diff --git a/server/osa/domain/deposition/command/create.py b/server/osa/domain/deposition/command/create.py index 47aadc0..46df73f 100644 --- a/server/osa/domain/deposition/command/create.py +++ b/server/osa/domain/deposition/command/create.py @@ -3,11 +3,11 @@ from osa.domain.deposition.service.deposition import DepositionService from osa.domain.shared.authorization.gate import at_least from osa.domain.shared.command import Command, CommandHandler, Result -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN class CreateDeposition(Command): - convention_srn: ConventionSRN + convention_id: ConventionSlug class DepositionCreated(Result): @@ -21,7 +21,7 @@ class CreateDepositionHandler(CommandHandler[CreateDeposition, DepositionCreated async def run(self, cmd: CreateDeposition) -> DepositionCreated: dep = await self.deposition_service.create( - convention_srn=cmd.convention_srn, + convention_id=cmd.convention_id, owner_id=self.principal.user_id, ) return DepositionCreated(srn=dep.srn) diff --git a/server/osa/domain/deposition/command/create_convention.py b/server/osa/domain/deposition/command/create_convention.py index b647738..3be994c 100644 --- a/server/osa/domain/deposition/command/create_convention.py +++ b/server/osa/domain/deposition/command/create_convention.py @@ -1,70 +1,121 @@ from datetime import datetime -from pydantic import ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field from osa.domain.auth.model.principal import Principal -from osa.domain.auth.model.role import Role +from osa.domain.deposition.model.deploy import HookDeploy from osa.domain.deposition.model.value import FileRequirements from osa.domain.deposition.service.convention import ConventionService from osa.domain.semantics.model.value import FieldDefinition -from osa.domain.shared.authorization.gate import at_least +from osa.domain.shared.authorization.gate import requires_scope from osa.domain.shared.command import Command, CommandHandler, Result -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import ( + HookIdentity, + HookName, + OciConfig, + OciLimits, + TableFeatureSpec, +) from osa.domain.shared.model.source import IngesterDefinition -from osa.domain.shared.model.srn import ConventionSRN, SchemaId, SchemaIdentifier +from osa.domain.shared.model.srn import ConventionSlug, SchemaId, SchemaIdentifier -class CreateConvention(Command): - model_config = ConfigDict(populate_by_name=True) +class DeployConventionSchema(BaseModel): + """The deploy's nested ``schema`` sub-structure (== POST /schemas body).""" id: SchemaIdentifier - """Schema slug — becomes the ```` in ``schema_id = @``. + version: str + fields: list[FieldDefinition] = [] + + +class DeployConventionRelease(BaseModel): + """A hook's release block (== POST /hooks/{name}/releases body).""" + + image: str + digest: str + config: dict = Field(default_factory=dict) + limits: OciLimits = Field(default_factory=OciLimits) + source_ref: str # REQUIRED — reproducibility anchor (FR-005) + + +class DeployConventionHook(BaseModel): + """One hook in the bundled deploy: identity (name + fixed feature) + release.""" + + name: HookName + feature: TableFeatureSpec + release: DeployConventionRelease + + def to_deploy(self) -> HookDeploy: + return HookDeploy( + identity=HookIdentity(name=self.name, feature=self.feature), + runtime=OciConfig( + image=self.release.image, + digest=self.release.digest, + config=self.release.config, + limits=self.release.limits, + ), + source_ref=self.release.source_ref, + ) - A convention is a bundle of (schema + validators + file requirements), and - the caller supplies the slug of the embedded schema here. The convention - itself gets an opaque server-generated SRN. + +class DeployConvention(Command): + """Bundled deploy: schema + hooks (+ first releases) + convention, atomically. + + Conventions are unversioned and mutable (design-revisions §3): deploy is a + declarative upsert keyed by ``slug`` — re-declaring the same state is a no-op, + a different declaration updates the convention in place. No caller-supplied + version, no conflict path. """ + model_config = ConfigDict(populate_by_name=True) + + slug: ConventionSlug + """The convention's identity — a bare, unversioned slug.""" + title: str - version: str - schema_fields: list[FieldDefinition] = Field(alias="schema") - file_requirements: FileRequirements description: str | None = None - hooks: list[HookDefinition] = [] + file_requirements: FileRequirements + schema_block: DeployConventionSchema = Field(alias="schema") + hooks: list[DeployConventionHook] = [] ingester: IngesterDefinition | None = None class ConventionCreated(Result): - srn: ConventionSRN + slug: ConventionSlug title: str description: str | None schema_id: SchemaId + hooks: list[str] created_at: datetime -class CreateConventionHandler(CommandHandler[CreateConvention, ConventionCreated]): - # Conventions are curated registry artifacts (like ontologies and schemas) — - # they define submission formats and bundle validators. Creation is an - # admin operation, matching CreateOntology / CreateSchema. - __auth__ = at_least(Role.ADMIN) +class DeployConventionHandler(CommandHandler[DeployConvention, ConventionCreated]): + # Conventions are curated registry artifacts; deploy is an admin/automation + # operation. Authorized by the ``conventions:write`` M2M scope OR an ADMIN + # role (#145, US5). + __auth__ = requires_scope("conventions:write") principal: Principal convention_service: ConventionService - async def run(self, cmd: CreateConvention) -> ConventionCreated: - convention = await self.convention_service.create_convention( - id=cmd.id, + async def run(self, cmd: DeployConvention) -> ConventionCreated: + built_by = str(self.principal.user_id) if self.principal.user_id else None + convention = await self.convention_service.deploy( + slug=cmd.slug, title=cmd.title, - version=cmd.version, - schema=cmd.schema_fields, - file_requirements=cmd.file_requirements, description=cmd.description, - hooks=cmd.hooks, + file_requirements=cmd.file_requirements, + schema_slug=cmd.schema_block.id, + schema_version=cmd.schema_block.version, + schema_fields=cmd.schema_block.fields, + hooks=[h.to_deploy() for h in cmd.hooks], ingester=cmd.ingester, + built_by=built_by, ) return ConventionCreated( - srn=convention.srn, + slug=convention.id, title=convention.title, description=convention.description, schema_id=convention.schema_id, + hooks=[name.root for name in convention.hooks], created_at=convention.created_at, ) diff --git a/server/osa/domain/deposition/command/upload_spreadsheet.py b/server/osa/domain/deposition/command/upload_spreadsheet.py index ad29269..587b14f 100644 --- a/server/osa/domain/deposition/command/upload_spreadsheet.py +++ b/server/osa/domain/deposition/command/upload_spreadsheet.py @@ -30,9 +30,9 @@ class UploadSpreadsheetHandler(CommandHandler[UploadSpreadsheet, SpreadsheetUplo async def run(self, cmd: UploadSpreadsheet) -> SpreadsheetUploaded: dep = await self.deposition_service.get(cmd.srn) - convention = await self.convention_repo.get(dep.convention_srn) + convention = await self.convention_repo.get(dep.convention_id) if convention is None: - raise NotFoundError(f"Convention not found: {dep.convention_srn}") + raise NotFoundError(f"Convention not found: {dep.convention_id}") schema = await self.schema_reader.get_schema(convention.schema_id) if schema is None: diff --git a/server/osa/domain/deposition/event/convention_registered.py b/server/osa/domain/deposition/event/convention_registered.py index 62d2112..9ddb321 100644 --- a/server/osa/domain/deposition/event/convention_registered.py +++ b/server/osa/domain/deposition/event/convention_registered.py @@ -2,8 +2,8 @@ from osa.domain.semantics.model.value import FieldDefinition from osa.domain.shared.event import Event, EventId -from osa.domain.shared.model.hook import HookDefinition -from osa.domain.shared.model.srn import ConventionSRN, SchemaId +from osa.domain.shared.model.hook import HookIdentity +from osa.domain.shared.model.srn import ConventionSlug, SchemaId class ConventionRegistered(Event): @@ -18,7 +18,7 @@ class ConventionRegistered(Event): """ id: EventId - convention_srn: ConventionSRN + convention_id: ConventionSlug schema_id: SchemaId schema_fields: list[FieldDefinition] = [] - hooks: list[HookDefinition] = [] + hooks: list[HookIdentity] = [] diff --git a/server/osa/domain/deposition/event/created.py b/server/osa/domain/deposition/event/created.py index 91174bb..917b743 100644 --- a/server/osa/domain/deposition/event/created.py +++ b/server/osa/domain/deposition/event/created.py @@ -1,6 +1,6 @@ from osa.domain.auth.model.value import UserId from osa.domain.shared.event import Event, EventId -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN class DepositionCreatedEvent(Event): @@ -8,5 +8,5 @@ class DepositionCreatedEvent(Event): id: EventId deposition_id: DepositionSRN - convention_srn: ConventionSRN + convention_id: ConventionSlug owner_id: UserId diff --git a/server/osa/domain/deposition/event/submitted.py b/server/osa/domain/deposition/event/submitted.py index 7ca6861..5773198 100644 --- a/server/osa/domain/deposition/event/submitted.py +++ b/server/osa/domain/deposition/event/submitted.py @@ -1,19 +1,21 @@ from typing import Any from osa.domain.shared.event import Event, EventId -from osa.domain.shared.model.hook import HookDefinition -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.hook import HookName +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN class DepositionSubmittedEvent(Event): """Emitted when a deposition is submitted for validation. - Enriched with convention_srn and hooks so the validation domain - can operate without querying deposition repos. + Enriched with convention_id and hook **names** so the validation domain can + operate without querying deposition repos. The validation handler resolves + each name's live release at run start (feature #145, R8) — never re-freezing + a digest into the event. """ id: EventId deposition_id: DepositionSRN metadata: dict[str, Any] - convention_srn: ConventionSRN - hooks: list[HookDefinition] = [] + convention_id: ConventionSlug + hooks: list[HookName] = [] diff --git a/server/osa/domain/deposition/model/aggregate.py b/server/osa/domain/deposition/model/aggregate.py index 06e91ec..b7d5b2e 100644 --- a/server/osa/domain/deposition/model/aggregate.py +++ b/server/osa/domain/deposition/model/aggregate.py @@ -5,12 +5,12 @@ from osa.domain.deposition.model.value import DepositionFile, DepositionStatus from osa.domain.shared.error import InvalidStateError from osa.domain.shared.model.aggregate import Aggregate -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN, RecordSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN, RecordSRN class Deposition(Aggregate): srn: DepositionSRN - convention_srn: ConventionSRN + convention_id: ConventionSlug status: DepositionStatus = DepositionStatus.DRAFT metadata: dict[str, Any] = {} files: list[DepositionFile] = [] diff --git a/server/osa/domain/deposition/model/convention.py b/server/osa/domain/deposition/model/convention.py index 53bc3ac..997dce0 100644 --- a/server/osa/domain/deposition/model/convention.py +++ b/server/osa/domain/deposition/model/convention.py @@ -2,19 +2,25 @@ from osa.domain.deposition.model.value import FileRequirements from osa.domain.shared.model.aggregate import Aggregate -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import HookName from osa.domain.shared.model.source import IngesterDefinition -from osa.domain.shared.model.srn import ConventionSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, SchemaId class Convention(Aggregate): - """An immutable, user-facing submission template.""" + """An immutable, user-facing submission template. - srn: ConventionSRN + Feature #145: identified by a caller-supplied ``ConventionSlug`` + (``"@"``) rather than an opaque server-generated SRN, and + references its hooks by **name** — the versioned release each name resolves + to lives in the ``validation`` hook registry, not inline here. + """ + + id: ConventionSlug title: str description: str | None = None schema_id: SchemaId file_requirements: FileRequirements - hooks: list[HookDefinition] = [] + hooks: list[HookName] = [] ingester: IngesterDefinition | None = None created_at: datetime diff --git a/server/osa/domain/deposition/model/deploy.py b/server/osa/domain/deposition/model/deploy.py new file mode 100644 index 0000000..58dcfb5 --- /dev/null +++ b/server/osa/domain/deposition/model/deploy.py @@ -0,0 +1,29 @@ +"""Deposition-domain input for the bundled convention deploy (#145). + +The bundled deploy's *edge* shape is the command DTO +:class:`~osa.domain.deposition.command.create_convention.DeployConventionHook` +(§4). This is its *internal* representation — the handler maps each edge DTO to +a :class:`HookDeploy` and hands the list to :class:`ConventionService.deploy`, +which upserts the identity and mints the release. + +It lives in the deposition domain (not ``shared``) on purpose: design-revisions +§1 removed the *shared* deploy/payload value object. The single persisted hook +artifact remains :class:`~osa.domain.validation.model.hook_release.HookRelease`. +""" + +from __future__ import annotations + +from typing import Annotated + +from pydantic import Field + +from osa.domain.shared.model.hook import HookIdentity, OciConfig +from osa.domain.shared.model.value import ValueObject + + +class HookDeploy(ValueObject): + """One hook in a bundled deploy: its fixed identity + the release to mint.""" + + identity: HookIdentity + runtime: Annotated[OciConfig, Field(discriminator="type")] + source_ref: str diff --git a/server/osa/domain/deposition/port/convention_repository.py b/server/osa/domain/deposition/port/convention_repository.py index a005f7c..e4c3073 100644 --- a/server/osa/domain/deposition/port/convention_repository.py +++ b/server/osa/domain/deposition/port/convention_repository.py @@ -1,7 +1,7 @@ from abc import abstractmethod from typing import TYPE_CHECKING, List, Protocol -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug from osa.domain.shared.port import Port if TYPE_CHECKING: @@ -13,7 +13,7 @@ class ConventionRepository(Port, Protocol): async def save(self, convention: "Convention") -> None: ... @abstractmethod - async def get(self, srn: ConventionSRN) -> "Convention | None": ... + async def get(self, id: ConventionSlug) -> "Convention | None": ... @abstractmethod async def list( @@ -21,7 +21,7 @@ async def list( ) -> "List[Convention]": ... @abstractmethod - async def exists(self, srn: ConventionSRN) -> bool: ... + async def exists(self, id: ConventionSlug) -> bool: ... @abstractmethod async def list_with_source(self) -> "List[Convention]": diff --git a/server/osa/domain/deposition/query/download_template.py b/server/osa/domain/deposition/query/download_template.py index acaa504..a9c0e0f 100644 --- a/server/osa/domain/deposition/query/download_template.py +++ b/server/osa/domain/deposition/query/download_template.py @@ -7,12 +7,12 @@ from osa.domain.semantics.model.value import TermConstraints from osa.domain.shared.authorization.gate import at_least from osa.domain.shared.error import NotFoundError -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug from osa.domain.shared.query import Query, QueryHandler, Result class DownloadTemplate(Query): - convention_srn: ConventionSRN + convention_id: ConventionSlug class TemplateResult(Result): @@ -29,9 +29,9 @@ class DownloadTemplateHandler(QueryHandler[DownloadTemplate, TemplateResult]): spreadsheet: SpreadsheetPort async def run(self, cmd: DownloadTemplate) -> TemplateResult: - convention = await self.convention_repo.get(cmd.convention_srn) + convention = await self.convention_repo.get(cmd.convention_id) if convention is None: - raise NotFoundError(f"Convention not found: {cmd.convention_srn}") + raise NotFoundError(f"Convention not found: {cmd.convention_id}") schema = await self.schema_reader.get_schema(convention.schema_id) if schema is None: diff --git a/server/osa/domain/deposition/query/get_convention.py b/server/osa/domain/deposition/query/get_convention.py index 7bf9d46..14bfe1d 100644 --- a/server/osa/domain/deposition/query/get_convention.py +++ b/server/osa/domain/deposition/query/get_convention.py @@ -3,23 +3,23 @@ from osa.domain.deposition.model.value import FileRequirements from osa.domain.deposition.service.convention import ConventionService from osa.domain.shared.authorization.gate import public -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import HookName from osa.domain.shared.model.source import IngesterDefinition -from osa.domain.shared.model.srn import ConventionSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, SchemaId from osa.domain.shared.query import Query, QueryHandler, Result class GetConvention(Query): - srn: ConventionSRN + id: ConventionSlug class ConventionDetail(Result): - srn: ConventionSRN + id: ConventionSlug title: str description: str | None schema_id: SchemaId file_requirements: FileRequirements - hooks: list[HookDefinition] + hooks: list[HookName] ingester: IngesterDefinition | None = None created_at: datetime @@ -29,14 +29,14 @@ class GetConventionHandler(QueryHandler[GetConvention, ConventionDetail]): convention_service: ConventionService async def run(self, cmd: GetConvention) -> ConventionDetail: - conv = await self.convention_service.get_convention(cmd.srn) + conv = await self.convention_service.get_convention(cmd.id) return ConventionDetail( - srn=conv.srn, + id=conv.id, title=conv.title, description=conv.description, schema_id=conv.schema_id, file_requirements=conv.file_requirements, - hooks=conv.hooks, + hooks=list(conv.hooks), ingester=conv.ingester, created_at=conv.created_at, ) diff --git a/server/osa/domain/deposition/query/get_deposition.py b/server/osa/domain/deposition/query/get_deposition.py index 0b835df..c1956b4 100644 --- a/server/osa/domain/deposition/query/get_deposition.py +++ b/server/osa/domain/deposition/query/get_deposition.py @@ -6,7 +6,7 @@ from osa.domain.deposition.model.value import DepositionFile, DepositionStatus from osa.domain.deposition.service.deposition import DepositionService from osa.domain.shared.authorization.gate import at_least -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN, RecordSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN, RecordSRN from osa.domain.shared.query import Query, QueryHandler, Result @@ -16,7 +16,7 @@ class GetDeposition(Query): class DepositionDetail(Result): srn: DepositionSRN - convention_srn: ConventionSRN + convention_id: ConventionSlug status: DepositionStatus metadata: dict[str, Any] files: list[DepositionFile] @@ -34,7 +34,7 @@ async def run(self, cmd: GetDeposition) -> DepositionDetail: dep = await self.deposition_service.get(cmd.srn) return DepositionDetail( srn=dep.srn, - convention_srn=dep.convention_srn, + convention_id=dep.convention_id, status=dep.status, metadata=dep.metadata, files=dep.files, diff --git a/server/osa/domain/deposition/query/list_conventions.py b/server/osa/domain/deposition/query/list_conventions.py index 0838bd0..9d859be 100644 --- a/server/osa/domain/deposition/query/list_conventions.py +++ b/server/osa/domain/deposition/query/list_conventions.py @@ -4,7 +4,7 @@ from osa.domain.deposition.service.convention import ConventionService from osa.domain.shared.authorization.gate import public -from osa.domain.shared.model.srn import ConventionSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, SchemaId from osa.domain.shared.query import Query, QueryHandler, Result @@ -13,7 +13,7 @@ class ListConventions(Query): class ConventionSummary(BaseModel): - srn: ConventionSRN + id: ConventionSlug title: str description: str | None schema_id: SchemaId @@ -33,7 +33,7 @@ async def run(self, cmd: ListConventions) -> ConventionList: return ConventionList( items=[ ConventionSummary( - srn=c.srn, + id=c.id, title=c.title, description=c.description, schema_id=c.schema_id, diff --git a/server/osa/domain/deposition/query/list_depositions.py b/server/osa/domain/deposition/query/list_depositions.py index 6920016..3d42e1c 100644 --- a/server/osa/domain/deposition/query/list_depositions.py +++ b/server/osa/domain/deposition/query/list_depositions.py @@ -7,7 +7,7 @@ from osa.domain.deposition.model.value import DepositionStatus from osa.domain.deposition.service.deposition import DepositionService from osa.domain.shared.authorization.gate import at_least -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN from osa.domain.shared.query import Query, QueryHandler, Result @@ -17,7 +17,7 @@ class ListDepositions(Query): class DepositionSummary(BaseModel): srn: DepositionSRN - convention_srn: ConventionSRN + convention_id: ConventionSlug status: DepositionStatus file_count: int created_at: datetime @@ -42,7 +42,7 @@ async def run(self, cmd: ListDepositions) -> DepositionList: items=[ DepositionSummary( srn=d.srn, - convention_srn=d.convention_srn, + convention_id=d.convention_id, status=d.status, file_count=len(d.files), created_at=d.created_at, diff --git a/server/osa/domain/deposition/service/convention.py b/server/osa/domain/deposition/service/convention.py index 65e650d..4810b8e 100644 --- a/server/osa/domain/deposition/service/convention.py +++ b/server/osa/domain/deposition/service/convention.py @@ -3,6 +3,7 @@ from osa.domain.deposition.event.convention_registered import ConventionRegistered from osa.domain.deposition.model.convention import Convention +from osa.domain.deposition.model.deploy import HookDeploy from osa.domain.deposition.model.value import FileRequirements from osa.domain.deposition.port.convention_repository import ConventionRepository from osa.domain.metadata.service.metadata import MetadataService @@ -10,93 +11,121 @@ from osa.domain.semantics.service.schema import SchemaService from osa.domain.shared.error import NotFoundError from osa.domain.shared.event import EventId -from osa.domain.shared.model.hook import HookDefinition from osa.domain.shared.model.source import IngesterDefinition from osa.domain.shared.model.srn import ( - ConventionSRN, + ConventionSlug, Domain, LocalId, + SchemaId, SchemaIdentifier, Semver, ) from osa.domain.shared.outbox import Outbox from osa.domain.shared.service import Service +from osa.domain.validation.service.hook_registry import HookRegistryService class ConventionService(Service): convention_repo: ConventionRepository schema_service: SchemaService # TODO: replace with a port? metadata_service: MetadataService # TODO: replace with a port? + hook_registry: HookRegistryService outbox: Outbox node_domain: Domain - async def create_convention( + async def deploy( self, - id: SchemaIdentifier, + *, + slug: ConventionSlug, title: str, - version: str, - schema: list[FieldDefinition], file_requirements: FileRequirements, - description: str | None = None, - hooks: list[HookDefinition] | None = None, + schema_slug: SchemaIdentifier, + schema_version: str, + schema_fields: list[FieldDefinition], + hooks: list[HookDeploy] | None = None, ingester: IngesterDefinition | None = None, + description: str | None = None, + built_by: str | None = None, ) -> Convention: - """Create a convention with an inline schema. + """Bundled deploy: schema + hooks (+ releases) + convention in one + transaction (FR-012). Fans out into the schema, metadata, and hook + registries, then upserts the convention referencing hooks by name. - The schema is created as a separate Schema row internally, - and the convention references it via schema_id. - - Feature table creation is handled asynchronously by the - CreateFeatureTables handler reacting to ConventionRegistered. + Declarative upsert keyed by ``slug`` (design-revisions §3): conventions + are unversioned and mutable, so re-declaring the same state is a no-op and + a differing declaration updates the convention in place — no caller version, + no conflict path. The schema (versioned, immutable) and each hook release + (idempotent on digest) are reused when already present. Feature-table + creation is handled asynchronously by ``CreateFeatureTables`` reacting to + ``ConventionRegistered``. """ - # Create Schema row from inline field definitions - created_schema = await self.schema_service.create_schema( - id=id, - title=title, - version=version, - fields=schema, - ) + hooks = hooks or [] - # Create (or additively evolve) the typed metadata table in the same - # transaction — no async window where records can publish against a - # convention whose typed table doesn't exist yet. + # 1) Schema (+ typed metadata table) — same transaction, no async gap. + # Schemas are versioned/immutable: reuse the existing one if this exact + # id@version is already registered (idempotent re-deploy). + schema_id = SchemaId( + id=LocalId(schema_slug.root), version=Semver.from_string(schema_version) + ) + existing_schema = await self._existing_schema(schema_id) + if existing_schema is not None: + created_schema = existing_schema + else: + created_schema = await self.schema_service.create_schema( + id=schema_slug, + title=title, + version=schema_version, + fields=schema_fields, + ) await self.metadata_service.ensure_table( schema_id=created_schema.id, fields=created_schema.fields, ) - srn = ConventionSRN( - domain=self.node_domain, - id=LocalId(str(uuid4())[:20]), - version=Semver.from_string(version), - ) + # 2) Hooks: upsert each identity (reject a differing contract) + mint its + # release (idempotent on digest, advancing the live pointer). + for spec in hooks: + await self.hook_registry.upsert_identity(spec.identity.name, spec.identity.feature) + await self.hook_registry.create_release( + spec.identity.name, spec.runtime, spec.source_ref, built_by + ) + + # 3) Convention referencing hooks by name (upsert by slug). convention = Convention( - srn=srn, + id=slug, title=title, description=description, schema_id=created_schema.id, file_requirements=file_requirements, - hooks=hooks or [], + hooks=[spec.identity.name for spec in hooks], ingester=ingester, created_at=datetime.now(UTC), ) - await self.convention_repo.save(convention) + + # 4) Async feature-table creation reacts to this (hook names + specs). await self.outbox.append( ConventionRegistered( id=EventId(uuid4()), - convention_srn=srn, + convention_id=slug, schema_id=created_schema.id, schema_fields=created_schema.fields, - hooks=convention.hooks, + hooks=[spec.identity for spec in hooks], ) ) return convention - async def get_convention(self, srn: ConventionSRN) -> Convention: - convention = await self.convention_repo.get(srn) + async def _existing_schema(self, schema_id: SchemaId): + """Return the schema if already registered, else ``None`` (idempotency).""" + try: + return await self.schema_service.get_schema(schema_id) + except NotFoundError: + return None + + async def get_convention(self, id: ConventionSlug) -> Convention: + convention = await self.convention_repo.get(id) if convention is None: - raise NotFoundError(f"Convention not found: {srn}") + raise NotFoundError(f"Convention not found: {id.root}") return convention async def list_conventions( diff --git a/server/osa/domain/deposition/service/deposition.py b/server/osa/domain/deposition/service/deposition.py index 239d60c..08442fd 100644 --- a/server/osa/domain/deposition/service/deposition.py +++ b/server/osa/domain/deposition/service/deposition.py @@ -15,7 +15,7 @@ from osa.domain.deposition.port.storage import FileStoragePort from osa.domain.shared.error import NotFoundError, ValidationError from osa.domain.shared.event import EventId -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN, Domain, LocalId +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN, Domain, LocalId from osa.domain.shared.outbox import Outbox from osa.domain.shared.service import Service @@ -29,12 +29,12 @@ class DepositionService(Service): async def create( self, - convention_srn: ConventionSRN, + convention_id: ConventionSlug, owner_id: UserId, ) -> Deposition: - convention = await self.convention_repo.get(convention_srn) + convention = await self.convention_repo.get(convention_id) if convention is None: - raise NotFoundError(f"Convention not found: {convention_srn}") + raise NotFoundError(f"Convention not found: {convention_id}") now = datetime.now(UTC) srn = DepositionSRN( @@ -43,7 +43,7 @@ async def create( ) deposition = Deposition( srn=srn, - convention_srn=convention_srn, + convention_id=convention_id, owner_id=owner_id, created_at=now, updated_at=now, @@ -53,7 +53,7 @@ async def create( event = DepositionCreatedEvent( id=EventId(uuid4()), deposition_id=srn, - convention_srn=convention_srn, + convention_id=convention_id, owner_id=owner_id, ) await self.outbox.append(event) @@ -90,9 +90,9 @@ async def upload_file( size: int, ) -> Deposition: dep = await self.get(srn) - convention = await self.convention_repo.get(dep.convention_srn) + convention = await self.convention_repo.get(dep.convention_id) if convention is None: - raise NotFoundError(f"Convention not found: {dep.convention_srn}") + raise NotFoundError(f"Convention not found: {dep.convention_id}") reqs = convention.file_requirements @@ -181,9 +181,9 @@ async def return_to_draft(self, srn: DepositionSRN) -> Deposition: async def submit(self, srn: DepositionSRN) -> Deposition: dep = await self.get(srn) - convention = await self.convention_repo.get(dep.convention_srn) + convention = await self.convention_repo.get(dep.convention_id) if convention is None: - raise NotFoundError(f"Convention not found: {dep.convention_srn}") + raise NotFoundError(f"Convention not found: {dep.convention_id}") reqs = convention.file_requirements if len(dep.files) < reqs.min_count: @@ -198,7 +198,7 @@ async def submit(self, srn: DepositionSRN) -> Deposition: id=EventId(uuid4()), deposition_id=srn, metadata=dep.metadata, - convention_srn=dep.convention_srn, + convention_id=dep.convention_id, hooks=convention.hooks, ) await self.outbox.append(event) diff --git a/server/osa/domain/deposition/util/di/provider.py b/server/osa/domain/deposition/util/di/provider.py index a38e7a5..ed20276 100644 --- a/server/osa/domain/deposition/util/di/provider.py +++ b/server/osa/domain/deposition/util/di/provider.py @@ -2,7 +2,7 @@ from osa.config import Config from osa.domain.deposition.command.create import CreateDepositionHandler -from osa.domain.deposition.command.create_convention import CreateConventionHandler +from osa.domain.deposition.command.create_convention import DeployConventionHandler from osa.domain.deposition.command.delete_files import DeleteFileHandler from osa.domain.deposition.command.submit import SubmitDepositionHandler from osa.domain.deposition.command.update import UpdateMetadataHandler @@ -19,6 +19,7 @@ from osa.domain.deposition.query.list_conventions import ListConventionsHandler from osa.domain.deposition.query.list_depositions import ListDepositionsHandler from osa.domain.deposition.service.convention import ConventionService +from osa.domain.validation.service.hook_registry import HookRegistryService from osa.domain.deposition.service.deposition import DepositionService from osa.domain.metadata.service.metadata import MetadataService from osa.domain.semantics.service.schema import SchemaService @@ -53,6 +54,7 @@ def get_convention_service( convention_repo: ConventionRepository, schema_service: SchemaService, metadata_service: MetadataService, + hook_registry: HookRegistryService, outbox: Outbox, config: Config, ) -> ConventionService: @@ -60,6 +62,7 @@ def get_convention_service( convention_repo=convention_repo, schema_service=schema_service, metadata_service=metadata_service, + hook_registry=hook_registry, outbox=outbox, node_domain=Domain(config.domain), ) @@ -75,7 +78,7 @@ def get_spreadsheet_port(self) -> SpreadsheetPort: upload_handler = provide(UploadFileHandler, scope=Scope.UOW) delete_file_handler = provide(DeleteFileHandler, scope=Scope.UOW) upload_spreadsheet_handler = provide(UploadSpreadsheetHandler, scope=Scope.UOW) - create_convention_handler = provide(CreateConventionHandler, scope=Scope.UOW) + deploy_convention_handler = provide(DeployConventionHandler, scope=Scope.UOW) # Query Handlers get_deposition_handler = provide(GetDepositionHandler, scope=Scope.UOW) diff --git a/server/osa/domain/feature/handler/create_feature_tables.py b/server/osa/domain/feature/handler/create_feature_tables.py index 0ad16a4..4d025f0 100644 --- a/server/osa/domain/feature/handler/create_feature_tables.py +++ b/server/osa/domain/feature/handler/create_feature_tables.py @@ -25,7 +25,7 @@ async def handle(self, event: ConventionRegistered) -> None: logger.info( "Creating feature table: hook=%s convention=%s", hook.name, - event.convention_srn, + event.convention_id, ) try: await self.feature_service.create_table(hook) @@ -33,5 +33,5 @@ async def handle(self, event: ConventionRegistered) -> None: logger.warning( "Feature table already exists, skipping: hook=%s convention=%s", hook.name, - event.convention_srn, + event.convention_id, ) diff --git a/server/osa/domain/feature/handler/insert_batch_features.py b/server/osa/domain/feature/handler/insert_batch_features.py index 39896d7..c0c6fa2 100644 --- a/server/osa/domain/feature/handler/insert_batch_features.py +++ b/server/osa/domain/feature/handler/insert_batch_features.py @@ -16,6 +16,10 @@ class InsertBatchFeatures(EventHandler[IngestBatchPublished]): Handles IngestBatchPublished (batch-level event) rather than per-record RecordPublished. Uses read_batch_outcomes to parse the JSONL output format (not the single-record features.json). + + Per-row provenance: each hook's ``run_id`` is read from the ``run.json`` the + producing run wrote into that hook's output dir (design-revisions §6) — no + registry call, no DB run-id lookup. """ feature_service: FeatureService @@ -31,9 +35,21 @@ async def handle(self, event: IngestBatchPublished) -> None: total_inserted = 0 skipped_dupes = 0 - for hook_name in event.expected_features: - # Read JSONL outcomes for this hook - outcomes = await self.feature_storage.read_batch_outcomes(batch_output_dir, hook_name) + for feature in event.expected_features: + name = feature.root + # Read JSONL outcomes for this feature's hook + outcomes = await self.feature_storage.read_batch_outcomes(batch_output_dir, name) + + run_ref = await self.feature_storage.read_run_ref(batch_output_dir, name) + if run_ref is None: + log.warn( + "no run.json for feature {feature} in batch {batch_index}; " + "skipping feature insert (no provenance)", + feature=name, + batch_index=event.batch_index, + ingest_run_id=event.ingest_run_id, + ) + continue # Insert features for each published record that passed this hook. # Map upstream source ID → published record SRN so features @@ -51,9 +67,10 @@ async def handle(self, event: IngestBatchPublished) -> None: continue count = await self.feature_service.insert_features( - hook_name=hook_name, + feature=feature, record_srn=record_srn, rows=outcome.features, + run_id=run_ref.run_id, ) total_inserted += count diff --git a/server/osa/domain/feature/handler/insert_record_features.py b/server/osa/domain/feature/handler/insert_record_features.py index d0bd4a5..d01045d 100644 --- a/server/osa/domain/feature/handler/insert_record_features.py +++ b/server/osa/domain/feature/handler/insert_record_features.py @@ -10,7 +10,9 @@ class InsertRecordFeatures(EventHandler[RecordPublished]): """Reads hook outputs from storage and inserts features with record_srn. Resolves the hook output directory from the record's source via the - feature storage port, then delegates to FeatureService for insertion. + feature storage port, then delegates to FeatureService for insertion. Each + hook's ``run_id`` is read from the ``run.json`` the producing run wrote into + that hook's output dir (design-revisions §6) — no registry/DB lookup. """ feature_service: FeatureService diff --git a/server/osa/domain/feature/port/feature_store.py b/server/osa/domain/feature/port/feature_store.py index 4f927d3..bc0dc47 100644 --- a/server/osa/domain/feature/port/feature_store.py +++ b/server/osa/domain/feature/port/feature_store.py @@ -12,16 +12,21 @@ class FeatureStore(Port, Protocol): """Manages feature tables for hook-derived data.""" @abstractmethod - async def create_table(self, hook_name: str, columns: list[ColumnDef]) -> None: - """Create a feature table for a hook. Fails on name collision.""" + async def create_table(self, feature: str, columns: list[ColumnDef]) -> None: + """Create a feature table (named by its producing hook). Fails on collision.""" ... @abstractmethod async def insert_features( self, - hook_name: str, + feature: str, record_srn: str, rows: list[dict[str, Any]], + run_id: str, ) -> int: - """Insert feature rows into the feature table. Returns row count.""" + """Insert feature rows into the feature table. Returns row count. + + ``run_id`` is the ``hook_runs.id`` that produced these rows; it is + stamped on every row for per-row provenance (feature #145). + """ ... diff --git a/server/osa/domain/feature/port/storage.py b/server/osa/domain/feature/port/storage.py index 64caa81..8f6efa1 100644 --- a/server/osa/domain/feature/port/storage.py +++ b/server/osa/domain/feature/port/storage.py @@ -3,6 +3,7 @@ from abc import abstractmethod from typing import Any, Protocol +from osa.domain.shared.model.provenance import RunRef from osa.domain.shared.port import Port from osa.domain.validation.model.batch_outcome import BatchRecordOutcome, HookRecordId @@ -10,6 +11,15 @@ class FeatureStoragePort(Port, Protocol): """File storage operations used by the feature domain.""" + @abstractmethod + async def read_run_ref(self, output_dir: str, hook_name: str) -> RunRef | None: + """Read ``{output_dir}/hooks/{hook_name}/output/run.json`` (provenance). + + Returns the ``run_id``/``release_id`` the producing hook stamped, or + ``None`` if absent (e.g. the hook produced no output). #145. + """ + ... + @abstractmethod def get_hook_output_root(self, source_type: str, source_id: str) -> str: """Resolve the root directory containing hook outputs for a source. diff --git a/server/osa/domain/feature/service/feature.py b/server/osa/domain/feature/service/feature.py index 210e681..513d172 100644 --- a/server/osa/domain/feature/service/feature.py +++ b/server/osa/domain/feature/service/feature.py @@ -5,7 +5,7 @@ from osa.domain.feature.port.feature_store import FeatureStore from osa.domain.feature.port.storage import FeatureStoragePort -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import FeatureName, HookIdentity from osa.domain.shared.service import Service logger = logging.getLogger(__name__) @@ -17,48 +17,62 @@ class FeatureService(Service): feature_store: FeatureStore feature_storage: FeatureStoragePort - async def create_table(self, hook: HookDefinition) -> None: - """Create a feature table from a HookDefinition.""" - await self.feature_store.create_table(hook.name, hook.feature.columns) + async def create_table(self, hook: HookIdentity) -> None: + """Create a feature table for a hook's output (named by the hook).""" + await self.feature_store.create_table(hook.name.root, hook.feature.columns) async def insert_features( self, - hook_name: str, + feature: FeatureName, record_srn: str, rows: list[dict[str, Any]], + run_id: str, ) -> int: - """Insert feature rows into the feature table. Returns row count.""" - return await self.feature_store.insert_features(hook_name, record_srn, rows) + """Insert feature rows into the feature table. Returns row count. + + ``run_id`` is the ``hook_runs.id`` that produced these rows (provenance). + """ + return await self.feature_store.insert_features(feature.root, record_srn, rows, run_id) async def insert_features_for_record( self, hook_output_dir: str, record_srn: str, - expected_features: list[str] | None = None, + expected_features: list[FeatureName] | None = None, ) -> None: - """Read hook features from the given directory and insert into feature tables. + """Read a record's hook outputs from storage and insert them into feature tables. - Warns (does not raise) when an expected feature is missing — the record - is already published, blocking other features would be worse. + Each feature's ``run_id`` is read from the ``run.json`` the producing run + wrote into that hook's output dir (design-revisions §6). Warns (does not + raise) when an expected feature is missing — the record is already + published, blocking other features would be worse. """ if not expected_features: return - for feature_name in expected_features: - if not await self.feature_storage.hook_features_exist(hook_output_dir, feature_name): + for feature in expected_features: + name = feature.root + if not await self.feature_storage.hook_features_exist(hook_output_dir, name): logger.warning( - f"Expected feature '{feature_name}' not found in {hook_output_dir} " + f"Expected feature '{name}' not found in {hook_output_dir} " f"for record {record_srn}" ) continue - features = await self.feature_storage.read_hook_features(hook_output_dir, feature_name) - if features: + run_ref = await self.feature_storage.read_run_ref(hook_output_dir, name) + if run_ref is None: + logger.warning( + f"No run.json for feature '{name}' in {hook_output_dir} " + f"for record {record_srn}; skipping (no provenance)" + ) + continue + + rows = await self.feature_storage.read_hook_features(hook_output_dir, name) + if rows: count = await self.insert_features( - hook_name=feature_name, + feature=feature, record_srn=record_srn, - rows=features, - ) - logger.info( - f"Inserted {count} features for hook={feature_name} record={record_srn}" + rows=rows, + run_id=run_ref.run_id, ) + logger.info(f"Inserted {count} features for feature={name} record={record_srn}") diff --git a/server/osa/domain/ingest/command/start_ingest.py b/server/osa/domain/ingest/command/start_ingest.py index 0b3afc0..99ffa1d 100644 --- a/server/osa/domain/ingest/command/start_ingest.py +++ b/server/osa/domain/ingest/command/start_ingest.py @@ -8,7 +8,7 @@ class StartIngest(Command): """Start an ingest run for a convention.""" - convention_srn: str + convention_id: str batch_size: int = 1000 limit: int | None = None # Max total records to ingest (None = unlimited) @@ -17,7 +17,7 @@ class IngestRunCreated(Result): """Result of starting an ingest run.""" srn: str - convention_srn: str + convention_id: str status: str started_at: str @@ -38,7 +38,7 @@ async def run(self, cmd: StartIngest) -> IngestRunCreated: from osa.domain.shared.model.srn import Domain # TODO: lazy needed? ingest_run = await self.service.start_ingest( - convention_srn=cmd.convention_srn, + convention_id=cmd.convention_id, batch_size=cmd.batch_size, limit=cmd.limit, ) @@ -48,7 +48,7 @@ async def run(self, cmd: StartIngest) -> IngestRunCreated: return IngestRunCreated( srn=srn, - convention_srn=ingest_run.convention_srn, + convention_id=ingest_run.convention_id, status=ingest_run.status, started_at=ingest_run.started_at.isoformat(), ) diff --git a/server/osa/domain/ingest/event/events.py b/server/osa/domain/ingest/event/events.py index 49e74f5..4a2e3d6 100644 --- a/server/osa/domain/ingest/event/events.py +++ b/server/osa/domain/ingest/event/events.py @@ -2,6 +2,7 @@ from osa.domain.ingest.model.ingest_run import IngestRunId from osa.domain.shared.event import Event, EventId +from osa.domain.shared.model.hook import FeatureName class IngestRunStarted(Event): @@ -9,7 +10,7 @@ class IngestRunStarted(Event): id: EventId ingest_run_id: IngestRunId - convention_srn: str + convention_id: str batch_size: int @@ -22,7 +23,7 @@ class NextBatchRequested(Event): id: EventId ingest_run_id: IngestRunId - convention_srn: str + convention_id: str batch_size: int @@ -58,11 +59,11 @@ class IngestBatchPublished(Event): id: EventId ingest_run_id: IngestRunId - convention_srn: str + convention_id: str batch_index: int published_srns: list[str] published_count: int - expected_features: list[str] + expected_features: list[FeatureName] upstream_to_record_srn: dict[str, str] # upstream source ID → published record SRN diff --git a/server/osa/domain/ingest/handler/publish_batch.py b/server/osa/domain/ingest/handler/publish_batch.py index e179bd5..ead3e8c 100644 --- a/server/osa/domain/ingest/handler/publish_batch.py +++ b/server/osa/domain/ingest/handler/publish_batch.py @@ -15,9 +15,10 @@ from osa.domain.record.model.draft import RecordDraft from osa.domain.record.service import RecordService from osa.domain.shared.error import NotFoundError +from osa.domain.shared.model.hook import FeatureName, HookName from osa.domain.shared.event import EventHandler, EventId from osa.domain.shared.model.source import IngestSource -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug from osa.domain.shared.outbox import Outbox from osa.infrastructure.logging import get_logger @@ -41,7 +42,7 @@ async def handle(self, event: HookBatchCompleted) -> None: raise NotFoundError(f"Ingest run not found: {event.ingest_run_id}") convention = await self.convention_service.get_convention( - ConventionSRN.parse(ingest_run.convention_srn) + ConventionSlug.parse(ingest_run.convention_id) ) # Read ingester records via storage port (filesystem or S3) @@ -51,23 +52,27 @@ async def handle(self, event: HookBatchCompleted) -> None: # batch_dir used as locator for hook outcome reads batch_dir = str(self.ingest_storage.batch_dir(event.ingest_run_id, event.batch_index)) - # Read hook outcomes for all hooks - expected_features = [h.name for h in convention.hooks] + # Producer-side hook names (for reading each hook's outcomes) vs the + # feature-table names they produce (carried downstream for insertion). + hook_names = list(convention.hooks) + expected_features = [FeatureName(h.root) for h in convention.hooks] # Determine which records passed all hooks (via storage port — works on filesystem + S3) # TODO: is this efficient, are we hitting S3 a lot? passed_records = await _get_passed_records( ingester_records=ingester_records, batch_dir=str(batch_dir), - hooks=expected_features, + hooks=hook_names, feature_storage=self.feature_storage, ) # Log outcome breakdown per hook short_id = event.ingest_run_id[:8] total = len(ingester_records) - for hook_name in expected_features: - outcomes = await self.feature_storage.read_batch_outcomes(str(batch_dir), hook_name) + for hook_name in hook_names: + outcomes = await self.feature_storage.read_batch_outcomes( + str(batch_dir), hook_name.root + ) from osa.domain.validation.model.batch_outcome import OutcomeStatus passed = sum(1 for o in outcomes.values() if o.status == OutcomeStatus.PASSED) @@ -96,12 +101,12 @@ async def handle(self, event: HookBatchCompleted) -> None: drafts.append( RecordDraft( source=IngestSource( - id=f"{ingest_run.convention_srn}:{record.source_id}", + id=f"{ingest_run.convention_id}:{record.source_id}", ingest_run_id=ingest_run.id, upstream_source=record.source_id, ), metadata=record.metadata, - convention_srn=ConventionSRN.parse(ingest_run.convention_srn), + convention_id=ConventionSlug.parse(ingest_run.convention_id), expected_features=expected_features, ) ) @@ -139,7 +144,7 @@ async def handle(self, event: HookBatchCompleted) -> None: IngestBatchPublished( id=EventId(uuid4()), ingest_run_id=event.ingest_run_id, - convention_srn=ingest_run.convention_srn, + convention_id=ingest_run.convention_id, batch_index=event.batch_index, published_srns=published_srns, published_count=published_count, @@ -164,7 +169,7 @@ async def on_exhausted(self, event: HookBatchCompleted) -> None: async def _get_passed_records( ingester_records: list[IngesterRecord], batch_dir: str, - hooks: list[str], + hooks: list[HookName], feature_storage: FeatureStoragePort, ) -> list[IngesterRecord]: """Determine which records passed ALL hooks via the storage port.""" @@ -174,7 +179,7 @@ async def _get_passed_records( passed_ids: set[str] | None = None for hook_name in hooks: - outcomes = await feature_storage.read_batch_outcomes(batch_dir, hook_name) + outcomes = await feature_storage.read_batch_outcomes(batch_dir, hook_name.root) if not outcomes: return [] from osa.domain.validation.model.batch_outcome import OutcomeStatus diff --git a/server/osa/domain/ingest/handler/run_hooks.py b/server/osa/domain/ingest/handler/run_hooks.py index 4794092..55661dd 100644 --- a/server/osa/domain/ingest/handler/run_hooks.py +++ b/server/osa/domain/ingest/handler/run_hooks.py @@ -1,7 +1,6 @@ """RunHooks — runs hook containers on an ingester batch.""" -from osa.domain.validation.model import HookResult - +from datetime import UTC, datetime from pathlib import Path from uuid import uuid4 @@ -13,11 +12,16 @@ from osa.domain.ingest.service.ingest import IngestService from osa.domain.shared.error import NotFoundError, OOMError, PermanentError from osa.domain.shared.event import EventHandler, EventId -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.hook import HookIdentity, HookName +from osa.domain.shared.model.srn import ConventionSlug from osa.domain.shared.outbox import Outbox +from osa.domain.validation.model import HookResult from osa.domain.validation.model.hook_input import HookRecord +from osa.domain.validation.model.hook_release import HookRelease +from osa.domain.validation.model.hook_run import HookRun, HookRunId, HookRunStatus from osa.domain.validation.port.hook_runner import HookInputs from osa.domain.validation.service.hook import HookService +from osa.domain.validation.service.hook_registry import HookRegistryService from osa.infrastructure.logging import get_logger log = get_logger(__name__) @@ -33,6 +37,7 @@ class RunHooks(EventHandler[IngesterBatchReady]): ingest_service: IngestService convention_service: ConventionService hook_service: HookService + hook_registry: HookRegistryService outbox: Outbox ingest_storage: IngestStoragePort @@ -42,7 +47,7 @@ async def handle(self, event: IngesterBatchReady) -> None: raise NotFoundError(f"Ingest run not found: {event.ingest_run_id}") convention = await self.convention_service.get_convention( - ConventionSRN.parse(ingest_run.convention_srn) + ConventionSlug.parse(ingest_run.convention_id) ) # Read records via storage port (filesystem or S3) @@ -77,18 +82,35 @@ async def handle(self, event: IngesterBatchReady) -> None: files_dirs=files_dirs, ) + # Resolve each hook's live release ONCE and snapshot it for this run + # (R8) so a mid-run deploy can't split the batch across versions. + hook_names = list(convention.hooks) + releases = await self.hook_registry.resolve_live(hook_names) + pairs: list[tuple[HookIdentity, HookRelease]] = [] + for name in hook_names: + hook = await self.hook_registry.get_hook(name) + release = releases.get(name) + if hook is None or release is None: + raise NotFoundError(f"Hook {name!r} has no live release") + pairs.append((HookIdentity(name=hook.name, feature=hook.feature), release)) + # Build work_dirs for each hook via storage port - work_dirs: dict[str, Path] = {} - for hook in convention.hooks: - work_dirs[hook.name] = self.ingest_storage.hook_work_dir( - event.ingest_run_id, event.batch_index, hook.name + work_dirs: dict[HookName, Path] = {} + for name in hook_names: + work_dirs[name] = self.ingest_storage.hook_work_dir( + event.ingest_run_id, event.batch_index, name.root ) + # Pre-allocate a hook_run id per hook so provenance is stable even on + # partial (OOM) outcomes; rows produced this batch reference these ids. + run_id_by_hook = {name: HookRunId(uuid4()) for name in hook_names} + started_at = datetime.now(UTC) + # Run all hooks via HookService results: list[HookResult] = [] try: results = await self.hook_service.run_hooks_for_batch( - hooks=convention.hooks, + hook_releases=pairs, inputs=inputs, work_dirs=work_dirs, ) @@ -127,7 +149,40 @@ async def handle(self, event: IngesterBatchReady) -> None: ingest_run_id=event.ingest_run_id, ) - # Emit HookBatchCompleted + # Record one append-only hook_run per hook (provenance anchor, #145) and + # write run.json into each hook's output dir so the feature-insert handler + # can stamp feature.run_id without a DB lookup (design-revisions §6). + finished_at = datetime.now(UTC) + result_by_hook = {r.hook_name: r for r in results} + for hook, release in pairs: + run_id = run_id_by_hook[hook.name] + # Absent result → the hook produced nothing this batch (e.g. an + # OOM-exhausted batch that raised); record an ERROR run. The per-hook + # retry count isn't surfaced through that failure path, so it stays 0. + result = result_by_hook.get(hook.name) + run_status = ( + HookRunStatus.from_hook_status(result.status) + if result is not None + else HookRunStatus.ERROR + ) + await self.ingest_storage.write_run_ref( + work_dirs[hook.name], str(run_id), str(release.id) + ) + await self.hook_registry.record_run( + HookRun( + id=run_id, + release_id=release.id, + status=run_status, + started_at=started_at, + finished_at=finished_at, + duration_s=result.duration_seconds if result is not None else 0.0, + oom_retries=result.oom_retries if result is not None else 0, + ) + ) + + # Emit HookBatchCompleted. Feature provenance (run_id per hook) is carried + # via the run.json just written into each hook's output dir — no event-chain + # threading and no DB run-id lookup. await self.outbox.append( HookBatchCompleted( id=EventId(uuid4()), diff --git a/server/osa/domain/ingest/handler/run_ingester.py b/server/osa/domain/ingest/handler/run_ingester.py index 9c9d369..f12fd62 100644 --- a/server/osa/domain/ingest/handler/run_ingester.py +++ b/server/osa/domain/ingest/handler/run_ingester.py @@ -11,7 +11,7 @@ from osa.domain.ingest.service.ingest import IngestService from osa.domain.shared.error import NotFoundError, PermanentError from osa.domain.shared.event import EventHandler, EventId -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug from osa.domain.shared.outbox import Outbox from osa.domain.shared.port.ingester_runner import IngesterInputs, IngesterRunner from osa.infrastructure.logging import get_logger @@ -51,7 +51,7 @@ async def handle(self, event: NextBatchRequested) -> None: NextBatchRequested( id=EventId(uuid4()), ingest_run_id=event.ingest_run_id, - convention_srn=event.convention_srn, + convention_id=event.convention_id, batch_size=event.batch_size, ), deliver_after=datetime.now(UTC) + BACKPRESSURE_DELAY, @@ -63,10 +63,10 @@ async def handle(self, event: NextBatchRequested) -> None: await self.ingest_repo.save(ingest_run) convention = await self.convention_service.get_convention( - ConventionSRN.parse(event.convention_srn) + ConventionSlug.parse(event.convention_id) ) if convention.ingester is None: - raise NotFoundError(f"No ingester for convention {event.convention_srn}") + raise NotFoundError(f"No ingester for convention {event.convention_id}") batch_index = ingest_run.batches_ingested @@ -91,7 +91,7 @@ async def handle(self, event: NextBatchRequested) -> None: effective_batch_limit = min(ingest_run.batch_size, remaining) inputs = IngesterInputs( - convention_srn=convention.srn, + convention_id=convention.id, ingest_run_id=event.ingest_run_id, batch_index=batch_index, config=convention.ingester.config, @@ -160,7 +160,7 @@ async def handle(self, event: NextBatchRequested) -> None: NextBatchRequested( id=EventId(uuid4()), ingest_run_id=event.ingest_run_id, - convention_srn=event.convention_srn, + convention_id=event.convention_id, batch_size=ingest_run.batch_size, ) ) diff --git a/server/osa/domain/ingest/model/ingest_run.py b/server/osa/domain/ingest/model/ingest_run.py index 53a899b..db8aab5 100644 --- a/server/osa/domain/ingest/model/ingest_run.py +++ b/server/osa/domain/ingest/model/ingest_run.py @@ -33,7 +33,7 @@ class IngestRun(Aggregate): """ id: IngestRunId - convention_srn: str + convention_id: str status: IngestStatus = IngestStatus.PENDING ingestion_finished: bool = False batches_ingested: int = 0 diff --git a/server/osa/domain/ingest/port/repository.py b/server/osa/domain/ingest/port/repository.py index 3b6a376..2dc55c1 100644 --- a/server/osa/domain/ingest/port/repository.py +++ b/server/osa/domain/ingest/port/repository.py @@ -26,7 +26,7 @@ async def get(self, id: IngestRunId) -> IngestRun | None: ... @abstractmethod - async def get_running_for_convention(self, convention_srn: str) -> IngestRun | None: + async def get_running_for_convention(self, convention_id: str) -> IngestRun | None: """Get a running ingest run for a convention, if any.""" ... diff --git a/server/osa/domain/ingest/port/storage.py b/server/osa/domain/ingest/port/storage.py index 3099c46..dfaf0b2 100644 --- a/server/osa/domain/ingest/port/storage.py +++ b/server/osa/domain/ingest/port/storage.py @@ -60,3 +60,12 @@ def batch_files_dir(self, ingest_run_id: str, batch_index: int) -> Path: def hook_work_dir(self, ingest_run_id: str, batch_index: int, hook_name: str) -> Path: """Return the hook output directory for a batch.""" ... + + @abstractmethod + async def write_run_ref(self, work_dir: Path, run_id: str, release_id: str) -> None: + """Write ``{work_dir}/output/run.json`` carrying this run's provenance. + + The feature-insert handler reads it back to stamp ``feature.run_id`` — + no DB run-id lookup (design-revisions §6). #145. + """ + ... diff --git a/server/osa/domain/ingest/service/ingest.py b/server/osa/domain/ingest/service/ingest.py index a91725d..3414d9d 100644 --- a/server/osa/domain/ingest/service/ingest.py +++ b/server/osa/domain/ingest/service/ingest.py @@ -9,7 +9,7 @@ from osa.domain.ingest.port.repository import IngestRunRepository from osa.domain.shared.error import ConflictError, NotFoundError from osa.domain.shared.event import EventId -from osa.domain.shared.model.srn import ConventionSRN, Domain +from osa.domain.shared.model.srn import ConventionSlug, Domain from osa.domain.shared.outbox import Outbox from osa.domain.shared.service import Service from osa.infrastructure.logging import get_logger @@ -27,7 +27,7 @@ class IngestService(Service): async def start_ingest( self, - convention_srn: str, # TODO: use convention ID instead of SRN + convention_id: str, # TODO: use convention ID instead of SRN batch_size: int = 1000, limit: int | None = None, ) -> IngestRun: @@ -38,19 +38,19 @@ async def start_ingest( - Convention has an ingester configured - No ingest is already running for this convention """ - parsed_srn = ConventionSRN.parse(convention_srn) + parsed_srn = ConventionSlug.parse(convention_id) convention = await self.convention_service.get_convention(parsed_srn) if convention.ingester is None: raise NotFoundError( - f"No ingester configured for convention {convention_srn}", + f"No ingester configured for convention {convention_id}", code="no_ingester_configured", ) - existing = await self.ingest_repo.get_running_for_convention(convention_srn) + existing = await self.ingest_repo.get_running_for_convention(convention_id) if existing is not None: raise ConflictError( - f"Ingest already running for convention {convention_srn}", + f"Ingest already running for convention {convention_id}", code="ingest_already_running", ) @@ -59,7 +59,7 @@ async def start_ingest( ingest_run = IngestRun( id=run_id, - convention_srn=convention_srn, + convention_id=convention_id, status=IngestStatus.PENDING, batch_size=batch_size, limit=limit, @@ -72,7 +72,7 @@ async def start_ingest( IngestRunStarted( id=EventId(uuid4()), ingest_run_id=run_id, - convention_srn=convention_srn, + convention_id=convention_id, batch_size=batch_size, ) ) @@ -81,16 +81,16 @@ async def start_ingest( NextBatchRequested( id=EventId(uuid4()), ingest_run_id=run_id, - convention_srn=convention_srn, + convention_id=convention_id, batch_size=batch_size, ) ) srn = f"urn:osa:{self.node_domain.root}:ing:{run_id}" log.info( - "ingest started for {convention_srn}", + "ingest started for {convention_id}", ingest_run_srn=srn, - convention_srn=convention_srn, + convention_id=convention_id, batch_size=batch_size, limit=limit, ) diff --git a/server/osa/domain/record/event/record_published.py b/server/osa/domain/record/event/record_published.py index 6cb1637..e87b209 100644 --- a/server/osa/domain/record/event/record_published.py +++ b/server/osa/domain/record/event/record_published.py @@ -4,7 +4,8 @@ from osa.domain.shared.event import Event, EventId from osa.domain.shared.model.source import RecordSource -from osa.domain.shared.model.srn import ConventionSRN, RecordSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, RecordSRN, SchemaId +from osa.domain.shared.model.hook import FeatureName class RecordPublished(Event): @@ -17,7 +18,7 @@ class RecordPublished(Event): id: EventId record_srn: RecordSRN source: RecordSource - convention_srn: ConventionSRN + convention_id: ConventionSlug schema_id: SchemaId metadata: dict[str, Any] - expected_features: list[str] = [] + expected_features: list[FeatureName] = [] diff --git a/server/osa/domain/record/handler/convert_deposition_to_record.py b/server/osa/domain/record/handler/convert_deposition_to_record.py index 5f89aa4..b7ed78c 100644 --- a/server/osa/domain/record/handler/convert_deposition_to_record.py +++ b/server/osa/domain/record/handler/convert_deposition_to_record.py @@ -20,7 +20,7 @@ async def handle(self, event: DepositionApproved) -> None: draft = RecordDraft( source=DepositionSource(id=str(event.deposition_srn)), metadata=event.metadata, - convention_srn=event.convention_srn, + convention_id=event.convention_id, expected_features=event.expected_features, ) await self.service.publish_record(draft) diff --git a/server/osa/domain/record/model/aggregate.py b/server/osa/domain/record/model/aggregate.py index 8b2f491..6a649a2 100644 --- a/server/osa/domain/record/model/aggregate.py +++ b/server/osa/domain/record/model/aggregate.py @@ -7,7 +7,7 @@ from osa.domain.shared.model.aggregate import Aggregate from osa.domain.shared.model.source import RecordSource -from osa.domain.shared.model.srn import ConventionSRN, RecordSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, RecordSRN, SchemaId class Record(Aggregate): @@ -15,7 +15,7 @@ class Record(Aggregate): srn: RecordSRN source: RecordSource - convention_srn: ConventionSRN + convention_id: ConventionSlug schema_id: SchemaId = Field(frozen=True) metadata: dict[str, Any] published_at: datetime diff --git a/server/osa/domain/record/model/draft.py b/server/osa/domain/record/model/draft.py index f5e9e6e..935d7e9 100644 --- a/server/osa/domain/record/model/draft.py +++ b/server/osa/domain/record/model/draft.py @@ -3,19 +3,20 @@ from typing import Any from osa.domain.shared.model.source import RecordSource -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug from osa.domain.shared.model.value import ValueObject +from osa.domain.shared.model.hook import FeatureName class RecordDraft(ValueObject): """Input to RecordService.publish_record(). Carries everything needed to create a Record from any source type. - ``expected_features`` lists feature table names (not full HookDefinitions) + ``expected_features`` lists feature table names (not full HookIdentitys) so compute runtime details don't leak past the validation boundary. """ source: RecordSource metadata: dict[str, Any] - convention_srn: ConventionSRN - expected_features: list[str] = [] + convention_id: ConventionSlug + expected_features: list[FeatureName] = [] diff --git a/server/osa/domain/record/query/get_record.py b/server/osa/domain/record/query/get_record.py index 10993e9..ac3a6fd 100644 --- a/server/osa/domain/record/query/get_record.py +++ b/server/osa/domain/record/query/get_record.py @@ -6,7 +6,7 @@ from osa.domain.record.service.record import RecordService from osa.domain.shared.authorization.gate import public from osa.domain.shared.model.source import RecordSource -from osa.domain.shared.model.srn import ConventionSRN, RecordSRN +from osa.domain.shared.model.srn import ConventionSlug, RecordSRN from osa.domain.shared.query import Query, QueryHandler, Result @@ -17,7 +17,7 @@ class GetRecord(Query): class RecordDetail(Result): srn: RecordSRN source: RecordSource - convention_srn: ConventionSRN + convention_id: ConventionSlug metadata: dict[str, Any] published_at: datetime features: dict[str, list[dict[str, Any]]] = {} @@ -33,7 +33,7 @@ async def run(self, cmd: GetRecord) -> RecordDetail: return RecordDetail( srn=record.srn, source=record.source, - convention_srn=record.convention_srn, + convention_id=record.convention_id, metadata=record.metadata, published_at=record.published_at, features=features, diff --git a/server/osa/domain/record/service/record.py b/server/osa/domain/record/service/record.py index 9c7d1f0..493edea 100644 --- a/server/osa/domain/record/service/record.py +++ b/server/osa/domain/record/service/record.py @@ -16,7 +16,7 @@ from osa.domain.shared.error import NotFoundError from osa.domain.shared.event import EventId from osa.domain.shared.model.srn import ( - ConventionSRN, + ConventionSlug, Domain, LocalId, RecordSRN, @@ -59,11 +59,11 @@ async def count(self) -> int: """Total published records on this node.""" return await self.record_repo.count() - async def _resolve_schema_id(self, convention_srn: ConventionSRN) -> SchemaId: + async def _resolve_schema_id(self, convention_id: ConventionSlug) -> SchemaId: """Resolve a convention to its schema id at publication time.""" - convention = await self.convention_repo.get(convention_srn) + convention = await self.convention_repo.get(convention_id) if convention is None: - raise NotFoundError(f"Convention not found: {convention_srn}") + raise NotFoundError(f"Convention not found: {convention_id}") return convention.schema_id async def bulk_publish(self, drafts: list[RecordDraft]) -> list[Record]: @@ -82,9 +82,9 @@ async def bulk_publish(self, drafts: list[RecordDraft]) -> list[Record]: records: list[Record] = [] for draft in drafts: - key = str(draft.convention_srn) + key = str(draft.convention_id) if key not in schema_id_by_conv: - schema_id_by_conv[key] = await self._resolve_schema_id(draft.convention_srn) + schema_id_by_conv[key] = await self._resolve_schema_id(draft.convention_id) record_srn = RecordSRN( domain=self.node_domain, id=LocalId(str(uuid4())), @@ -94,7 +94,7 @@ async def bulk_publish(self, drafts: list[RecordDraft]) -> list[Record]: Record( srn=record_srn, source=draft.source, - convention_srn=draft.convention_srn, + convention_id=draft.convention_id, schema_id=schema_id_by_conv[key], metadata=draft.metadata, published_at=datetime.now(UTC), @@ -121,7 +121,7 @@ async def publish_record(self, draft: RecordDraft) -> Record: """Create and persist a Record from a draft.""" logger.info(f"Creating record from {draft.source.type} source: {draft.source.id}") - schema_id = await self._resolve_schema_id(draft.convention_srn) + schema_id = await self._resolve_schema_id(draft.convention_id) record_srn = RecordSRN( domain=self.node_domain, @@ -132,7 +132,7 @@ async def publish_record(self, draft: RecordDraft) -> Record: record = Record( srn=record_srn, source=draft.source, - convention_srn=draft.convention_srn, + convention_id=draft.convention_id, schema_id=schema_id, metadata=draft.metadata, published_at=datetime.now(UTC), @@ -152,7 +152,7 @@ async def publish_record(self, draft: RecordDraft) -> Record: id=EventId(uuid4()), record_srn=record_srn, source=draft.source, - convention_srn=draft.convention_srn, + convention_id=draft.convention_id, schema_id=schema_id, metadata=draft.metadata, expected_features=draft.expected_features, diff --git a/server/osa/domain/shared/authorization/gate.py b/server/osa/domain/shared/authorization/gate.py index 3c8c435..a4c319a 100644 --- a/server/osa/domain/shared/authorization/gate.py +++ b/server/osa/domain/shared/authorization/gate.py @@ -1,4 +1,4 @@ -"""Handler-level authorization gates: public() and at_least(Role).""" +"""Handler-level authorization gates: public(), at_least(Role), requires_scope(str).""" from __future__ import annotations @@ -29,6 +29,18 @@ class AtLeast(Gate): role: "Role" +@dataclass(frozen=True) +class RequiresScope(Gate): + """Gate for machine (M2M) credentials (#145, US5). + + Authorizes if the principal holds the named OAuth scope **or** is an ADMIN. + The ADMIN fallback keeps every scoped endpoint reachable with an ordinary + admin user token, so scopes narrow — never replace — the role model. + """ + + scope: str + + _PUBLIC = Public() @@ -40,3 +52,8 @@ def public() -> Public: def at_least(role: "Role") -> AtLeast: """Mark a handler as requiring at least the given role.""" return AtLeast(role=role) + + +def requires_scope(scope: str) -> RequiresScope: + """Mark a handler as requiring an OAuth scope (or ADMIN). See RequiresScope.""" + return RequiresScope(scope=scope) diff --git a/server/osa/domain/shared/authorization/startup.py b/server/osa/domain/shared/authorization/startup.py index b2cb755..d553ba1 100644 --- a/server/osa/domain/shared/authorization/startup.py +++ b/server/osa/domain/shared/authorization/startup.py @@ -3,7 +3,7 @@ import dataclasses import logging -from osa.domain.shared.authorization.gate import AtLeast, Gate +from osa.domain.shared.authorization.gate import AtLeast, Gate, RequiresScope from osa.domain.shared.command import CommandHandler from osa.domain.shared.error import ConfigurationError from osa.domain.shared.query import QueryHandler @@ -24,15 +24,16 @@ def _check_handler_class(handler_cls: type) -> None: if not isinstance(auth, Gate): raise ConfigurationError(f"Handler {handler_cls.__name__} has no __auth__ declaration") - if isinstance(auth, AtLeast): + if isinstance(auth, (AtLeast, RequiresScope)): field_names = ( {f.name for f in dataclasses.fields(handler_cls)} if dataclasses.is_dataclass(handler_cls) else set() ) if "principal" not in field_names: + gate_name = "at_least(...)" if isinstance(auth, AtLeast) else "requires_scope(...)" raise ConfigurationError( - f"Handler {handler_cls.__name__} declares __auth__ = at_least(...) " + f"Handler {handler_cls.__name__} declares __auth__ = {gate_name} " "but is missing a `principal: Principal` field. Without it the " "auth gate rejects every request with a misleading 'missing_token' " "even when the caller's JWT is valid." diff --git a/server/osa/domain/shared/command.py b/server/osa/domain/shared/command.py index f39dee3..ce0f403 100644 --- a/server/osa/domain/shared/command.py +++ b/server/osa/domain/shared/command.py @@ -32,7 +32,7 @@ def _wrap_run_with_auth(cls: type, original_run: _HandlerMethod) -> _HandlerMeth @wraps(original_run) async def auth_wrapped_run(self: Any, cmd: Any) -> Any: - from osa.domain.shared.authorization.gate import AtLeast, Gate, Public + from osa.domain.shared.authorization.gate import AtLeast, Gate, Public, RequiresScope from osa.domain.shared.error import AuthorizationError, ConfigurationError auth_gate = getattr(type(self), "__auth__", None) @@ -61,6 +61,25 @@ async def auth_wrapped_run(self: Any, cmd: Any) -> Any: return await original_run(self, cmd) + if isinstance(auth_gate, RequiresScope): + from osa.domain.auth.model.principal import Principal + from osa.domain.auth.model.role import Role + + principal = getattr(self, "principal", None) + if not isinstance(principal, Principal): + raise AuthorizationError( + "Authentication required", + code="missing_token", + ) + + if not (principal.has_scope(auth_gate.scope) or principal.has_role(Role.ADMIN)): + raise AuthorizationError( + f"Access denied: missing scope {auth_gate.scope!r} for {type(self).__name__}", + code="access_denied", + ) + + return await original_run(self, cmd) + raise ConfigurationError( # pragma: no cover — future gate types handled here f"Handler {type(self).__name__} has unhandled __auth__ type: {type(auth_gate).__name__}" ) diff --git a/server/osa/domain/shared/error.py b/server/osa/domain/shared/error.py index 1ba718f..8bcc98a 100644 --- a/server/osa/domain/shared/error.py +++ b/server/osa/domain/shared/error.py @@ -55,7 +55,7 @@ class ConflictError(DomainError): class ReservedNameError(DomainError): """A schema ID or hook/feature name collides with a reserved URL slot. - Raised at aggregate construction (``Schema``, ``HookDefinition``) when a + Raised at aggregate construction (``Schema``, ``HookIdentity``) when a name equals one of :data:`osa.domain.shared.model.reserved.RESERVED_NAMES`. Surfaced as HTTP 400 with the structured ``code`` field. """ diff --git a/server/osa/domain/shared/model/hook.py b/server/osa/domain/shared/model/hook.py index 8c08dee..0842bdd 100644 --- a/server/osa/domain/shared/model/hook.py +++ b/server/osa/domain/shared/model/hook.py @@ -6,9 +6,9 @@ """ import re -from typing import Annotated, Any, Literal +from typing import Annotated, Any, ClassVar, Literal -from pydantic import Field +from pydantic import ConfigDict, Field, RootModel, field_validator from osa.domain.shared.model.value import ValueObject @@ -16,13 +16,67 @@ # Safe for use as PG identifiers, file path components, and env var values. PgIdentifier = Annotated[str, Field(pattern=r"^[a-z][a-z0-9_]{0,62}$")] -# Hook names compose into PG identifiers alongside fixed prefixes/suffixes — -# notably the per-hook FK constraint ``fk_features_{name}_record_srn`` (23 -# chars of overhead). PG's identifier limit is 63 chars, so cap hook names at -# 40 to keep every derived identifier inside the limit without surprise -# truncation. Column names use plain ``PgIdentifier`` because they don't get -# composed into longer names. -HookName = Annotated[str, Field(pattern=r"^[a-z][a-z0-9_]{0,39}$")] + +class HookName(RootModel[str]): + """A hook's stable name — a frozen ``RootModel`` (#145). + + Promoted from a bare ``Annotated[str, …]`` to a nominal type so the type + checker can distinguish a hook name from any other string and the regex is + enforced at construction. Frozen, so it is hashable and usable as a dict key + (``dict[HookName, …]``). Use ``.root`` where a plain ``str`` is required + (PG identifiers built without interpolation, dict keys handed to infra). + + Hook names compose into PG identifiers alongside fixed prefixes/suffixes — + notably the per-hook FK constraint ``fk_features_{name}_record_srn`` (23 + chars of overhead). PG's identifier limit is 63 chars, so cap hook names at + 40 to keep every derived identifier inside the limit without surprise + truncation. Column names use plain :data:`PgIdentifier` because they don't + get composed into longer names. + """ + + model_config = ConfigDict(frozen=True) + + _re: ClassVar[re.Pattern] = re.compile(r"^[a-z][a-z0-9_]{0,39}$") + + @field_validator("root") + @classmethod + def _validate(cls, v: str) -> str: + if not cls._re.match(v): + raise ValueError("invalid hook name: 1–40 chars of [a-z0-9_], starting with a letter") + return v + + def __str__(self) -> str: + return self.root + + +class FeatureName(RootModel[str]): + """Identity of a feature table on the read surface (#145). + + A hook produces exactly one feature table, addressed at + ``/data/{schema}/{feature}``; its name equals the producing hook's name but + is a **distinct nominal type** so the read/feature side never traffics in + "hook". Same PG-identifier rules as :class:`HookName` (≤40 chars). Convert a + producing hook's name once at the hook→feature boundary + (``FeatureName(hook.name.root)``); reserved-slot names are already rejected + upstream on the hook identity. + """ + + model_config = ConfigDict(frozen=True) + + _re: ClassVar[re.Pattern] = re.compile(r"^[a-z][a-z0-9_]{0,39}$") + + @field_validator("root") + @classmethod + def _validate(cls, v: str) -> str: + if not cls._re.match(v): + raise ValueError( + "invalid feature name: 1–40 chars of [a-z0-9_], starting with a letter" + ) + return v + + def __str__(self) -> str: + return self.root + _MEMORY_RE = re.compile(r"^(\d+(?:\.\d+)?)(g|m|k)?i?$") @@ -53,7 +107,7 @@ def parse_memory(memory: str) -> int: raise ValueError(f"Unknown memory unit: {unit}") -def _format_memory(byte_count: int) -> str: +def format_memory(byte_count: int) -> str: """Format bytes to a compact memory string (e.g. '2g', '1536m').""" if byte_count % _GIB == 0: return f"{byte_count // _GIB}g" @@ -120,11 +174,19 @@ class TableFeatureSpec(FeatureSpec): # ── Hook ── -class HookDefinition(ValueObject): - """Complete specification for a hook: how it runs + what it produces.""" +class HookIdentity(ValueObject): + """A hook's stable **identity**: its name + the output contract it produces. + + Feature #145 split the old monolithic hook spec into two: this identity + (``name`` + ``feature``, fixed forever and owning the feature table) and the + versioned, immutable ``HookRelease`` entity (``runtime`` + ``source_ref``) + in the ``validation`` domain that says *what image* currently computes that + output. This value object is the identity shape used by the bundled deploy + input and the ``ConventionRegistered`` event; its persisted form is the + ``Hook`` aggregate. + """ name: HookName - runtime: Annotated[OciConfig, Field(discriminator="type")] feature: Annotated[TableFeatureSpec, Field(discriminator="kind")] def model_post_init(self, __context: object) -> None: @@ -133,17 +195,5 @@ def model_post_init(self, __context: object) -> None: from osa.domain.shared.error import ReservedNameError from osa.domain.shared.model.reserved import RESERVED_NAMES - if self.name in RESERVED_NAMES: - raise ReservedNameError(self.name, "hook") - - def with_memory(self, memory: str) -> "HookDefinition": - """Return a copy with a different memory limit.""" - new_limits = self.runtime.limits.model_copy(update={"memory": memory}) - new_runtime = self.runtime.model_copy(update={"limits": new_limits}) - return self.model_copy(update={"runtime": new_runtime}) - - def with_doubled_memory(self) -> "HookDefinition": - """Return a copy with 2x the current memory limit.""" - current_bytes = parse_memory(self.runtime.limits.memory) - doubled = _format_memory(current_bytes * 2) - return self.with_memory(doubled) + if self.name.root in RESERVED_NAMES: + raise ReservedNameError(self.name.root, "hook") diff --git a/server/osa/domain/shared/model/ids.py b/server/osa/domain/shared/model/ids.py index 4fb9c4c..16cc66c 100644 --- a/server/osa/domain/shared/model/ids.py +++ b/server/osa/domain/shared/model/ids.py @@ -1,10 +1,13 @@ """Central semantic ID types used across the ``/data/`` read surface. Per OSA's type-safety convention, semantic identifiers cross module -boundaries as ``NewType`` aliases rather than bare ``str``. ``HookName`` is +boundaries as nominal types rather than bare ``str``. ``FeatureName`` (the +identity of a feature table addressed at ``/data/{schema}/{feature}``) is re-exported from :mod:`osa.domain.shared.model.hook` (its source of truth, where the PG-identifier pattern validation lives) so callers have a single -import location for the IDs this surface deals in. +import location for the IDs this surface deals in. The read surface speaks of +*features*, not *hooks* — a hook is the producer; the table it produces is the +feature. """ from __future__ import annotations @@ -12,7 +15,7 @@ from typing import NewType from osa.domain.shared.error import ValidationError -from osa.domain.shared.model.hook import HookName +from osa.domain.shared.model.hook import FeatureName from osa.domain.shared.model.value import ValueObject # Bare internal record identifier (UUIDv7 / ULID). Validation of the exact @@ -52,4 +55,4 @@ def __str__(self) -> str: return self.render() -__all__ = ["RecordId", "RecordRef", "HookName"] +__all__ = ["RecordId", "RecordRef", "FeatureName"] diff --git a/server/osa/domain/shared/model/provenance.py b/server/osa/domain/shared/model/provenance.py new file mode 100644 index 0000000..630a3f8 --- /dev/null +++ b/server/osa/domain/shared/model/provenance.py @@ -0,0 +1,20 @@ +"""Per-row provenance reference carried through hook output storage (#145). + +Design-revisions §6: ``run_id`` is carried to the feature-insert handlers via a +tiny ``run.json`` written into each hook's output dir (``…/hooks/{hook}/output/ +run.json``), **not** reconstructed via a DB lookup. This is the parsed shape of +that file. ``run_id`` is the ``hook_runs.id`` stamped onto every feature row; +``release_id`` records which release produced it (redundant with the run, kept +for cheap debugging of orphaned output). +""" + +from __future__ import annotations + +from osa.domain.shared.model.value import ValueObject + + +class RunRef(ValueObject): + """Contents of a hook output dir's ``run.json``.""" + + run_id: str + release_id: str diff --git a/server/osa/domain/shared/model/srn.py b/server/osa/domain/shared/model/srn.py index 13f97db..5f5f2df 100644 --- a/server/osa/domain/shared/model/srn.py +++ b/server/osa/domain/shared/model/srn.py @@ -6,6 +6,7 @@ from typing import Any, ClassVar, Generic, Self, Type, TypeVar, Union from pydantic import ( + ConfigDict, Field, RootModel, field_validator, @@ -280,11 +281,6 @@ class OntologySRN(SRN): version: Semver -class ConventionSRN(SRN): - type: ResourceType = Field(default=ResourceType.conv, frozen=True) - version: Semver - - class DepositionSRN(SRN): type: ResourceType = Field(default=ResourceType.dep, frozen=True) version: None = None @@ -355,3 +351,44 @@ def from_srn(cls, srn: "SchemaSRN") -> "SchemaId": def to_srn(self, domain: Domain) -> "SchemaSRN": return SchemaSRN(domain=domain, id=self.id, version=self.version) + + +# ---------- Convention identity (slug — internal primitive) ---------- + + +class ConventionSlug(RootModel[str]): + """A convention's identity — a frozen, human-readable slug (#145). + + Conventions are **unversioned** (design-revisions §3): a convention is a thin + wrapper that delegates versioning to its parts — it pins a versioned + :class:`SchemaId` and references hooks by name (each resolving to a versioned + live release). It therefore has no meaningful version of its own, so its + identity is a bare slug rather than the old ``"@"`` + ``ConventionSlug``. + + Mirrors :class:`SchemaIdentifier`'s charset (starts with a letter, 3–64 chars + of ``[a-z0-9-]``) so it is safe as a URL segment and a PG identifier. Frozen, + so it is hashable and usable as a dict key. Use ``.root`` where a plain + ``str`` is required. + """ + + model_config = ConfigDict(frozen=True) + + _re: ClassVar[re.Pattern] = re.compile(r"^[a-z][a-z0-9\-]{2,63}$") + + @field_validator("root") + @classmethod + def _validate(cls, v: str) -> str: + if not cls._re.match(v): + raise ValueError( + "invalid convention slug: must be 3–64 chars of [a-z0-9-] and start with a letter" + ) + return v + + @classmethod + def parse(cls, value: str) -> "ConventionSlug": + """Parse/validate a bare slug. Raises ``ValueError`` on malformed input.""" + return cls(value) + + def __str__(self) -> str: + return self.root diff --git a/server/osa/domain/shared/port/ingester_runner.py b/server/osa/domain/shared/port/ingester_runner.py index 4b51d14..229a6c0 100644 --- a/server/osa/domain/shared/port/ingester_runner.py +++ b/server/osa/domain/shared/port/ingester_runner.py @@ -12,14 +12,14 @@ from typing import Any, Protocol from osa.domain.shared.model.source import IngesterDefinition -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug @dataclass(frozen=True) class IngesterInputs: """Inputs for an ingester container run.""" - convention_srn: ConventionSRN + convention_id: ConventionSlug ingest_run_id: str = "" batch_index: int = 0 config: dict[str, Any] | None = None diff --git a/server/osa/domain/shared/query.py b/server/osa/domain/shared/query.py index c04d98f..e7f2d89 100644 --- a/server/osa/domain/shared/query.py +++ b/server/osa/domain/shared/query.py @@ -35,7 +35,7 @@ def _wrap_query_run_with_auth(cls: type, original_run: _HandlerMethod) -> _Handl @wraps(original_run) async def auth_wrapped_run(self: Any, cmd: Any) -> Any: - from osa.domain.shared.authorization.gate import AtLeast, Gate, Public + from osa.domain.shared.authorization.gate import AtLeast, Gate, Public, RequiresScope from osa.domain.shared.error import AuthorizationError, ConfigurationError auth_gate = getattr(type(self), "__auth__", None) @@ -76,6 +76,25 @@ async def auth_wrapped_run(self: Any, cmd: Any) -> Any: return await original_run(self, cmd) + if isinstance(auth_gate, RequiresScope): + from osa.domain.auth.model.principal import Principal + from osa.domain.auth.model.role import Role + + principal = getattr(self, "principal", None) + if not isinstance(principal, Principal): + raise AuthorizationError( + "Authentication required", + code="missing_token", + ) + + if not (principal.has_scope(auth_gate.scope) or principal.has_role(Role.ADMIN)): + raise AuthorizationError( + f"Access denied: missing scope {auth_gate.scope!r} for {type(self).__name__}", + code="access_denied", + ) + + return await original_run(self, cmd) + raise ConfigurationError( # pragma: no cover — future gate types handled here f"Handler {type(self).__name__} has unhandled __auth__ type: {type(auth_gate).__name__}" ) diff --git a/server/osa/domain/validation/command/create_release.py b/server/osa/domain/validation/command/create_release.py new file mode 100644 index 0000000..a74a141 --- /dev/null +++ b/server/osa/domain/validation/command/create_release.py @@ -0,0 +1,88 @@ +"""CreateRelease — register a new release for an existing hook (#145, US3). + +``POST /hooks/{name}/releases`` mints vN+1 (immutable) and advances the live +pointer; the convention that references the hook is untouched. Idempotent on +``(name, digest)``: re-posting an existing digest is a no-op that returns the +existing release without minting a version or moving the pointer (FR-006/R5). +""" + +from __future__ import annotations + +from datetime import datetime + +from pydantic import Field + +from osa.domain.auth.model.principal import Principal +from osa.domain.shared.authorization.gate import requires_scope +from osa.domain.shared.command import Command, CommandHandler, Result +from osa.domain.shared.model.hook import HookName, OciConfig, OciLimits +from osa.domain.validation.model.hook_release import HookReleaseId +from osa.domain.validation.service.hook_registry import HookRegistryService + + +class CreateRelease(Command): + """Register release vN+1 for an existing hook. + + No ``feature`` — the output contract is fixed at the hook's first release + (FR-002); a differing one is rejected upstream as a contract conflict. + """ + + name: HookName + image: str + digest: str + config: dict = Field(default_factory=dict) + limits: OciLimits = Field(default_factory=OciLimits) + source_ref: str # REQUIRED — reproducibility anchor (FR-005) + + def to_runtime(self) -> OciConfig: + return OciConfig( + image=self.image, + digest=self.digest, + config=self.config, + limits=self.limits, + ) + + +class ReleaseCreated(Result): + hook_name: HookName + version: int + id: HookReleaseId + digest: str + source_ref: str + built_at: datetime + live: bool + # Internal-only: lets the router pick 201 (new version) vs 200 (idempotent + # no-op). Excluded from the wire response (the contract's ReleaseCreated has + # no such field). + created: bool = Field(default=True, exclude=True) + + +class CreateReleaseHandler(CommandHandler[CreateRelease, ReleaseCreated]): + # Authorized by the ``hooks:write`` M2M scope OR an ADMIN role (#145, US5). + __auth__ = requires_scope("hooks:write") + principal: Principal + service: HookRegistryService + + async def run(self, cmd: CreateRelease) -> ReleaseCreated: + built_by = str(self.principal.user_id) if self.principal.user_id else None + + # `created` (201 new version vs 200 idempotent no-op) is decided inside + # the registry's row lock, so it is correct under concurrent identical + # submissions — no racy pre-read here. + outcome = await self.service.create_release( + cmd.name, cmd.to_runtime(), cmd.source_ref, built_by + ) + release = outcome.release + hook = await self.service.get_hook(cmd.name) + is_live = hook is not None and hook.live_release_id == release.id + + return ReleaseCreated( + hook_name=release.hook_name, + version=release.version, + id=release.id, + digest=release.runtime.digest, + source_ref=release.source_ref, + built_at=release.built_at, + live=is_live, + created=outcome.created, + ) diff --git a/server/osa/domain/validation/command/set_live.py b/server/osa/domain/validation/command/set_live.py new file mode 100644 index 0000000..cdfd784 --- /dev/null +++ b/server/osa/domain/validation/command/set_live.py @@ -0,0 +1,48 @@ +"""SetLive — repoint a hook's live pointer to a prior release (#145, US4). + +``PUT /hooks/{name}/live`` rolls the live pointer back (or forward) to any +existing release of the hook. Releases are never deleted; history is preserved +and the provenance of rows already produced by other releases is unchanged. +""" + +from __future__ import annotations + +from osa.domain.auth.model.principal import Principal +from osa.domain.shared.authorization.gate import requires_scope +from osa.domain.shared.command import Command, CommandHandler, Result +from osa.domain.shared.error import InvalidStateError +from osa.domain.shared.model.hook import HookName +from osa.domain.validation.model.hook_release import HookReleaseId +from osa.domain.validation.service.hook_registry import HookRegistryService + + +class SetLive(Command): + """Repoint the hook's live pointer to ``version`` (an existing release).""" + + name: HookName + version: int + + +class LiveSet(Result): + hook_name: HookName + live_version: int + live_release_id: HookReleaseId + + +class SetLiveHandler(CommandHandler[SetLive, LiveSet]): + # Authorized by the ``hooks:write`` M2M scope OR an ADMIN role (#145, US5). + __auth__ = requires_scope("hooks:write") + principal: Principal + service: HookRegistryService + + async def run(self, cmd: SetLive) -> LiveSet: + hook = await self.service.set_live(cmd.name, cmd.version) + # set_live always points at the target release (it raises NotFoundError + # otherwise), so live_release_id is non-None here. + if hook.live_release_id is None: + raise InvalidStateError(f"Live pointer not set after set_live for {cmd.name}") + return LiveSet( + hook_name=hook.name, + live_version=cmd.version, + live_release_id=hook.live_release_id, + ) diff --git a/server/osa/domain/validation/event/validation_completed.py b/server/osa/domain/validation/event/validation_completed.py index b65c147..d53326e 100644 --- a/server/osa/domain/validation/event/validation_completed.py +++ b/server/osa/domain/validation/event/validation_completed.py @@ -1,8 +1,9 @@ from typing import Any from osa.domain.shared.event import Event, EventId -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN, ValidationRunSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN, ValidationRunSRN from osa.domain.validation.model import RunStatus +from osa.domain.shared.model.hook import FeatureName class ValidationCompleted(Event): @@ -11,8 +12,8 @@ class ValidationCompleted(Event): id: EventId validation_run_srn: ValidationRunSRN deposition_srn: DepositionSRN - convention_srn: ConventionSRN + convention_id: ConventionSlug status: RunStatus hook_results: list[dict[str, Any]] metadata: dict[str, Any] - expected_features: list[str] = [] + expected_features: list[FeatureName] = [] diff --git a/server/osa/domain/validation/event/validation_failed.py b/server/osa/domain/validation/event/validation_failed.py index a626664..ef77ba8 100644 --- a/server/osa/domain/validation/event/validation_failed.py +++ b/server/osa/domain/validation/event/validation_failed.py @@ -1,5 +1,5 @@ from osa.domain.shared.event import Event, EventId -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN from osa.domain.validation.model import RunStatus @@ -8,6 +8,6 @@ class ValidationFailed(Event): id: EventId deposition_srn: DepositionSRN - convention_srn: ConventionSRN + convention_id: ConventionSlug status: RunStatus reasons: list[str] diff --git a/server/osa/domain/validation/handler/validate_deposition.py b/server/osa/domain/validation/handler/validate_deposition.py index 3f92a52..02b13a2 100644 --- a/server/osa/domain/validation/handler/validate_deposition.py +++ b/server/osa/domain/validation/handler/validate_deposition.py @@ -5,6 +5,7 @@ from osa.domain.deposition.event.submitted import DepositionSubmittedEvent from osa.domain.shared.event import EventHandler, EventId +from osa.domain.shared.model.hook import FeatureName from osa.domain.shared.outbox import Outbox from osa.domain.validation.event.validation_completed import ValidationCompleted from osa.domain.validation.event.validation_failed import ValidationFailed @@ -27,7 +28,7 @@ async def handle(self, event: DepositionSubmittedEvent) -> None: try: run, hook_results = await self.validation_service.validate_deposition( deposition_srn=event.deposition_id, - convention_srn=event.convention_srn, + convention_id=event.convention_id, metadata=event.metadata, hooks=event.hooks, ) @@ -44,20 +45,21 @@ async def handle(self, event: DepositionSubmittedEvent) -> None: failed = ValidationFailed( id=EventId(uuid4()), deposition_srn=event.deposition_id, - convention_srn=event.convention_srn, + convention_id=event.convention_id, status=run.status, reasons=reasons, ) await self.outbox.append(failed) logger.info(f"Validation failed for: {event.deposition_id}") else: - # Extract expected_features from hooks at the validation boundary - expected_features = [h.name for h in event.hooks] + # Each hook produces one feature table named after it: convert the + # producing hook names to feature names at this boundary (#145). + expected_features = [FeatureName(h.root) for h in event.hooks] completed = ValidationCompleted( id=EventId(uuid4()), validation_run_srn=run.srn, deposition_srn=event.deposition_id, - convention_srn=event.convention_srn, + convention_id=event.convention_id, status=run.status, hook_results=[r.model_dump() for r in hook_results], metadata=event.metadata, diff --git a/server/osa/domain/validation/model/hook.py b/server/osa/domain/validation/model/hook.py new file mode 100644 index 0000000..9d922bf --- /dev/null +++ b/server/osa/domain/validation/model/hook.py @@ -0,0 +1,42 @@ +"""Hook aggregate — stable identity, fixed output contract, live pointer (#145). + +A hook owns a fixed feature contract (columns + cardinality) and its feature +table; the contract never changes across releases. ``live_release_id`` points +at the currently active :class:`HookRelease`. The pointer is advanced/repointed +under a row lock by the registry adapter; this pure aggregate just holds state +and produces repointed copies. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Annotated + +from pydantic import Field + +from osa.domain.shared.model.aggregate import Aggregate +from osa.domain.shared.model.hook import HookName, TableFeatureSpec +from osa.domain.validation.model.hook_release import HookReleaseId + + +class Hook(Aggregate): + """Stable hook identity + fixed output contract + live-release pointer.""" + + model_config = {"frozen": True} + + name: HookName + feature: Annotated[TableFeatureSpec, Field(discriminator="kind")] + live_release_id: HookReleaseId | None = None + created_at: datetime + + def model_post_init(self, __context: object) -> None: + # A hook name becomes a feature-table URL slot under /data/{schema}/. + from osa.domain.shared.error import ReservedNameError + from osa.domain.shared.model.reserved import RESERVED_NAMES + + if self.name.root in RESERVED_NAMES: + raise ReservedNameError(self.name.root, "hook") + + def with_live_release(self, release_id: HookReleaseId) -> "Hook": + """Return a copy whose live pointer references *release_id*.""" + return self.model_copy(update={"live_release_id": release_id}) diff --git a/server/osa/domain/validation/model/hook_release.py b/server/osa/domain/validation/model/hook_release.py new file mode 100644 index 0000000..9820e0e --- /dev/null +++ b/server/osa/domain/validation/model/hook_release.py @@ -0,0 +1,71 @@ +"""HookRelease — the immutable, versioned hook artifact (feature #145). + +A release captures *what code* runs for a hook at a point in time: the OCI +runtime (image + digest + config + limits) plus the build source ref +(reproducibility anchor). Releases are integer-versioned, monotonic per hook, +and never mutated after creation. The live pointer on the :class:`Hook` +aggregate selects which release is currently active. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from typing import Annotated, NewType +from uuid import UUID + +from pydantic import Field + +from osa.domain.shared.model.entity import Entity +from osa.domain.shared.model.hook import ( + HookName, + OciConfig, + format_memory, + parse_memory, +) + +HookReleaseId = NewType("HookReleaseId", UUID) + + +class HookRelease(Entity): + """Immutable, versioned hook artifact. ``runtime`` + ``source_ref`` are the + deployable unit; ``version`` is monotonic per ``hook_name``.""" + + model_config = {"frozen": True} + + id: HookReleaseId + hook_name: HookName + version: int + runtime: Annotated[OciConfig, Field(discriminator="type")] + source_ref: str + built_by: str | None = None + built_at: datetime + + def with_memory(self, memory: str) -> "HookRelease": + """Return an in-memory copy with a different memory limit. + + Used only by the OOM-retry path to escalate memory for a single + execution attempt; the persisted release is never changed. + """ + new_limits = self.runtime.limits.model_copy(update={"memory": memory}) + new_runtime = self.runtime.model_copy(update={"limits": new_limits}) + return self.model_copy(update={"runtime": new_runtime}) + + def with_doubled_memory(self) -> "HookRelease": + """Return an in-memory copy with 2x the current memory limit.""" + doubled = format_memory(parse_memory(self.runtime.limits.memory) * 2) + return self.with_memory(doubled) + + +@dataclass(frozen=True) +class ReleaseOutcome: + """Result of minting a release. + + ``created`` is ``True`` when a new version was minted, ``False`` for an + idempotent no-op (the digest already existed). It is decided *inside* the + registry's row lock, so it is correct under concurrent identical submissions + — the caller (router) maps it to HTTP 201 vs 200 without a racy pre-check. + """ + + release: HookRelease + created: bool diff --git a/server/osa/domain/validation/model/hook_result.py b/server/osa/domain/validation/model/hook_result.py index 7841d6f..e4aaaf8 100644 --- a/server/osa/domain/validation/model/hook_result.py +++ b/server/osa/domain/validation/model/hook_result.py @@ -4,6 +4,7 @@ from pydantic import Field +from osa.domain.shared.model.hook import HookName from osa.domain.shared.model.value import ValueObject @@ -23,9 +24,13 @@ class ProgressEntry(ValueObject): class HookResult(ValueObject): """Result of executing a single hook.""" - hook_name: str + hook_name: HookName status: HookStatus rejection_reason: str | None = None error_message: str | None = None progress: list[ProgressEntry] = Field(default_factory=list) duration_seconds: float + oom_retries: int = 0 + """Number of times the run was retried with doubled memory after an OOM + eviction (#145). 0 for a clean single-attempt run. Surfaced into the + ``hook_runs`` provenance record.""" diff --git a/server/osa/domain/validation/model/hook_run.py b/server/osa/domain/validation/model/hook_run.py new file mode 100644 index 0000000..b0935f1 --- /dev/null +++ b/server/osa/domain/validation/model/hook_run.py @@ -0,0 +1,64 @@ +"""HookRun — pure execution record + per-row provenance anchor (#145). + +One row per (hook execution at completion). Points at the snapshotted +:class:`HookRelease` that ran, and is the FK target stamped onto every feature +row (``features.*.run_id``). Carries only status/timing/logs — *what code ran, +when, and where the logs are*. + +Where the input data came from is **not** restated here: a feature row reaches +its origin via the other arm of the join, ``record_srn → records.source``. The +old execution-context columns (``ingest_run_id`` / ``deposition_id`` / +``batch_index`` and the XOR check) existed only to key an insert-time run-id +lookup that has been removed in favour of carrying ``run_id`` through the hook +output dir (``run.json``). +""" + +from __future__ import annotations + +from datetime import datetime +from enum import StrEnum +from typing import NewType +from uuid import UUID + +from osa.domain.shared.model.entity import Entity +from osa.domain.validation.model.hook_release import HookReleaseId +from osa.domain.validation.model.hook_result import HookStatus + +HookRunId = NewType("HookRunId", UUID) + + +class HookRunStatus(StrEnum): + PASSED = "passed" + WARNINGS = "warnings" + FAILED = "failed" + ERROR = "error" + + @classmethod + def from_hook_status(cls, status: HookStatus) -> "HookRunStatus": + """Map a per-hook execution outcome to its append-only run status. + + Total over :class:`HookStatus` (``PASSED`` / ``REJECTED``). The + "no result / errored" case has no ``HookStatus`` to map from — it is + handled explicitly at the single call site that has it (→ ``ERROR``). + """ + if status == HookStatus.PASSED: + return cls.PASSED + return cls.FAILED + + +class HookRun(Entity): + """Append-only execution record; provenance + logs anchor. + + Runs are recorded as a single insert at completion, so ``finished_at`` / + ``duration_s`` / ``oom_retries`` are always known. ``log_ref`` is the only + genuine optional (logs may not have been persisted). + """ + + id: HookRunId + release_id: HookReleaseId + status: HookRunStatus + started_at: datetime + finished_at: datetime + duration_s: float + oom_retries: int + log_ref: str | None = None diff --git a/server/osa/domain/validation/port/hook_registry.py b/server/osa/domain/validation/port/hook_registry.py new file mode 100644 index 0000000..2cf3252 --- /dev/null +++ b/server/osa/domain/validation/port/hook_registry.py @@ -0,0 +1,84 @@ +"""Port for the hook registry (feature #145). + +Persists hook identities, their immutable versioned releases, and the live +pointer. The adapter is responsible for the concurrency-critical bits: +gap-free monotonic version assignment and live-pointer advance under a row +lock on the ``hooks`` row (research R7). +""" + +from __future__ import annotations + +from abc import abstractmethod +from typing import Protocol + +from osa.domain.shared.model.hook import HookName, OciConfig, TableFeatureSpec +from osa.domain.shared.port import Port +from osa.domain.validation.model.hook import Hook +from osa.domain.validation.model.hook_release import HookRelease, ReleaseOutcome +from osa.domain.validation.model.hook_run import HookRun + + +class HookRegistry(Port, Protocol): + @abstractmethod + async def upsert_identity(self, name: HookName, feature: TableFeatureSpec) -> Hook: + """Create the hook identity if absent; return the (existing or new) hook. + + If the hook exists with a **different** ``feature`` contract, raise + ``ConflictError`` (the contract is fixed across releases, FR-002/FR-016). + """ + ... + + @abstractmethod + async def create_release( + self, + name: HookName, + runtime: OciConfig, + source_ref: str, + built_by: str | None, + ) -> ReleaseOutcome: + """Mint the next release for an existing hook and advance the live pointer. + + Idempotent on ``(name, digest)``: re-submitting an existing digest returns + the existing release without minting a new version or moving the pointer + (FR-006/R5), with ``ReleaseOutcome.created == False``. Version assignment + + pointer advance happen under a row lock on the ``hooks`` row so concurrent + submitters serialize (FR-009/R7); ``created`` is decided under that same + lock so it is race-free. + """ + ... + + @abstractmethod + async def set_live(self, name: HookName, version: int) -> Hook: + """Repoint the live pointer to an existing release of the hook (rollback).""" + ... + + @abstractmethod + async def get_hook(self, name: HookName) -> Hook | None: ... + + @abstractmethod + async def list_hooks(self) -> list[Hook]: ... + + @abstractmethod + async def list_releases(self, name: HookName) -> list[HookRelease]: + """All releases for a hook, version-descending. Empty if hook absent.""" + ... + + @abstractmethod + async def get_release(self, name: HookName, version: int) -> HookRelease | None: ... + + @abstractmethod + async def get_release_by_id(self, release_id: object) -> HookRelease | None: ... + + @abstractmethod + async def record_run(self, run: HookRun) -> None: + """Persist a completed hook_run row (append-only provenance anchor).""" + ... + + @abstractmethod + async def resolve_live(self, names: list[HookName]) -> dict[HookName, HookRelease]: + """Resolve each hook's current live release in one indexed lookup. + + Called once at run start and snapshotted for the run's duration (R8). + Hooks with no live release are omitted from the result. + """ + ... diff --git a/server/osa/domain/validation/port/hook_runner.py b/server/osa/domain/validation/port/hook_runner.py index 6ee2c89..0a0ae7f 100644 --- a/server/osa/domain/validation/port/hook_runner.py +++ b/server/osa/domain/validation/port/hook_runner.py @@ -5,9 +5,10 @@ from pathlib import Path from typing import Protocol, runtime_checkable -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import HookIdentity from osa.domain.shared.port import Port from osa.domain.validation.model.hook_input import HookRecord +from osa.domain.validation.model.hook_release import HookRelease from osa.domain.validation.model.hook_result import HookResult @@ -33,12 +34,16 @@ class HookRunner(Port, Protocol): @abstractmethod async def run( self, - hook: HookDefinition, + hook: HookIdentity, + release: HookRelease, inputs: HookInputs, work_dir: Path, ) -> HookResult: """Run a hook and return its result. + *hook* supplies the identity (name) and *release* supplies the runtime + (image/digest/config/limits) — feature #145's identity/release split. + The runner creates ``input/`` and ``output/`` subdirectories under *work_dir*. ``input/`` is ephemeral (cleaned after run); ``output/`` persists for later reading. """ diff --git a/server/osa/domain/validation/port/storage.py b/server/osa/domain/validation/port/storage.py index 2070a13..e1e2de3 100644 --- a/server/osa/domain/validation/port/storage.py +++ b/server/osa/domain/validation/port/storage.py @@ -22,6 +22,15 @@ def get_files_dir(self, deposition_id: DepositionSRN) -> Path: """Return the directory containing data files for a deposition.""" ... + @abstractmethod + async def write_run_ref(self, work_dir: Path, run_id: str, release_id: str) -> None: + """Write ``{work_dir}/output/run.json`` carrying this run's provenance. + + The feature-insert handler reads it back to stamp ``feature.run_id`` — + no DB run-id lookup (design-revisions §6). #145. + """ + ... + @abstractmethod async def write_checkpoint( self, work_dir: Path, outcomes: dict[HookRecordId, BatchRecordOutcome] diff --git a/server/osa/domain/validation/query/get_release.py b/server/osa/domain/validation/query/get_release.py new file mode 100644 index 0000000..a2c0f93 --- /dev/null +++ b/server/osa/domain/validation/query/get_release.py @@ -0,0 +1,60 @@ +"""GetRelease — inspect a single hook release (#145, US3). + +``GET /hooks/{name}/releases/{version}`` returns the full release, including +runtime config/limits and whether it is the hook's current live release. +""" + +from __future__ import annotations + +from datetime import datetime + +from osa.domain.shared.authorization.gate import public +from osa.domain.shared.error import NotFoundError +from osa.domain.shared.model.hook import HookName, OciLimits +from osa.domain.shared.query import Query, QueryHandler, Result +from osa.domain.validation.model.hook_release import HookReleaseId +from osa.domain.validation.service.hook_registry import HookRegistryService + + +class GetRelease(Query): + name: HookName + version: int + + +class ReleaseDetail(Result): + hook_name: HookName + version: int + id: HookReleaseId + image: str + digest: str + config: dict + limits: OciLimits + source_ref: str + built_by: str | None + built_at: datetime + live: bool + + +class GetReleaseHandler(QueryHandler[GetRelease, ReleaseDetail]): + __auth__ = public() + service: HookRegistryService + + async def run(self, cmd: GetRelease) -> ReleaseDetail: + release = await self.service.get_release(cmd.name, cmd.version) + if release is None: + raise NotFoundError(f"Release not found: {cmd.name}@v{cmd.version}") + hook = await self.service.get_hook(cmd.name) + is_live = hook is not None and hook.live_release_id == release.id + return ReleaseDetail( + hook_name=release.hook_name, + version=release.version, + id=release.id, + image=release.runtime.image, + digest=release.runtime.digest, + config=release.runtime.config, + limits=release.runtime.limits, + source_ref=release.source_ref, + built_by=release.built_by, + built_at=release.built_at, + live=is_live, + ) diff --git a/server/osa/domain/validation/query/list_hooks.py b/server/osa/domain/validation/query/list_hooks.py new file mode 100644 index 0000000..bcf1ace --- /dev/null +++ b/server/osa/domain/validation/query/list_hooks.py @@ -0,0 +1,66 @@ +"""ListHooks — the hook catalog (#145, US3). + +``GET /hooks`` lists every hook with its fixed output contract and its current +live release (FR-011). A hook with no live release (none ever minted) reports +``live_release: null``. +""" + +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel + +from osa.domain.shared.authorization.gate import public +from osa.domain.shared.model.hook import HookName, TableFeatureSpec +from osa.domain.shared.query import Query, QueryHandler, Result +from osa.domain.validation.service.hook_registry import HookRegistryService + + +class ListHooks(Query): + pass + + +class LiveReleaseSummary(BaseModel): + version: int + digest: str + source_ref: str + built_at: datetime + + +class HookCatalogItem(BaseModel): + name: HookName + feature: TableFeatureSpec + live_release: LiveReleaseSummary | None + + +class HookCatalog(Result): + items: list[HookCatalogItem] + + +class ListHooksHandler(QueryHandler[ListHooks, HookCatalog]): + __auth__ = public() + service: HookRegistryService + + async def run(self, cmd: ListHooks) -> HookCatalog: + hooks = await self.service.list_hooks() + live = await self.service.resolve_live([h.name for h in hooks]) + return HookCatalog( + items=[ + HookCatalogItem( + name=h.name, + feature=h.feature, + live_release=( + LiveReleaseSummary( + version=rel.version, + digest=rel.runtime.digest, + source_ref=rel.source_ref, + built_at=rel.built_at, + ) + if (rel := live.get(h.name)) is not None + else None + ), + ) + for h in hooks + ] + ) diff --git a/server/osa/domain/validation/query/list_releases.py b/server/osa/domain/validation/query/list_releases.py new file mode 100644 index 0000000..273ca35 --- /dev/null +++ b/server/osa/domain/validation/query/list_releases.py @@ -0,0 +1,63 @@ +"""ListReleases — a hook's release history (#145, US3/US4). + +``GET /hooks/{name}/releases`` returns every release for a hook, +version-descending, alongside the currently-live version (FR-010). +""" + +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel + +from osa.domain.shared.authorization.gate import public +from osa.domain.shared.error import NotFoundError +from osa.domain.shared.model.hook import HookName +from osa.domain.shared.query import Query, QueryHandler, Result +from osa.domain.validation.service.hook_registry import HookRegistryService + + +class ListReleases(Query): + name: HookName + + +class ReleaseSummary(BaseModel): + version: int + digest: str + image: str + source_ref: str + built_by: str | None + built_at: datetime + + +class ReleaseList(Result): + hook_name: HookName + live_version: int | None + releases: list[ReleaseSummary] + + +class ListReleasesHandler(QueryHandler[ListReleases, ReleaseList]): + __auth__ = public() + service: HookRegistryService + + async def run(self, cmd: ListReleases) -> ReleaseList: + hook = await self.service.get_hook(cmd.name) + if hook is None: + raise NotFoundError(f"Hook not found: {cmd.name}") + releases = await self.service.list_releases(cmd.name) + live_version = next((r.version for r in releases if r.id == hook.live_release_id), None) + return ReleaseList( + hook_name=cmd.name, + live_version=live_version, + releases=[ + ReleaseSummary( + version=r.version, + digest=r.runtime.digest, + image=r.runtime.image, + source_ref=r.source_ref, + built_by=r.built_by, + built_at=r.built_at, + ) + for r in releases + ], + ) diff --git a/server/osa/domain/validation/service/hook.py b/server/osa/domain/validation/service/hook.py index 4718578..8bf3700 100644 --- a/server/osa/domain/validation/service/hook.py +++ b/server/osa/domain/validation/service/hook.py @@ -14,8 +14,9 @@ from pathlib import Path from osa.domain.shared.error import OOMError -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import HookIdentity, HookName from osa.domain.shared.service import Service +from osa.domain.validation.model.hook_release import HookRelease from osa.domain.validation.model.batch_outcome import ( BatchRecordOutcome, HookRecordId, @@ -40,7 +41,8 @@ class HookService(Service): async def run_hook( self, - hook: HookDefinition, + hook: HookIdentity, + release: HookRelease, inputs: HookInputs, work_dir: Path, ) -> HookResult: @@ -71,8 +73,9 @@ async def run_hook( duration_seconds=0.0, ) - current_hook = hook + current_release = release total_duration = 0.0 + oom_retries = 0 for attempt in range(1 + MAX_OOM_RETRIES): attempt_inputs = HookInputs( @@ -83,7 +86,7 @@ async def run_hook( ) try: - result = await self.hook_runner.run(current_hook, attempt_inputs, work_dir) + result = await self.hook_runner.run(hook, current_release, attempt_inputs, work_dir) except OOMError: # Read any partial output written before OOM new_outcomes = _read_output_dir(work_dir) @@ -99,13 +102,14 @@ async def run_hook( break if attempt < MAX_OOM_RETRIES: - current_hook = current_hook.with_doubled_memory() + current_release = current_release.with_doubled_memory() + oom_retries += 1 log.info( "OOM retry {attempt}/{max_retries} for hook={hook_name}, memory={memory}, remaining={remaining} records", attempt=attempt + 1, max_retries=MAX_OOM_RETRIES, hook_name=hook.name, - memory=current_hook.runtime.limits.memory, + memory=current_release.runtime.limits.memory, remaining=len(remaining), ) continue @@ -115,7 +119,7 @@ async def run_hook( outcomes[HookRecordId(r.id)] = BatchRecordOutcome( record_id=HookRecordId(r.id), status=OutcomeStatus.ERRORED, - error=f"OOM after {MAX_OOM_RETRIES} retries (last limit: {current_hook.runtime.limits.memory})", + error=f"OOM after {MAX_OOM_RETRIES} retries (last limit: {current_release.runtime.limits.memory})", ) await self.hook_storage.write_batch_outcomes(work_dir, outcomes) raise @@ -138,6 +142,7 @@ async def run_hook( status=HookStatus.REJECTED, rejection_reason=result.rejection_reason, duration_seconds=total_duration, + oom_retries=oom_retries, ) else: # Success (PASSED) @@ -151,22 +156,24 @@ async def run_hook( hook_name=hook.name, status=HookStatus.PASSED, duration_seconds=total_duration, + oom_retries=oom_retries, ) async def run_hooks_for_batch( self, - hooks: list[HookDefinition], + hook_releases: list[tuple[HookIdentity, HookRelease]], inputs: HookInputs, - work_dirs: dict[str, Path], + work_dirs: dict[HookName, Path], ) -> list[HookResult]: """Run multiple hooks sequentially for a batch of records. - work_dirs maps hook_name → output directory. + *hook_releases* pairs each hook identity with the release resolved for + this run (snapshot, R8). work_dirs maps hook_name → output directory. """ results: list[HookResult] = [] - for hook in hooks: + for hook, release in hook_releases: work_dir = work_dirs[hook.name] - result = await self.run_hook(hook, inputs, work_dir) + result = await self.run_hook(hook, release, inputs, work_dir) results.append(result) return results diff --git a/server/osa/domain/validation/service/hook_registry.py b/server/osa/domain/validation/service/hook_registry.py new file mode 100644 index 0000000..bbe029f --- /dev/null +++ b/server/osa/domain/validation/service/hook_registry.py @@ -0,0 +1,63 @@ +"""HookRegistryService — business logic for the hook registry (feature #145). + +Thin orchestration over the :class:`HookRegistry` port: upsert identities, +mint releases (advancing the live pointer), repoint live for rollback, and +resolve the live release set once at run start (snapshot, R8). The +concurrency-critical version/pointer mechanics live in the adapter. +""" + +from __future__ import annotations + +from osa.domain.shared.model.hook import HookName, OciConfig, TableFeatureSpec +from osa.domain.shared.service import Service +from osa.domain.validation.model.hook import Hook +from osa.domain.validation.model.hook_release import HookRelease, ReleaseOutcome +from osa.domain.validation.model.hook_run import HookRun +from osa.domain.validation.port.hook_registry import HookRegistry + + +class HookRegistryService(Service): + registry: HookRegistry + + async def upsert_identity(self, name: HookName, feature: TableFeatureSpec) -> Hook: + """Create the hook identity if absent; reject a differing contract.""" + return await self.registry.upsert_identity(name, feature) + + async def create_release( + self, + name: HookName, + runtime: OciConfig, + source_ref: str, + built_by: str | None = None, + ) -> ReleaseOutcome: + """Mint vN+1 for an existing hook (idempotent on digest); advance live. + + Returns a :class:`ReleaseOutcome` whose ``created`` flag (decided under + the registry's row lock) distinguishes a new version from an idempotent + no-op. + """ + return await self.registry.create_release(name, runtime, source_ref, built_by) + + async def set_live(self, name: HookName, version: int) -> Hook: + """Repoint the live pointer to a prior release (rollback / pin).""" + return await self.registry.set_live(name, version) + + async def get_hook(self, name: HookName) -> Hook | None: + return await self.registry.get_hook(name) + + async def list_hooks(self) -> list[Hook]: + return await self.registry.list_hooks() + + async def list_releases(self, name: HookName) -> list[HookRelease]: + return await self.registry.list_releases(name) + + async def get_release(self, name: HookName, version: int) -> HookRelease | None: + return await self.registry.get_release(name, version) + + async def resolve_live(self, names: list[HookName]) -> dict[HookName, HookRelease]: + """Resolve the live release for each hook once, for snapshotting (R8).""" + return await self.registry.resolve_live(names) + + async def record_run(self, run: HookRun) -> None: + """Persist a completed hook_run (append-only provenance anchor).""" + await self.registry.record_run(run) diff --git a/server/osa/domain/validation/service/validation.py b/server/osa/domain/validation/service/validation.py index a675f5b..24a3e4b 100644 --- a/server/osa/domain/validation/service/validation.py +++ b/server/osa/domain/validation/service/validation.py @@ -3,11 +3,14 @@ import logging import uuid from datetime import datetime, timezone +from uuid import uuid4 + from typing import Any -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.error import NotFoundError +from osa.domain.shared.model.hook import HookIdentity, HookName from osa.domain.shared.model.srn import ( - ConventionSRN, + ConventionSlug, DepositionSRN, Domain, LocalId, @@ -19,11 +22,14 @@ ValidationRun, ) from osa.domain.validation.model.hook_input import HookRecord +from osa.domain.validation.model.hook_release import HookRelease from osa.domain.validation.model.hook_result import HookResult, HookStatus +from osa.domain.validation.model.hook_run import HookRun, HookRunId, HookRunStatus from osa.domain.validation.port.hook_runner import HookInputs, HookRunner from osa.domain.validation.port.repository import ValidationRunRepository from osa.domain.validation.port.storage import HookStoragePort from osa.domain.validation.service.hook import HookService +from osa.domain.validation.service.hook_registry import HookRegistryService logger = logging.getLogger(__name__) @@ -34,6 +40,7 @@ class ValidationService(Service): run_repo: ValidationRunRepository hook_runner: HookRunner hook_storage: HookStoragePort + hook_registry: HookRegistryService node_domain: Domain async def create_run( @@ -63,13 +70,14 @@ async def run_hooks( run: ValidationRun, deposition_srn: DepositionSRN, inputs: HookInputs, - hooks: list[HookDefinition], + hook_names: list[HookName], ) -> tuple[ValidationRun, list[HookResult]]: """Execute hooks sequentially with OOM retry. Halt on reject/fail/OOM. - Hook outputs are written to durable cold storage under the deposition directory. - Feature insertion is deferred to record publication time. - Each hook is executed via HookService which handles OOM retry with memory doubling. + Resolves each hook's live release once at run start (snapshot, R8) and + records an append-only ``hook_run`` per hook (deposition context). The + run ids are reconstructed at feature-insert time from the deposition, + so they are recorded here but not returned. """ run.status = RunStatus.RUNNING run.started_at = datetime.now(timezone.utc) @@ -80,16 +88,57 @@ async def run_hooks( hook_storage=self.hook_storage, ) + # Resolve identity + live release per hook (snapshot). + releases = await self.hook_registry.resolve_live(hook_names) + pairs: list[tuple[HookIdentity, HookRelease]] = [] + for name in hook_names: + hook = await self.hook_registry.get_hook(name) + release = releases.get(name) + if hook is None or release is None: + raise NotFoundError(f"Hook {name!r} has no live release") + pairs.append((HookIdentity(name=hook.name, feature=hook.feature), release)) + hook_results: list[HookResult] = [] overall_status: RunStatus = RunStatus.COMPLETED - for hook in hooks: - work_dir = self.hook_storage.get_hook_output_dir(deposition_srn, hook.name) + for hook, release in pairs: + work_dir = self.hook_storage.get_hook_output_dir(deposition_srn, hook.name.root) + started_at = datetime.now(timezone.utc) + run_id = HookRunId(uuid4()) try: - result = await hook_service.run_hook(hook, inputs, work_dir) + result = await hook_service.run_hook(hook, release, inputs, work_dir) except Exception: + finished_at = datetime.now(timezone.utc) + await self.hook_registry.record_run( + HookRun( + id=run_id, + release_id=release.id, + status=HookRunStatus.ERROR, + started_at=started_at, + finished_at=finished_at, + duration_s=(finished_at - started_at).total_seconds(), + # The run raised (no HookResult), so the retry count isn't + # available on this failure path; diagnose via log_ref. + oom_retries=0, + ) + ) overall_status = RunStatus.FAILED break + + finished_at = datetime.now(timezone.utc) + # run.json carries run_id to InsertRecordFeatures — no DB lookup (§6). + await self.hook_storage.write_run_ref(work_dir, str(run_id), str(release.id)) + await self.hook_registry.record_run( + HookRun( + id=run_id, + release_id=release.id, + status=HookRunStatus.from_hook_status(result.status), + started_at=started_at, + finished_at=finished_at, + duration_s=result.duration_seconds, + oom_retries=result.oom_retries, + ) + ) hook_results.append(result) if result.status == HookStatus.REJECTED: @@ -106,9 +155,9 @@ async def run_hooks( async def validate_deposition( self, deposition_srn: DepositionSRN, - convention_srn: ConventionSRN, + convention_id: ConventionSlug, metadata: dict[str, Any], - hooks: list[HookDefinition], + hooks: list[HookName], ) -> tuple[ValidationRun, list[HookResult]]: """Full validation workflow using enriched event data. @@ -133,15 +182,13 @@ async def validate_deposition( await self.run_repo.save(run) return run, [] - run, hook_results = await self.run_hooks( + return await self.run_hooks( run=run, deposition_srn=deposition_srn, inputs=inputs, - hooks=hooks, + hook_names=hooks, ) - return run, hook_results - async def save_run(self, run: ValidationRun) -> None: """Persist a validation run.""" await self.run_repo.save(run) diff --git a/server/osa/domain/validation/util/di/provider.py b/server/osa/domain/validation/util/di/provider.py index 707675d..08ba7d2 100644 --- a/server/osa/domain/validation/util/di/provider.py +++ b/server/osa/domain/validation/util/di/provider.py @@ -2,8 +2,14 @@ from osa.config import Config from osa.domain.shared.model.srn import Domain +from osa.domain.validation.command.create_release import CreateReleaseHandler +from osa.domain.validation.command.set_live import SetLiveHandler +from osa.domain.validation.query.get_release import GetReleaseHandler +from osa.domain.validation.query.list_hooks import ListHooksHandler +from osa.domain.validation.query.list_releases import ListReleasesHandler from osa.domain.validation.service import ValidationService from osa.domain.validation.service.hook import HookService +from osa.domain.validation.service.hook_registry import HookRegistryService from osa.util.di.base import Provider from osa.util.di.scope import Scope @@ -12,6 +18,18 @@ class ValidationProvider(Provider): service = provide(ValidationService, scope=Scope.UOW) hook_service = provide(HookService, scope=Scope.UOW) + # Hook registry (feature #145). + hook_registry_service = provide(HookRegistryService, scope=Scope.UOW) + + # Release (US3) + live-pointer (US4) write handlers. + create_release_handler = provide(CreateReleaseHandler, scope=Scope.UOW) + set_live_handler = provide(SetLiveHandler, scope=Scope.UOW) + + # Catalog / history / detail read handlers (US3). + list_hooks_handler = provide(ListHooksHandler, scope=Scope.UOW) + list_releases_handler = provide(ListReleasesHandler, scope=Scope.UOW) + get_release_handler = provide(GetReleaseHandler, scope=Scope.UOW) + @provide(scope=Scope.UOW) def get_node_domain(self, config: Config) -> Domain: return Domain(config.domain) diff --git a/server/osa/infrastructure/data/postgres_table_read_store.py b/server/osa/infrastructure/data/postgres_table_read_store.py index 88b7568..5d79298 100644 --- a/server/osa/infrastructure/data/postgres_table_read_store.py +++ b/server/osa/infrastructure/data/postgres_table_read_store.py @@ -250,12 +250,13 @@ def _coerce_cursor_value(value: Any, sort_expr: sa.ColumnElement[Any]) -> Any: async def _stream_features(self, plan: QueryPlan) -> AsyncIterator[Mapping[str, Any]]: if plan.feature_name is None: # guarded by QueryPlan, narrowed for the type checker raise ValidationError("feature_name is required for a FEATURE plan", field="feature") - ft, _ = await self._resolve_feature_table(plan.schema_id, plan.feature_name) + feature = plan.feature_name.root + ft, _ = await self._resolve_feature_table(plan.schema_id, feature) conditions: list[Any] = [] if plan.filter is not None: conditions.append( - self._compile_feature_filter(plan.filter, ft=ft, feature_name=plan.feature_name) + self._compile_feature_filter(plan.filter, ft=ft, feature_name=feature) ) order_keys, cursor_after = self._features_sort(plan, ft) diff --git a/server/osa/infrastructure/data/schema_feature_reader.py b/server/osa/infrastructure/data/schema_feature_reader.py index 5280f65..4f21a0f 100644 --- a/server/osa/infrastructure/data/schema_feature_reader.py +++ b/server/osa/infrastructure/data/schema_feature_reader.py @@ -68,6 +68,7 @@ async def _hook_names(self, schema_id: SchemaId) -> set[str]: result = await self.session.execute(stmt) names: set[str] = set() for (hooks,) in result.all(): - for hook in hooks or []: - names.add(hook["name"]) + # conventions.hooks is a JSON list of bare hook-name strings (#145). + for hook_name in hooks or []: + names.add(hook_name) return names diff --git a/server/osa/infrastructure/k8s/ingester_runner.py b/server/osa/infrastructure/k8s/ingester_runner.py index 721739c..559f3ca 100644 --- a/server/osa/infrastructure/k8s/ingester_runner.py +++ b/server/osa/infrastructure/k8s/ingester_runner.py @@ -16,7 +16,7 @@ TransientError, ) from osa.domain.shared.model.source import IngesterDefinition -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug from osa.domain.shared.port.ingester_runner import IngesterInputs, IngesterOutput, IngesterRunner from osa.infrastructure.k8s.errors import classify_api_error from osa.infrastructure.logging import get_logger @@ -133,7 +133,7 @@ async def _run_job( try: # Check for existing Jobs existing = await self._check_existing_job( - namespace, inputs.convention_srn, ingester.digest + namespace, inputs.convention_id, ingester.digest ) if existing == "succeeded": @@ -158,7 +158,7 @@ async def _run_job( work_dir=work_dir, files_dir=files_dir, inputs=inputs, - convention_srn=inputs.convention_srn, + convention_id=inputs.convention_id, ) job_name_to_watch = spec.metadata.name @@ -220,12 +220,12 @@ async def _parse_source_output(self, work_dir: Path, files_dir: Path) -> Ingeste async def _check_existing_job( self, namespace: str, - convention_srn: ConventionSRN | None, + convention_id: ConventionSlug | None, digest: str = "", ) -> str | None: label_parts = ["osa.io/role=ingester"] - if convention_srn is not None: - label_parts.append(f"osa.io/convention={label_value(convention_srn)}") + if convention_id is not None: + label_parts.append(f"osa.io/convention={label_value(convention_id.root)}") if digest: label_parts.append(f"osa.io/digest={sanitize_label(digest)}") label_selector = ",".join(label_parts) @@ -251,7 +251,7 @@ def _build_job_spec( work_dir: Path, files_dir: Path, inputs: IngesterInputs | None = None, - convention_srn: ConventionSRN | None = None, + convention_id: ConventionSlug | None = None, ) -> V1Job: from kubernetes_asyncio.client import ( V1Capabilities, @@ -272,7 +272,7 @@ def _build_job_spec( V1VolumeMount, ) - name = job_name("ingester", "ing", str(convention_srn) if convention_srn else "unknown") + name = job_name("ingester", "ing", convention_id.root if convention_id else "unknown") relative_work = self._relative_path(work_dir) input_subpath = f"{relative_work}/input" output_subpath = f"{relative_work}/output" @@ -282,8 +282,8 @@ def _build_job_spec( "osa.io/role": "ingester", "osa.io/digest": sanitize_label(ingester.digest), } - if convention_srn is not None: - labels["osa.io/convention"] = label_value(convention_srn) + if convention_id is not None: + labels["osa.io/convention"] = label_value(convention_id.root) if inputs and inputs.ingest_run_id: labels["osa.io/ingest-run-id"] = inputs.ingest_run_id labels["osa.io/ingest-run-batch"] = str(inputs.batch_index) diff --git a/server/osa/infrastructure/k8s/runner.py b/server/osa/infrastructure/k8s/runner.py index 23e59b7..4787dfa 100644 --- a/server/osa/infrastructure/k8s/runner.py +++ b/server/osa/infrastructure/k8s/runner.py @@ -15,7 +15,8 @@ PermanentError, TransientError, ) -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import HookIdentity +from osa.domain.validation.model.hook_release import HookRelease from osa.domain.validation.model.hook_result import HookResult, HookStatus from osa.domain.validation.port.hook_runner import HookInputs, HookRunner from osa.infrastructure.k8s.errors import classify_api_error @@ -83,7 +84,8 @@ async def capture_logs(self, run_id: str) -> str: async def run( self, - hook: HookDefinition, + hook: HookIdentity, + release: HookRelease, inputs: HookInputs, work_dir: Path, ) -> HookResult: @@ -92,15 +94,16 @@ async def run( # Write records.jsonl (unified batch contract) records_jsonl = "\n".join(json.dumps(r.model_dump()) for r in inputs.records) + "\n" await self._s3.put_object(f"{input_prefix}/records.jsonl", records_jsonl) - if inputs.config or hook.runtime.config: - config = {**hook.runtime.config, **(inputs.config or {})} + if inputs.config or release.runtime.config: + config = {**release.runtime.config, **(inputs.config or {})} await self._s3.put_object(f"{input_prefix}/config.json", json.dumps(config)) - return await self._run_job(hook, inputs, work_dir) + return await self._run_job(hook, release, inputs, work_dir) async def _run_job( self, - hook: HookDefinition, + hook: HookIdentity, + release: HookRelease, inputs: HookInputs, work_dir: Path, ) -> HookResult: @@ -112,7 +115,7 @@ async def _run_job( job_name_to_watch = None try: - existing = await self._check_existing_job(namespace, hook.name, inputs.run_id) + existing = await self._check_existing_job(namespace, hook.name.root, inputs.run_id) if existing == "succeeded": logger.info( @@ -140,6 +143,7 @@ async def _run_job( files_dir = first_dir.parent spec = self._build_job_spec( hook, + release, work_dir, run_id=inputs.run_id, files_dir=files_dir, @@ -161,7 +165,7 @@ async def _run_job( await self._wait_for_completion( job_name_to_watch, namespace, - timeout_seconds=hook.runtime.limits.timeout_seconds + 30, + timeout_seconds=release.runtime.limits.timeout_seconds + 30, ) return await self._parse_hook_result(hook, work_dir, start_time) @@ -184,7 +188,7 @@ async def _run_job( await self._cleanup_job(job_name_to_watch, namespace) async def _parse_hook_result( - self, hook: HookDefinition, work_dir: Path, start_time: float + self, hook: HookIdentity, work_dir: Path, start_time: float ) -> HookResult: """Parse output from a completed Job (reads from S3).""" from osa.infrastructure.runner_utils import parse_progress_from_s3 @@ -243,7 +247,8 @@ async def _check_existing_job( def _build_job_spec( self, - hook: HookDefinition, + hook: HookIdentity, + release: HookRelease, work_dir: Path, *, run_id: str, @@ -271,7 +276,7 @@ def _build_job_spec( V1VolumeMount, ) - name = job_name("hook", hook.name, run_id) + name = job_name("hook", hook.name.root, run_id) relative_work = self._relative_path(work_dir) input_subpath = f"{relative_work}/input" output_subpath = f"{relative_work}/output" @@ -280,7 +285,7 @@ def _build_job_spec( batch_index = run_id.split("_b", 1)[1] if "_b" in run_id else "0" labels = { "osa.io/role": "hook", - "osa.io/hook": hook.name, + "osa.io/hook": hook.name.root, "osa.io/ingest-run-id": ingest_run_id, "osa.io/ingest-run-batch": batch_index, } @@ -316,17 +321,17 @@ def _build_job_spec( container = V1Container( name="hook", - image=f"{hook.runtime.image}@{hook.runtime.digest}", + image=f"{release.runtime.image}@{release.runtime.digest}", env=[ V1EnvVar(name="OSA_IN", value="/osa/in"), V1EnvVar(name="OSA_OUT", value="/osa/out"), V1EnvVar(name="OSA_FILES", value="/osa/files"), - V1EnvVar(name="OSA_HOOK_NAME", value=hook.name), + V1EnvVar(name="OSA_HOOK_NAME", value=hook.name.root), ], resources=V1ResourceRequirements( limits={ - "memory": to_k8s_quantity(hook.runtime.limits.memory), - "cpu": hook.runtime.limits.cpu, + "memory": to_k8s_quantity(release.runtime.limits.memory), + "cpu": release.runtime.limits.cpu, }, ), security_context=V1SecurityContext( @@ -365,7 +370,7 @@ def _build_job_spec( metadata=V1ObjectMeta(name=name, namespace=self._config.namespace, labels=labels), spec=V1JobSpec( backoff_limit=0, - active_deadline_seconds=SCHEDULING_TIMEOUT + hook.runtime.limits.timeout_seconds, + active_deadline_seconds=SCHEDULING_TIMEOUT + release.runtime.limits.timeout_seconds, ttl_seconds_after_finished=self._config.job_ttl_seconds, template=V1PodTemplateSpec( metadata=V1ObjectMeta(labels=labels), diff --git a/server/osa/infrastructure/oci/runner.py b/server/osa/infrastructure/oci/runner.py index e54bd8b..d670396 100644 --- a/server/osa/infrastructure/oci/runner.py +++ b/server/osa/infrastructure/oci/runner.py @@ -11,7 +11,8 @@ import aiodocker from osa.domain.shared.error import OOMError, PermanentError, TransientError -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import HookIdentity +from osa.domain.validation.model.hook_release import HookRelease from osa.domain.validation.model.hook_result import HookResult, HookStatus from osa.domain.validation.port.hook_runner import HookInputs, HookRunner from osa.infrastructure.logging import get_logger @@ -50,11 +51,12 @@ async def capture_logs(self, run_id: str) -> str: async def run( self, - hook: HookDefinition, + hook: HookIdentity, + release: HookRelease, inputs: HookInputs, work_dir: Path, ) -> HookResult: - timeout = hook.runtime.limits.timeout_seconds + timeout = release.runtime.limits.timeout_seconds # Create sibling input/ and output/ dirs under work_dir staging_dir = work_dir / "input" @@ -67,8 +69,8 @@ async def run( for record in inputs.records: f.write(json.dumps(record.model_dump()) + "\n") - if inputs.config or hook.runtime.config: - config = {**hook.runtime.config, **(inputs.config or {})} + if inputs.config or release.runtime.config: + config = {**release.runtime.config, **(inputs.config or {})} (staging_dir / "config.json").write_text(json.dumps(config)) # Create files directory structure: $OSA_FILES/{id}/ per record @@ -80,13 +82,16 @@ async def run( try: async def _resolve_and_run(): - image_ref = await self._resolve_image(hook.runtime.image, hook.runtime.digest) + image_ref = await self._resolve_image( + release.runtime.image, release.runtime.digest + ) return await self._run_container( image_ref, staging_dir, inputs.files_dirs, container_output, hook, + release, files_base, ) @@ -120,7 +125,8 @@ async def _run_container( staging_dir: Path, files_dirs: dict[str, Path], output_dir: Path, - hook: HookDefinition, + hook: HookIdentity, + release: HookRelease, files_base: Path, ) -> dict: container = None @@ -153,9 +159,9 @@ async def _run_container( "User": "65534:65534", "HostConfig": { "Binds": binds, - "Memory": parse_memory(hook.runtime.limits.memory), - "MemorySwap": parse_memory(hook.runtime.limits.memory), - "NanoCpus": int(float(hook.runtime.limits.cpu) * 1e9), + "Memory": parse_memory(release.runtime.limits.memory), + "MemorySwap": parse_memory(release.runtime.limits.memory), + "NanoCpus": int(float(release.runtime.limits.cpu) * 1e9), "NetworkMode": "none", "ReadonlyRootfs": True, "CapDrop": ["ALL"], @@ -185,12 +191,12 @@ async def _run_container( log.error( "OOM: hook={hook_name} limit={memory}", hook_name=hook.name, - memory=hook.runtime.limits.memory, + memory=release.runtime.limits.memory, ) if tail_text: for line in tail_text.splitlines(): print(f" OOM [{hook.name}] {line}", file=sys.stderr, flush=True) - raise OOMError(f"Hook killed by OOM (limit: {hook.runtime.limits.memory})") + raise OOMError(f"Hook killed by OOM (limit: {release.runtime.limits.memory})") # Parse progress file progress = parse_progress_file(output_dir) diff --git a/server/osa/infrastructure/persistence/adapter/ingest_storage.py b/server/osa/infrastructure/persistence/adapter/ingest_storage.py index 44bf535..58ac6c0 100644 --- a/server/osa/infrastructure/persistence/adapter/ingest_storage.py +++ b/server/osa/infrastructure/persistence/adapter/ingest_storage.py @@ -76,3 +76,11 @@ def hook_work_dir(self, ingest_run_id: str, batch_index: int, hook_name: str) -> d = self._layout.ingest_batch_hook_dir(ingest_run_id, batch_index, hook_name) d.mkdir(parents=True, exist_ok=True) return d + + async def write_run_ref(self, work_dir: Path, run_id: str, release_id: str) -> None: + """Write run.json alongside a hook's features (per-row provenance, #145).""" + output_dir = Path(work_dir) / "output" + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / "run.json").write_text( + json.dumps({"run_id": run_id, "release_id": release_id}) + ) diff --git a/server/osa/infrastructure/persistence/adapter/storage.py b/server/osa/infrastructure/persistence/adapter/storage.py index 98a359b..4c1c03f 100644 --- a/server/osa/infrastructure/persistence/adapter/storage.py +++ b/server/osa/infrastructure/persistence/adapter/storage.py @@ -12,7 +12,8 @@ from osa.domain.deposition.model.value import DepositionFile from osa.domain.deposition.port.storage import FileStoragePort from osa.domain.shared.error import InfrastructureError -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.provenance import RunRef +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN from osa.domain.validation.model.batch_outcome import ( BatchRecordOutcome, HookRecordId, @@ -85,6 +86,21 @@ async def hook_features_exist(self, hook_output_dir: str, feature_name: str) -> features_file = Path(hook_output_dir) / "hooks" / feature_name / "output" / "features.json" return features_file.exists() + async def write_run_ref(self, work_dir: Path, run_id: str, release_id: str) -> None: + """Write run.json alongside a hook's features (per-row provenance, #145).""" + output_dir = Path(work_dir) / "output" + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / "run.json").write_text( + json.dumps({"run_id": run_id, "release_id": release_id}) + ) + + async def read_run_ref(self, output_dir: str, hook_name: str) -> RunRef | None: + run_file = Path(output_dir) / "hooks" / hook_name / "output" / "run.json" + if not run_file.exists(): + return None + data = json.loads(run_file.read_text()) + return RunRef(run_id=data["run_id"], release_id=data["release_id"]) + async def save_file( self, deposition_id: DepositionSRN, @@ -160,16 +176,16 @@ async def delete_files_for_deposition( if dep_dir.exists(): shutil.rmtree(dep_dir) - def _conv_id(self, convention_srn: ConventionSRN) -> str: - return f"{convention_srn.domain.root}_{convention_srn.id.root}" + def _conv_id(self, convention_id: ConventionSlug) -> str: + return convention_id.root - def get_source_staging_dir(self, convention_srn: ConventionSRN, run_id: str) -> Path: - staging = self.base_path / "sources" / self._conv_id(convention_srn) / "staging" / run_id + def get_source_staging_dir(self, convention_id: ConventionSlug, run_id: str) -> Path: + staging = self.base_path / "sources" / self._conv_id(convention_id) / "staging" / run_id staging.mkdir(parents=True, exist_ok=True) return staging - def get_source_output_dir(self, convention_srn: ConventionSRN, run_id: str) -> Path: - output = self.base_path / "sources" / self._conv_id(convention_srn) / "runs" / run_id + def get_source_output_dir(self, convention_id: ConventionSlug, run_id: str) -> Path: + output = self.base_path / "sources" / self._conv_id(convention_id) / "runs" / run_id output.mkdir(parents=True, exist_ok=True) return output diff --git a/server/osa/infrastructure/persistence/di.py b/server/osa/infrastructure/persistence/di.py index 1c8eacd..6345877 100644 --- a/server/osa/infrastructure/persistence/di.py +++ b/server/osa/infrastructure/persistence/di.py @@ -28,6 +28,7 @@ from osa.domain.shared.port.event_repository import EventRepository from osa.domain.feature.port.feature_store import FeatureStore from osa.domain.validation.port.repository import ValidationRunRepository +from osa.domain.validation.port.hook_registry import HookRegistry from osa.domain.data.port.data_read_store import ( DataCatalogReadStore, DataTableReadStore, @@ -46,6 +47,9 @@ from osa.infrastructure.persistence.repository.convention import ( PostgresConventionRepository, ) +from osa.infrastructure.persistence.repository.hook_registry import ( + PostgresHookRegistry, +) from osa.infrastructure.persistence.repository.deposition import ( PostgresDepositionRepository, ) @@ -124,6 +128,10 @@ def get_metadata_store(self, engine: AsyncEngine, session: AsyncSession) -> Meta PostgresConventionRepository, scope=Scope.UOW, provides=ConventionRepository ) + # Hook registry (validation domain — feature #145). Owns hooks, releases, + # the live pointer, and hook_runs (record + provenance reads). + hook_registry_repo = provide(PostgresHookRegistry, scope=Scope.UOW, provides=HookRegistry) + # Cross-domain readers schema_reader = provide(SchemaReaderAdapter, scope=Scope.UOW, provides=SchemaReader) ontology_reader = provide(OntologyReaderAdapter, scope=Scope.UOW, provides=OntologyReader) diff --git a/server/osa/infrastructure/persistence/feature_store.py b/server/osa/infrastructure/persistence/feature_store.py index 9aa7e0f..b820cf9 100644 --- a/server/osa/infrastructure/persistence/feature_store.py +++ b/server/osa/infrastructure/persistence/feature_store.py @@ -43,8 +43,8 @@ def __init__(self, engine: AsyncEngine, session: AsyncSession) -> None: self._engine = engine self._session = session - async def create_table(self, hook_name: str, columns: list[ColumnDef]) -> None: - _validate_pg_identifier(hook_name) + async def create_table(self, feature: str, columns: list[ColumnDef]) -> None: + _validate_pg_identifier(feature) async with self._engine.begin() as conn: # Ensure the features schema exists @@ -53,22 +53,22 @@ async def create_table(self, hook_name: str, columns: list[ColumnDef]) -> None: # Check for existing table in catalog — duplicate is a hard error existing = await conn.execute( select(feature_tables_table.c.hook_name).where( - feature_tables_table.c.hook_name == hook_name + feature_tables_table.c.hook_name == feature ) ) if existing.first() is not None: - raise ConflictError(f"Feature table already exists: {hook_name}") + raise ConflictError(f"Feature table already exists: {feature}") # Build dynamic table schema = FeatureSchema(columns=columns) - table = build_feature_table(hook_name, schema) + table = build_feature_table(feature, schema) # Create table (FK to records.srn is declared inline on the column) await conn.run_sync(table.metadata.create_all, checkfirst=False) await conn.execute( feature_tables_table.insert().values( - hook_name=hook_name, - pg_table=feature_pg_table(hook_name), + hook_name=feature, + pg_table=feature_pg_table(feature), feature_schema=schema.model_dump(), schema_version=1, created_at=datetime.now(UTC), @@ -77,19 +77,21 @@ async def create_table(self, hook_name: str, columns: list[ColumnDef]) -> None: async def insert_features( self, - hook_name: str, + feature: str, record_srn: str, rows: list[dict[str, Any]], + run_id: str, ) -> int: if not rows: return 0 - _validate_pg_identifier(hook_name) + _validate_pg_identifier(feature) now = datetime.now(UTC) enriched_rows = [ { "record_srn": record_srn, + "run_id": run_id, "created_at": now, **{k: json.dumps(v) if isinstance(v, (list, dict)) else v for k, v in row.items()}, } @@ -100,7 +102,7 @@ async def insert_features( chunk_size = 1000 total = 0 pg_schema = feature_pg_schema() - pg_table = feature_pg_table(hook_name) + pg_table = feature_pg_table(feature) async with self._engine.begin() as conn: # Reflect the actual table to get correct column types for casts metadata = sa.MetaData(schema=pg_schema) diff --git a/server/osa/infrastructure/persistence/feature_table.py b/server/osa/infrastructure/persistence/feature_table.py index eff08c8..ad3ebd3 100644 --- a/server/osa/infrastructure/persistence/feature_table.py +++ b/server/osa/infrastructure/persistence/feature_table.py @@ -3,18 +3,19 @@ from __future__ import annotations import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID as PG_UUID from osa.domain.shared.model.hook import ColumnDef from osa.domain.shared.model.value import ValueObject from osa.infrastructure.persistence.api_naming import feature_pg_schema, feature_pg_table from osa.infrastructure.persistence.column_mapper import map_column -from osa.infrastructure.persistence.tables import records_table +from osa.infrastructure.persistence.tables import hook_runs_table, records_table # Back-compat re-export for callers that import the constant directly. # Prefer ``feature_pg_schema()`` in new code. FEATURES_SCHEMA = feature_pg_schema() -AUTO_COLUMN_NAMES = frozenset({"id", "record_srn", "created_at"}) +AUTO_COLUMN_NAMES = frozenset({"id", "record_srn", "run_id", "created_at"}) class FeatureSchema(ValueObject): @@ -55,6 +56,14 @@ def build_feature_table(api_feature_name: str, schema: FeatureSchema) -> sa.Tabl nullable=False, index=True, ), + # Per-row provenance (feature #145): which hook_run produced this row. + sa.Column( + "run_id", + PG_UUID(as_uuid=True), + sa.ForeignKey(hook_runs_table.c.id), + nullable=False, + index=True, + ), sa.Column( "created_at", sa.DateTime(timezone=True), diff --git a/server/osa/infrastructure/persistence/mappers/deposition.py b/server/osa/infrastructure/persistence/mappers/deposition.py index f47d271..455ab12 100644 --- a/server/osa/infrastructure/persistence/mappers/deposition.py +++ b/server/osa/infrastructure/persistence/mappers/deposition.py @@ -4,7 +4,7 @@ from osa.domain.auth.model.value import UserId from osa.domain.deposition.model.aggregate import Deposition from osa.domain.deposition.model.value import DepositionFile, DepositionStatus -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN, RecordSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN, RecordSRN def row_to_deposition(row: dict[str, Any]) -> Deposition: @@ -16,7 +16,7 @@ def row_to_deposition(row: dict[str, Any]) -> Deposition: return Deposition( srn=DepositionSRN.parse(row["srn"]), - convention_srn=ConventionSRN.parse(row["convention_srn"]), + convention_id=ConventionSlug.parse(row["convention_id"]), status=DepositionStatus(row["status"]), metadata=row.get("metadata", {}), files=files, @@ -31,7 +31,7 @@ def deposition_to_dict(dep: Deposition) -> dict[str, Any]: """Convert Deposition aggregate to database dict.""" return { "srn": str(dep.srn), - "convention_srn": str(dep.convention_srn), + "convention_id": str(dep.convention_id), "status": dep.status, "metadata": dep.metadata, "files": [f.model_dump(mode="json") for f in dep.files], diff --git a/server/osa/infrastructure/persistence/mappers/record.py b/server/osa/infrastructure/persistence/mappers/record.py index a916d75..5dbb24a 100644 --- a/server/osa/infrastructure/persistence/mappers/record.py +++ b/server/osa/infrastructure/persistence/mappers/record.py @@ -14,7 +14,7 @@ from osa.domain.record.model.aggregate import Record from osa.domain.shared.model.source import RecordSource -from osa.domain.shared.model.srn import ConventionSRN, LocalId, RecordSRN, SchemaId, Semver +from osa.domain.shared.model.srn import ConventionSlug, LocalId, RecordSRN, SchemaId, Semver _source_adapter = TypeAdapter(RecordSource) @@ -30,7 +30,7 @@ def row_to_record(row: dict[str, Any]) -> Record: return Record( srn=RecordSRN.parse(row["srn"]), source=source, - convention_srn=ConventionSRN.parse(row["convention_srn"]), + convention_id=ConventionSlug.parse(row["convention_id"]), schema_id=SchemaId( id=LocalId(row["schema_id"]), version=Semver.from_string(row["schema_version"]), @@ -44,7 +44,7 @@ def record_to_dict(record: Record) -> dict[str, Any]: """Convert Record aggregate to database dict.""" return { "srn": str(record.srn), - "convention_srn": str(record.convention_srn), + "convention_id": str(record.convention_id), "schema_id": record.schema_id.id.root, "schema_version": record.schema_id.version.root, "source": _source_adapter.dump_python(record.source, mode="json"), diff --git a/server/osa/infrastructure/persistence/repository/convention.py b/server/osa/infrastructure/persistence/repository/convention.py index c2cd050..69419af 100644 --- a/server/osa/infrastructure/persistence/repository/convention.py +++ b/server/osa/infrastructure/persistence/repository/convention.py @@ -1,26 +1,26 @@ from typing import Any, List -from sqlalchemy import insert, select +from sqlalchemy import select +from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.ext.asyncio import AsyncSession from osa.domain.deposition.model.convention import Convention from osa.domain.deposition.model.value import FileRequirements from osa.domain.deposition.port.convention_repository import ConventionRepository -from osa.domain.shared.model.hook import HookDefinition from osa.domain.shared.model.source import IngesterDefinition -from osa.domain.shared.model.srn import ConventionSRN, LocalId, SchemaId, Semver +from osa.domain.shared.model.srn import ConventionSlug, LocalId, SchemaId, Semver from osa.infrastructure.persistence.tables import conventions_table def _convention_to_row(convention: Convention) -> dict[str, Any]: return { - "srn": str(convention.srn), + "id": convention.id.root, "title": convention.title, "description": convention.description, "schema_id": convention.schema_id.id.root, "schema_version": convention.schema_id.version.root, "file_requirements": convention.file_requirements.model_dump(), - "hooks": [h.model_dump() for h in convention.hooks], + "hooks": [name.root for name in convention.hooks], # hook-name registry refs "source": convention.ingester.model_dump() if convention.ingester else None, "created_at": convention.created_at, } @@ -29,7 +29,7 @@ def _convention_to_row(convention: Convention) -> dict[str, Any]: def _row_to_convention(row: dict[str, Any]) -> Convention: source_data = row.get("source") return Convention( - srn=ConventionSRN.parse(row["srn"]), + id=ConventionSlug.parse(row["id"]), title=row["title"], description=row.get("description"), schema_id=SchemaId( @@ -37,7 +37,7 @@ def _row_to_convention(row: dict[str, Any]) -> Convention: version=Semver.from_string(row["schema_version"]), ), file_requirements=FileRequirements.model_validate(row["file_requirements"]), - hooks=[HookDefinition.model_validate(h) for h in (row.get("hooks") or [])], + hooks=list(row.get("hooks") or []), ingester=IngesterDefinition.model_validate(source_data) if source_data else None, created_at=row["created_at"], ) @@ -48,12 +48,28 @@ def __init__(self, session: AsyncSession) -> None: self.session = session async def save(self, convention: Convention) -> None: + # Conventions are mutable, slug-keyed (design-revisions §3): deploy is a + # declarative upsert — re-declaring updates the convention in place, + # preserving its original created_at. row = _convention_to_row(convention) - await self.session.execute(insert(conventions_table).values(**row)) + stmt = pg_insert(conventions_table).values(**row) + stmt = stmt.on_conflict_do_update( + index_elements=[conventions_table.c.id], + set_={ + "title": stmt.excluded.title, + "description": stmt.excluded.description, + "schema_id": stmt.excluded.schema_id, + "schema_version": stmt.excluded.schema_version, + "file_requirements": stmt.excluded.file_requirements, + "hooks": stmt.excluded.hooks, + "source": stmt.excluded.source, + }, + ) + await self.session.execute(stmt) await self.session.flush() - async def get(self, srn: ConventionSRN) -> Convention | None: - stmt = select(conventions_table).where(conventions_table.c.srn == str(srn)) + async def get(self, id: ConventionSlug) -> Convention | None: + stmt = select(conventions_table).where(conventions_table.c.id == id.root) result = await self.session.execute(stmt) row = result.mappings().first() return _row_to_convention(dict(row)) if row else None @@ -70,8 +86,8 @@ async def list( result = await self.session.execute(stmt) return [_row_to_convention(dict(r)) for r in result.mappings().all()] - async def exists(self, srn: ConventionSRN) -> bool: - stmt = select(conventions_table.c.srn).where(conventions_table.c.srn == str(srn)) + async def exists(self, id: ConventionSlug) -> bool: + stmt = select(conventions_table.c.id).where(conventions_table.c.id == id.root) result = await self.session.execute(stmt) return result.first() is not None diff --git a/server/osa/infrastructure/persistence/repository/hook_registry.py b/server/osa/infrastructure/persistence/repository/hook_registry.py new file mode 100644 index 0000000..5f55fe4 --- /dev/null +++ b/server/osa/infrastructure/persistence/repository/hook_registry.py @@ -0,0 +1,252 @@ +"""Postgres adapter for the hook registry (feature #145). + +Concurrency-critical operations (version assignment, live-pointer advance) run +under a ``SELECT ... FOR UPDATE`` row lock on the ``hooks`` row so concurrent +release submissions to one hook produce gap-free monotonic versions and never +lose a pointer update (research R7). +""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any +from uuid import UUID, uuid4 + +from sqlalchemy import and_, func, insert, select, update +from sqlalchemy.ext.asyncio import AsyncSession + +from osa.domain.shared.error import ConflictError, NotFoundError +from osa.domain.shared.model.hook import ( + HookName, + OciConfig, + OciLimits, + TableFeatureSpec, +) +from osa.domain.validation.model.hook import Hook +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId, ReleaseOutcome +from osa.domain.validation.model.hook_run import HookRun +from osa.domain.validation.port.hook_registry import HookRegistry +from osa.infrastructure.persistence.tables import ( + hook_releases_table, + hook_runs_table, + hooks_table, +) + + +class PostgresHookRegistry(HookRegistry): + def __init__(self, session: AsyncSession) -> None: + self.session = session + + @staticmethod + def _to_hook(row: dict[str, Any]) -> Hook: + live = row["live_release_id"] + return Hook( + name=row["name"], + feature=TableFeatureSpec.model_validate(row["feature_spec"]), + live_release_id=HookReleaseId(live) if live is not None else None, + created_at=row["created_at"], + ) + + @staticmethod + def _to_release(row: dict[str, Any]) -> HookRelease: + return HookRelease( + id=HookReleaseId(row["id"]), + hook_name=row["hook_name"], + version=row["version"], + runtime=OciConfig( + image=row["image"], + digest=row["digest"], + config=row["config"] or {}, + limits=OciLimits.model_validate(row["limits"]), + ), + source_ref=row["source_ref"], + built_by=row["built_by"], + built_at=row["built_at"], + ) + + async def upsert_identity(self, name: HookName, feature: TableFeatureSpec) -> Hook: + existing = await self._get_hook_row(name) + if existing is not None: + current = TableFeatureSpec.model_validate(existing["feature_spec"]) + if current != feature: + raise ConflictError( + f"Hook {name!r} already exists with a different feature contract; " + "the output contract is fixed across releases" + ) + return self._to_hook(existing) + + await self.session.execute( + insert(hooks_table).values( + name=name.root, + feature_spec=feature.model_dump(), + live_release_id=None, + created_at=datetime.now(UTC), + ) + ) + await self.session.flush() + row = await self._get_hook_row(name) + assert row is not None + return self._to_hook(row) + + async def create_release( + self, + name: HookName, + runtime: OciConfig, + source_ref: str, + built_by: str | None, + ) -> ReleaseOutcome: + # Row-lock the hook so concurrent releases serialize (R7). Also asserts + # the hook exists. + locked = await self.session.execute( + select(hooks_table).where(hooks_table.c.name == name.root).with_for_update() + ) + hook_row = locked.mappings().first() + if hook_row is None: + raise NotFoundError(f"Hook not found: {name}") + + # Idempotency on (hook_name, digest): return the existing release, no + # new version, pointer unchanged (R5). Decided under the row lock, so + # `created` is race-free under concurrent identical submissions. + dup = await self.session.execute( + select(hook_releases_table).where( + and_( + hook_releases_table.c.hook_name == name.root, + hook_releases_table.c.digest == runtime.digest, + ) + ) + ) + dup_row = dup.mappings().first() + if dup_row is not None: + return ReleaseOutcome(release=self._to_release(dict(dup_row)), created=False) + + max_version = await self.session.scalar( + select(func.coalesce(func.max(hook_releases_table.c.version), 0)).where( + hook_releases_table.c.hook_name == name.root + ) + ) + next_version = int(max_version or 0) + 1 + release_id = uuid4() + + await self.session.execute( + insert(hook_releases_table).values( + id=release_id, + hook_name=name.root, + version=next_version, + image=runtime.image, + digest=runtime.digest, + config=runtime.config, + limits=runtime.limits.model_dump(), + source_ref=source_ref, + built_by=built_by, + built_at=datetime.now(UTC), + ) + ) + # Advance the live pointer. + await self.session.execute( + update(hooks_table) + .where(hooks_table.c.name == name.root) + .values(live_release_id=release_id) + ) + await self.session.flush() + + created = await self.get_release(name, next_version) + assert created is not None + return ReleaseOutcome(release=created, created=True) + + async def set_live(self, name: HookName, version: int) -> Hook: + locked = await self.session.execute( + select(hooks_table).where(hooks_table.c.name == name.root).with_for_update() + ) + if locked.mappings().first() is None: + raise NotFoundError(f"Hook not found: {name}") + + target = await self.get_release(name, version) + if target is None: + raise NotFoundError(f"Release not found: {name}@v{version}") + + await self.session.execute( + update(hooks_table) + .where(hooks_table.c.name == name.root) + .values(live_release_id=target.id) + ) + await self.session.flush() + row = await self._get_hook_row(name) + assert row is not None + return self._to_hook(row) + + async def get_hook(self, name: HookName) -> Hook | None: + row = await self._get_hook_row(name) + return self._to_hook(row) if row is not None else None + + async def list_hooks(self) -> list[Hook]: + result = await self.session.execute(select(hooks_table).order_by(hooks_table.c.name)) + return [self._to_hook(dict(r)) for r in result.mappings().all()] + + async def list_releases(self, name: HookName) -> list[HookRelease]: + result = await self.session.execute( + select(hook_releases_table) + .where(hook_releases_table.c.hook_name == name.root) + .order_by(hook_releases_table.c.version.desc()) + ) + return [self._to_release(dict(r)) for r in result.mappings().all()] + + async def get_release(self, name: HookName, version: int) -> HookRelease | None: + result = await self.session.execute( + select(hook_releases_table).where( + and_( + hook_releases_table.c.hook_name == name.root, + hook_releases_table.c.version == version, + ) + ) + ) + row = result.mappings().first() + return self._to_release(dict(row)) if row else None + + async def get_release_by_id(self, release_id: object) -> HookRelease | None: + rid = release_id if isinstance(release_id, UUID) else UUID(str(release_id)) + result = await self.session.execute( + select(hook_releases_table).where(hook_releases_table.c.id == rid) + ) + row = result.mappings().first() + return self._to_release(dict(row)) if row else None + + async def record_run(self, run: HookRun) -> None: + await self.session.execute( + insert(hook_runs_table).values( + id=run.id, + release_id=run.release_id, + status=run.status.value, + started_at=run.started_at, + finished_at=run.finished_at, + duration_s=run.duration_s, + oom_retries=run.oom_retries, + log_ref=run.log_ref, + ) + ) + await self.session.flush() + + async def resolve_live(self, names: list[HookName]) -> dict[HookName, HookRelease]: + if not names: + return {} + stmt = ( + select(hook_releases_table) + .select_from( + hooks_table.join( + hook_releases_table, + hooks_table.c.live_release_id == hook_releases_table.c.id, + ) + ) + .where(hooks_table.c.name.in_([n.root for n in names])) + ) + result = await self.session.execute(stmt) + return { + HookName(row["hook_name"]): self._to_release(dict(row)) + for row in result.mappings().all() + } + + async def _get_hook_row(self, name: HookName) -> dict[str, Any] | None: + result = await self.session.execute( + select(hooks_table).where(hooks_table.c.name == name.root) + ) + row = result.mappings().first() + return dict(row) if row else None diff --git a/server/osa/infrastructure/persistence/repository/ingest.py b/server/osa/infrastructure/persistence/repository/ingest.py index 5d330ac..476595c 100644 --- a/server/osa/infrastructure/persistence/repository/ingest.py +++ b/server/osa/infrastructure/persistence/repository/ingest.py @@ -23,7 +23,7 @@ async def save(self, ingest_run: IngestRun) -> None: """Insert or update an ingest run.""" values = { "id": ingest_run.id, - "convention_srn": ingest_run.convention_srn, + "convention_id": ingest_run.convention_id, "status": ingest_run.status.value, "ingestion_finished": ingest_run.ingestion_finished, "batches_ingested": ingest_run.batches_ingested, @@ -54,10 +54,10 @@ async def get(self, id: str) -> IngestRun | None: return None return _row_to_ingest_run(dict(row)) - async def get_running_for_convention(self, convention_srn: str) -> IngestRun | None: + async def get_running_for_convention(self, convention_id: str) -> IngestRun | None: stmt = ( select(ingest_runs_table) - .where(ingest_runs_table.c.convention_srn == convention_srn) + .where(ingest_runs_table.c.convention_id == convention_id) .where( ingest_runs_table.c.status.in_( [IngestStatus.PENDING.value, IngestStatus.RUNNING.value] @@ -135,7 +135,7 @@ async def increment_completed(self, id: str, published_count: int) -> IngestRun: def _row_to_ingest_run(row: dict) -> IngestRun: return IngestRun( id=row["id"], - convention_srn=row["convention_srn"], + convention_id=row["convention_id"], status=IngestStatus(row["status"]), ingestion_finished=row["ingestion_finished"], batches_ingested=row["batches_ingested"], diff --git a/server/osa/infrastructure/persistence/tables.py b/server/osa/infrastructure/persistence/tables.py index b8b8d22..2a7dbf8 100644 --- a/server/osa/infrastructure/persistence/tables.py +++ b/server/osa/infrastructure/persistence/tables.py @@ -4,6 +4,7 @@ Boolean, Column, DateTime, + Float, ForeignKey, Index, Integer, @@ -15,6 +16,7 @@ text, ) from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.dialects.postgresql import UUID as PGUUID from sqlalchemy.types import JSON # Metadata object for all tables @@ -27,7 +29,7 @@ "depositions", metadata, Column("srn", String, primary_key=True), - Column("convention_srn", String, nullable=False), # Convention submitted against + Column("convention_id", String, nullable=False), # Convention submitted against Column("status", String(32), nullable=False), # DepositionStatus as string Column("metadata", JSON, nullable=False), Column("files", JSON, nullable=False), @@ -65,7 +67,7 @@ "records", metadata, Column("srn", String, primary_key=True), - Column("convention_srn", Text, nullable=False), + Column("convention_id", Text, nullable=False), Column("schema_id", Text, nullable=False), Column("schema_version", Text, nullable=False), Column("source", JSONB, nullable=False), @@ -73,12 +75,16 @@ Column("published_at", DateTime(timezone=True), nullable=False), ) -Index("idx_records_convention_srn", records_table.c.convention_srn) +Index("idx_records_convention_id", records_table.c.convention_id) Index("idx_records_schema_id", records_table.c.schema_id) +# Expression must be the raw ``->>`` text accessor (NOT .as_string(), which adds a +# redundant CAST) so it matches the bulk-publish ON CONFLICT ((source->>'type'), +# (source->>'id')) — Postgres matches ON CONFLICT to a unique index by exact +# expression. Index( "uq_records_source", - records_table.c.source["type"].as_string(), - records_table.c.source["id"].as_string(), + records_table.c.source["type"].astext, + records_table.c.source["id"].astext, unique=True, ) Index("idx_records_published_at", records_table.c.published_at) @@ -274,13 +280,14 @@ conventions_table = Table( "conventions", metadata, - Column("srn", String, primary_key=True), # Convention SRN stays as-is (published artifact) + # Caller-supplied ConventionSlug ("@") — feature #145. + Column("id", String, primary_key=True), Column("title", String(255), nullable=False), Column("description", Text, nullable=True), Column("schema_id", String, nullable=False), Column("schema_version", String, nullable=False), Column("file_requirements", JSON, nullable=False), # FileRequirements as dict - Column("hooks", JSON, nullable=False, default=[]), # List of HookDefinition dicts + Column("hooks", JSON, nullable=False, default=[]), # List of hook names (str) — registry refs Column("source", JSON, nullable=True), # IngesterDefinition as dict Column("created_at", DateTime(timezone=True), nullable=False), ) @@ -346,7 +353,7 @@ "ingest_runs", metadata, Column("id", String, primary_key=True), - Column("convention_srn", String, nullable=False), + Column("convention_id", String, nullable=False), Column("status", String(32), nullable=False, server_default=text("'pending'")), Column("ingestion_finished", Boolean, nullable=False, server_default=text("false")), Column("batches_ingested", Integer, nullable=False, server_default=text("0")), @@ -359,7 +366,7 @@ Column("completed_at", DateTime(timezone=True), nullable=True), ) -Index("idx_ingest_runs_convention", ingest_runs_table.c.convention_srn) +Index("idx_ingest_runs_convention", ingest_runs_table.c.convention_id) Index("idx_ingest_runs_status", ingest_runs_table.c.status) @@ -385,3 +392,78 @@ device_authorizations_table.c.status, device_authorizations_table.c.expires_at, ) + + +# ============================================================================ +# HOOK REGISTRY (Validation — feature #145) +# ============================================================================ +# A hook's stable identity + fixed output contract + live-release pointer. +# The circular hooks.live_release_id ↔ hook_releases.hook_name dependency is +# broken with a DEFERRABLE, use_alter FK: SQLAlchemy/Alembic emit it as a +# separate ALTER after both tables exist, and the release is inserted before the +# pointer is set within one transaction. +hooks_table = Table( + "hooks", + metadata, + Column("name", String(40), primary_key=True), # HookName, globally unique + Column("feature_spec", JSONB, nullable=False), # serialized TableFeatureSpec + Column( + "live_release_id", + PGUUID(as_uuid=True), + ForeignKey( + "hook_releases.id", + name="fk_hooks_live_release_id", + use_alter=True, + deferrable=True, + initially="DEFERRED", + ), + nullable=True, + ), + Column("created_at", DateTime(timezone=True), nullable=False), +) + + +# Immutable, integer-versioned hook artifact: what image runs, built from where. +hook_releases_table = Table( + "hook_releases", + metadata, + Column("id", PGUUID(as_uuid=True), primary_key=True), # HookReleaseId (UUIDv7) + Column("hook_name", String(40), ForeignKey("hooks.name"), nullable=False), + Column("version", Integer, nullable=False), # monotonic per hook, gap-free + Column("image", Text, nullable=False), + Column("digest", Text, nullable=False), + Column("config", JSONB, nullable=False, server_default=text("'{}'")), + Column("limits", JSONB, nullable=False), + Column("source_ref", Text, nullable=False), # git SHA / build id (reproducibility) + Column("built_by", Text, nullable=True), + Column("built_at", DateTime(timezone=True), nullable=False), + UniqueConstraint("hook_name", "version", name="uq_hook_releases_hook_version"), + UniqueConstraint("hook_name", "digest", name="uq_hook_releases_hook_digest"), +) + +Index( + "idx_hook_releases_hook_version", + hook_releases_table.c.hook_name, + hook_releases_table.c.version.desc(), +) + + +# Append-only PURE execution record + per-row provenance anchor (design-revisions +# §6). No execution-context columns: a feature row reaches its data origin via the +# other arm of the join (record_srn → records.source); this is only "what code ran, +# when, and where the logs are". Recorded as a single insert at completion, so +# finished_at / duration_s / oom_retries are always known. +hook_runs_table = Table( + "hook_runs", + metadata, + Column("id", PGUUID(as_uuid=True), primary_key=True), # HookRunId; stamped on feature rows + Column("release_id", PGUUID(as_uuid=True), ForeignKey("hook_releases.id"), nullable=False), + Column("status", String(16), nullable=False), # HookRunStatus + Column("started_at", DateTime(timezone=True), nullable=False), + Column("finished_at", DateTime(timezone=True), nullable=False), + Column("duration_s", Float, nullable=False), + Column("oom_retries", Integer, nullable=False, server_default=text("0")), + Column("log_ref", Text, nullable=True), +) + +Index("idx_hook_runs_release", hook_runs_table.c.release_id) # recall: rows from a release diff --git a/server/osa/infrastructure/s3/ingest_storage.py b/server/osa/infrastructure/s3/ingest_storage.py index 12820a4..973bbd4 100644 --- a/server/osa/infrastructure/s3/ingest_storage.py +++ b/server/osa/infrastructure/s3/ingest_storage.py @@ -87,3 +87,9 @@ def batch_files_dir(self, ingest_run_id: str, batch_index: int) -> Path: def hook_work_dir(self, ingest_run_id: str, batch_index: int, hook_name: str) -> Path: return self._layout.ingest_batch_hook_dir(ingest_run_id, batch_index, hook_name) + + async def write_run_ref(self, work_dir: Path, run_id: str, release_id: str) -> None: + """Write run.json alongside a hook's features (per-row provenance, #145).""" + prefix = relative_path(work_dir, self._data_mount_path) + key = f"{prefix}/output/run.json" + await self._s3.put_object(key, json.dumps({"run_id": run_id, "release_id": release_id})) diff --git a/server/osa/infrastructure/s3/storage.py b/server/osa/infrastructure/s3/storage.py index 2f7016a..74ab765 100644 --- a/server/osa/infrastructure/s3/storage.py +++ b/server/osa/infrastructure/s3/storage.py @@ -15,7 +15,8 @@ from osa.domain.deposition.model.value import DepositionFile from osa.domain.deposition.port.storage import FileStoragePort from osa.domain.shared.error import InfrastructureError, NotFoundError -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.provenance import RunRef +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN from osa.domain.validation.model.batch_outcome import ( BatchRecordOutcome, HookRecordId, @@ -47,8 +48,8 @@ def __init__(self, s3: S3Client, data_mount_path: str) -> None: def _safe_id(self, srn: DepositionSRN) -> str: return f"{srn.domain.root}_{srn.id.root}" - def _conv_id(self, convention_srn: ConventionSRN) -> str: - return f"{convention_srn.domain.root}_{convention_srn.id.root}" + def _conv_id(self, convention_id: ConventionSlug) -> str: + return convention_id.root def _dep_prefix(self, deposition_id: DepositionSRN) -> str: return f"depositions/{self._safe_id(deposition_id)}" @@ -122,24 +123,20 @@ async def delete_files_for_deposition( # ── Ingester storage ────────────────────────────────────────────── - def get_source_staging_dir(self, convention_srn: ConventionSRN, run_id: str) -> Path: + def get_source_staging_dir(self, convention_id: ConventionSlug, run_id: str) -> Path: """Return path for PVC subpath computation (no I/O).""" return ( Path(self._data_mount_path) / "sources" - / self._conv_id(convention_srn) + / self._conv_id(convention_id) / "staging" / run_id ) - def get_source_output_dir(self, convention_srn: ConventionSRN, run_id: str) -> Path: + def get_source_output_dir(self, convention_id: ConventionSlug, run_id: str) -> Path: """Return path for PVC subpath computation (no I/O).""" return ( - Path(self._data_mount_path) - / "sources" - / self._conv_id(convention_srn) - / "runs" - / run_id + Path(self._data_mount_path) / "sources" / self._conv_id(convention_id) / "runs" / run_id ) async def move_source_files_to_deposition( @@ -252,6 +249,22 @@ async def hook_features_exist(self, hook_output_dir: str, feature_name: str) -> key = f"{prefix}/hooks/{feature_name}/output/features.json" return await self._s3.head_object(key) + async def write_run_ref(self, work_dir: Path, run_id: str, release_id: str) -> None: + """Write run.json alongside a hook's features (per-row provenance, #145).""" + prefix = relative_path(work_dir, self._data_mount_path) + key = f"{prefix}/output/run.json" + await self._s3.put_object(key, json.dumps({"run_id": run_id, "release_id": release_id})) + + async def read_run_ref(self, output_dir: str, hook_name: str) -> RunRef | None: + prefix = relative_path(Path(output_dir), self._data_mount_path) + key = f"{prefix}/hooks/{hook_name}/output/run.json" + try: + data_bytes = await self._s3.get_object(key) + except Exception: + return None + data = json.loads(data_bytes) + return RunRef(run_id=data["run_id"], release_id=data["release_id"]) + async def read_batch_outcomes( self, output_dir: str, hook_name: str ) -> dict[HookRecordId, BatchRecordOutcome]: diff --git a/server/osa/util/di/fastapi.py b/server/osa/util/di/fastapi.py index 41f0e03..1f2ff46 100644 --- a/server/osa/util/di/fastapi.py +++ b/server/osa/util/di/fastapi.py @@ -2,7 +2,7 @@ import logging from typing import Any -from uuid import UUID +from uuid import NAMESPACE_URL, UUID, uuid5 import jwt from sqlalchemy import select @@ -24,6 +24,21 @@ logger = logging.getLogger(__name__) +def _parse_scopes(payload: dict[str, Any], scope_claim: str) -> frozenset[str]: + """Parse OAuth scopes from an M2M token (#145, US5). + + Tolerant of the two common encodings: a single space-delimited string + (``"scope": "conventions:write hooks:write"``) or an array + (``"scp": ["conventions:write"]``). + """ + raw = payload.get(scope_claim) + if isinstance(raw, str): + return frozenset(raw.split()) + if isinstance(raw, (list, tuple)): + return frozenset(str(s) for s in raw) + return frozenset() + + async def resolve_identity( request: Request, token_service: TokenService, @@ -61,6 +76,22 @@ async def resolve_identity( logger.warning("auth: rejected invalid token (%s)", type(e).__name__) return Anonymous() + # M2M path (#145, US5): a token from the configured second issuer is + # authorized by scopes, not DB roles. Its `sub` is a client identifier, not + # an internal user UUID, so derive a stable synthetic UserId from issuer+sub + # for provenance (`built_by`) and keep roles empty. + extra_issuer = token_service.extra_issuer + if extra_issuer is not None and payload.get("iss") == extra_issuer.issuer: + client = str(payload.get("sub", "")) + scopes = _parse_scopes(payload, extra_issuer.scope_claim) + logger.debug("auth: M2M credential (client=%s, scopes=%s)", client, sorted(scopes)) + return Principal( + user_id=UserId(uuid5(NAMESPACE_URL, f"{extra_issuer.issuer}#{client}")), + provider_identity=ProviderIdentity(provider="m2m", external_id=client), + roles=frozenset(), + scopes=scopes, + ) + user_id = UserId(UUID(payload["sub"])) # Lightweight role lookup using a short-lived session diff --git a/server/pyproject.toml b/server/pyproject.toml index 5c8c3f8..f6bccea 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "greenlet>=3.2.4", "asyncpg>=0.31.0", "psycopg2-binary>=2.9.11", - "pyjwt>=2.11.0", + "pyjwt[crypto]>=2.11.0", "openpyxl>=3.1.5", "python-multipart>=0.0.22", "slowapi>=0.1.9", diff --git a/server/tests/contract/test_hooks_contract.py b/server/tests/contract/test_hooks_contract.py new file mode 100644 index 0000000..e85bb3b --- /dev/null +++ b/server/tests/contract/test_hooks_contract.py @@ -0,0 +1,55 @@ +"""DB-free contract tests for the hook registry routes (#145, US3–US5). + +Runs in CI's no-Postgres contract job. Asserts routing + auth gating without +touching the database: unauthenticated writes are rejected at handler +construction (the Principal provider raises ``missing_token`` → 401) before any +DI/DB work, and the new hook paths are registered on the app. DB-backed +behaviour (version monotonicity, idempotency, provenance) lives in the unit + +integration suites. +""" + +import os + +import pytest +from httpx import ASGITransport, AsyncClient + +os.environ.setdefault("OSA_BASE_URL", "http://localhost:8000") +os.environ.setdefault("OSA_AUTH__JWT__SECRET", "test-secret-for-contract-tests-minimum-32-chars") + + +def _app(): + from osa.application.api.rest.app import create_app + + return create_app() + + +@pytest.fixture +def client() -> AsyncClient: + return AsyncClient(transport=ASGITransport(app=_app()), base_url="http://test") + + +@pytest.mark.asyncio +async def test_create_release_requires_auth(client: AsyncClient): + async with client: + resp = await client.post( + "/api/v1/hooks/pocket_detect/releases", + json={"image": "reg/p:abc", "digest": "sha256:x", "source_ref": "git"}, + ) + assert resp.status_code == 401 + assert resp.json()["code"] == "missing_token" + + +@pytest.mark.asyncio +async def test_set_live_requires_auth(client: AsyncClient): + async with client: + resp = await client.put("/api/v1/hooks/pocket_detect/live", json={"version": 1}) + assert resp.status_code == 401 + + +def test_hook_routes_registered(): + app = _app() + paths = {getattr(r, "path", None) for r in app.routes} + assert "/api/v1/hooks" in paths + assert "/api/v1/hooks/{name}/releases" in paths + assert "/api/v1/hooks/{name}/releases/{version}" in paths + assert "/api/v1/hooks/{name}/live" in paths diff --git a/server/tests/integration/conftest.py b/server/tests/integration/conftest.py index 2ee9a02..4cd6aa2 100644 --- a/server/tests/integration/conftest.py +++ b/server/tests/integration/conftest.py @@ -4,6 +4,7 @@ import os from datetime import UTC, datetime from typing import Any +from uuid import uuid4 import pytest import pytest_asyncio @@ -14,6 +15,13 @@ create_async_engine, ) +from osa.domain.shared.model.hook import ( + ColumnDef, + OciConfig, + TableFeatureSpec, +) +from osa.domain.validation.model.hook_run import HookRun, HookRunId, HookRunStatus +from osa.infrastructure.persistence.repository.hook_registry import PostgresHookRegistry from osa.infrastructure.persistence.seed import ensure_system_user @@ -28,7 +36,7 @@ async def seed_record( engine: AsyncEngine, *, srn: str, - convention_srn: str = "urn:osa:localhost:conv:test@1.0.0", + convention_id: str = "urn:osa:localhost:conv:test@1.0.0", schema_id: str = "test", schema_version: str = "1.0.0", source: dict[str, Any] | None = None, @@ -45,7 +53,7 @@ async def seed_record( await conn.execute( text( """ - INSERT INTO records (srn, convention_srn, schema_id, schema_version, + INSERT INTO records (srn, convention_id, schema_id, schema_version, source, metadata, published_at) VALUES (:srn, :conv, :schema_id, :schema_version, CAST(:source AS JSONB), CAST(:meta AS JSONB), :published_at) @@ -53,7 +61,7 @@ async def seed_record( ), { "srn": srn, - "conv": convention_srn, + "conv": convention_id, "schema_id": schema_id, "schema_version": schema_version, "source": json.dumps(src), @@ -63,6 +71,53 @@ async def seed_record( ) +async def seed_hook_run( + engine: AsyncEngine, + *, + feature_name: str, + columns: list[ColumnDef] | None = None, +) -> str: + """Seed the per-row provenance chain for a feature table and return a real + ``hook_runs.id`` (as a string) usable as a feature row's ``run_id``. + + Feature #145 made every ``features.*`` row carry a NOT NULL ``run_id`` that + is a FK to ``hook_runs.id``. To insert feature rows in a test the chain + ``hook identity → release → run`` must exist first. Built via the registry + adapter so the FK is satisfied with real ids. + """ + if columns is None: + columns = [ + ColumnDef(name="score", json_type="number", required=True), + ColumnDef(name="label", json_type="string", required=False), + ] + feature = TableFeatureSpec(cardinality="many", columns=columns) + runtime = OciConfig(image="ghcr.io/example/hook:latest", digest=f"sha256:{uuid4().hex}") + + factory = async_sessionmaker(engine, expire_on_commit=False) + async with factory() as session: + from osa.domain.shared.model.hook import HookName + + registry = PostgresHookRegistry(session) + await registry.upsert_identity(HookName(feature_name), feature) + outcome = await registry.create_release( + HookName(feature_name), runtime, source_ref="abc1234", built_by=None + ) + now = datetime.now(UTC) + run = HookRun( + id=HookRunId(uuid4()), + release_id=outcome.release.id, + status=HookRunStatus.PASSED, + started_at=now, + finished_at=now, + duration_s=0.1, + oom_retries=0, + log_ref=None, + ) + await registry.record_run(run) + await session.commit() + return str(run.id) + + @pytest_asyncio.fixture async def pg_engine(): """Per-test async engine pointing at osa_test.""" @@ -90,7 +145,8 @@ async def pg_session(pg_engine: AsyncEngine): text( "TRUNCATE TABLE depositions, conventions, schemas, ontologies, " "ontology_terms, events, deliveries, records, validation_runs, " - "feature_tables, metadata_tables, users, identities, refresh_tokens, " + "feature_tables, metadata_tables, hooks, hook_releases, hook_runs, " + "users, identities, refresh_tokens, " "role_assignments CASCADE" ) ) diff --git a/server/tests/integration/persistence/test_convention_repo.py b/server/tests/integration/persistence/test_convention_repo.py index 0e5e818..aae123d 100644 --- a/server/tests/integration/persistence/test_convention_repo.py +++ b/server/tests/integration/persistence/test_convention_repo.py @@ -1,4 +1,11 @@ -"""Integration tests for ConventionRepository against real PostgreSQL.""" +"""Integration tests for ConventionRepository against real PostgreSQL. + +Feature #145: conventions are unversioned, slug-keyed (``ConventionSlug``) and +mutable. ``save`` is an idempotent UPSERT by slug (re-saving updates in place, +preserving ``created_at``). Hooks are referenced by **name** (``HookName``) — +the versioned release each name resolves to lives in the validation hook +registry, not inline on the convention. +""" from datetime import UTC, datetime @@ -7,20 +14,14 @@ from osa.domain.deposition.model.convention import Convention from osa.domain.deposition.model.value import FileRequirements -from osa.domain.shared.model.hook import ( - ColumnDef, - HookDefinition, - OciConfig, - OciLimits, - TableFeatureSpec, -) +from osa.domain.shared.model.hook import HookName from osa.domain.shared.model.source import ( IngesterDefinition, IngesterLimits, IngesterScheduleConfig, InitialRunConfig, ) -from osa.domain.shared.model.srn import ConventionSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, SchemaId from osa.infrastructure.persistence.repository.convention import ( PostgresConventionRepository, ) @@ -28,14 +29,14 @@ def _make_convention( *, - srn: str = "urn:osa:localhost:conv:test-convention-001@1.0.0", + slug: str = "test-convention", title: str = "Test Convention", schema_id: str = "test-schema-001@1.0.0", - hooks: list[HookDefinition] | None = None, + hooks: list[HookName] | None = None, ingester: IngesterDefinition | None = None, ) -> Convention: return Convention( - srn=ConventionSRN.parse(srn), + id=ConventionSlug.parse(slug), title=title, description="A test convention for integration tests", schema_id=SchemaId.parse(schema_id), @@ -51,23 +52,8 @@ def _make_convention( ) -def _make_hook() -> HookDefinition: - return HookDefinition( - name="quality_check", - runtime=OciConfig( - image="ghcr.io/example/validator:latest", - digest="sha256:abc123", - config={"threshold": 0.95}, - limits=OciLimits(timeout_seconds=600, memory="4g", cpu="2.0"), - ), - feature=TableFeatureSpec( - cardinality="many", - columns=[ - ColumnDef(name="score", json_type="number", required=True), - ColumnDef(name="labels", json_type="array", required=False), - ], - ), - ) +def _make_hook() -> HookName: + return HookName("quality_check") def _make_ingester() -> IngesterDefinition: @@ -94,18 +80,16 @@ async def test_save_and_get(self, pg_session: AsyncSession): await repo.save(conv) await pg_session.commit() - got = await repo.get(conv.srn) + got = await repo.get(conv.id) assert got is not None - assert str(got.srn) == str(conv.srn) + assert str(got.id) == str(conv.id) assert got.title == conv.title assert got.description == conv.description assert str(got.schema_id) == str(conv.schema_id) assert got.file_requirements == conv.file_requirements assert len(got.hooks) == 1 - assert got.hooks[0].runtime.image == hook.runtime.image - assert got.hooks[0].runtime.digest == hook.runtime.digest - assert got.hooks[0].name == "quality_check" - assert got.hooks[0].feature.columns[0].name == "score" + # Hooks are name references now (registry resolves the live release). + assert got.hooks[0].root == "quality_check" assert got.ingester is not None assert got.ingester.image == ingester.image assert got.ingester.schedule is not None @@ -115,14 +99,39 @@ async def test_save_and_get(self, pg_session: AsyncSession): async def test_get_nonexistent_returns_none(self, pg_session: AsyncSession): repo = PostgresConventionRepository(pg_session) - got = await repo.get(ConventionSRN.parse("urn:osa:localhost:conv:does-not-exist@1.0.0")) + got = await repo.get(ConventionSlug.parse("does-not-exist")) assert got is None + async def test_save_is_idempotent_upsert_preserving_created_at(self, pg_session: AsyncSession): + """Re-saving a slug updates in place and preserves the original created_at + (conventions are mutable + unversioned in #145 — no insert conflict).""" + repo = PostgresConventionRepository(pg_session) + conv = _make_convention(slug="upsert-me", title="Original") + original_created_at = conv.created_at + + await repo.save(conv) + await pg_session.commit() + + # Re-declare with a different title + later created_at; upsert keeps the + # original created_at but applies the new title. + updated = _make_convention(slug="upsert-me", title="Updated") + await repo.save(updated) + await pg_session.commit() + + got = await repo.get(ConventionSlug.parse("upsert-me")) + assert got is not None + assert got.title == "Updated" + assert got.created_at == original_created_at + + # Still exactly one row for this slug. + result = await repo.list() + assert sum(1 for c in result if c.id.root == "upsert-me") == 1 + async def test_list_returns_ordered_by_created_at_desc(self, pg_session: AsyncSession): repo = PostgresConventionRepository(pg_session) - conv_a = _make_convention(srn="urn:osa:localhost:conv:conv-aaa@1.0.0", title="First") - conv_b = _make_convention(srn="urn:osa:localhost:conv:conv-bbb@1.0.0", title="Second") + conv_a = _make_convention(slug="conv-aaa", title="First") + conv_b = _make_convention(slug="conv-bbb", title="Second") await repo.save(conv_a) await pg_session.flush() @@ -140,7 +149,7 @@ async def test_list_with_limit_and_offset(self, pg_session: AsyncSession): for i in range(5): conv = _make_convention( - srn=f"urn:osa:localhost:conv:conv-{i:03d}@1.0.0", + slug=f"conv-{i:03d}", title=f"Conv {i}", ) await repo.save(conv) @@ -157,11 +166,11 @@ async def test_exists_true(self, pg_session: AsyncSession): await repo.save(conv) await pg_session.commit() - assert await repo.exists(conv.srn) is True + assert await repo.exists(conv.id) is True async def test_exists_false(self, pg_session: AsyncSession): repo = PostgresConventionRepository(pg_session) - assert await repo.exists(ConventionSRN.parse("urn:osa:localhost:conv:nope@1.0.0")) is False + assert await repo.exists(ConventionSlug.parse("nope-nope")) is False async def test_convention_without_ingester(self, pg_session: AsyncSession): """Ingester is optional — should be None on retrieval when not set.""" @@ -170,7 +179,7 @@ async def test_convention_without_ingester(self, pg_session: AsyncSession): await repo.save(conv) await pg_session.commit() - got = await repo.get(conv.srn) + got = await repo.get(conv.id) assert got is not None assert got.ingester is None assert got.hooks == [] diff --git a/server/tests/integration/persistence/test_deposition_repo.py b/server/tests/integration/persistence/test_deposition_repo.py index e504384..a8d49a6 100644 --- a/server/tests/integration/persistence/test_deposition_repo.py +++ b/server/tests/integration/persistence/test_deposition_repo.py @@ -10,7 +10,7 @@ from osa.domain.auth.model.value import SYSTEM_USER_ID, UserId from osa.domain.deposition.model.aggregate import Deposition from osa.domain.deposition.model.value import DepositionFile, DepositionStatus -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN from osa.infrastructure.persistence.repository.deposition import ( PostgresDepositionRepository, ) @@ -27,7 +27,7 @@ def _make_deposition( now = datetime.now(UTC) return Deposition( srn=DepositionSRN.parse(dep_id), - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:test-conv@1.0.0"), + convention_id=ConventionSlug.parse("test-conv"), status=status, metadata=metadata or {"title": "Test Deposition"}, files=[], @@ -50,7 +50,7 @@ async def test_save_and_get(self, pg_session: AsyncSession): got = await repo.get(dep.srn) assert got is not None assert str(got.srn) == str(dep.srn) - assert str(got.convention_srn) == str(dep.convention_srn) + assert str(got.convention_id) == str(dep.convention_id) assert got.status == DepositionStatus.DRAFT assert got.metadata == {"title": "Test Deposition"} assert got.owner_id == SYSTEM_USER_ID diff --git a/server/tests/integration/persistence/test_feature_store.py b/server/tests/integration/persistence/test_feature_store.py index a05b28e..13ae81e 100644 --- a/server/tests/integration/persistence/test_feature_store.py +++ b/server/tests/integration/persistence/test_feature_store.py @@ -6,33 +6,26 @@ from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, - OciConfig, TableFeatureSpec, ) from osa.infrastructure.persistence.feature_store import PostgresFeatureStore from osa.infrastructure.persistence.feature_table import FEATURES_SCHEMA -def _make_hook( - name: str = "quality_check", +def _make_feature( columns: list[ColumnDef] | None = None, -) -> HookDefinition: +) -> TableFeatureSpec: + """A hook's output contract (#145: ``HookIdentity`` is name + feature only; + runtime moved to ``HookDeploy``/``HookRelease``). The store only needs the + feature columns, so build the ``TableFeatureSpec`` directly.""" if columns is None: columns = [ ColumnDef(name="score", json_type="number", required=True), ColumnDef(name="label", json_type="string", required=False), ] - return HookDefinition( - name=name, - runtime=OciConfig( - image="ghcr.io/example/validator:latest", - digest="sha256:abc123", - ), - feature=TableFeatureSpec( - cardinality="many", - columns=columns, - ), + return TableFeatureSpec( + cardinality="many", + columns=columns, ) @@ -42,9 +35,9 @@ async def test_create_table_creates_schema_and_table( self, pg_engine: AsyncEngine, pg_session: AsyncSession ): store = PostgresFeatureStore(pg_engine, pg_session) - hook = _make_hook(name="integration_test_hook") + feature = _make_feature() - await store.create_table("integration_test_hook", hook.feature.columns) + await store.create_table("integration_test_hook", feature.columns) # Verify the table exists in the features schema async with pg_engine.begin() as conn: @@ -82,19 +75,19 @@ async def test_create_table_duplicate_raises_conflict( from osa.domain.shared.error import ConflictError store = PostgresFeatureStore(pg_engine, pg_session) - hook = _make_hook(name="duplicate_hook") + feature = _make_feature() - await store.create_table("duplicate_hook", hook.feature.columns) + await store.create_table("duplicate_hook", feature.columns) with pytest.raises(ConflictError, match="already exists"): - await store.create_table("duplicate_hook", hook.feature.columns) + await store.create_table("duplicate_hook", feature.columns) async def test_create_table_registers_in_catalog( self, pg_engine: AsyncEngine, pg_session: AsyncSession ): store = PostgresFeatureStore(pg_engine, pg_session) - hook = _make_hook(name="catalog_hook") + feature = _make_feature() - await store.create_table("catalog_hook", hook.feature.columns) + await store.create_table("catalog_hook", feature.columns) # Check catalog async with pg_engine.begin() as conn: @@ -111,21 +104,23 @@ async def test_create_table_registers_in_catalog( @pytest.mark.asyncio class TestFeatureStoreInsert: async def test_insert_features(self, pg_engine: AsyncEngine, pg_session: AsyncSession): - from tests.integration.conftest import seed_record + from tests.integration.conftest import seed_hook_run, seed_record store = PostgresFeatureStore(pg_engine, pg_session) - hook = _make_hook(name="insert_hook") - await store.create_table("insert_hook", hook.feature.columns) + feature = _make_feature() + await store.create_table("insert_hook", feature.columns) record_srn = "urn:osa:localhost:rec:rec-001@1" await seed_record(pg_engine, srn=record_srn) + # Every feature row carries a NOT NULL run_id FK to hook_runs.id (#145). + run_id = await seed_hook_run(pg_engine, feature_name="insert_hook") rows = [ {"score": 0.95, "label": "good"}, {"score": 0.42, "label": "poor"}, {"score": 0.78, "label": None}, ] - count = await store.insert_features("insert_hook", record_srn, rows) + count = await store.insert_features("insert_hook", record_srn, rows, run_id) assert count == 3 # Verify data is in the table @@ -142,7 +137,11 @@ async def test_insert_empty_rows_returns_zero( self, pg_engine: AsyncEngine, pg_session: AsyncSession ): store = PostgresFeatureStore(pg_engine, pg_session) - count = await store.insert_features("whatever", "urn:osa:localhost:rec:x@1", []) + # Empty rows short-circuits before any insert, so the run_id is never + # used (no FK check) — a placeholder is fine here. + count = await store.insert_features( + "whatever", "urn:osa:localhost:rec:x@1", [], "00000000-0000-0000-0000-000000000000" + ) assert count == 0 @@ -157,9 +156,9 @@ async def test_jsonb_column_for_array_and_object( ColumnDef(name="metadata", json_type="object", required=False), ColumnDef(name="count", json_type="integer", required=True), ] - hook = _make_hook(name="jsonb_hook", columns=columns) + feature = _make_feature(columns=columns) store = PostgresFeatureStore(pg_engine, pg_session) - await store.create_table("jsonb_hook", hook.feature.columns) + await store.create_table("jsonb_hook", feature.columns) # Verify JSONB columns via information_schema async with pg_engine.begin() as conn: @@ -176,10 +175,11 @@ async def test_jsonb_column_for_array_and_object( assert col_types["metadata"] == "jsonb" assert col_types["count"] == "bigint" - from tests.integration.conftest import seed_record + from tests.integration.conftest import seed_hook_run, seed_record record_srn = "urn:osa:localhost:rec:rec-jsonb@1" await seed_record(pg_engine, srn=record_srn) + run_id = await seed_hook_run(pg_engine, feature_name="jsonb_hook", columns=columns) # Insert data with JSONB values rows = [ @@ -189,5 +189,5 @@ async def test_jsonb_column_for_array_and_object( "count": 42, } ] - count = await store.insert_features("jsonb_hook", record_srn, rows) + count = await store.insert_features("jsonb_hook", record_srn, rows, run_id) assert count == 1 diff --git a/server/tests/integration/test_bulk_publish_dual_write.py b/server/tests/integration/test_bulk_publish_dual_write.py index ab11f35..b0ce34e 100644 --- a/server/tests/integration/test_bulk_publish_dual_write.py +++ b/server/tests/integration/test_bulk_publish_dual_write.py @@ -7,8 +7,13 @@ - Both rows land on a successful publish. - A malformed metadata value rolls back the whole transaction — no partial state where ``records`` has a row but the typed table doesn't. -- ``ConventionService.create_convention`` creates the typed table inline +- ``ConventionService.deploy`` creates the typed table inline (no event-handler race window). + +Feature #145: conventions are slug-keyed and unversioned; ``create_convention`` +was replaced by the bundled ``deploy`` (schema + hooks + convention in one +transaction). The convention is addressed by its bare ``ConventionSlug`` (the +``conventions.id`` column), no longer an opaque SRN. """ from __future__ import annotations @@ -28,9 +33,11 @@ from osa.domain.semantics.model.value import Cardinality, FieldDefinition, FieldType from osa.domain.semantics.service.schema import SchemaService from osa.domain.shared.model.source import DepositionSource -from osa.domain.shared.model.srn import ConventionSRN, Domain, SchemaIdentifier +from osa.domain.shared.model.srn import ConventionSlug, Domain, SchemaIdentifier +from osa.domain.validation.service.hook_registry import HookRegistryService from osa.infrastructure.persistence.metadata_store import PostgresMetadataStore from osa.infrastructure.persistence.repository.convention import PostgresConventionRepository +from osa.infrastructure.persistence.repository.hook_registry import PostgresHookRegistry from osa.infrastructure.persistence.repository.ontology import PostgresOntologyRepository from osa.infrastructure.persistence.repository.record import PostgresRecordRepository from osa.infrastructure.persistence.repository.schema import PostgresSemanticsSchemaRepository @@ -69,15 +76,20 @@ async def _register_convention( convention_repo=PostgresConventionRepository(pg_session), schema_service=schema_service, metadata_service=metadata_service, + hook_registry=HookRegistryService(registry=PostgresHookRegistry(pg_session)), outbox=AsyncMock(), node_domain=Domain("localhost"), ) - await convention_service.create_convention( - id=SchemaIdentifier(slug), + # Bundled deploy: schema + typed metadata table + convention, one txn. + # Use the slug for both the convention and its schema so the typed table is + # named "_v1" (asserted below). + await convention_service.deploy( + slug=ConventionSlug.parse(slug), title="Dual Write Sample", - version="1.0.0", - schema=_fields(), file_requirements=FileRequirements(accepted_types=[], max_count=0, max_file_size=0), + schema_slug=SchemaIdentifier(slug), + schema_version="1.0.0", + schema_fields=_fields(), ) await pg_session.commit() return convention_service @@ -85,7 +97,7 @@ async def _register_convention( @pytest.mark.asyncio class TestConventionCreatesTypedTableInline: - async def test_typed_table_exists_immediately_after_create_convention( + async def test_typed_table_exists_immediately_after_deploy( self, pg_engine: AsyncEngine, pg_session: AsyncSession ): """No event-handler race window — the table exists in the same txn.""" @@ -110,13 +122,6 @@ async def test_bulk_publish_writes_both_tables( ): await _register_convention(pg_engine, pg_session, slug="bulk-dual") - # Fetch the convention SRN to attach drafts to. - async with pg_engine.begin() as conn: - conv_srn_str = ( - await conn.execute(text("SELECT srn FROM conventions LIMIT 1")) - ).scalar() - assert conv_srn_str is not None - record_service = RecordService( record_repo=PostgresRecordRepository(pg_session), convention_repo=PostgresConventionRepository(pg_session), @@ -128,11 +133,12 @@ async def test_bulk_publish_writes_both_tables( feature_reader=AsyncMock(), ) + # Drafts reference the convention by its bare slug (#145). drafts = [ RecordDraft( source=DepositionSource(id=f"dep-{uuid4()}"), metadata={"species": "Homo sapiens", "resolution": 2.0 + i * 0.1}, - convention_srn=ConventionSRN.parse(conv_srn_str), + convention_id=ConventionSlug.parse("bulk-dual"), ) for i in range(3) ] @@ -161,13 +167,6 @@ async def test_malformed_metadata_rolls_back_everything( no orphan row left in ``records``.""" await _register_convention(pg_engine, pg_session, slug="rollback-sample") - async with pg_engine.begin() as conn: - conv_srn_str = ( - await conn.execute( - text("SELECT srn FROM conventions WHERE schema_id = 'rollback-sample'") - ) - ).scalar() - record_service = RecordService( record_repo=PostgresRecordRepository(pg_session), convention_repo=PostgresConventionRepository(pg_session), @@ -184,7 +183,7 @@ async def test_malformed_metadata_rolls_back_everything( RecordDraft( source=DepositionSource(id=f"dep-{uuid4()}"), metadata={"species": "A", "resolution": "not-a-number"}, - convention_srn=ConventionSRN.parse(conv_srn_str), + convention_id=ConventionSlug.parse("rollback-sample"), ) ] diff --git a/server/tests/integration/test_data_features_postgres.py b/server/tests/integration/test_data_features_postgres.py index cc01784..594169f 100644 --- a/server/tests/integration/test_data_features_postgres.py +++ b/server/tests/integration/test_data_features_postgres.py @@ -38,7 +38,7 @@ ) from osa.infrastructure.persistence.tables import conventions_table -from tests.integration.conftest import seed_record +from tests.integration.conftest import seed_hook_run, seed_record SCHEMA = SchemaId.parse("compound@1.0.0") SCHEMA_B = SchemaId.parse("protein@1.0.0") @@ -79,33 +79,39 @@ async def _register_hook( session: AsyncSession, hook_name: str = HOOK, schema: SchemaId = SCHEMA, -) -> None: - """Link the schema → hook via a convention row, then create its feature table. - - The read store only reads ``hooks[*].name`` from the convention, so a - name-only hooks payload is sufficient to scope the feature to the schema. - A pre-existing feature table is reused, mirroring ``CreateFeatureTables`` - (which swallows the ConflictError when two conventions share a hook name). +) -> str: + """Link the schema → hook via a convention row, seed the provenance chain, + and create the hook's feature table. Returns a real ``hook_runs.id`` to + stamp on inserted feature rows (#145). + + The read store only reads hook **names** from the convention's ``hooks`` + column (now a JSON list of name strings), so a name-only payload is + sufficient to scope the feature to the schema. A pre-existing feature table + is reused, mirroring ``CreateFeatureTables`` (which swallows the + ConflictError when two conventions share a hook name). """ await session.execute( conventions_table.insert().values( - srn=f"urn:osa:localhost:conv:{schema.id.root}-{hook_name}@1.0.0", + id=f"{schema.id.root}-{hook_name}", title=f"{schema.id.root} conv", description=None, schema_id=schema.id.root, schema_version=schema.version.root, file_requirements={}, - hooks=[{"name": hook_name}], + hooks=[hook_name], source=None, created_at=datetime.now(UTC), ) ) await session.commit() + # Seed hooks/releases/runs so feature rows have a valid run_id FK target. + run_id = await seed_hook_run(engine, feature_name=hook_name, columns=_feature_columns()) feature_store = PostgresFeatureStore(engine, session) try: await feature_store.create_table(hook_name, _feature_columns()) except ConflictError: pass + return run_id async def _publish( @@ -146,10 +152,13 @@ async def test_streams_feature_rows_with_data_columns( store = await _setup_schema(pg_engine, pg_session) srn = await _publish(pg_engine, store, "rec1") await pg_session.commit() - await _register_hook(pg_engine, pg_session) + run_id = await _register_hook(pg_engine, pg_session) feature_store = PostgresFeatureStore(pg_engine, pg_session) await feature_store.insert_features( - HOOK, str(srn), [{"score": 0.9, "label": "high"}, {"score": 0.1, "label": "low"}] + HOOK, + str(srn), + [{"score": 0.9, "label": "high"}, {"score": 0.1, "label": "low"}], + run_id, ) rs = PostgresTableReadStore(pg_session) @@ -169,10 +178,13 @@ async def test_feature_filter_narrows_results( store = await _setup_schema(pg_engine, pg_session) srn = await _publish(pg_engine, store, "rec1") await pg_session.commit() - await _register_hook(pg_engine, pg_session) + run_id = await _register_hook(pg_engine, pg_session) feature_store = PostgresFeatureStore(pg_engine, pg_session) await feature_store.insert_features( - HOOK, str(srn), [{"score": 0.9, "label": "high"}, {"score": 0.1, "label": "low"}] + HOOK, + str(srn), + [{"score": 0.9, "label": "high"}, {"score": 0.1, "label": "low"}], + run_id, ) rs = PostgresTableReadStore(pg_session) @@ -217,9 +229,11 @@ async def test_created_at_sort_cursor_round_trips( store = await _setup_schema(pg_engine, pg_session) srn = await _publish(pg_engine, store, "rec1") await pg_session.commit() - await _register_hook(pg_engine, pg_session) + run_id = await _register_hook(pg_engine, pg_session) feature_store = PostgresFeatureStore(pg_engine, pg_session) - await feature_store.insert_features(HOOK, str(srn), [{"score": s} for s in (0.1, 0.2, 0.3)]) + await feature_store.insert_features( + HOOK, str(srn), [{"score": s} for s in (0.1, 0.2, 0.3)], run_id + ) rs = PostgresTableReadStore(pg_session) sort = [SortSpec(column="created_at", direction=SortDirection.ASC)] @@ -260,11 +274,11 @@ async def _seed_shared_hook( srn_a = await _publish(pg_engine, store_a, "reca") srn_b = await _publish(pg_engine, store_b, "recb", schema=SCHEMA_B) await pg_session.commit() - await _register_hook(pg_engine, pg_session) - await _register_hook(pg_engine, pg_session, schema=SCHEMA_B) + run_a = await _register_hook(pg_engine, pg_session) + run_b = await _register_hook(pg_engine, pg_session, schema=SCHEMA_B) feature_store = PostgresFeatureStore(pg_engine, pg_session) - await feature_store.insert_features(HOOK, str(srn_a), [{"score": 0.9, "label": "a"}]) - await feature_store.insert_features(HOOK, str(srn_b), [{"score": 0.2, "label": "b"}]) + await feature_store.insert_features(HOOK, str(srn_a), [{"score": 0.9, "label": "a"}], run_a) + await feature_store.insert_features(HOOK, str(srn_b), [{"score": 0.2, "label": "b"}], run_b) return srn_a, srn_b async def test_stream_excludes_rows_of_other_schemas( @@ -297,9 +311,11 @@ async def test_manifest_includes_feature_resource( store = await _setup_schema(pg_engine, pg_session) srn = await _publish(pg_engine, store, "rec1") await pg_session.commit() - await _register_hook(pg_engine, pg_session) + run_id = await _register_hook(pg_engine, pg_session) feature_store = PostgresFeatureStore(pg_engine, pg_session) - await feature_store.insert_features(HOOK, str(srn), [{"score": 0.9, "label": "high"}]) + await feature_store.insert_features( + HOOK, str(srn), [{"score": 0.9, "label": "high"}], run_id + ) rs = PostgresCatalogReadStore(pg_session, Domain("localhost")) manifest = await rs.get_schema_manifest(SCHEMA) diff --git a/server/tests/integration/test_data_routes_e2e_postgres.py b/server/tests/integration/test_data_routes_e2e_postgres.py index d2e14b3..4add5c9 100644 --- a/server/tests/integration/test_data_routes_e2e_postgres.py +++ b/server/tests/integration/test_data_routes_e2e_postgres.py @@ -30,7 +30,7 @@ ) from osa.infrastructure.persistence.tables import conventions_table -from tests.integration.conftest import seed_record +from tests.integration.conftest import seed_hook_run, seed_record # create_app() reads Config() at import/call time; localhost domain needs a base URL. os.environ.setdefault("OSA_BASE_URL", "http://localhost:8000") @@ -85,33 +85,38 @@ async def _seed(engine: AsyncEngine, session: AsyncSession, n: int) -> None: async def _seed_feature( engine: AsyncEngine, session: AsyncSession, record_srn: RecordSRN, n: int ) -> None: - """Register a hook on the schema and populate ``features.chem_features``.""" + """Register a hook on the schema and populate ``features.chem_features``. + + Feature #145: the convention's ``hooks`` column is a JSON list of plain hook + **name strings** (registry refs), and every feature row carries a NOT NULL + ``run_id`` FK to ``hook_runs.id`` — so the provenance chain is seeded first. + """ + columns = [ + ColumnDef(name="score", json_type="number", required=True), + ColumnDef(name="label", json_type="string", required=False), + ] await session.execute( conventions_table.insert().values( - srn=f"urn:osa:localhost:conv:{HOOK}@1.0.0", + id=f"{HOOK}-conv", title="compound conv", description=None, schema_id=SCHEMA.id.root, schema_version=SCHEMA.version.root, file_requirements={}, - hooks=[{"name": HOOK}], + hooks=[HOOK], source=None, created_at=datetime.now(UTC), ) ) await session.commit() + run_id = await seed_hook_run(engine, feature_name=HOOK, columns=columns) feature_store = PostgresFeatureStore(engine, session) - await feature_store.create_table( - HOOK, - [ - ColumnDef(name="score", json_type="number", required=True), - ColumnDef(name="label", json_type="string", required=False), - ], - ) + await feature_store.create_table(HOOK, columns) await feature_store.insert_features( HOOK, str(record_srn), [{"score": float(i), "label": f"l{i}"} for i in range(n)], + run_id, ) diff --git a/server/tests/integration/test_data_streaming_guarantees_postgres.py b/server/tests/integration/test_data_streaming_guarantees_postgres.py index 5240562..52fdd3a 100644 --- a/server/tests/integration/test_data_streaming_guarantees_postgres.py +++ b/server/tests/integration/test_data_streaming_guarantees_postgres.py @@ -78,7 +78,7 @@ async def _bulk_seed(engine: AsyncEngine, n: int) -> None: text( """ INSERT INTO records - (srn, convention_srn, schema_id, schema_version, source, metadata, published_at) + (srn, convention_id, schema_id, schema_version, source, metadata, published_at) SELECT 'urn:osa:localhost:rec:bulk' || g || '@1', 'urn:osa:localhost:conv:test@1.0.0', 'compound', '1.0.0', jsonb_build_object('type', 'deposition', 'id', 'bulk' || g), diff --git a/server/tests/unit/api/__init__.py b/server/tests/unit/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/tests/unit/api/test_error_mapping_hooks.py b/server/tests/unit/api/test_error_mapping_hooks.py new file mode 100644 index 0000000..718d0e8 --- /dev/null +++ b/server/tests/unit/api/test_error_mapping_hooks.py @@ -0,0 +1,44 @@ +"""Unit tests for centralized HTTP mapping of hook-registry domain errors (#145, T061). + +The hook deploy / release / live / catalog endpoints raise the shared domain +errors; these assert ``map_osa_error`` turns each into the contract's status +code (see specs/145-feat-hook-versioning/contracts/hook-releases.md). +""" + +from __future__ import annotations + +from osa.application.api.v1.errors import map_osa_error +from osa.domain.shared.error import ( + ConflictError, + NotFoundError, + ReservedNameError, + ValidationError, +) + + +def test_feature_contract_mismatch_is_409() -> None: + # Hook exists with a different fixed feature contract (FR-002/FR-016). + exc = map_osa_error(ConflictError("different feature contract")) + assert exc.status_code == 409 + + +def test_unknown_hook_or_release_is_404() -> None: + exc = map_osa_error(NotFoundError("Hook not found: pocket_detect")) + assert exc.status_code == 404 + + +def test_malformed_release_is_422() -> None: + exc = map_osa_error(ValidationError("missing source_ref", field="source_ref")) + assert exc.status_code == 422 + assert exc.detail["field"] == "source_ref" + + +def test_reserved_hook_name_is_400() -> None: + exc = map_osa_error(ReservedNameError("records", "hook")) + assert exc.status_code == 400 + + +def test_detail_carries_code_and_message() -> None: + exc = map_osa_error(ConflictError("clash", code="hook_contract_mismatch")) + assert exc.detail["code"] == "hook_contract_mismatch" + assert exc.detail["message"] == "clash" diff --git a/server/tests/unit/application/test_app_factory.py b/server/tests/unit/application/test_app_factory.py index f0233d4..e88b6eb 100644 --- a/server/tests/unit/application/test_app_factory.py +++ b/server/tests/unit/application/test_app_factory.py @@ -18,7 +18,7 @@ from osa.application.api.rest.app import create_app from osa.application.di import create_container from osa.domain.shared.event import Event, EventHandler, EventId -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import HookIdentity from osa.domain.shared.model.source import IngesterDefinition from osa.domain.shared.model.subscription_registry import SubscriptionRegistry from osa.domain.shared.port.ingester_runner import IngesterInputs, IngesterOutput, IngesterRunner @@ -46,7 +46,7 @@ class StubHookRunner: """Stub HookRunner for testing provider overrides.""" - async def run(self, hook: HookDefinition, inputs: HookInputs, work_dir: Path) -> HookResult: + async def run(self, hook: HookIdentity, inputs: HookInputs, work_dir: Path) -> HookResult: return HookResult(hook_name=hook.name, status=HookStatus.PASSED, duration_seconds=0.0) diff --git a/server/tests/unit/domain/auth/test_handler_configs.py b/server/tests/unit/domain/auth/test_handler_configs.py index 5be7715..05b85da 100644 --- a/server/tests/unit/domain/auth/test_handler_configs.py +++ b/server/tests/unit/domain/auth/test_handler_configs.py @@ -41,7 +41,7 @@ def _make_principal( class TestCreateDepositionHandlerAuth: @pytest.mark.asyncio async def test_create_deposition_allows_depositor(self) -> None: - from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN + from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN depositor = _make_principal(frozenset({Role.DEPOSITOR})) service = AsyncMock() @@ -53,19 +53,19 @@ async def test_create_deposition_allows_depositor(self) -> None: deposition_service=service, ) - conv_srn = ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") - result = await handler.run(CreateDeposition(convention_srn=conv_srn)) + conv_slug = ConventionSlug("test") + result = await handler.run(CreateDeposition(convention_id=conv_slug)) assert result.srn is not None @pytest.mark.asyncio async def test_create_deposition_rejects_unauthenticated(self) -> None: - from osa.domain.shared.model.srn import ConventionSRN + from osa.domain.shared.model.srn import ConventionSlug handler = CreateDepositionHandler.__new__(CreateDepositionHandler) with pytest.raises(AuthorizationError) as exc_info: - conv_srn = ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") - await handler.run(CreateDeposition(convention_srn=conv_srn)) + conv_slug = ConventionSlug("test") + await handler.run(CreateDeposition(convention_id=conv_slug)) assert exc_info.value.code == "missing_token" diff --git a/server/tests/unit/domain/auth/test_scope_gate.py b/server/tests/unit/domain/auth/test_scope_gate.py new file mode 100644 index 0000000..0e5158f --- /dev/null +++ b/server/tests/unit/domain/auth/test_scope_gate.py @@ -0,0 +1,84 @@ +"""Unit tests for the RequiresScope gate + Principal scopes (#145, US5, T052).""" + +from __future__ import annotations + +import pytest + +from osa.domain.auth.model.identity import Anonymous +from osa.domain.auth.model.principal import Principal +from osa.domain.auth.model.role import Role +from osa.domain.auth.model.value import ProviderIdentity, UserId +from osa.domain.shared.authorization.gate import RequiresScope, requires_scope +from osa.domain.shared.command import Command, CommandHandler, Result +from osa.domain.shared.error import AuthorizationError + + +class _Cmd(Command): + value: str = "x" + + +class _Res(Result): + ok: bool = True + + +class _ScopedHandler(CommandHandler[_Cmd, _Res]): + __auth__ = requires_scope("hooks:write") + principal: Principal + + async def run(self, cmd: _Cmd) -> _Res: + return _Res() + + +def _principal( + *, roles: frozenset[Role] = frozenset(), scopes: frozenset[str] = frozenset() +) -> Principal: + return Principal( + user_id=UserId.generate(), + provider_identity=ProviderIdentity(provider="m2m", external_id="client-1"), + roles=roles, + scopes=scopes, + ) + + +def test_requires_scope_factory() -> None: + gate = requires_scope("hooks:write") + assert isinstance(gate, RequiresScope) + assert gate.scope == "hooks:write" + + +def test_principal_has_scope() -> None: + p = _principal(scopes=frozenset({"hooks:write"})) + assert p.has_scope("hooks:write") is True + assert p.has_scope("conventions:write") is False + + +class TestRequiresScopeGate: + @pytest.mark.asyncio + async def test_allows_matching_scope(self) -> None: + handler = _ScopedHandler(principal=_principal(scopes=frozenset({"hooks:write"}))) + result = await handler.run(_Cmd()) + assert result.ok is True + + @pytest.mark.asyncio + async def test_allows_admin_without_scope(self) -> None: + handler = _ScopedHandler(principal=_principal(roles=frozenset({Role.ADMIN}))) + result = await handler.run(_Cmd()) + assert result.ok is True + + @pytest.mark.asyncio + async def test_denies_wrong_scope_non_admin(self) -> None: + handler = _ScopedHandler( + principal=_principal( + roles=frozenset({Role.DEPOSITOR}), scopes=frozenset({"other:write"}) + ) + ) + with pytest.raises(AuthorizationError) as exc: + await handler.run(_Cmd()) + assert exc.value.code == "access_denied" + + @pytest.mark.asyncio + async def test_missing_principal_is_unauthenticated(self) -> None: + handler = _ScopedHandler(principal=Anonymous()) # type: ignore[arg-type] + with pytest.raises(AuthorizationError) as exc: + await handler.run(_Cmd()) + assert exc.value.code == "missing_token" diff --git a/server/tests/unit/domain/auth/test_token_iss_routing.py b/server/tests/unit/domain/auth/test_token_iss_routing.py new file mode 100644 index 0000000..4dceed4 --- /dev/null +++ b/server/tests/unit/domain/auth/test_token_iss_routing.py @@ -0,0 +1,142 @@ +"""Unit tests for TokenService `iss`-routed validation (#145, US5, T051). + +The primary HS256 path is byte-identical when no second issuer is configured +(SC-007). A configured second issuer verifies its own **EdDSA (Ed25519)** tokens +— the only algorithm accepted on that path — and failures on one path never leak +into the other. +""" + +from __future__ import annotations + +import time + +import jwt +import pytest +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.primitives.asymmetric import rsa +from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey + +from osa.config import ExtraIssuerConfig, JwtConfig +from osa.domain.auth.model.value import ProviderIdentity, UserId +from osa.domain.auth.service.token import TokenService + +ISSUER = "https://deploy.example.org" +AUDIENCE = "osa-deploy" + + +def _ed25519_keypair() -> tuple[bytes, str]: + key = Ed25519PrivateKey.generate() + private_pem = key.private_bytes( + serialization.Encoding.PEM, + serialization.PrivateFormat.PKCS8, + serialization.NoEncryption(), + ) + public_pem = ( + key.public_key() + .public_bytes( + serialization.Encoding.PEM, + serialization.PublicFormat.SubjectPublicKeyInfo, + ) + .decode() + ) + return private_pem, public_pem + + +@pytest.fixture +def keypair() -> tuple[bytes, str]: + return _ed25519_keypair() + + +def _jwt_config() -> JwtConfig: + return JwtConfig(secret="a" * 40, algorithm="HS256") + + +def _extra(public_pem: str, *, audience: str = AUDIENCE) -> ExtraIssuerConfig: + return ExtraIssuerConfig(issuer=ISSUER, public_key=public_pem, audience=audience) + + +def _m2m_token( + private_pem: bytes, + *, + algorithm: str = "EdDSA", + scope: str = "hooks:write", + iss: str = ISSUER, +) -> str: + return jwt.encode( + { + "iss": iss, + "aud": AUDIENCE, + "sub": "deploy-bot", + "scope": scope, + "exp": int(time.time()) + 3600, + }, + private_pem, + algorithm=algorithm, + ) + + +class TestPrimaryPathUnchanged: + def test_user_token_validates_without_extra_issuer(self) -> None: + service = TokenService(_config=_jwt_config()) + token = service.create_access_token( + UserId.generate(), ProviderIdentity(provider="local", external_id="alice") + ) + payload = service.validate_access_token(token) + assert payload["provider"] == "local" + assert "iss" not in payload # primary tokens carry no issuer claim + + def test_user_token_validates_with_extra_issuer_configured(self, keypair) -> None: + _, public_pem = keypair + service = TokenService(_config=_jwt_config(), _extra_issuer=_extra(public_pem)) + token = service.create_access_token( + UserId.generate(), ProviderIdentity(provider="local", external_id="alice") + ) + # Primary token still verifies via HS256 even with the second issuer set. + assert service.validate_access_token(token)["provider"] == "local" + + +class TestExtraIssuerPath: + def test_m2m_token_validates_and_carries_scope(self, keypair) -> None: + private_pem, public_pem = keypair + service = TokenService(_config=_jwt_config(), _extra_issuer=_extra(public_pem)) + payload = service.validate_access_token(_m2m_token(private_pem)) + assert payload["iss"] == ISSUER + assert payload["scope"] == "hooks:write" + + def test_m2m_token_rejected_when_no_extra_issuer(self, keypair) -> None: + private_pem, _ = keypair + service = TokenService(_config=_jwt_config()) # no second issuer + # Falls through to HS256 verification, which cannot verify an Ed25519 token. + with pytest.raises(jwt.InvalidTokenError): + service.validate_access_token(_m2m_token(private_pem)) + + def test_wrong_key_is_rejected(self, keypair) -> None: + _, public_pem = keypair + service = TokenService(_config=_jwt_config(), _extra_issuer=_extra(public_pem)) + # Token claims the trusted issuer but is signed by a different Ed25519 key. + other_private, _ = _ed25519_keypair() + with pytest.raises(jwt.InvalidTokenError): + service.validate_access_token(_m2m_token(other_private)) + + def test_wrong_audience_is_rejected(self, keypair) -> None: + private_pem, public_pem = keypair + service = TokenService( + _config=_jwt_config(), _extra_issuer=_extra(public_pem, audience="other-aud") + ) + with pytest.raises(jwt.InvalidTokenError): + service.validate_access_token(_m2m_token(private_pem)) + + def test_non_eddsa_algorithm_is_rejected(self) -> None: + """An RS256-signed token for the trusted issuer is rejected — the verify + path pins EdDSA, so the token header's `alg` cannot downgrade it.""" + rsa_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) + rsa_private = rsa_key.private_bytes( + serialization.Encoding.PEM, + serialization.PrivateFormat.PKCS8, + serialization.NoEncryption(), + ) + # The configured public key is Ed25519; the attacker token is RS256. + _, ed_public = _ed25519_keypair() + service = TokenService(_config=_jwt_config(), _extra_issuer=_extra(ed_public)) + with pytest.raises(jwt.InvalidTokenError): + service.validate_access_token(_m2m_token(rsa_private, algorithm="RS256")) diff --git a/server/tests/unit/domain/data/test_data_catalog_service.py b/server/tests/unit/domain/data/test_data_catalog_service.py index 65dcfc8..edb09cf 100644 --- a/server/tests/unit/domain/data/test_data_catalog_service.py +++ b/server/tests/unit/domain/data/test_data_catalog_service.py @@ -22,7 +22,7 @@ from osa.domain.data.service.data_catalog import DataCatalogService from osa.domain.semantics.model.value import FieldType from osa.domain.shared.error import NotFoundError -from osa.domain.shared.model.ids import RecordId +from osa.domain.shared.model.ids import FeatureName, RecordId from osa.domain.shared.model.srn import SchemaId RECORDS_COLUMNS = [ @@ -95,7 +95,7 @@ async def test_resolve_table_records() -> None: @pytest.mark.asyncio async def test_resolve_table_feature() -> None: resolved = await _service().resolve_table( - "compound@1.0.0", TableKind.FEATURE, feature_name="chem_features" + "compound@1.0.0", TableKind.FEATURE, feature_name=FeatureName("chem_features") ) assert resolved.columns == FEATURE_COLUMNS @@ -103,7 +103,9 @@ async def test_resolve_table_feature() -> None: @pytest.mark.asyncio async def test_resolve_table_unknown_feature_404s() -> None: with pytest.raises(NotFoundError) as exc: - await _service().resolve_table("compound@1.0.0", TableKind.FEATURE, feature_name="nope") + await _service().resolve_table( + "compound@1.0.0", TableKind.FEATURE, feature_name=FeatureName("nope") + ) assert exc.value.code == "table_not_found" @@ -112,4 +114,6 @@ async def test_resolve_table_kind_must_match() -> None: # A resource named "records" exists, but with kind RECORDS — asking for a # FEATURE of that name must not match it. with pytest.raises(NotFoundError): - await _service().resolve_table("compound@1.0.0", TableKind.FEATURE, feature_name="records") + await _service().resolve_table( + "compound@1.0.0", TableKind.FEATURE, feature_name=FeatureName("records") + ) diff --git a/server/tests/unit/domain/data/test_table_read_handlers.py b/server/tests/unit/domain/data/test_table_read_handlers.py index 9a9da74..9252ab3 100644 --- a/server/tests/unit/domain/data/test_table_read_handlers.py +++ b/server/tests/unit/domain/data/test_table_read_handlers.py @@ -119,6 +119,9 @@ async def test_feature_handler_resolves_feature_and_builds_feature_plan() -> Non catalog_service=catalog, query_service=query, config=FakeConfig() ) result = await handler.run(ReadFeatureTable(schema="compound@1.0.0", feature="chem_features")) - assert catalog.resolved_with == ("compound@1.0.0", TableKind.FEATURE, "chem_features") + schema, table_kind, feature_name = catalog.resolved_with + assert (schema, table_kind) == ("compound@1.0.0", TableKind.FEATURE) + # feature_name is now a HookName (frozen RootModel), not a bare str. + assert feature_name.root == "chem_features" assert result.plan.table_kind == TableKind.FEATURE - assert result.plan.feature_name == "chem_features" + assert result.plan.feature_name.root == "chem_features" diff --git a/server/tests/unit/domain/deposition/test_convention.py b/server/tests/unit/domain/deposition/test_convention.py index 29bb09b..91a8283 100644 --- a/server/tests/unit/domain/deposition/test_convention.py +++ b/server/tests/unit/domain/deposition/test_convention.py @@ -4,11 +4,11 @@ from osa.domain.deposition.model.convention import Convention from osa.domain.deposition.model.value import FileRequirements -from osa.domain.shared.model.srn import ConventionSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, SchemaId -def _make_conv_srn(id: str = "test-conv", version: str = "1.0.0") -> ConventionSRN: - return ConventionSRN.parse(f"urn:osa:localhost:conv:{id}@{version}") +def _make_conv_slug(slug: str = "test-conv") -> ConventionSlug: + return ConventionSlug(slug) def _make_schema_id(id: str = "test-schema", version: str = "1.0.0") -> SchemaId: @@ -27,7 +27,7 @@ def _make_file_reqs() -> FileRequirements: class TestConventionCreation: def test_create_with_required_fields(self): conv = Convention( - srn=_make_conv_srn(), + id=_make_conv_slug(), title="scRNA-seq Submission", schema_id=_make_schema_id(), file_requirements=_make_file_reqs(), @@ -39,7 +39,7 @@ def test_create_with_required_fields(self): def test_create_with_description(self): conv = Convention( - srn=_make_conv_srn(), + id=_make_conv_slug(), title="Test", description="A test convention", schema_id=_make_schema_id(), @@ -50,7 +50,7 @@ def test_create_with_description(self): def test_create_with_empty_hooks(self): conv = Convention( - srn=_make_conv_srn(), + id=_make_conv_slug(), title="Test", schema_id=_make_schema_id(), file_requirements=_make_file_reqs(), @@ -60,13 +60,14 @@ def test_create_with_empty_hooks(self): assert conv.hooks == [] -class TestConventionImmutability: - def test_srn_is_versioned(self): +class TestConventionIdentity: + def test_id_is_bare_slug(self): + # #145: conventions are unversioned; identity is a bare slug, not a URN. conv = Convention( - srn=_make_conv_srn("my-conv", "2.0.0"), + id=_make_conv_slug("my-conv"), title="Test", schema_id=_make_schema_id(), file_requirements=_make_file_reqs(), created_at=datetime.now(UTC), ) - assert str(conv.srn) == "urn:osa:localhost:conv:my-conv@2.0.0" + assert conv.id.root == "my-conv" diff --git a/server/tests/unit/domain/deposition/test_convention_registered.py b/server/tests/unit/domain/deposition/test_convention_registered.py index 71352e0..630d610 100644 --- a/server/tests/unit/domain/deposition/test_convention_registered.py +++ b/server/tests/unit/domain/deposition/test_convention_registered.py @@ -1,7 +1,7 @@ """Unit tests for enriched ConventionRegistered event. Tests for User Story 2: Convention Initialization Chain. -Verifies ConventionRegistered carries hooks: list[HookDefinition]. +Verifies ConventionRegistered carries hooks: list[HookIdentity]. """ from uuid import uuid4 @@ -10,28 +10,24 @@ from osa.domain.shared.event import EventId from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, - OciConfig, + HookIdentity, + HookName, TableFeatureSpec, ) -from osa.domain.shared.model.srn import ConventionSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, SchemaId -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test-conv") def _make_schema_id() -> SchemaId: return SchemaId.parse("test@1.0.0") -def _make_hook_definition(name: str = "pocket_detect") -> HookDefinition: - return HookDefinition( - name=name, - runtime=OciConfig( - image="ghcr.io/example/hook", - digest="sha256:abc123", - ), +def _make_hook_definition(name: str = "pocket_detect") -> HookIdentity: + return HookIdentity( + name=HookName(name), feature=TableFeatureSpec( cardinality="many", columns=[ColumnDef(name="score", json_type="number", required=True)], @@ -41,25 +37,25 @@ def _make_hook_definition(name: str = "pocket_detect") -> HookDefinition: class TestConventionRegisteredWithHooks: def test_event_carries_hooks(self): - """ConventionRegistered carries hooks: list[HookDefinition].""" + """ConventionRegistered carries hooks: list[HookIdentity].""" hooks = [_make_hook_definition("hook_a"), _make_hook_definition("hook_b")] event = ConventionRegistered( id=EventId(uuid4()), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), schema_id=_make_schema_id(), schema_fields=[], hooks=hooks, ) assert len(event.hooks) == 2 - assert event.hooks[0].name == "hook_a" - assert event.hooks[1].name == "hook_b" + assert event.hooks[0].name.root == "hook_a" + assert event.hooks[1].name.root == "hook_b" def test_event_defaults_to_empty_hooks(self): """ConventionRegistered defaults to empty hooks list.""" event = ConventionRegistered( id=EventId(uuid4()), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), schema_id=_make_schema_id(), ) @@ -70,7 +66,7 @@ def test_serialization_with_hooks(self): hooks = [_make_hook_definition()] event = ConventionRegistered( id=EventId(uuid4()), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), schema_id=_make_schema_id(), schema_fields=[], hooks=hooks, @@ -80,6 +76,5 @@ def test_serialization_with_hooks(self): restored = ConventionRegistered.model_validate(data) assert len(restored.hooks) == 1 - assert restored.hooks[0].name == "pocket_detect" - assert restored.hooks[0].runtime.image == "ghcr.io/example/hook" + assert restored.hooks[0].name.root == "pocket_detect" assert len(restored.hooks[0].feature.columns) == 1 diff --git a/server/tests/unit/domain/deposition/test_convention_service.py b/server/tests/unit/domain/deposition/test_convention_service.py index 59ea3fc..e356893 100644 --- a/server/tests/unit/domain/deposition/test_convention_service.py +++ b/server/tests/unit/domain/deposition/test_convention_service.py @@ -1,4 +1,4 @@ -"""Unit tests for ConventionService.""" +"""Unit tests for ConventionService (#145 bundled ``deploy``).""" from datetime import UTC, datetime from unittest.mock import AsyncMock @@ -6,21 +6,28 @@ import pytest from osa.domain.deposition.model.convention import Convention +from osa.domain.deposition.model.deploy import HookDeploy from osa.domain.deposition.model.value import FileRequirements from osa.domain.deposition.service.convention import ConventionService from osa.domain.semantics.model.value import Cardinality, FieldDefinition, FieldType from osa.domain.shared.error import NotFoundError from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, + HookIdentity, + HookName, OciConfig, TableFeatureSpec, ) -from osa.domain.shared.model.srn import ConventionSRN, Domain, SchemaId, SchemaIdentifier +from osa.domain.shared.model.srn import ( + ConventionSlug, + Domain, + SchemaId, + SchemaIdentifier, +) -def _make_conv_srn(id: str = "test-conv", version: str = "1.0.0") -> ConventionSRN: - return ConventionSRN.parse(f"urn:osa:localhost:conv:{id}@{version}") +def _make_conv_slug(slug: str = "test-conv") -> ConventionSlug: + return ConventionSlug(slug) def _make_schema_id(id: str = "test-schema", version: str = "1.0.0") -> SchemaId: @@ -47,19 +54,17 @@ def _make_file_reqs() -> FileRequirements: ) -def _make_hook_def(name: str = "pocket_detect") -> HookDefinition: - return HookDefinition( - name=name, - runtime=OciConfig( - image="ghcr.io/example/hook", - digest="sha256:abc123", - ), - feature=TableFeatureSpec( - cardinality="one", - columns=[ - ColumnDef(name="score", json_type="number", required=True), - ], +def _make_hook_deploy(name: str = "pocket_detect") -> HookDeploy: + return HookDeploy( + identity=HookIdentity( + name=HookName(name), + feature=TableFeatureSpec( + cardinality="one", + columns=[ColumnDef(name="score", json_type="number", required=True)], + ), ), + runtime=OciConfig(image="ghcr.io/example/hook", digest="sha256:abc123"), + source_ref="git+https://example.com/hook@abc", ) @@ -67,6 +72,7 @@ def _make_service( conv_repo: AsyncMock | None = None, schema_service: AsyncMock | None = None, outbox: AsyncMock | None = None, + hook_registry: AsyncMock | None = None, ) -> ConventionService: mock_schema_service = schema_service or AsyncMock() if not schema_service: @@ -74,89 +80,100 @@ def _make_service( mock_schema.id = _make_schema_id() mock_schema.fields = [] mock_schema_service.create_schema.return_value = mock_schema + # No existing schema → deploy proceeds to create_schema. + mock_schema_service.get_schema.side_effect = NotFoundError("schema not found") return ConventionService( convention_repo=conv_repo or AsyncMock(), schema_service=mock_schema_service, metadata_service=AsyncMock(), + hook_registry=hook_registry or AsyncMock(), outbox=outbox or AsyncMock(), node_domain=Domain("localhost"), ) -class TestConventionServiceCreate: +async def _deploy(service: ConventionService, **overrides) -> Convention: + kwargs = dict( + slug=_make_conv_slug(), + title="Test Convention", + file_requirements=_make_file_reqs(), + schema_slug=SchemaIdentifier("test-schema"), + schema_version="1.0.0", + schema_fields=_make_field_defs(), + hooks=None, + ) + kwargs.update(overrides) + return await service.deploy(**kwargs) + + +class TestConventionServiceDeploy: @pytest.mark.asyncio - async def test_create_convention_creates_schema(self): + async def test_deploy_creates_schema(self): conv_repo = AsyncMock() schema_service = AsyncMock() mock_schema = AsyncMock() mock_schema.id = _make_schema_id() mock_schema.fields = [] schema_service.create_schema.return_value = mock_schema + schema_service.get_schema.side_effect = NotFoundError("not found") service = _make_service(conv_repo, schema_service) - result = await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="Test Convention", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ) + result = await _deploy(service, title="Test Convention") assert result.title == "Test Convention" conv_repo.save.assert_called_once() schema_service.create_schema.assert_called_once() @pytest.mark.asyncio - async def test_create_convention_generates_srn(self): + async def test_deploy_returns_slug_identity(self): service = _make_service() - result = await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="Test", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ) - assert str(result.srn).startswith("urn:osa:localhost:conv:") + result = await _deploy(service, slug=_make_conv_slug("my-conv")) + assert result.id.root == "my-conv" @pytest.mark.asyncio - async def test_create_convention_with_hooks_emits_hooks_in_event(self): + async def test_deploy_with_hooks_emits_hooks_in_event(self): outbox = AsyncMock() - service = _make_service(outbox=outbox) - hooks = [_make_hook_def()] - result = await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="With Hooks", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - hooks=hooks, - ) - assert result.hooks == hooks - # Verify the emitted event carries hook definitions directly + hook_registry = AsyncMock() + service = _make_service(outbox=outbox, hook_registry=hook_registry) + hooks = [_make_hook_deploy()] + result = await _deploy(service, hooks=hooks) + # Convention references the hook by name. + assert result.hooks == [HookName("pocket_detect")] + # The registry was asked to upsert the identity + mint the release. + hook_registry.upsert_identity.assert_called_once() + hook_registry.create_release.assert_called_once() + # The emitted event carries hook identities directly. emitted = outbox.append.call_args[0][0] assert len(emitted.hooks) == 1 - assert emitted.hooks[0].name == "pocket_detect" + assert emitted.hooks[0].name.root == "pocket_detect" @pytest.mark.asyncio - async def test_create_convention_without_hooks_emits_empty_hooks(self): + async def test_deploy_without_hooks_emits_empty_hooks(self): outbox = AsyncMock() service = _make_service(outbox=outbox) - await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="No Hooks", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ) + await _deploy(service, title="No Hooks", hooks=None) emitted = outbox.append.call_args[0][0] assert emitted.hooks == [] + @pytest.mark.asyncio + async def test_redeploy_same_slug_is_idempotent_upsert(self): + # #145: conventions are mutable + unversioned — re-deploying the same + # slug upserts in place and must NOT raise a conflict. + conv_repo = AsyncMock() + service = _make_service(conv_repo=conv_repo) + + first = await _deploy(service, slug=_make_conv_slug("proteins")) + second = await _deploy(service, slug=_make_conv_slug("proteins")) + + assert first.id == second.id + assert conv_repo.save.call_count == 2 + class TestConventionServiceGet: @pytest.mark.asyncio async def test_get_existing(self): conv = Convention( - srn=_make_conv_srn(), + id=_make_conv_slug(), title="Test", schema_id=_make_schema_id(), file_requirements=_make_file_reqs(), @@ -166,7 +183,7 @@ async def test_get_existing(self): conv_repo.get.return_value = conv service = _make_service(conv_repo=conv_repo) - result = await service.get_convention(conv.srn) + result = await service.get_convention(conv.id) assert result == conv @pytest.mark.asyncio @@ -176,14 +193,14 @@ async def test_get_nonexistent_raises(self): service = _make_service(conv_repo=conv_repo) with pytest.raises(NotFoundError): - await service.get_convention(_make_conv_srn()) + await service.get_convention(_make_conv_slug()) class TestConventionServiceList: @pytest.mark.asyncio async def test_list_conventions(self): conv = Convention( - srn=_make_conv_srn(), + id=_make_conv_slug(), title="Test", schema_id=_make_schema_id(), file_requirements=_make_file_reqs(), diff --git a/server/tests/unit/domain/deposition/test_convention_service_v2.py b/server/tests/unit/domain/deposition/test_convention_service_v2.py index 1e8557e..7a4bd0d 100644 --- a/server/tests/unit/domain/deposition/test_convention_service_v2.py +++ b/server/tests/unit/domain/deposition/test_convention_service_v2.py @@ -1,21 +1,33 @@ -"""Unit tests for ConventionService with inline schema creation and source fields.""" +"""Unit tests for ConventionService ``deploy`` — inline schema + ingester + event.""" from unittest.mock import AsyncMock import pytest from osa.domain.deposition.event.convention_registered import ConventionRegistered +from osa.domain.deposition.model.deploy import HookDeploy from osa.domain.deposition.model.value import FileRequirements from osa.domain.deposition.service.convention import ConventionService from osa.domain.semantics.model.value import Cardinality, FieldDefinition, FieldType +from osa.domain.shared.error import NotFoundError from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, + HookIdentity, + HookName, OciConfig, TableFeatureSpec, ) from osa.domain.shared.model.source import IngesterDefinition -from osa.domain.shared.model.srn import Domain, SchemaId, SchemaIdentifier +from osa.domain.shared.model.srn import ( + ConventionSlug, + Domain, + SchemaId, + SchemaIdentifier, +) + + +def _make_conv_slug(slug: str = "test-conv") -> ConventionSlug: + return ConventionSlug(slug) def _make_field_defs() -> list[FieldDefinition]: @@ -44,19 +56,17 @@ def _make_file_reqs() -> FileRequirements: ) -def _make_hook_def(name: str = "detect_pockets") -> HookDefinition: - return HookDefinition( - name=name, - runtime=OciConfig( - image="ghcr.io/example/pocketeer", - digest="sha256:abc123", - ), - feature=TableFeatureSpec( - cardinality="many", - columns=[ - ColumnDef(name="score", json_type="number", required=True), - ], +def _make_hook_deploy(name: str = "detect_pockets") -> HookDeploy: + return HookDeploy( + identity=HookIdentity( + name=HookName(name), + feature=TableFeatureSpec( + cardinality="many", + columns=[ColumnDef(name="score", json_type="number", required=True)], + ), ), + runtime=OciConfig(image="ghcr.io/example/pocketeer", digest="sha256:abc123"), + source_ref="git+https://example.com/pocketeer@abc", ) @@ -72,40 +82,54 @@ def _make_service( conv_repo: AsyncMock | None = None, schema_service: AsyncMock | None = None, outbox: AsyncMock | None = None, + hook_registry: AsyncMock | None = None, ) -> ConventionService: """Create a ConventionService with mock deps.""" mock_schema_service = schema_service or AsyncMock() - # Default: create_schema returns a Schema-like obj with .srn if not schema_service: mock_schema = AsyncMock() mock_schema.id = SchemaId.parse("testschema12345678@1.0.0") + mock_schema.fields = [] mock_schema_service.create_schema.return_value = mock_schema + # No existing schema → deploy proceeds to create_schema. + mock_schema_service.get_schema.side_effect = NotFoundError("schema not found") return ConventionService( convention_repo=conv_repo or AsyncMock(), schema_service=mock_schema_service, metadata_service=AsyncMock(), + hook_registry=hook_registry or AsyncMock(), outbox=outbox or AsyncMock(), node_domain=Domain("localhost"), ) -class TestCreateConventionWithInlineSchema: +async def _deploy(service: ConventionService, **overrides): + kwargs = dict( + slug=_make_conv_slug(), + title="PDB Structures", + file_requirements=_make_file_reqs(), + schema_slug=SchemaIdentifier("test-schema"), + schema_version="1.0.0", + schema_fields=_make_field_defs(), + hooks=None, + ) + kwargs.update(overrides) + return await service.deploy(**kwargs) + + +class TestDeployWithInlineSchema: @pytest.mark.asyncio async def test_creates_schema_from_field_definitions(self): schema_service = AsyncMock() mock_schema = AsyncMock() mock_schema.id = SchemaId.parse("testschema12345678@1.0.0") + mock_schema.fields = [] schema_service.create_schema.return_value = mock_schema + schema_service.get_schema.side_effect = NotFoundError("not found") service = _make_service(schema_service=schema_service) - await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="PDB Structures", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ) + await _deploy(service, title="PDB Structures") # SchemaService.create_schema should have been called with field defs schema_service.create_schema.assert_called_once() call_kwargs = schema_service.create_schema.call_args @@ -119,30 +143,19 @@ async def test_convention_references_created_schema_id(self): schema_id = SchemaId.parse("created123456789@1.0.0") mock_schema = AsyncMock() mock_schema.id = schema_id + mock_schema.fields = [] schema_service.create_schema.return_value = mock_schema + schema_service.get_schema.side_effect = NotFoundError("not found") service = _make_service(schema_service=schema_service) - result = await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="Test", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ) + result = await _deploy(service, title="Test") assert result.schema_id == schema_id @pytest.mark.asyncio async def test_convention_saves_ingester_definition(self): service = _make_service() ingester = _make_ingester_def() - result = await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="With Ingester", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ingester=ingester, - ) + result = await _deploy(service, title="With Ingester", ingester=ingester) assert result.ingester is not None assert result.ingester.image == "osa-sources/rcsb-pdb:latest" assert result.ingester.digest == "sha256:abc123" @@ -151,63 +164,37 @@ async def test_convention_saves_ingester_definition(self): @pytest.mark.asyncio async def test_convention_ingester_defaults_to_none(self): service = _make_service() - result = await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="No Ingester", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ) + result = await _deploy(service, title="No Ingester") assert result.ingester is None @pytest.mark.asyncio async def test_convention_with_hooks_emits_hooks_in_event(self): outbox = AsyncMock() service = _make_service(outbox=outbox) - hooks = [_make_hook_def()] - await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="With Hooks", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - hooks=hooks, - ) + hooks = [_make_hook_deploy()] + await _deploy(service, title="With Hooks", hooks=hooks) emitted = outbox.append.call_args[0][0] assert len(emitted.hooks) == 1 - assert emitted.hooks[0].name == "detect_pockets" + assert emitted.hooks[0].name.root == "detect_pockets" class TestConventionRegisteredEvent: @pytest.mark.asyncio - async def test_create_convention_emits_convention_registered(self): + async def test_deploy_emits_convention_registered(self): outbox = AsyncMock() service = _make_service(outbox=outbox) - result = await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="With Source", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ingester=_make_ingester_def(), - ) + result = await _deploy(service, title="With Source", ingester=_make_ingester_def()) outbox.append.assert_called_once() emitted = outbox.append.call_args[0][0] assert isinstance(emitted, ConventionRegistered) - assert emitted.convention_srn == result.srn + assert emitted.convention_id == result.id @pytest.mark.asyncio - async def test_create_convention_without_source_still_emits_event(self): + async def test_deploy_without_source_still_emits_event(self): outbox = AsyncMock() service = _make_service(outbox=outbox) - result = await service.create_convention( - id=SchemaIdentifier("test-schema"), - title="No Source", - version="1.0.0", - schema=_make_field_defs(), - file_requirements=_make_file_reqs(), - ) + result = await _deploy(service, title="No Source") outbox.append.assert_called_once() emitted = outbox.append.call_args[0][0] assert isinstance(emitted, ConventionRegistered) - assert emitted.convention_srn == result.srn + assert emitted.convention_id == result.id diff --git a/server/tests/unit/domain/deposition/test_deposition.py b/server/tests/unit/domain/deposition/test_deposition.py index a089f40..8006119 100644 --- a/server/tests/unit/domain/deposition/test_deposition.py +++ b/server/tests/unit/domain/deposition/test_deposition.py @@ -9,21 +9,21 @@ from osa.domain.deposition.model.aggregate import Deposition from osa.domain.deposition.model.value import DepositionFile, DepositionStatus from osa.domain.shared.error import InvalidStateError -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN def _make_dep_srn(id: str = "test-dep") -> DepositionSRN: return DepositionSRN.parse(f"urn:osa:localhost:dep:{id}") -def _make_conv_srn(id: str = "test-conv", version: str = "1.0.0") -> ConventionSRN: - return ConventionSRN.parse(f"urn:osa:localhost:conv:{id}@{version}") +def _make_conv_slug(slug: str = "test-conv") -> ConventionSlug: + return ConventionSlug(slug) def _make_deposition(**overrides) -> Deposition: defaults = dict( srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), owner_id=UserId(uuid4()), created_at=datetime.now(UTC), updated_at=datetime.now(UTC), @@ -42,9 +42,9 @@ def _make_file(name: str = "data.csv", size: int = 1000) -> DepositionFile: class TestDepositionCreation: - def test_create_with_convention_srn(self): + def test_create_with_convention_id(self): dep = _make_deposition() - assert dep.convention_srn == _make_conv_srn() + assert dep.convention_id == _make_conv_slug() assert dep.status == DepositionStatus.DRAFT def test_create_with_empty_metadata(self): diff --git a/server/tests/unit/domain/deposition/test_deposition_service.py b/server/tests/unit/domain/deposition/test_deposition_service.py index 53aeba3..f57f621 100644 --- a/server/tests/unit/domain/deposition/test_deposition_service.py +++ b/server/tests/unit/domain/deposition/test_deposition_service.py @@ -21,15 +21,15 @@ from osa.domain.deposition.event.submitted import DepositionSubmittedEvent from osa.domain.deposition.service.deposition import DepositionService from osa.domain.shared.error import NotFoundError, ValidationError -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN, Domain, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN, Domain, SchemaId def _make_dep_srn(id: str = "test-dep") -> DepositionSRN: return DepositionSRN.parse(f"urn:osa:localhost:dep:{id}") -def _make_conv_srn(id: str = "test-conv", version: str = "1.0.0") -> ConventionSRN: - return ConventionSRN.parse(f"urn:osa:localhost:conv:{id}@{version}") +def _make_conv_slug(slug: str = "test-conv") -> ConventionSlug: + return ConventionSlug(slug) def _make_schema_id(id: str = "test-schema", version: str = "1.0.0") -> SchemaId: @@ -49,7 +49,7 @@ def _make_file_reqs(**overrides) -> FileRequirements: def _make_convention(**overrides) -> Convention: defaults = dict( - srn=_make_conv_srn(), + id=_make_conv_slug(), title="Test Convention", schema_id=_make_schema_id(), file_requirements=_make_file_reqs(), @@ -62,7 +62,7 @@ def _make_convention(**overrides) -> Convention: def _make_deposition(**overrides) -> Deposition: defaults = dict( srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), owner_id=UserId(uuid4()), created_at=datetime.now(UTC), updated_at=datetime.now(UTC), @@ -95,10 +95,10 @@ async def test_create_with_valid_convention(self): service = _make_service(conv_repo=conv_repo) result = await service.create( - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), owner_id=owner, ) - assert result.convention_srn == _make_conv_srn() + assert result.convention_id == _make_conv_slug() assert result.owner_id == owner assert result.status == DepositionStatus.DRAFT @@ -110,7 +110,7 @@ async def test_create_rejects_nonexistent_convention(self): service = _make_service(conv_repo=conv_repo) with pytest.raises(NotFoundError, match="Convention not found"): await service.create( - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), owner_id=UserId(uuid4()), ) @@ -121,7 +121,7 @@ async def test_create_generates_srn(self): service = _make_service(conv_repo=conv_repo) result = await service.create( - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), owner_id=UserId(uuid4()), ) assert str(result.srn).startswith("urn:osa:localhost:dep:") @@ -134,27 +134,27 @@ async def test_create_saves_to_repo(self): service = _make_service(dep_repo=dep_repo, conv_repo=conv_repo) await service.create( - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), owner_id=UserId(uuid4()), ) dep_repo.save.assert_called_once() @pytest.mark.asyncio async def test_create_emits_deposition_created_event(self): - conv_srn = _make_conv_srn() + conv_slug = _make_conv_slug() conv_repo = AsyncMock() - conv_repo.get.return_value = _make_convention(srn=conv_srn) + conv_repo.get.return_value = _make_convention(id=conv_slug) outbox = AsyncMock() owner = UserId(uuid4()) service = _make_service(conv_repo=conv_repo, outbox=outbox) - result = await service.create(convention_srn=conv_srn, owner_id=owner) + result = await service.create(convention_id=conv_slug, owner_id=owner) outbox.append.assert_called_once() event = outbox.append.call_args[0][0] assert isinstance(event, DepositionCreatedEvent) assert event.deposition_id == result.srn - assert event.convention_srn == conv_srn + assert event.convention_id == conv_slug assert event.owner_id == owner diff --git a/server/tests/unit/domain/deposition/test_deposition_submitted_enriched.py b/server/tests/unit/domain/deposition/test_deposition_submitted_enriched.py index 2ec5ee8..08e12c2 100644 --- a/server/tests/unit/domain/deposition/test_deposition_submitted_enriched.py +++ b/server/tests/unit/domain/deposition/test_deposition_submitted_enriched.py @@ -1,66 +1,45 @@ """Unit tests for enriched DepositionSubmittedEvent. -Verifies the event carries convention_srn and hooks. +Verifies the event carries convention_id and hooks (hook **names**, #145 — the +validation handler resolves each name's live release at run start). """ from uuid import uuid4 from osa.domain.deposition.event.submitted import DepositionSubmittedEvent from osa.domain.shared.event import EventId -from osa.domain.shared.model.hook import ( - ColumnDef, - HookDefinition, - OciConfig, - TableFeatureSpec, -) -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.hook import HookName +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN def _make_dep_srn() -> DepositionSRN: return DepositionSRN.parse("urn:osa:localhost:dep:test-dep") -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") - - -def _make_hook_definition() -> HookDefinition: - return HookDefinition( - name="pocketeer", - runtime=OciConfig( - image="osa-hooks/pocketeer:latest", - digest="sha256:abc123", - config={"threshold": 0.5}, - ), - feature=TableFeatureSpec( - cardinality="many", - columns=[ColumnDef(name="score", json_type="number", required=True)], - ), - ) +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test-conv") class TestDepositionSubmittedEnriched: - def test_carries_convention_srn(self): - """Event has convention_srn field.""" + def test_carries_convention_id(self): + """Event has convention_id field.""" event = DepositionSubmittedEvent( id=EventId(uuid4()), deposition_id=_make_dep_srn(), metadata={"title": "Test"}, - convention_srn=_make_conv_srn(), - hooks=[_make_hook_definition()], + convention_id=_make_conv_slug(), + hooks=[HookName("pocketeer")], ) - assert event.convention_srn == _make_conv_srn() + assert event.convention_id == _make_conv_slug() def test_carries_hooks(self): - """Event has hooks field with HookDefinition list.""" - hook = _make_hook_definition() + """Event has hooks field with HookName list.""" event = DepositionSubmittedEvent( id=EventId(uuid4()), deposition_id=_make_dep_srn(), metadata={"title": "Test"}, - convention_srn=_make_conv_srn(), - hooks=[hook], + convention_id=_make_conv_slug(), + hooks=[HookName("pocketeer")], ) assert len(event.hooks) == 1 - assert event.hooks[0].name == "pocketeer" - assert event.hooks[0].runtime.digest == "sha256:abc123" + assert event.hooks[0].root == "pocketeer" diff --git a/server/tests/unit/domain/deposition/test_event_chain.py b/server/tests/unit/domain/deposition/test_event_chain.py index 31ac8a1..7efe874 100644 --- a/server/tests/unit/domain/deposition/test_event_chain.py +++ b/server/tests/unit/domain/deposition/test_event_chain.py @@ -15,15 +15,10 @@ from osa.domain.record.event.record_published import RecordPublished from osa.domain.record.handler.convert_deposition_to_record import ConvertDepositionToRecord from osa.domain.shared.event import EventId -from osa.domain.shared.model.hook import ( - ColumnDef, - HookDefinition, - OciConfig, - TableFeatureSpec, -) +from osa.domain.shared.model.hook import FeatureName, HookName from osa.domain.shared.model.source import DepositionSource from osa.domain.shared.model.srn import ( - ConventionSRN, + ConventionSlug, DepositionSRN, RecordSRN, SchemaId, @@ -42,8 +37,8 @@ def _make_dep_srn() -> DepositionSRN: return DepositionSRN.parse("urn:osa:localhost:dep:test-dep") -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test-conv") def _make_record_srn() -> RecordSRN: @@ -53,7 +48,7 @@ def _make_record_srn() -> RecordSRN: def _make_deposition() -> Deposition: return Deposition( srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), status=DepositionStatus.IN_VALIDATION, owner_id=UserId(uuid4()), created_at=datetime.now(UTC), @@ -61,29 +56,21 @@ def _make_deposition() -> Deposition: ) -def _make_hook_definition(name: str = "pocket_detect") -> HookDefinition: - return HookDefinition( - name=name, - runtime=OciConfig( - image="ghcr.io/example/hook", - digest="sha256:abc123", - ), - feature=TableFeatureSpec( - cardinality="many", - columns=[ColumnDef(name="score", json_type="number", required=True)], - ), - ) +def _make_hook_name(name: str = "pocket_detect") -> HookName: + # #145: the submitted event carries hook **names** (the feature-table slots); + # the validation handler resolves each name's live release at run start. + return HookName(name) def _make_submitted_event( dep_srn: DepositionSRN | None = None, - hooks: list[HookDefinition] | None = None, + hooks: list[HookName] | None = None, ) -> DepositionSubmittedEvent: return DepositionSubmittedEvent( id=EventId(uuid4()), deposition_id=dep_srn or _make_dep_srn(), metadata={"title": "Test"}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), hooks=hooks or [], ) @@ -132,13 +119,15 @@ async def test_handler_delegates_with_event_data(self): ) dep = _make_deposition() - hooks = [_make_hook_definition()] + # #145: the submitted event carries hook NAMES (HookName), matching + # DepositionSubmittedEvent.hooks: list[HookName]. + hooks = [_make_hook_name()] event = _make_submitted_event(dep_srn=dep.srn, hooks=hooks) await handler.handle(event) validation_service.validate_deposition.assert_called_once_with( deposition_srn=dep.srn, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), metadata={"title": "Test"}, hooks=hooks, ) @@ -193,7 +182,7 @@ async def test_auto_approve_on_completed(self): id=EventId(uuid4()), validation_run_srn=ValidationRunSRN.parse("urn:osa:localhost:val:run1"), deposition_srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), status=RunStatus.COMPLETED, hook_results=[], metadata={"title": "Test"}, @@ -211,7 +200,7 @@ async def test_skips_approve_on_failed(self): id=EventId(uuid4()), validation_run_srn=ValidationRunSRN.parse("urn:osa:localhost:val:run1"), deposition_srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), status=RunStatus.FAILED, hook_results=[], metadata={"title": "Test"}, @@ -234,7 +223,7 @@ async def test_publishes_record(self): id=EventId(uuid4()), deposition_srn=_make_dep_srn(), metadata={"title": "Test"}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), expected_features=["pocket_detect"], ) await handler.handle(event) @@ -244,8 +233,8 @@ async def test_publishes_record(self): assert isinstance(draft, RecordDraft) assert isinstance(draft.source, DepositionSource) assert draft.source.id == str(_make_dep_srn()) - assert draft.convention_srn == _make_conv_srn() - assert draft.expected_features == ["pocket_detect"] + assert draft.convention_id == _make_conv_slug() + assert [f.root for f in draft.expected_features] == ["pocket_detect"] assert draft.metadata == {"title": "Test"} @@ -267,7 +256,7 @@ async def test_delegates_to_feature_service(self): record_srn=_make_record_srn(), source=DepositionSource(id=str(_make_dep_srn())), metadata={"title": "Test"}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), schema_id=_make_schema_id(), expected_features=["pocket_detect"], ) @@ -279,5 +268,5 @@ async def test_delegates_to_feature_service(self): feature_service.insert_features_for_record.assert_called_once_with( hook_output_dir="/fake/output/dir", record_srn=str(_make_record_srn()), - expected_features=["pocket_detect"], + expected_features=[FeatureName("pocket_detect")], ) diff --git a/server/tests/unit/domain/deposition/test_hook_reserved_name.py b/server/tests/unit/domain/deposition/test_hook_reserved_name.py index b20650c..f0ea9a5 100644 --- a/server/tests/unit/domain/deposition/test_hook_reserved_name.py +++ b/server/tests/unit/domain/deposition/test_hook_reserved_name.py @@ -1,20 +1,19 @@ -"""T065 — HookDefinition rejects reserved hook names (records, datasets).""" +"""T065 — HookIdentity rejects reserved hook names (records, datasets).""" import pytest from osa.domain.shared.error import ReservedNameError from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, - OciConfig, + HookIdentity, + HookName, TableFeatureSpec, ) -def _hook(name: str) -> HookDefinition: - return HookDefinition( - name=name, - runtime=OciConfig(image="ghcr.io/example/hook", digest="sha256:abc123"), +def _hook(name: str) -> HookIdentity: + return HookIdentity( + name=HookName(name), feature=TableFeatureSpec( cardinality="one", columns=[ColumnDef(name="score", json_type="number", required=True)], @@ -33,4 +32,4 @@ def test_hook_rejects_reserved_name(reserved: str) -> None: def test_hook_allows_non_reserved_name() -> None: hook = _hook("chemical_features") - assert hook.name == "chemical_features" + assert hook.name.root == "chemical_features" diff --git a/server/tests/unit/domain/deposition/test_return_to_draft.py b/server/tests/unit/domain/deposition/test_return_to_draft.py index ef14e5e..d6f2996 100644 --- a/server/tests/unit/domain/deposition/test_return_to_draft.py +++ b/server/tests/unit/domain/deposition/test_return_to_draft.py @@ -8,7 +8,7 @@ from osa.domain.deposition.handler.return_to_draft import ReturnToDraft from osa.domain.shared.error import NotFoundError from osa.domain.shared.event import EventId -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN from osa.domain.validation.event.validation_failed import ValidationFailed from osa.domain.validation.model import RunStatus @@ -17,8 +17,8 @@ def _make_dep_srn(id: str = "test-dep") -> DepositionSRN: return DepositionSRN.parse(f"urn:osa:localhost:dep:{id}") -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test-conv") class TestReturnToDraft: @@ -30,7 +30,7 @@ async def test_delegates_to_service(self): event = ValidationFailed( id=EventId(uuid4()), deposition_srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), status=RunStatus.FAILED, reasons=["Missing required field"], ) @@ -47,7 +47,7 @@ async def test_handles_missing_deposition(self): event = ValidationFailed( id=EventId(uuid4()), deposition_srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), status=RunStatus.FAILED, reasons=["error"], ) diff --git a/server/tests/unit/domain/deposition/test_return_to_draft_service.py b/server/tests/unit/domain/deposition/test_return_to_draft_service.py index 84f7133..4c45919 100644 --- a/server/tests/unit/domain/deposition/test_return_to_draft_service.py +++ b/server/tests/unit/domain/deposition/test_return_to_draft_service.py @@ -10,7 +10,7 @@ from osa.domain.deposition.model.aggregate import Deposition from osa.domain.deposition.model.value import DepositionStatus from osa.domain.shared.event import EventId -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN from osa.domain.validation.event.validation_failed import ValidationFailed from osa.domain.validation.model import RunStatus @@ -19,14 +19,14 @@ def _make_dep_srn() -> DepositionSRN: return DepositionSRN.parse("urn:osa:localhost:dep:test-dep") -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test-conv") def _make_deposition(status: DepositionStatus = DepositionStatus.IN_VALIDATION) -> Deposition: return Deposition( srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), status=status, owner_id=UserId(uuid4()), created_at=datetime.now(UTC), @@ -91,7 +91,7 @@ async def test_handler_delegates_to_service(self): event = ValidationFailed( id=EventId(uuid4()), deposition_srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), status=RunStatus.FAILED, reasons=["Missing required field"], ) @@ -112,7 +112,7 @@ async def test_handler_catches_not_found(self): event = ValidationFailed( id=EventId(uuid4()), deposition_srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), status=RunStatus.FAILED, reasons=["error"], ) diff --git a/server/tests/unit/domain/feature/test_create_feature_tables.py b/server/tests/unit/domain/feature/test_create_feature_tables.py index 56466ea..01473fe 100644 --- a/server/tests/unit/domain/feature/test_create_feature_tables.py +++ b/server/tests/unit/domain/feature/test_create_feature_tables.py @@ -11,28 +11,23 @@ from osa.domain.shared.event import EventId from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, - OciConfig, + HookIdentity, TableFeatureSpec, ) -from osa.domain.shared.model.srn import ConventionSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, SchemaId -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test") def _make_schema_id() -> SchemaId: return SchemaId.parse("test@1.0.0") -def _make_hook_definition(name: str = "pocket_detect") -> HookDefinition: - return HookDefinition( +def _make_hook_definition(name: str = "pocket_detect") -> HookIdentity: + return HookIdentity( name=name, - runtime=OciConfig( - image="ghcr.io/example/hook", - digest="sha256:abc123", - ), feature=TableFeatureSpec( cardinality="many", columns=[ColumnDef(name="score", json_type="number", required=True)], @@ -40,10 +35,10 @@ def _make_hook_definition(name: str = "pocket_detect") -> HookDefinition: ) -def _make_event(hooks: list[HookDefinition] | None = None) -> ConventionRegistered: +def _make_event(hooks: list[HookIdentity] | None = None) -> ConventionRegistered: return ConventionRegistered( id=EventId(uuid4()), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), schema_id=_make_schema_id(), schema_fields=[], hooks=hooks or [], diff --git a/server/tests/unit/domain/feature/test_feature_service_decoupled.py b/server/tests/unit/domain/feature/test_feature_service_decoupled.py index b8984d9..b84b06c 100644 --- a/server/tests/unit/domain/feature/test_feature_service_decoupled.py +++ b/server/tests/unit/domain/feature/test_feature_service_decoupled.py @@ -12,7 +12,8 @@ from osa.domain.feature.service.feature import FeatureService from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, + FeatureName, + HookIdentity, OciConfig, TableFeatureSpec, ) @@ -23,8 +24,8 @@ def _make_dep_srn() -> DepositionSRN: return DepositionSRN.parse("urn:osa:localhost:dep:test-dep") -def _make_hook_definition() -> HookDefinition: - return HookDefinition( +def _make_hook_definition() -> HookIdentity: + return HookIdentity( name="pocketeer", runtime=OciConfig( image="osa-hooks/pocketeer:latest", @@ -77,7 +78,7 @@ async def test_insert_features_for_record_uses_event_data(self): await service.insert_features_for_record( hook_output_dir="/fake/output/dir", record_srn="urn:osa:localhost:rec:test@1", - expected_features=["pocketeer"], + expected_features=[FeatureName("pocketeer")], ) feature_storage.hook_features_exist.assert_called_once_with("/fake/output/dir", "pocketeer") diff --git a/server/tests/unit/domain/feature/test_insert_record_features.py b/server/tests/unit/domain/feature/test_insert_record_features.py index 8de7db2..47592cd 100644 --- a/server/tests/unit/domain/feature/test_insert_record_features.py +++ b/server/tests/unit/domain/feature/test_insert_record_features.py @@ -9,9 +9,11 @@ from osa.domain.feature.service.feature import FeatureService from osa.domain.record.event.record_published import RecordPublished from osa.domain.shared.event import EventId +from osa.domain.shared.model.hook import FeatureName +from osa.domain.shared.model.provenance import RunRef from osa.domain.shared.model.source import DepositionSource, IngestSource from osa.domain.shared.model.srn import ( - ConventionSRN, + ConventionSlug, RecordSRN, SchemaId, ) @@ -21,8 +23,12 @@ def _make_record_srn() -> RecordSRN: return RecordSRN.parse("urn:osa:localhost:rec:test-rec@1") -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test") + + +def _make_run_ref() -> RunRef: + return RunRef(run_id="run-abc", release_id="rel-xyz") def _make_schema_id() -> SchemaId: @@ -37,7 +43,7 @@ def _make_event( record_srn=_make_record_srn(), source=DepositionSource(id="urn:osa:localhost:dep:test-dep"), metadata={"title": "Test"}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), schema_id=_make_schema_id(), expected_features=expected_features or [], ) @@ -81,7 +87,7 @@ async def test_delegates_to_feature_service(self): feature_service.insert_features_for_record.assert_called_once_with( hook_output_dir="/fake/output/dir", record_srn=str(event.record_srn), - expected_features=["pocket_detect"], + expected_features=[FeatureName("pocket_detect")], ) @@ -91,6 +97,7 @@ async def test_inserts_features_from_cold_storage(self): """Reads features.json from cold storage and inserts with record_srn.""" feature_storage = AsyncMock() feature_storage.hook_features_exist.return_value = True + feature_storage.read_run_ref.return_value = _make_run_ref() feature_storage.read_hook_features.return_value = [{"score": 0.95}, {"score": 0.82}] feature_store = AsyncMock() @@ -104,13 +111,15 @@ async def test_inserts_features_from_cold_storage(self): await service.insert_features_for_record( hook_output_dir="/fake/output/dir", record_srn=str(_make_record_srn()), - expected_features=["pocket_detect"], + expected_features=[FeatureName("pocket_detect")], ) + # run_id (from the hook output dir's run.json) is stamped on every row (#145). feature_store.insert_features.assert_called_once_with( "pocket_detect", str(_make_record_srn()), [{"score": 0.95}, {"score": 0.82}], + "run-abc", ) @pytest.mark.asyncio @@ -129,7 +138,7 @@ async def test_skips_features_without_features_file(self): await service.insert_features_for_record( hook_output_dir="/fake/output/dir", record_srn=str(_make_record_srn()), - expected_features=["pocket_detect"], + expected_features=[FeatureName("pocket_detect")], ) feature_storage.read_hook_features.assert_not_called() @@ -152,9 +161,32 @@ async def test_skips_empty_feature_list(self): await service.insert_features_for_record( hook_output_dir="/fake/output/dir", record_srn=str(_make_record_srn()), - expected_features=["pocket_detect"], + expected_features=[FeatureName("pocket_detect")], + ) + + feature_store.insert_features.assert_not_called() + + @pytest.mark.asyncio + async def test_skips_features_without_run_json(self): + """A hook with features but no run.json is skipped (no provenance, #145).""" + feature_storage = AsyncMock() + feature_storage.hook_features_exist.return_value = True + feature_storage.read_run_ref.return_value = None + + feature_store = AsyncMock() + + service = _make_feature_service( + feature_store=feature_store, + feature_storage=feature_storage, ) + await service.insert_features_for_record( + hook_output_dir="/fake/output/dir", + record_srn=str(_make_record_srn()), + expected_features=[FeatureName("pocket_detect")], + ) + + feature_storage.read_hook_features.assert_not_called() feature_store.insert_features.assert_not_called() @pytest.mark.asyncio @@ -162,6 +194,7 @@ async def test_handles_multiple_features(self): """Processes all expected features.""" feature_storage = AsyncMock() feature_storage.hook_features_exist.return_value = True + feature_storage.read_run_ref.return_value = _make_run_ref() feature_storage.read_hook_features.side_effect = [ [{"score": 0.9}], [{"score": 0.8}], @@ -178,7 +211,7 @@ async def test_handles_multiple_features(self): await service.insert_features_for_record( hook_output_dir="/fake/output/dir", record_srn=str(_make_record_srn()), - expected_features=["hook_a", "hook_b"], + expected_features=[FeatureName("hook_a"), FeatureName("hook_b")], ) assert feature_store.insert_features.call_count == 2 @@ -224,7 +257,7 @@ async def test_ingest_source_uses_source_fields(self): upstream_source="pdb", ), metadata={"title": "Ingested"}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), schema_id=_make_schema_id(), expected_features=["pocket_detect"], ) @@ -234,5 +267,5 @@ async def test_ingest_source_uses_source_fields(self): feature_service.insert_features_for_record.assert_called_once_with( hook_output_dir="/fake/ingest/dir", record_srn=str(_make_record_srn()), - expected_features=["pocket_detect"], + expected_features=[FeatureName("pocket_detect")], ) diff --git a/server/tests/unit/domain/ingest/test_ingest_run.py b/server/tests/unit/domain/ingest/test_ingest_run.py index 34ab0dd..df21027 100644 --- a/server/tests/unit/domain/ingest/test_ingest_run.py +++ b/server/tests/unit/domain/ingest/test_ingest_run.py @@ -11,7 +11,7 @@ def _make_run(**overrides) -> IngestRun: defaults = { "id": "test-run-id", - "convention_srn": "urn:osa:localhost:conv:test-conv@1.0.0", + "convention_id": "urn:osa:localhost:conv:test-conv@1.0.0", "status": IngestStatus.PENDING, "started_at": datetime.now(UTC), } diff --git a/server/tests/unit/domain/ingest/test_ingest_service.py b/server/tests/unit/domain/ingest/test_ingest_service.py index 7e1c8df..07516a1 100644 --- a/server/tests/unit/domain/ingest/test_ingest_service.py +++ b/server/tests/unit/domain/ingest/test_ingest_service.py @@ -13,7 +13,7 @@ def _make_convention(*, has_ingester: bool = True): conv = MagicMock() - conv.srn = "urn:osa:localhost:conv:test-conv@1.0.0" + conv.srn = "test-conv" conv.ingester = ( IngesterDefinition( image="ghcr.io/example/ingester:v1", @@ -56,17 +56,17 @@ class TestStartIngest: async def test_creates_pending_ingest(self) -> None: service = _make_service() run = await service.start_ingest( - convention_srn="urn:osa:localhost:conv:test-conv@1.0.0", + convention_id="test-conv", ) assert run.status == IngestStatus.PENDING - assert run.convention_srn == "urn:osa:localhost:conv:test-conv@1.0.0" + assert run.convention_id == "test-conv" assert run.batch_size == 1000 @pytest.mark.asyncio async def test_saves_and_emits_events(self) -> None: service = _make_service() run = await service.start_ingest( - convention_srn="urn:osa:localhost:conv:test-conv@1.0.0", + convention_id="test-conv", ) service.ingest_repo.save.assert_called_once() assert service.outbox.append.call_count == 2 @@ -75,19 +75,19 @@ async def test_saves_and_emits_events(self) -> None: first_event = service.outbox.append.call_args_list[0][0][0] assert first_event.__class__.__name__ == "IngestRunStarted" assert first_event.ingest_run_id == run.id - assert first_event.convention_srn == run.convention_srn + assert first_event.convention_id == run.convention_id # Second event: NextBatchRequested (triggers first batch) second_event = service.outbox.append.call_args_list[1][0][0] assert second_event.__class__.__name__ == "NextBatchRequested" assert second_event.ingest_run_id == run.id - assert second_event.convention_srn == run.convention_srn + assert second_event.convention_id == run.convention_id @pytest.mark.asyncio async def test_custom_batch_size(self) -> None: service = _make_service() run = await service.start_ingest( - convention_srn="urn:osa:localhost:conv:test-conv@1.0.0", + convention_id="test-conv", batch_size=500, ) assert run.batch_size == 500 @@ -97,7 +97,7 @@ async def test_rejects_convention_not_found(self) -> None: service = _make_service(convention_not_found=True) with pytest.raises(NotFoundError): await service.start_ingest( - convention_srn="urn:osa:localhost:conv:nonexistent@1.0.0", + convention_id="nonexistent", ) @pytest.mark.asyncio @@ -105,7 +105,7 @@ async def test_rejects_no_ingester_configured(self) -> None: service = _make_service(convention=_make_convention(has_ingester=False)) with pytest.raises(NotFoundError, match="No ingester configured"): await service.start_ingest( - convention_srn="urn:osa:localhost:conv:test-conv@1.0.0", + convention_id="test-conv", ) @pytest.mark.asyncio @@ -114,5 +114,5 @@ async def test_rejects_ingest_already_running(self) -> None: service = _make_service(running_ingest=existing) with pytest.raises(ConflictError, match="already running"): await service.start_ingest( - convention_srn="urn:osa:localhost:conv:test-conv@1.0.0", + convention_id="test-conv", ) diff --git a/server/tests/unit/domain/ingest/test_run_hooks.py b/server/tests/unit/domain/ingest/test_run_hooks.py index fbea714..fb139f8 100644 --- a/server/tests/unit/domain/ingest/test_run_hooks.py +++ b/server/tests/unit/domain/ingest/test_run_hooks.py @@ -1,5 +1,7 @@ """Tests for RunHooks — OOM exhaustion should still emit HookBatchCompleted.""" +from datetime import UTC, datetime +from pathlib import Path from unittest.mock import AsyncMock from uuid import uuid4 @@ -10,18 +12,37 @@ from osa.domain.ingest.model.ingest_run import IngestRun, IngestRunId, IngestStatus from osa.domain.shared.error import OOMError, PermanentError from osa.domain.shared.event import EventId -from osa.domain.shared.model.hook import HookDefinition, OciConfig, OciLimits, TableFeatureSpec - - -def _make_hook(name: str = "pockets") -> HookDefinition: - return HookDefinition( - name=name, +from osa.domain.shared.model.hook import ( + HookName, + OciConfig, + OciLimits, + TableFeatureSpec, +) +from osa.domain.validation.model.hook import Hook +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId + + +def _make_release(name: str = "pockets", memory: str = "1g") -> HookRelease: + return HookRelease( + id=HookReleaseId(uuid4()), + hook_name=HookName(name), + version=1, runtime=OciConfig( image="ghcr.io/test/pockets:v1", digest="sha256:abc123", - limits=OciLimits(memory="1g"), + limits=OciLimits(memory=memory), ), + source_ref="git:abc123", + built_at=datetime.now(UTC), + ) + + +def _make_hook(name: str = "pockets") -> Hook: + return Hook( + name=HookName(name), feature=TableFeatureSpec(cardinality="one", columns=[]), + live_release_id=HookReleaseId(uuid4()), + created_at=datetime.now(UTC), ) @@ -38,19 +59,30 @@ def _make_event( def _make_convention(): + # Conventions now reference hooks by name (#145), not embedded identities. conv = AsyncMock() - conv.hooks = [_make_hook()] + conv.hooks = [HookName("pockets")] return conv +def _make_hook_registry() -> AsyncMock: + """Fake HookRegistryService resolving the single 'pockets' hook + release.""" + registry = AsyncMock() + release = _make_release() + registry.resolve_live.return_value = {HookName("pockets"): release} + registry.get_hook.return_value = _make_hook() + return registry + + def _make_handler(*, hook_service_side_effect=None) -> RunHooks: ingest_repo = AsyncMock() ingest_repo.get.return_value = IngestRun( id=IngestRunId("run-1"), - convention_srn="urn:osa:localhost:conv:test@1.0.0", + # Conventions are identified by a bare slug now (#145). + convention_id="test-conv", status=IngestStatus.RUNNING, batch_size=100, - started_at=__import__("datetime").datetime.now(__import__("datetime").UTC), + started_at=datetime.now(UTC), ) convention_service = AsyncMock() @@ -60,8 +92,8 @@ def _make_handler(*, hook_service_side_effect=None) -> RunHooks: ingest_storage.read_records.return_value = [ {"source_id": "rec-1", "metadata": {}, "files": []}, ] - ingest_storage.batch_files_dir.return_value = __import__("pathlib").Path("/tmp/files") - ingest_storage.hook_work_dir.return_value = __import__("pathlib").Path("/tmp/work") + ingest_storage.batch_files_dir.return_value = Path("/tmp/files") + ingest_storage.hook_work_dir.return_value = Path("/tmp/work") hook_service = AsyncMock() if hook_service_side_effect: @@ -72,6 +104,7 @@ def _make_handler(*, hook_service_side_effect=None) -> RunHooks: ingest_service=AsyncMock(), convention_service=convention_service, hook_service=hook_service, + hook_registry=_make_hook_registry(), outbox=AsyncMock(), ingest_storage=ingest_storage, ) diff --git a/server/tests/unit/domain/record/test_get_record_handler.py b/server/tests/unit/domain/record/test_get_record_handler.py index 206618e..57e4d2f 100644 --- a/server/tests/unit/domain/record/test_get_record_handler.py +++ b/server/tests/unit/domain/record/test_get_record_handler.py @@ -8,22 +8,22 @@ from osa.domain.record.model.aggregate import Record from osa.domain.shared.error import NotFoundError from osa.domain.shared.model.source import DepositionSource -from osa.domain.shared.model.srn import ConventionSRN, RecordSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, RecordSRN, SchemaId def _make_record_srn() -> RecordSRN: return RecordSRN.parse("urn:osa:localhost:rec:test-rec@1") -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test") def _make_record() -> Record: return Record( srn=_make_record_srn(), source=DepositionSource(id="urn:osa:localhost:dep:test-dep"), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), schema_id=SchemaId.parse("test@1.0.0"), metadata={"title": "Test Protein"}, published_at=datetime.now(UTC), @@ -45,7 +45,7 @@ async def test_returns_record_detail(self): assert result.srn == record.srn assert result.source == record.source - assert result.convention_srn == record.convention_srn + assert result.convention_id == record.convention_id assert result.metadata == record.metadata service.get.assert_called_once_with(record.srn) diff --git a/server/tests/unit/domain/record/test_record_draft.py b/server/tests/unit/domain/record/test_record_draft.py index 68ebef2..b85dcc7 100644 --- a/server/tests/unit/domain/record/test_record_draft.py +++ b/server/tests/unit/domain/record/test_record_draft.py @@ -2,11 +2,11 @@ from osa.domain.record.model.draft import RecordDraft from osa.domain.shared.model.source import DepositionSource -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test") class TestRecordDraft: @@ -14,17 +14,17 @@ def test_construction(self): draft = RecordDraft( source=DepositionSource(id="urn:osa:localhost:dep:abc"), metadata={"title": "Test"}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), ) assert draft.source.type == "deposition" assert draft.metadata == {"title": "Test"} - assert draft.convention_srn == _make_conv_srn() + assert draft.convention_id == _make_conv_slug() def test_expected_features_defaults_empty(self): draft = RecordDraft( source=DepositionSource(id="dep-1"), metadata={}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), ) assert draft.expected_features == [] @@ -32,7 +32,7 @@ def test_expected_features_can_be_set(self): draft = RecordDraft( source=DepositionSource(id="dep-1"), metadata={}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), expected_features=["pocket_detect", "qc_check"], ) - assert draft.expected_features == ["pocket_detect", "qc_check"] + assert [f.root for f in draft.expected_features] == ["pocket_detect", "qc_check"] diff --git a/server/tests/unit/domain/record/test_record_features.py b/server/tests/unit/domain/record/test_record_features.py index 2fe6a1e..827d0b0 100644 --- a/server/tests/unit/domain/record/test_record_features.py +++ b/server/tests/unit/domain/record/test_record_features.py @@ -9,7 +9,7 @@ from osa.domain.record.query.get_record import GetRecord, GetRecordHandler, RecordDetail from osa.domain.record.service.record import RecordService from osa.domain.shared.model.source import DepositionSource -from osa.domain.shared.model.srn import ConventionSRN, Domain, RecordSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, Domain, RecordSRN, SchemaId from osa.infrastructure.persistence.adapter.feature_reader import PostgresFeatureReader @@ -176,7 +176,7 @@ def _make_record() -> Record: return Record( srn=RecordSRN.parse("urn:osa:localhost:rec:abc@1"), source=DepositionSource(id="urn:osa:localhost:dep:dep1"), - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0"), + convention_id=ConventionSlug("test"), schema_id=SchemaId.parse("test@1.0.0"), metadata={"title": "Test"}, published_at=datetime.now(UTC), @@ -240,6 +240,6 @@ async def test_existing_behavior_preserved(self) -> None: assert result.srn == record.srn assert result.source == record.source - assert result.convention_srn == record.convention_srn + assert result.convention_id == record.convention_id assert result.metadata == record.metadata mock_service.get.assert_called_once_with(record.srn) diff --git a/server/tests/unit/domain/record/test_record_published_enriched.py b/server/tests/unit/domain/record/test_record_published_enriched.py index 670ed1d..ea50da8 100644 --- a/server/tests/unit/domain/record/test_record_published_enriched.py +++ b/server/tests/unit/domain/record/test_record_published_enriched.py @@ -1,6 +1,6 @@ """Unit tests for enriched RecordPublished event. -Verifies the event carries source, convention_srn, and expected_features. +Verifies the event carries source, convention_id, and expected_features. """ from uuid import uuid4 @@ -8,7 +8,7 @@ from osa.domain.record.event.record_published import RecordPublished from osa.domain.shared.event import EventId from osa.domain.shared.model.source import DepositionSource -from osa.domain.shared.model.srn import ConventionSRN, RecordSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, RecordSRN, SchemaId SCHEMA = SchemaId.parse("test@1.0.0") @@ -22,24 +22,24 @@ def test_carries_source(self): record_srn=RecordSRN.parse("urn:osa:localhost:rec:test@1"), source=source, metadata={"title": "Test"}, - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0"), + convention_id=ConventionSlug("test"), schema_id=SCHEMA, expected_features=["pocketeer"], ) assert event.source.type == "deposition" assert event.source.id == "urn:osa:localhost:dep:test" - def test_carries_convention_srn(self): + def test_carries_convention_id(self): event = RecordPublished( id=EventId(uuid4()), record_srn=RecordSRN.parse("urn:osa:localhost:rec:test@1"), source=DepositionSource(id="urn:osa:localhost:dep:test"), metadata={"title": "Test"}, - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0"), + convention_id=ConventionSlug("test"), schema_id=SCHEMA, expected_features=[], ) - assert event.convention_srn == ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") + assert event.convention_id == ConventionSlug("test") def test_carries_expected_features(self): event = RecordPublished( @@ -47,8 +47,8 @@ def test_carries_expected_features(self): record_srn=RecordSRN.parse("urn:osa:localhost:rec:test@1"), source=DepositionSource(id="urn:osa:localhost:dep:test"), metadata={"title": "Test"}, - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0"), + convention_id=ConventionSlug("test"), schema_id=SCHEMA, expected_features=["pocketeer", "qc_check"], ) - assert event.expected_features == ["pocketeer", "qc_check"] + assert [f.root for f in event.expected_features] == ["pocketeer", "qc_check"] diff --git a/server/tests/unit/domain/record/test_record_schema_srn_immutable.py b/server/tests/unit/domain/record/test_record_schema_srn_immutable.py index 13e5c7b..444d9b7 100644 --- a/server/tests/unit/domain/record/test_record_schema_srn_immutable.py +++ b/server/tests/unit/domain/record/test_record_schema_srn_immutable.py @@ -7,14 +7,14 @@ from osa.domain.record.model.aggregate import Record from osa.domain.shared.model.source import DepositionSource -from osa.domain.shared.model.srn import ConventionSRN, RecordSRN, SchemaId +from osa.domain.shared.model.srn import ConventionSlug, RecordSRN, SchemaId def _make_record() -> Record: return Record( srn=RecordSRN.parse("urn:osa:localhost:rec:abc@1"), source=DepositionSource(id="urn:osa:localhost:dep:d1"), - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0"), + convention_id=ConventionSlug("test"), schema_id=SchemaId.parse("test@1.0.0"), metadata={"title": "T"}, published_at=datetime.now(UTC), diff --git a/server/tests/unit/domain/record/test_record_service.py b/server/tests/unit/domain/record/test_record_service.py index dbeafc0..fbb9ff3 100644 --- a/server/tests/unit/domain/record/test_record_service.py +++ b/server/tests/unit/domain/record/test_record_service.py @@ -15,7 +15,7 @@ from osa.domain.record.service.record import RecordService from osa.domain.shared.model.source import DepositionSource, IngestSource from osa.domain.shared.model.srn import ( - ConventionSRN, + ConventionSlug, DepositionSRN, Domain, LocalId, @@ -24,8 +24,8 @@ from osa.domain.shared.outbox import Outbox -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test") def _make_schema_id() -> SchemaId: @@ -34,7 +34,7 @@ def _make_schema_id() -> SchemaId: def _make_convention() -> Convention: return Convention( - srn=_make_conv_srn(), + id=_make_conv_slug(), title="Test Convention", description=None, schema_id=_make_schema_id(), @@ -76,7 +76,7 @@ def sample_draft(node_domain: Domain) -> RecordDraft: return RecordDraft( source=DepositionSource(id=str(dep_srn)), metadata={"title": "Test Record", "organism": "human", "platform": "GPL570"}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), expected_features=["pocket_detect"], ) @@ -113,7 +113,7 @@ async def test_publish_record_creates_record( assert record is not None assert record.source == sample_draft.source - assert record.convention_srn == sample_draft.convention_srn + assert record.convention_id == sample_draft.convention_id assert record.schema_id == _make_schema_id() assert record.metadata == sample_draft.metadata mock_record_repo.save.assert_called_once() @@ -136,7 +136,7 @@ async def test_publish_record_emits_record_published_event( assert isinstance(event, RecordPublished) assert event.record_srn == record.srn assert event.source == sample_draft.source - assert event.convention_srn == sample_draft.convention_srn + assert event.convention_id == sample_draft.convention_id assert event.schema_id == _make_schema_id() assert event.expected_features == sample_draft.expected_features assert event.metadata == sample_draft.metadata @@ -173,7 +173,7 @@ async def test_publish_with_ingest_source( upstream_source="pdb", ), metadata={"title": "Ingested Protein"}, - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), expected_features=["pocket_detect"], ) @@ -183,10 +183,10 @@ async def test_publish_with_ingest_source( assert record.source.type == "ingest" assert record.source.upstream_source == "pdb" - assert record.convention_srn == _make_conv_srn() + assert record.convention_id == _make_conv_slug() mock_record_repo.save.assert_called_once() event = mock_outbox.append.call_args[0][0] assert isinstance(event, RecordPublished) assert event.source.type == "ingest" - assert event.expected_features == ["pocket_detect"] + assert [f.root for f in event.expected_features] == ["pocket_detect"] diff --git a/server/tests/unit/domain/shared/test_convention_id.py b/server/tests/unit/domain/shared/test_convention_id.py new file mode 100644 index 0000000..f5c08f4 --- /dev/null +++ b/server/tests/unit/domain/shared/test_convention_id.py @@ -0,0 +1,80 @@ +"""Unit tests for the ConventionSlug value object (a bare slug, #145). + +Conventions became unversioned in feature #145: their identity is a bare, +human-readable slug rather than the old ``"@"`` form. This +suite replaces the former ``ConventionId`` tests. +""" + +import pytest + +from osa.domain.shared.model.srn import ConventionSlug + + +def test_construct_and_read_root() -> None: + slug = ConventionSlug("proteins") + assert slug.root == "proteins" + assert str(slug) == "proteins" + + +def test_parse_round_trips() -> None: + slug = ConventionSlug.parse("pdb-structure") + assert slug.root == "pdb-structure" + assert str(slug) == "pdb-structure" + + +def test_equality_via_root() -> None: + assert ConventionSlug("proteins") == ConventionSlug("proteins") + assert ConventionSlug("proteins").root == "proteins" + assert ConventionSlug("proteins") != ConventionSlug("genomes") + + +@pytest.mark.parametrize( + "value", + [ + "proteins", + "pdb-structure", + "abc", # minimum length (3) + "a1b", + "x" * 64, # maximum length + ], +) +def test_valid_slugs_accepted(value: str) -> None: + assert ConventionSlug(value).root == value + + +@pytest.mark.parametrize( + "value", + [ + "", # empty + "ab", # too short (< 3) + "x" * 65, # too long (> 64) + "1proteins", # must start with a letter + "-proteins", # must start with a letter + "Proteins", # uppercase rejected + "proteins_1", # underscores rejected + "proteins@1.0.0", # version suffix rejected + "proteins ", # whitespace rejected (no stripping) + "urn:osa:localhost:conv:proteins", # URN form rejected + ], +) +def test_malformed_rejected(value: str) -> None: + with pytest.raises(ValueError): + ConventionSlug(value) + + +def test_parse_rejects_version_suffix() -> None: + with pytest.raises(ValueError): + ConventionSlug.parse("proteins@1.0.0") + + +def test_is_frozen() -> None: + slug = ConventionSlug("proteins") + with pytest.raises(Exception): + slug.root = "other" # type: ignore[misc] + + +def test_is_hashable_and_usable_as_dict_key() -> None: + slug = ConventionSlug("proteins") + mapping = {slug: 1} + assert mapping[ConventionSlug("proteins")] == 1 + assert hash(slug) == hash(ConventionSlug("proteins")) diff --git a/server/tests/unit/domain/shared/test_hook_models.py b/server/tests/unit/domain/shared/test_hook_models.py index e7fc144..4c1cdb0 100644 --- a/server/tests/unit/domain/shared/test_hook_models.py +++ b/server/tests/unit/domain/shared/test_hook_models.py @@ -1,4 +1,4 @@ -"""Tests for shared hook domain models: HookDefinition, OciConfig, OciLimits, TableFeatureSpec, ColumnDef.""" +"""Tests for shared hook domain models: HookIdentity, OciConfig, OciLimits, TableFeatureSpec, ColumnDef.""" import pytest from pydantic import ValidationError @@ -129,23 +129,15 @@ def test_oci_config_default_config(): assert cfg.limits.timeout_seconds == 300 -def test_hook_definition_full(): +def test_hook_identity_holds_name_and_feature(): from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, - OciConfig, - OciLimits, + HookIdentity, TableFeatureSpec, ) - hook_def = HookDefinition( + ident = HookIdentity( name="detect_pockets", - runtime=OciConfig( - image="ghcr.io/osa/hooks/pocketeer:v1", - digest="sha256:abc123", - config={"r_min": 3.0}, - limits=OciLimits(timeout_seconds=300, memory="512m", cpu="0.5"), - ), feature=TableFeatureSpec( cardinality="many", columns=[ @@ -154,48 +146,22 @@ def test_hook_definition_full(): ], ), ) - assert hook_def.name == "detect_pockets" - assert hook_def.runtime.image == "ghcr.io/osa/hooks/pocketeer:v1" - assert hook_def.runtime.digest == "sha256:abc123" - assert hook_def.runtime.config == {"r_min": 3.0} - assert hook_def.runtime.limits.timeout_seconds == 300 - assert hook_def.feature.cardinality == "many" - assert len(hook_def.feature.columns) == 2 - - -def test_hook_definition_default_limits(): - from osa.domain.shared.model.hook import ( - HookDefinition, - OciConfig, - TableFeatureSpec, - ) - - hook_def = HookDefinition( - name="h", - runtime=OciConfig(image="img:v1", digest="sha256:abc"), - feature=TableFeatureSpec(cardinality="one", columns=[]), - ) - assert hook_def.runtime.limits.timeout_seconds == 300 - assert hook_def.runtime.limits.memory == "1g" + assert ident.name.root == "detect_pockets" + assert ident.feature.cardinality == "many" + assert len(ident.feature.columns) == 2 + # Runtime moved to HookRelease (#145) — covered in test_hook_release.py. + assert not hasattr(ident, "runtime") -def test_hook_definition_serialization_roundtrip(): +def test_hook_identity_serialization_roundtrip(): from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, - OciConfig, - OciLimits, + HookIdentity, TableFeatureSpec, ) - hook_def = HookDefinition( + ident = HookIdentity( name="detect_pockets", - runtime=OciConfig( - image="ghcr.io/osa/hooks/pocketeer:v1", - digest="sha256:abc123", - config={"key": "value"}, - limits=OciLimits(timeout_seconds=120, memory="1g", cpu="1.5"), - ), feature=TableFeatureSpec( cardinality="many", columns=[ @@ -205,124 +171,70 @@ def test_hook_definition_serialization_roundtrip(): ), ) - data = hook_def.model_dump() - restored = HookDefinition.model_validate(data) - assert restored == hook_def + data = ident.model_dump() + restored = HookIdentity.model_validate(data) + assert restored == ident assert restored.feature.columns[1].required is False -class TestMemoryDoubling: - """Tests for HookDefinition.with_memory() and with_doubled_memory().""" - - def _make_hook(self, memory: str = "1g"): - from osa.domain.shared.model.hook import ( - HookDefinition, - OciConfig, - OciLimits, - TableFeatureSpec, - ) - - return HookDefinition( - name="detect_pockets", - runtime=OciConfig( - image="img:v1", - digest="sha256:abc", - limits=OciLimits(memory=memory), - ), - feature=TableFeatureSpec(cardinality="one", columns=[]), - ) - - def test_hook_definition_with_memory(self): - hook = self._make_hook("1g") - updated = hook.with_memory("2g") - assert updated.runtime.limits.memory == "2g" - # original unchanged (frozen) - assert hook.runtime.limits.memory == "1g" - - def test_hook_definition_with_doubled_memory_1g(self): - hook = self._make_hook("1g") - doubled = hook.with_doubled_memory() - assert doubled.runtime.limits.memory == "2g" - - def test_hook_definition_with_doubled_memory_512m(self): - hook = self._make_hook("512m") - doubled = hook.with_doubled_memory() - assert doubled.runtime.limits.memory == "1g" - - def test_hook_definition_with_doubled_memory_768m(self): - hook = self._make_hook("768m") - doubled = hook.with_doubled_memory() - assert doubled.runtime.limits.memory == "1536m" - - def test_hook_definition_with_doubled_memory_preserves_other_fields(self): - hook = self._make_hook("1g") - doubled = hook.with_doubled_memory() - assert doubled.name == hook.name - assert doubled.runtime.image == hook.runtime.image - assert doubled.runtime.digest == hook.runtime.digest - assert doubled.runtime.limits.timeout_seconds == hook.runtime.limits.timeout_seconds - assert doubled.runtime.limits.cpu == hook.runtime.limits.cpu - assert doubled.feature == hook.feature - - class TestNameValidation: """Hook and column names must be safe PG identifiers.""" def test_hook_name_rejects_uppercase(self): - from osa.domain.shared.model.hook import HookDefinition, OciConfig, TableFeatureSpec + from osa.domain.shared.model.hook import HookIdentity, OciConfig, TableFeatureSpec with pytest.raises(ValidationError): - HookDefinition( + HookIdentity( name="BadName", runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), ) def test_hook_name_rejects_newline_injection(self): - from osa.domain.shared.model.hook import HookDefinition, OciConfig, TableFeatureSpec + from osa.domain.shared.model.hook import HookIdentity, OciConfig, TableFeatureSpec with pytest.raises(ValidationError): - HookDefinition( + HookIdentity( name="hook\nEVIL_VAR=pwned", runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), ) def test_hook_name_rejects_path_traversal(self): - from osa.domain.shared.model.hook import HookDefinition, OciConfig, TableFeatureSpec + from osa.domain.shared.model.hook import HookIdentity, OciConfig, TableFeatureSpec with pytest.raises(ValidationError): - HookDefinition( + HookIdentity( name="../etc/passwd", runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), ) def test_hook_name_rejects_sql_injection(self): - from osa.domain.shared.model.hook import HookDefinition, OciConfig, TableFeatureSpec + from osa.domain.shared.model.hook import HookIdentity, OciConfig, TableFeatureSpec with pytest.raises(ValidationError): - HookDefinition( + HookIdentity( name="'; DROP TABLE --", runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), ) def test_hook_name_rejects_empty(self): - from osa.domain.shared.model.hook import HookDefinition, OciConfig, TableFeatureSpec + from osa.domain.shared.model.hook import HookIdentity, OciConfig, TableFeatureSpec with pytest.raises(ValidationError): - HookDefinition( + HookIdentity( name="", runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), ) def test_hook_name_rejects_leading_digit(self): - from osa.domain.shared.model.hook import HookDefinition, OciConfig, TableFeatureSpec + from osa.domain.shared.model.hook import HookIdentity, OciConfig, TableFeatureSpec with pytest.raises(ValidationError): - HookDefinition( + HookIdentity( name="1hook", runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), @@ -343,19 +255,17 @@ def test_column_name_rejects_spaces(self): def test_valid_names_accepted(self): from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, - OciConfig, + HookIdentity, TableFeatureSpec, ) valid_names = ["a", "hook_v2", "pocket_detect", "x1", "a_b_c_d"] for name in valid_names: - hook = HookDefinition( + hook = HookIdentity( name=name, - runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), ) - assert hook.name == name + assert hook.name.root == name col = ColumnDef(name=name, json_type="number", required=True) assert col.name == name @@ -364,23 +274,22 @@ def test_hook_name_accepts_40_chars(self): """Hook names must fit in derived identifiers like ``fk_features_{name}_record_srn`` — 23 chars overhead + up to 40-char hook = 63-char max, which is PG's identifier limit.""" - from osa.domain.shared.model.hook import HookDefinition, OciConfig, TableFeatureSpec + from osa.domain.shared.model.hook import HookIdentity, TableFeatureSpec forty = "a" + "b" * 39 - hook = HookDefinition( + hook = HookIdentity( name=forty, - runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), ) - assert hook.name == forty + assert hook.name.root == forty def test_hook_name_rejects_over_40_chars(self): """41+ char names would produce an FK name exceeding PG's 63-char identifier limit.""" - from osa.domain.shared.model.hook import HookDefinition, OciConfig, TableFeatureSpec + from osa.domain.shared.model.hook import HookIdentity, OciConfig, TableFeatureSpec with pytest.raises(ValidationError): - HookDefinition( + HookIdentity( name="a" + "b" * 40, # 41 chars runtime=OciConfig(image="img:v1", digest="sha256:abc"), feature=TableFeatureSpec(cardinality="one", columns=[]), diff --git a/server/tests/unit/domain/shared/test_hook_split.py b/server/tests/unit/domain/shared/test_hook_split.py new file mode 100644 index 0000000..cbc9c46 --- /dev/null +++ b/server/tests/unit/domain/shared/test_hook_split.py @@ -0,0 +1,38 @@ +"""Unit tests for the HookIdentity→HookIdentity split. + +The execution/runtime half (runtime + source_ref) now lives on the +``HookRelease`` entity in the ``validation`` domain (see +``tests/unit/validation/test_hook_release.py``); this identity holds only the +name + the fixed output contract. +""" + +import pytest + +from osa.domain.shared.error import ReservedNameError +from osa.domain.shared.model.hook import ColumnDef, HookIdentity, TableFeatureSpec + + +def _feature() -> TableFeatureSpec: + return TableFeatureSpec( + cardinality="many", + columns=[ColumnDef(name="score", json_type="number", required=True)], + ) + + +def test_identity_holds_name_and_feature_only() -> None: + ident = HookIdentity(name="pocket_detect", feature=_feature()) + assert ident.name.root == "pocket_detect" + assert ident.feature.cardinality == "many" + # Runtime no longer lives on the identity — it moved to HookRelease. + assert not hasattr(ident, "runtime") + + +def test_identity_rejects_reserved_name() -> None: + with pytest.raises(ReservedNameError): + HookIdentity(name="records", feature=_feature()) + + +def test_identity_is_frozen() -> None: + ident = HookIdentity(name="pocket_detect", feature=_feature()) + with pytest.raises(Exception): + ident.name = "other" # type: ignore[misc] diff --git a/server/tests/unit/domain/validation/test_hook_aggregate.py b/server/tests/unit/domain/validation/test_hook_aggregate.py new file mode 100644 index 0000000..c8ec618 --- /dev/null +++ b/server/tests/unit/domain/validation/test_hook_aggregate.py @@ -0,0 +1,48 @@ +"""Unit tests for the Hook aggregate (identity + fixed contract + live pointer).""" + +from datetime import UTC, datetime +from uuid import uuid4 + +import pytest + +from osa.domain.shared.error import ReservedNameError +from osa.domain.shared.model.hook import ColumnDef, TableFeatureSpec +from osa.domain.validation.model.hook import Hook +from osa.domain.validation.model.hook_release import HookReleaseId + + +def _feature() -> TableFeatureSpec: + return TableFeatureSpec( + cardinality="many", + columns=[ColumnDef(name="score", json_type="number", required=True)], + ) + + +def test_hook_holds_identity_and_no_live_release_initially() -> None: + hook = Hook(name="pocket_detect", feature=_feature(), created_at=datetime.now(UTC)) + assert hook.name.root == "pocket_detect" + assert hook.feature.cardinality == "many" + assert hook.live_release_id is None + + +def test_feature_contract_is_frozen() -> None: + hook = Hook(name="pocket_detect", feature=_feature(), created_at=datetime.now(UTC)) + with pytest.raises(Exception): + hook.feature = _feature() # type: ignore[misc] + + +def test_reserved_name_rejected() -> None: + with pytest.raises(ReservedNameError): + Hook(name="records", feature=_feature(), created_at=datetime.now(UTC)) + + +def test_with_live_release_returns_repointed_copy() -> None: + hook = Hook(name="pocket_detect", feature=_feature(), created_at=datetime.now(UTC)) + rid = HookReleaseId(uuid4()) + advanced = hook.with_live_release(rid) + assert advanced.live_release_id == rid + # Original unchanged (immutable aggregate). + assert hook.live_release_id is None + # Identity preserved. + assert advanced.name == hook.name + assert advanced.feature == hook.feature diff --git a/server/tests/unit/domain/validation/test_hook_release.py b/server/tests/unit/domain/validation/test_hook_release.py new file mode 100644 index 0000000..d365ca8 --- /dev/null +++ b/server/tests/unit/domain/validation/test_hook_release.py @@ -0,0 +1,57 @@ +"""Unit tests for HookRelease (immutable versioned artifact).""" + +from datetime import UTC, datetime +from uuid import uuid4 + +import pytest + +from osa.domain.shared.model.hook import OciConfig, OciLimits +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId + + +def _release(version: int = 1, memory: str = "1g") -> HookRelease: + return HookRelease( + id=HookReleaseId(uuid4()), + hook_name="pocket_detect", + version=version, + runtime=OciConfig(image="reg/p:abc", digest="sha256:abc", limits=OciLimits(memory=memory)), + source_ref="git-abc123", + built_by="deploy-bot", + built_at=datetime.now(UTC), + ) + + +def test_release_carries_runtime_and_source_ref() -> None: + rel = _release() + assert rel.runtime.image == "reg/p:abc" + assert rel.runtime.digest == "sha256:abc" + assert rel.source_ref == "git-abc123" + assert rel.version == 1 + + +def test_release_is_immutable() -> None: + rel = _release() + with pytest.raises(Exception): + rel.version = 2 # type: ignore[misc] + + +def test_source_ref_required() -> None: + with pytest.raises(Exception): + HookRelease( + id=HookReleaseId(uuid4()), + hook_name="pocket_detect", + version=1, + runtime=OciConfig(image="i", digest="sha256:d"), + built_at=datetime.now(UTC), + ) # type: ignore[call-arg] + + +def test_with_doubled_memory_for_oom_retry() -> None: + rel = _release(memory="1g") + doubled = rel.with_doubled_memory() + assert doubled.runtime.limits.memory == "2g" + # Doubling is an in-memory view for retries; identity/version preserved. + assert doubled.version == rel.version + assert doubled.id == rel.id + # Original untouched. + assert rel.runtime.limits.memory == "1g" diff --git a/server/tests/unit/domain/validation/test_hook_result.py b/server/tests/unit/domain/validation/test_hook_result.py index d245e74..c723e04 100644 --- a/server/tests/unit/domain/validation/test_hook_result.py +++ b/server/tests/unit/domain/validation/test_hook_result.py @@ -48,7 +48,7 @@ def test_hook_result_passed(): status=HookStatus.PASSED, duration_seconds=12.5, ) - assert result.hook_name == "detect_pockets" + assert result.hook_name.root == "detect_pockets" assert result.status == HookStatus.PASSED assert result.rejection_reason is None assert result.error_message is None diff --git a/server/tests/unit/domain/validation/test_hook_run.py b/server/tests/unit/domain/validation/test_hook_run.py new file mode 100644 index 0000000..7b5305a --- /dev/null +++ b/server/tests/unit/domain/validation/test_hook_run.py @@ -0,0 +1,66 @@ +"""Unit tests for HookRun — pure execution record + per-row provenance anchor (#145). + +Post design-revision: HookRun carries no execution-context columns (no +``ingest_run_id`` / ``deposition_id`` / ``batch_index`` / XOR check). Data origin +is reached via the feature row's ``record_srn → records.source``; this record is +purely *what code ran, when, and where the logs are*. +""" + +from datetime import UTC, datetime, timedelta +from uuid import uuid4 + +import pytest + +from osa.domain.validation.model.hook_release import HookReleaseId +from osa.domain.validation.model.hook_result import HookStatus +from osa.domain.validation.model.hook_run import HookRun, HookRunId, HookRunStatus + + +def _run(**overrides: object) -> HookRun: + started = datetime.now(UTC) + base: dict[str, object] = dict( + id=HookRunId(uuid4()), + release_id=HookReleaseId(uuid4()), + status=HookRunStatus.PASSED, + started_at=started, + finished_at=started + timedelta(seconds=2), + duration_s=2.0, + oom_retries=0, + ) + base.update(overrides) + return HookRun(**base) # type: ignore[arg-type] + + +def test_status_enum_values() -> None: + assert {s.value for s in HookRunStatus} == {"passed", "warnings", "failed", "error"} + + +def test_pure_execution_record() -> None: + run = _run() + assert run.duration_s == 2.0 + assert run.oom_retries == 0 + assert run.log_ref is None + # No execution-context columns remain on the pure record. + assert not hasattr(run, "ingest_run_id") + assert not hasattr(run, "deposition_id") + assert not hasattr(run, "batch_index") + + +def test_log_ref_is_optional() -> None: + run = _run(log_ref="s3://logs/run-1.txt") + assert run.log_ref == "s3://logs/run-1.txt" + + +def test_finished_at_and_duration_required() -> None: + with pytest.raises(ValueError): + HookRun( + id=HookRunId(uuid4()), + release_id=HookReleaseId(uuid4()), + status=HookRunStatus.PASSED, + started_at=datetime.now(UTC), + ) # type: ignore[call-arg] + + +def test_from_hook_status_maps_passed_and_rejected() -> None: + assert HookRunStatus.from_hook_status(HookStatus.PASSED) is HookRunStatus.PASSED + assert HookRunStatus.from_hook_status(HookStatus.REJECTED) is HookRunStatus.FAILED diff --git a/server/tests/unit/domain/validation/test_hook_runner.py b/server/tests/unit/domain/validation/test_hook_runner.py index 2536ff0..9486b3a 100644 --- a/server/tests/unit/domain/validation/test_hook_runner.py +++ b/server/tests/unit/domain/validation/test_hook_runner.py @@ -4,7 +4,7 @@ import pytest -from osa.domain.shared.model.hook import HookDefinition +from osa.domain.shared.model.hook import HookIdentity from osa.domain.validation.model.hook_result import HookResult, HookStatus from osa.domain.validation.model.hook_input import HookRecord from osa.domain.validation.port.hook_runner import HookInputs, HookRunner @@ -78,7 +78,7 @@ def test_concrete_class_satisfies_protocol(self): class FakeRunner: async def run( self, - hook: HookDefinition, + hook: HookIdentity, inputs: HookInputs, workspace_dir: Path, ) -> HookResult: diff --git a/server/tests/unit/domain/validation/test_hook_service.py b/server/tests/unit/domain/validation/test_hook_service.py index 7b1b4b9..cb2d1fe 100644 --- a/server/tests/unit/domain/validation/test_hook_service.py +++ b/server/tests/unit/domain/validation/test_hook_service.py @@ -1,14 +1,16 @@ """Tests for HookService — OOM retry with checkpointing.""" +from datetime import UTC, datetime from pathlib import Path from typing import Any from unittest.mock import AsyncMock +from uuid import uuid4 import pytest from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, + HookIdentity, OciConfig, OciLimits, TableFeatureSpec, @@ -20,22 +22,34 @@ ) from osa.domain.validation.model.hook_input import HookRecord from osa.domain.shared.error import OOMError +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId from osa.domain.validation.model.hook_result import HookResult, HookStatus from osa.domain.validation.port.hook_runner import HookInputs -def _make_hook(name: str = "detect_pockets", memory: str = "1g") -> HookDefinition: - return HookDefinition( +def _make_hook(name: str = "detect_pockets") -> HookIdentity: + # #145: HookIdentity holds only name + feature; runtime moved to HookRelease. + return HookIdentity( name=name, + feature=TableFeatureSpec( + cardinality="one", + columns=[ColumnDef(name="score", json_type="number", required=True)], + ), + ) + + +def _make_release(name: str = "detect_pockets", memory: str = "1g") -> HookRelease: + return HookRelease( + id=HookReleaseId(uuid4()), + hook_name=name, + version=1, runtime=OciConfig( image="img:v1", digest="sha256:abc", limits=OciLimits(memory=memory), ), - feature=TableFeatureSpec( - cardinality="one", - columns=[ColumnDef(name="score", json_type="number", required=True)], - ), + source_ref="git:abc", + built_at=datetime.now(UTC), ) @@ -97,6 +111,7 @@ async def test_no_oom_runs_once(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() records = _make_records(2) work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -118,9 +133,10 @@ async def test_no_oom_runs_once(self, tmp_path: Path): ) service = HookService(hook_runner=runner, hook_storage=storage) - result = await service.run_hook(hook, _inputs(records), work_dir) + result = await service.run_hook(hook, release, _inputs(records), work_dir) assert result.status == HookStatus.PASSED + assert result.oom_retries == 0 # clean run records no retries (#145) runner.run.assert_called_once() @@ -131,7 +147,8 @@ class TestHookServiceOOMRetry: async def test_oom_retry_doubles_memory(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService - hook = _make_hook(memory="1g") + hook = _make_hook() + release = _make_release(memory="1g") records = _make_records(2) work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -142,7 +159,7 @@ async def test_oom_retry_doubles_memory(self, tmp_path: Path): call_count = 0 - async def mock_run(h, inputs, wd): + async def mock_run(h, rel, inputs, wd): nonlocal call_count call_count += 1 if call_count == 1: @@ -165,13 +182,55 @@ async def mock_run(h, inputs, wd): storage = FakeHookStorage() service = HookService(hook_runner=runner, hook_storage=storage) - result = await service.run_hook(hook, _inputs(records), work_dir) + result = await service.run_hook(hook, release, _inputs(records), work_dir) assert result.status == HookStatus.PASSED + assert result.oom_retries == 1 # one OOM retry surfaced into provenance (#145) assert runner.run.call_count == 2 - # Second call should have doubled memory - second_call_hook = runner.run.call_args_list[1][0][0] - assert second_call_hook.runtime.limits.memory == "2g" + # Second call should have doubled memory — escalation lives on the release (#145). + second_call_release = runner.run.call_args_list[1][0][1] + assert second_call_release.runtime.limits.memory == "2g" + + @pytest.mark.asyncio + async def test_oom_retries_counted_across_multiple_retries(self, tmp_path: Path): + """Two OOMs then success → oom_retries == 2, memory doubled twice.""" + from osa.domain.validation.service.hook import HookService + + hook = _make_hook() + release = _make_release(memory="1g") + records = _make_records(1) + work_dir = tmp_path / "hook_out" + work_dir.mkdir() + output_dir = work_dir / "output" + output_dir.mkdir(parents=True) + + import json + + call_count = 0 + + async def mock_run(h, rel, inputs, wd): + nonlocal call_count + call_count += 1 + if call_count <= 2: + raise _oom_error() + features_file = output_dir / "features.jsonl" + features_file.write_text( + json.dumps({"id": records[0].id, "features": [{"score": 0.8}]}) + "\n" + ) + return _passed_result() + + runner = AsyncMock() + runner.run.side_effect = mock_run + storage = FakeHookStorage() + + service = HookService(hook_runner=runner, hook_storage=storage) + result = await service.run_hook(hook, release, _inputs(records), work_dir) + + assert result.status == HookStatus.PASSED + assert result.oom_retries == 2 + assert runner.run.call_count == 3 # 1 initial + 2 retries + # Memory doubled twice: 1g → 2g → 4g. + assert runner.run.call_args_list[2][0][1].runtime.limits.memory == "4g" class TestHookServiceOOMExhaustion: @@ -181,7 +240,8 @@ class TestHookServiceOOMExhaustion: async def test_oom_exhaustion_marks_errored(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService - hook = _make_hook(memory="1g") + hook = _make_hook() + release = _make_release(memory="1g") records = _make_records(1) work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -194,7 +254,7 @@ async def test_oom_exhaustion_marks_errored(self, tmp_path: Path): service = HookService(hook_runner=runner, hook_storage=storage) with pytest.raises(OOMError): - await service.run_hook(hook, _inputs(records), work_dir) + await service.run_hook(hook, release, _inputs(records), work_dir) # Should have retried MAX_OOM_RETRIES times assert runner.run.call_count == 4 # 1 initial + 3 retries @@ -216,6 +276,7 @@ async def test_non_oom_failure_no_retry(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() records = _make_records(1) work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -227,7 +288,7 @@ async def test_non_oom_failure_no_retry(self, tmp_path: Path): service = HookService(hook_runner=runner, hook_storage=storage) with pytest.raises(PermanentError): - await service.run_hook(hook, _inputs(records), work_dir) + await service.run_hook(hook, release, _inputs(records), work_dir) runner.run.assert_called_once() @@ -240,6 +301,7 @@ async def test_finalize_writes_canonical_files(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() records = _make_records(2) work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -259,7 +321,7 @@ async def test_finalize_writes_canonical_files(self, tmp_path: Path): storage = FakeHookStorage() service = HookService(hook_runner=runner, hook_storage=storage) - await service.run_hook(hook, _inputs(records), work_dir) + await service.run_hook(hook, release, _inputs(records), work_dir) assert str(work_dir) in storage.written_outcomes outcomes = storage.written_outcomes[str(work_dir)] @@ -277,6 +339,7 @@ async def test_empty_records_noop(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -284,7 +347,7 @@ async def test_empty_records_noop(self, tmp_path: Path): storage = FakeHookStorage() service = HookService(hook_runner=runner, hook_storage=storage) - result = await service.run_hook(hook, _inputs([]), work_dir) + result = await service.run_hook(hook, release, _inputs([]), work_dir) assert result.status == HookStatus.PASSED runner.run.assert_not_called() @@ -298,7 +361,9 @@ async def test_multi_hook_second_ooms(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook1 = _make_hook(name="hook_one") - hook2 = _make_hook(name="hook_two", memory="512m") + hook2 = _make_hook(name="hook_two") + release1 = _make_release(name="hook_one") + release2 = _make_release(name="hook_two", memory="512m") records = _make_records(1) work_dir1 = tmp_path / "hook_one" @@ -320,10 +385,10 @@ async def test_multi_hook_second_ooms(self, tmp_path: Path): call_index = 0 - async def side_effect(h, inputs, wd): + async def side_effect(h, rel, inputs, wd): nonlocal call_index call_index += 1 - if h.name == "hook_one": + if h.name.root == "hook_one": return _passed_result(hook_name="hook_one") else: raise _oom_error() @@ -333,16 +398,16 @@ async def side_effect(h, inputs, wd): service = HookService(hook_runner=runner, hook_storage=storage) # Run hook 1 — should pass - r1 = await service.run_hook(hook1, _inputs(records), work_dir1) + r1 = await service.run_hook(hook1, release1, _inputs(records), work_dir1) assert r1.status == HookStatus.PASSED # Run hook 2 — should OOM and exhaust retries, then raise with pytest.raises(OOMError): - await service.run_hook(hook2, _inputs(records), work_dir2) + await service.run_hook(hook2, release2, _inputs(records), work_dir2) # Hook 1 was called once, hook 2 was called 4 times (1 + 3 retries) - hook1_calls = [c for c in runner.run.call_args_list if c[0][0].name == "hook_one"] - hook2_calls = [c for c in runner.run.call_args_list if c[0][0].name != "hook_one"] + hook1_calls = [c for c in runner.run.call_args_list if c[0][0].name.root == "hook_one"] + hook2_calls = [c for c in runner.run.call_args_list if c[0][0].name.root != "hook_one"] assert len(hook1_calls) == 1 assert len(hook2_calls) == 4 @@ -355,6 +420,7 @@ async def test_checkpoint_crash_recovery(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() records = _make_records(3) work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -379,7 +445,7 @@ async def test_checkpoint_crash_recovery(self, tmp_path: Path): runner = AsyncMock() storage = FakeHookStorage() - async def mock_run(h, inputs, wd): + async def mock_run(h, rel, inputs, wd): # Should only receive rec1 and rec2, not rec0 input_ids = [r.id for r in inputs.records] assert "rec0" not in input_ids @@ -393,7 +459,7 @@ async def mock_run(h, inputs, wd): runner.run.side_effect = mock_run service = HookService(hook_runner=runner, hook_storage=storage) - result = await service.run_hook(hook, _inputs(records), work_dir) + result = await service.run_hook(hook, release, _inputs(records), work_dir) assert result.status == HookStatus.PASSED runner.run.assert_called_once() @@ -410,6 +476,7 @@ async def test_checkpoint_all_complete_skips_hook(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() records = _make_records(2) work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -428,7 +495,7 @@ async def test_checkpoint_all_complete_skips_hook(self, tmp_path: Path): storage = FakeHookStorage() service = HookService(hook_runner=runner, hook_storage=storage) - result = await service.run_hook(hook, _inputs(records), work_dir) + result = await service.run_hook(hook, release, _inputs(records), work_dir) assert result.status == HookStatus.PASSED runner.run.assert_not_called() @@ -442,6 +509,7 @@ async def test_records_sorted_by_file_size(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() # Create records with different sizes — large first to test reordering records = [ HookRecord(id="large", metadata={}, size_hint_mb=100.0), @@ -457,7 +525,7 @@ async def test_records_sorted_by_file_size(self, tmp_path: Path): captured_order: list[str] = [] - async def mock_run(h, inputs, wd): + async def mock_run(h, rel, inputs, wd): for r in inputs.records: captured_order.append(r.id) features_file = output_dir / "features.jsonl" @@ -471,7 +539,7 @@ async def mock_run(h, inputs, wd): storage = FakeHookStorage() service = HookService(hook_runner=runner, hook_storage=storage) - await service.run_hook(hook, _inputs(records), work_dir) + await service.run_hook(hook, release, _inputs(records), work_dir) assert captured_order == ["small", "medium", "large"] @@ -480,6 +548,7 @@ async def test_sorting_skipped_when_no_sizes(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() # All records have default size_hint_mb=0 — original order preserved records = [ HookRecord(id="a", metadata={}), @@ -495,7 +564,7 @@ async def test_sorting_skipped_when_no_sizes(self, tmp_path: Path): captured_order: list[str] = [] - async def mock_run(h, inputs, wd): + async def mock_run(h, rel, inputs, wd): for r in inputs.records: captured_order.append(r.id) features_file = output_dir / "features.jsonl" @@ -509,7 +578,7 @@ async def mock_run(h, inputs, wd): storage = FakeHookStorage() service = HookService(hook_runner=runner, hook_storage=storage) - await service.run_hook(hook, _inputs(records), work_dir) + await service.run_hook(hook, release, _inputs(records), work_dir) assert captured_order == ["a", "b", "c"] @@ -522,6 +591,7 @@ async def test_corrupted_checkpoint_treated_as_empty(self, tmp_path: Path): from osa.domain.validation.service.hook import HookService hook = _make_hook() + release = _make_release() records = _make_records(2) work_dir = tmp_path / "hook_out" work_dir.mkdir() @@ -536,7 +606,7 @@ async def test_corrupted_checkpoint_treated_as_empty(self, tmp_path: Path): runner = AsyncMock() - async def mock_run(h, inputs, wd): + async def mock_run(h, rel, inputs, wd): # Should receive ALL records since checkpoint is corrupted assert len(inputs.records) == 2 features_file = output_dir / "features.jsonl" @@ -549,7 +619,7 @@ async def mock_run(h, inputs, wd): storage = FakeHookStorage() service = HookService(hook_runner=runner, hook_storage=storage) - result = await service.run_hook(hook, _inputs(records), work_dir) + result = await service.run_hook(hook, release, _inputs(records), work_dir) assert result.status == HookStatus.PASSED runner.run.assert_called_once() diff --git a/server/tests/unit/domain/validation/test_release_handlers.py b/server/tests/unit/domain/validation/test_release_handlers.py new file mode 100644 index 0000000..9fee23e --- /dev/null +++ b/server/tests/unit/domain/validation/test_release_handlers.py @@ -0,0 +1,231 @@ +"""Unit tests for the hook release / live-pointer command + query handlers (#145). + +US3 (create release, catalog/history/detail) and US4 (set live). Services are +mocked; these assert the handler orchestration + auth gates only. +""" + +from __future__ import annotations + +from datetime import UTC, datetime +from unittest.mock import AsyncMock +from uuid import uuid4 + +import pytest + +from osa.domain.auth.model.principal import Principal +from osa.domain.auth.model.role import Role +from osa.domain.auth.model.value import ProviderIdentity, UserId +from osa.domain.shared.authorization.gate import Public +from osa.domain.shared.error import AuthorizationError, NotFoundError +from osa.domain.shared.model.hook import ColumnDef, HookName, OciConfig, OciLimits, TableFeatureSpec +from osa.domain.validation.model.hook import Hook +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId, ReleaseOutcome + +NAME = HookName("pocket_detect") + + +def _principal(role: Role = Role.ADMIN) -> Principal: + return Principal( + user_id=UserId.generate(), + provider_identity=ProviderIdentity(provider="test", external_id="ext"), + roles=frozenset({role}), + ) + + +def _feature() -> TableFeatureSpec: + return TableFeatureSpec( + cardinality="many", + columns=[ColumnDef(name="score", json_type="number", required=True)], + ) + + +def _release(version: int, digest: str, rid: HookReleaseId | None = None) -> HookRelease: + return HookRelease( + id=rid or HookReleaseId(uuid4()), + hook_name=NAME, + version=version, + runtime=OciConfig(image="reg/pocket:abc", digest=digest, config={}, limits=OciLimits()), + source_ref="git-sha", + built_by=None, + built_at=datetime.now(UTC), + ) + + +def _hook(live: HookReleaseId | None) -> Hook: + return Hook(name=NAME, feature=_feature(), live_release_id=live, created_at=datetime.now(UTC)) + + +class TestCreateReleaseHandler: + @pytest.mark.asyncio + async def test_new_version_is_live_and_created(self) -> None: + from osa.domain.validation.command.create_release import ( + CreateRelease, + CreateReleaseHandler, + ) + + new = _release(2, "sha256:new") + service = AsyncMock() + service.create_release.return_value = ReleaseOutcome(release=new, created=True) + service.get_hook.return_value = _hook(live=new.id) + + handler = CreateReleaseHandler(principal=_principal(), service=service) + result = await handler.run( + CreateRelease(name=NAME, image="reg/pocket:def", digest="sha256:new", source_ref="git") + ) + + assert result.version == 2 + assert result.live is True + assert result.created is True + + @pytest.mark.asyncio + async def test_idempotent_redeploy_is_not_created(self) -> None: + from osa.domain.validation.command.create_release import ( + CreateRelease, + CreateReleaseHandler, + ) + + existing = _release(1, "sha256:old") + service = AsyncMock() + service.create_release.return_value = ReleaseOutcome(release=existing, created=False) + service.get_hook.return_value = _hook(live=existing.id) + + handler = CreateReleaseHandler(principal=_principal(), service=service) + result = await handler.run( + CreateRelease(name=NAME, image="reg/pocket:abc", digest="sha256:old", source_ref="git") + ) + + assert result.created is False + assert result.version == 1 + + @pytest.mark.asyncio + async def test_requires_admin(self) -> None: + from osa.domain.validation.command.create_release import ( + CreateRelease, + CreateReleaseHandler, + ) + + handler = CreateReleaseHandler(principal=_principal(Role.DEPOSITOR), service=AsyncMock()) + with pytest.raises(AuthorizationError): + await handler.run(CreateRelease(name=NAME, image="i", digest="d", source_ref="git")) + + +class TestSetLiveHandler: + @pytest.mark.asyncio + async def test_repoints_live(self) -> None: + from osa.domain.validation.command.set_live import SetLive, SetLiveHandler + + target = _release(1, "sha256:old") + service = AsyncMock() + service.set_live.return_value = _hook(live=target.id) + + handler = SetLiveHandler(principal=_principal(), service=service) + result = await handler.run(SetLive(name=NAME, version=1)) + + assert result.live_version == 1 + assert result.live_release_id == target.id + service.set_live.assert_awaited_once_with(NAME, 1) + + @pytest.mark.asyncio + async def test_unknown_version_propagates(self) -> None: + from osa.domain.validation.command.set_live import SetLive, SetLiveHandler + + service = AsyncMock() + service.set_live.side_effect = NotFoundError("nope") + + handler = SetLiveHandler(principal=_principal(), service=service) + with pytest.raises(NotFoundError): + await handler.run(SetLive(name=NAME, version=99)) + + +class TestListHooksHandler: + @pytest.mark.asyncio + async def test_catalog_includes_live_release(self) -> None: + from osa.domain.validation.query.list_hooks import HookCatalog, ListHooks, ListHooksHandler + + live = _release(3, "sha256:live") + service = AsyncMock() + service.list_hooks.return_value = [_hook(live=live.id)] + service.resolve_live.return_value = {NAME: live} + + handler = ListHooksHandler(service=service) + result: HookCatalog = await handler.run(ListHooks()) + + assert len(result.items) == 1 + assert result.items[0].live_release is not None + assert result.items[0].live_release.version == 3 + + @pytest.mark.asyncio + async def test_hook_without_live_release(self) -> None: + from osa.domain.validation.query.list_hooks import ListHooks, ListHooksHandler + + service = AsyncMock() + service.list_hooks.return_value = [_hook(live=None)] + service.resolve_live.return_value = {} + + handler = ListHooksHandler(service=service) + result = await handler.run(ListHooks()) + + assert result.items[0].live_release is None + + def test_is_public(self) -> None: + from osa.domain.validation.query.list_hooks import ListHooksHandler + + assert isinstance(ListHooksHandler.__auth__, Public) + + +class TestListReleasesHandler: + @pytest.mark.asyncio + async def test_history_descending_with_live_version(self) -> None: + from osa.domain.validation.query.list_releases import ListReleases, ListReleasesHandler + + v2 = _release(2, "sha256:v2") + v1 = _release(1, "sha256:v1") + service = AsyncMock() + service.get_hook.return_value = _hook(live=v2.id) + service.list_releases.return_value = [v2, v1] + + handler = ListReleasesHandler(service=service) + result = await handler.run(ListReleases(name=NAME)) + + assert [r.version for r in result.releases] == [2, 1] + assert result.live_version == 2 + + @pytest.mark.asyncio + async def test_unknown_hook_404(self) -> None: + from osa.domain.validation.query.list_releases import ListReleases, ListReleasesHandler + + service = AsyncMock() + service.get_hook.return_value = None + + handler = ListReleasesHandler(service=service) + with pytest.raises(NotFoundError): + await handler.run(ListReleases(name=NAME)) + + +class TestGetReleaseHandler: + @pytest.mark.asyncio + async def test_detail_marks_live(self) -> None: + from osa.domain.validation.query.get_release import GetRelease, GetReleaseHandler + + rel = _release(2, "sha256:v2") + service = AsyncMock() + service.get_release.return_value = rel + service.get_hook.return_value = _hook(live=rel.id) + + handler = GetReleaseHandler(service=service) + result = await handler.run(GetRelease(name=NAME, version=2)) + + assert result.version == 2 + assert result.live is True + assert result.image == "reg/pocket:abc" + + @pytest.mark.asyncio + async def test_unknown_release_404(self) -> None: + from osa.domain.validation.query.get_release import GetRelease, GetReleaseHandler + + service = AsyncMock() + service.get_release.return_value = None + + handler = GetReleaseHandler(service=service) + with pytest.raises(NotFoundError): + await handler.run(GetRelease(name=NAME, version=99)) diff --git a/server/tests/unit/domain/validation/test_validation_service.py b/server/tests/unit/domain/validation/test_validation_service.py index 2b73881..a8be2f3 100644 --- a/server/tests/unit/domain/validation/test_validation_service.py +++ b/server/tests/unit/domain/validation/test_validation_service.py @@ -1,39 +1,71 @@ """Unit tests for ValidationService — hook execution orchestration.""" +from datetime import UTC, datetime from pathlib import Path from unittest.mock import AsyncMock, MagicMock +from uuid import uuid4 import pytest from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, + HookName, OciConfig, TableFeatureSpec, ) from osa.domain.shared.model.srn import DepositionSRN, Domain from osa.domain.validation.model import RunStatus from osa.domain.shared.error import OOMError, PermanentError +from osa.domain.validation.model.hook import Hook +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId from osa.domain.validation.model.hook_result import HookResult, HookStatus from osa.domain.validation.model.hook_input import HookRecord from osa.domain.validation.port.hook_runner import HookInputs from osa.domain.validation.service.validation import ValidationService -def _make_hook_definition(name: str = "pocket_detect") -> HookDefinition: - return HookDefinition( - name=name, - runtime=OciConfig( - image="ghcr.io/example/hook", - digest="sha256:abc123", - ), +def _make_hook(name: str = "pocket_detect") -> Hook: + # #145: a hook's identity is name + feature; runtime lives on its release. + return Hook( + name=HookName(name), feature=TableFeatureSpec( cardinality="many", columns=[ColumnDef(name="score", json_type="number", required=True)], ), + live_release_id=HookReleaseId(uuid4()), + created_at=datetime.now(UTC), ) +def _make_release(name: str = "pocket_detect") -> HookRelease: + return HookRelease( + id=HookReleaseId(uuid4()), + hook_name=HookName(name), + version=1, + runtime=OciConfig( + image="ghcr.io/example/hook", + digest="sha256:abc123", + ), + source_ref="git:abc123", + built_at=datetime.now(UTC), + ) + + +def _make_registry(names: list[str]) -> AsyncMock: + """Fake HookRegistryService resolving each name to a Hook + live HookRelease.""" + hooks = {HookName(n): _make_hook(n) for n in names} + releases = {HookName(n): _make_release(n) for n in names} + + registry = AsyncMock() + registry.resolve_live.return_value = releases + + async def get_hook(name: HookName) -> Hook | None: + return hooks.get(name) + + registry.get_hook.side_effect = get_hook + return registry + + def _make_hook_result( name: str = "pocket_detect", status: HookStatus = HookStatus.PASSED, @@ -49,19 +81,22 @@ def _make_service( run_repo: AsyncMock | None = None, hook_runner: AsyncMock | None = None, hook_storage: MagicMock | None = None, + hook_registry: AsyncMock | None = None, ) -> ValidationService: hs = hook_storage or MagicMock() if not hasattr(hs, "get_hook_output_dir") or not callable(hs.get_hook_output_dir): hs.get_hook_output_dir = MagicMock(return_value=Path("/tmp/hooks/test")) if not hasattr(hs, "get_files_dir") or not callable(hs.get_files_dir): hs.get_files_dir = MagicMock(return_value=Path("/tmp/files/test")) - # write_checkpoint and write_batch_outcomes are async + # write_checkpoint / write_batch_outcomes / write_run_ref are async hs.write_checkpoint = AsyncMock() hs.write_batch_outcomes = AsyncMock() + hs.write_run_ref = AsyncMock() return ValidationService( run_repo=run_repo or AsyncMock(), hook_runner=hook_runner or AsyncMock(), hook_storage=hs, + hook_registry=hook_registry or _make_registry(["pocket_detect"]), node_domain=Domain("localhost"), ) @@ -100,12 +135,11 @@ async def test_all_hooks_pass(self): service = _make_service(run_repo, hook_runner) run = await service.create_run(inputs=_make_inputs()) - hook = _make_hook_definition() run, results = await service.run_hooks( run=run, deposition_srn=_make_dep_srn(), inputs=_make_inputs(), - hooks=[hook], + hook_names=[HookName("pocket_detect")], ) assert run.status == RunStatus.COMPLETED @@ -118,15 +152,15 @@ async def test_hook_rejected_halts_pipeline(self): hook_runner.run.return_value = _make_hook_result( status=HookStatus.REJECTED, ) - service = _make_service(hook_runner=hook_runner) + registry = _make_registry(["hook1", "hook2"]) + service = _make_service(hook_runner=hook_runner, hook_registry=registry) run = await service.create_run(inputs=_make_inputs()) - hooks = [_make_hook_definition("hook1"), _make_hook_definition("hook2")] run, results = await service.run_hooks( run=run, deposition_srn=_make_dep_srn(), inputs=_make_inputs(), - hooks=hooks, + hook_names=[HookName("hook1"), HookName("hook2")], ) assert run.status == RunStatus.REJECTED @@ -143,7 +177,7 @@ async def test_hook_failed_halts_pipeline(self): run=run, deposition_srn=_make_dep_srn(), inputs=_make_inputs(), - hooks=[_make_hook_definition()], + hook_names=[HookName("pocket_detect")], ) assert run.status == RunStatus.FAILED @@ -165,35 +199,35 @@ async def test_output_dir_from_hook_storage(self): run=run, deposition_srn=dep_srn, inputs=_make_inputs(), - hooks=[_make_hook_definition()], + hook_names=[HookName("pocket_detect")], ) hook_storage.get_hook_output_dir.assert_called_once_with(dep_srn, "pocket_detect") - # Runner receives the cold storage output_dir + # Runner receives (hook, release, inputs, output_dir) — output_dir is the cold path. call_args = hook_runner.run.call_args - assert call_args[0][2] == Path("/cold/hooks/pocket_detect") + assert call_args[0][3] == Path("/cold/hooks/pocket_detect") @pytest.mark.asyncio async def test_sequential_execution_order(self): """Hooks run in order; first pass before second starts.""" call_order = [] - async def run_hook(hook, inputs, output_dir): - call_order.append(hook.name) - return _make_hook_result(name=hook.name) + async def run_hook(hook, release, inputs, output_dir): + call_order.append(hook.name.root) + return _make_hook_result(name=hook.name.root) hook_runner = AsyncMock() hook_runner.run.side_effect = run_hook - service = _make_service(hook_runner=hook_runner) + registry = _make_registry(["hook_a", "hook_b"]) + service = _make_service(hook_runner=hook_runner, hook_registry=registry) run = await service.create_run(inputs=_make_inputs()) - hooks = [_make_hook_definition("hook_a"), _make_hook_definition("hook_b")] run, results = await service.run_hooks( run=run, deposition_srn=_make_dep_srn(), inputs=_make_inputs(), - hooks=hooks, + hook_names=[HookName("hook_a"), HookName("hook_b")], ) assert call_order == ["hook_a", "hook_b"] @@ -211,7 +245,7 @@ async def test_validation_service_halts_on_oom(self): run=run, deposition_srn=_make_dep_srn(), inputs=_make_inputs(), - hooks=[_make_hook_definition()], + hook_names=[HookName("pocket_detect")], ) assert run.status == RunStatus.FAILED @@ -221,28 +255,33 @@ async def test_validation_service_retries_on_oom(self): """OOM should be retried via HookService; PASSED on retry → COMPLETED.""" call_count = 0 - async def run_hook(hook, inputs, output_dir): + async def run_hook(hook, release, inputs, output_dir): nonlocal call_count call_count += 1 if call_count == 1: raise OOMError("Hook killed by OOM") return HookResult( - hook_name=hook.name, + hook_name=hook.name.root, status=HookStatus.PASSED, duration_seconds=5.0, ) hook_runner = AsyncMock() hook_runner.run.side_effect = run_hook - service = _make_service(hook_runner=hook_runner) + registry = _make_registry(["pocket_detect"]) + service = _make_service(hook_runner=hook_runner, hook_registry=registry) run = await service.create_run(inputs=_make_inputs()) run, results = await service.run_hooks( run=run, deposition_srn=_make_dep_srn(), inputs=_make_inputs(), - hooks=[_make_hook_definition()], + hook_names=[HookName("pocket_detect")], ) assert run.status == RunStatus.COMPLETED assert call_count == 2 + # The one OOM retry is threaded into the provenance record (#145, fix #3). + registry.record_run.assert_awaited_once() + recorded_run = registry.record_run.await_args.args[0] + assert recorded_run.oom_retries == 1 diff --git a/server/tests/unit/domain/validation/test_validation_service_decoupled.py b/server/tests/unit/domain/validation/test_validation_service_decoupled.py index c1b5b87..b36f43f 100644 --- a/server/tests/unit/domain/validation/test_validation_service_decoupled.py +++ b/server/tests/unit/domain/validation/test_validation_service_decoupled.py @@ -6,19 +6,23 @@ """ import inspect +from datetime import UTC, datetime from pathlib import Path from unittest.mock import AsyncMock, MagicMock +from uuid import uuid4 import pytest from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, + HookName, OciConfig, TableFeatureSpec, ) -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN, Domain +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN, Domain from osa.domain.validation.model import RunStatus +from osa.domain.validation.model.hook import Hook +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId from osa.domain.validation.model.hook_result import HookResult, HookStatus from osa.domain.validation.service.validation import ValidationService @@ -27,25 +31,44 @@ def _make_dep_srn() -> DepositionSRN: return DepositionSRN.parse("urn:osa:localhost:dep:test-dep") -def _make_conv_srn() -> ConventionSRN: - return ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +def _make_conv_slug() -> ConventionSlug: + return ConventionSlug("test") -def _make_hook_definition() -> HookDefinition: - return HookDefinition( - name="pocketeer", +def _make_hook(name: str = "pocketeer") -> Hook: + return Hook( + name=HookName(name), + feature=TableFeatureSpec( + cardinality="many", + columns=[ColumnDef(name="score", json_type="number", required=True)], + ), + live_release_id=HookReleaseId(uuid4()), + created_at=datetime.now(UTC), + ) + + +def _make_release(name: str = "pocketeer") -> HookRelease: + return HookRelease( + id=HookReleaseId(uuid4()), + hook_name=HookName(name), + version=1, runtime=OciConfig( image="osa-hooks/pocketeer:latest", digest="sha256:abc123", config={"threshold": 0.5}, ), - feature=TableFeatureSpec( - cardinality="many", - columns=[ColumnDef(name="score", json_type="number", required=True)], - ), + source_ref="git:abc123", + built_at=datetime.now(UTC), ) +def _make_registry(name: str = "pocketeer") -> AsyncMock: + registry = AsyncMock() + registry.resolve_live.return_value = {HookName(name): _make_release(name)} + registry.get_hook.return_value = _make_hook(name) + return registry + + class TestDecoupledValidationService: def test_no_deposition_repo_dependency(self): """ValidationService no longer depends on DepositionRepository.""" @@ -67,7 +90,7 @@ def test_no_file_storage_dependency(self): @pytest.mark.asyncio async def test_validate_deposition_uses_event_data(self): - """validate_deposition accepts hooks/metadata directly.""" + """validate_deposition accepts hook names/metadata directly (#145).""" run_repo = AsyncMock() run_repo.save = AsyncMock() hook_runner = AsyncMock() @@ -81,20 +104,21 @@ async def test_validate_deposition_uses_event_data(self): hook_storage.get_files_dir.return_value = Path("/data/files/test-dep") hook_storage.write_checkpoint = AsyncMock() hook_storage.write_batch_outcomes = AsyncMock() + hook_storage.write_run_ref = AsyncMock() service = ValidationService( run_repo=run_repo, hook_runner=hook_runner, hook_storage=hook_storage, + hook_registry=_make_registry("pocketeer"), node_domain=Domain("localhost"), ) - hook = _make_hook_definition() run, hook_results = await service.validate_deposition( deposition_srn=_make_dep_srn(), - convention_srn=_make_conv_srn(), + convention_id=_make_conv_slug(), metadata={"pdb_id": "4HHB"}, - hooks=[hook], + hooks=[HookName("pocketeer")], ) assert run.status == RunStatus.COMPLETED diff --git a/server/tests/unit/infrastructure/event/test_worker_exhaustion.py b/server/tests/unit/infrastructure/event/test_worker_exhaustion.py index ef10915..e707e51 100644 --- a/server/tests/unit/infrastructure/event/test_worker_exhaustion.py +++ b/server/tests/unit/infrastructure/event/test_worker_exhaustion.py @@ -36,7 +36,7 @@ async def test_mark_failed_runs_even_if_on_exhausted_raises(self) -> None: event = NextBatchRequested( id=EventId(uuid4()), ingest_run_id=IngestRunId("run-1"), - convention_srn="urn:osa:localhost:conv:test@1.0.0", + convention_id="urn:osa:localhost:conv:test@1.0.0", batch_size=100, ) delivery = MagicMock() diff --git a/server/tests/unit/infrastructure/k8s/test_k8s_hook_runner.py b/server/tests/unit/infrastructure/k8s/test_k8s_hook_runner.py index ead5ad1..2e12ffe 100644 --- a/server/tests/unit/infrastructure/k8s/test_k8s_hook_runner.py +++ b/server/tests/unit/infrastructure/k8s/test_k8s_hook_runner.py @@ -1,7 +1,9 @@ """Unit tests for K8sHookRunner — Job spec, scheduling, execution, orphans, cleanup.""" +from datetime import UTC, datetime from pathlib import Path from unittest.mock import AsyncMock, MagicMock +from uuid import uuid4 import pytest @@ -13,11 +15,12 @@ ) from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, + HookIdentity, OciConfig, OciLimits, TableFeatureSpec, ) +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId from osa.domain.validation.model.hook_result import HookStatus from osa.domain.validation.model.hook_input import HookRecord from osa.domain.validation.port.hook_runner import HookInputs @@ -26,7 +29,18 @@ _RUN_ID = "run-abc123" -def _make_hook( +def _make_hook(name: str = "validate_dna") -> HookIdentity: + """Build a HookIdentity (#145: name + feature only, no runtime).""" + return HookIdentity( + name=name, + feature=TableFeatureSpec( + cardinality="many", + columns=[ColumnDef(name="score", json_type="number", required=True)], + ), + ) + + +def _make_release( name: str = "validate_dna", timeout: int = 300, memory: str = "2g", @@ -34,19 +48,20 @@ def _make_hook( config: dict | None = None, image: str = "ghcr.io/example/hook:v1", digest: str = "sha256:abc123", -) -> HookDefinition: - return HookDefinition( - name=name, +) -> HookRelease: + """Build a HookRelease carrying the OCI runtime (#145).""" + return HookRelease( + id=HookReleaseId(uuid4()), + hook_name=name, + version=1, runtime=OciConfig( image=image, digest=digest, config=config or {}, limits=OciLimits(timeout_seconds=timeout, memory=memory, cpu=cpu), ), - feature=TableFeatureSpec( - cardinality="many", - columns=[ColumnDef(name="score", json_type="number", required=True)], - ), + source_ref="git+https://example.com/hook@abc", + built_at=datetime.now(UTC), ) @@ -88,9 +103,11 @@ def _make_runner(config: K8sConfig | None = None) -> K8sHookRunner: class TestJobSpecGeneration: def test_correct_image(self): runner = _make_runner() - hook = _make_hook(image="ghcr.io/org/hook:v2", digest="sha256:def456") + hook = _make_hook() + release = _make_release(image="ghcr.io/org/hook:v2", digest="sha256:def456") spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -101,8 +118,10 @@ def test_correct_image(self): def test_security_context(self): runner = _make_runner() hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -122,9 +141,11 @@ def test_security_context(self): def test_resource_limits(self): runner = _make_runner() - hook = _make_hook(memory="4g", cpu="2.0") + hook = _make_hook() + release = _make_release(memory="4g", cpu="2.0") spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -136,8 +157,9 @@ def test_resource_limits(self): def test_volume_mounts(self): runner = _make_runner() hook = _make_hook() + release = _make_release() work_dir = Path("/data/depositions/localhost_abc/hooks/validate_dna") - spec = runner._build_job_spec(hook, work_dir, run_id=_RUN_ID) + spec = runner._build_job_spec(hook, release, work_dir, run_id=_RUN_ID) volumes = spec.spec.template.spec.volumes pvc_vol = next(v for v in volumes if v.name == "data") @@ -155,8 +177,10 @@ def test_volume_mounts(self): def test_env_vars(self): runner = _make_runner() hook = _make_hook(name="pocket_detect") + release = _make_release(name="pocket_detect") spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/pocket_detect"), run_id=_RUN_ID, ) @@ -170,8 +194,10 @@ def test_env_vars(self): def test_backoff_limit_zero(self): runner = _make_runner() hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -180,9 +206,11 @@ def test_backoff_limit_zero(self): def test_active_deadline_seconds(self): runner = _make_runner() - hook = _make_hook(timeout=300) + hook = _make_hook() + release = _make_release(timeout=300) spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -193,8 +221,10 @@ def test_active_deadline_seconds(self): def test_dns_policy_none(self): runner = _make_runner() hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -206,8 +236,10 @@ def test_dns_policy_none(self): def test_labels(self): runner = _make_runner() hook = _make_hook(name="validate_dna") + release = _make_release(name="validate_dna") spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -221,8 +253,10 @@ def test_labels(self): def test_human_readable_job_name(self): runner = _make_runner() hook = _make_hook(name="validate_dna") + release = _make_release(name="validate_dna") spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -234,8 +268,10 @@ def test_human_readable_job_name(self): def test_empty_dir_at_tmp(self): runner = _make_runner() hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -247,8 +283,10 @@ def test_empty_dir_at_tmp(self): def test_automount_service_account_false(self): runner = _make_runner() hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -259,8 +297,10 @@ def test_automount_service_account_false(self): def test_ttl_seconds_after_finished(self): runner = _make_runner(config=_make_config(job_ttl_seconds=600)) hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -270,8 +310,10 @@ def test_ttl_seconds_after_finished(self): def test_files_mount_when_files_dir_provided(self): runner = _make_runner() hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, files_dir=Path("/data/depositions/localhost_abc/files"), @@ -285,8 +327,10 @@ def test_files_mount_when_files_dir_provided(self): def test_image_pull_secrets(self): runner = _make_runner(config=_make_config(image_pull_secrets=["ghcr-secret"])) hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -298,8 +342,10 @@ def test_image_pull_secrets(self): def test_service_account(self): runner = _make_runner(config=_make_config(service_account="osa-runner")) hook = _make_hook() + release = _make_release() spec = runner._build_job_spec( hook, + release, Path("/data/depositions/localhost_abc/hooks/validate_dna"), run_id=_RUN_ID, ) @@ -471,13 +517,14 @@ async def test_successful_run(self, tmp_path: Path): # Configure S3 mock to return progress data hook = _make_hook() + release = _make_release() work_dir = tmp_path / "depositions" / "localhost_abc" / "hooks" / "validate_dna" runner._s3.get_object.return_value = ( b'{"step":"Check","status":"completed","message":"OK"}\n' ) inputs = HookInputs(records=[HookRecord(id="test", metadata={})], run_id=_RUN_ID) - result = await runner._run_job(hook, inputs, work_dir) + result = await runner._run_job(hook, release, inputs, work_dir) assert result.status == HookStatus.PASSED assert len(result.progress) == 1 @@ -519,6 +566,7 @@ async def test_timeout_deadline_exceeded(self, tmp_path: Path): batch_api.read_namespaced_job.return_value = failed_job hook = _make_hook() + release = _make_release() work_dir = tmp_path / "depositions" / "localhost_abc" / "hooks" / "validate_dna" work_dir.mkdir(parents=True) inputs = HookInputs(records=[HookRecord(id="test", metadata={})], run_id=_RUN_ID) @@ -526,6 +574,7 @@ async def test_timeout_deadline_exceeded(self, tmp_path: Path): with pytest.raises(TransientError, match="[Tt]imed out|[Dd]eadline"): await runner._run_job( hook, + release, inputs, work_dir, ) @@ -579,6 +628,7 @@ async def test_oom_exit_137(self, tmp_path: Path): core_api.list_namespaced_pod.side_effect = [pod_list, oom_pod_list] hook = _make_hook() + release = _make_release() work_dir = tmp_path / "depositions" / "localhost_abc" / "hooks" / "validate_dna" work_dir.mkdir(parents=True) inputs = HookInputs(records=[HookRecord(id="test", metadata={})], run_id=_RUN_ID) @@ -586,6 +636,7 @@ async def test_oom_exit_137(self, tmp_path: Path): with pytest.raises(OOMError, match="[Oo][Oo][Mm]"): await runner._run_job( hook, + release, inputs, work_dir, ) @@ -636,6 +687,7 @@ async def test_nonzero_exit(self, tmp_path: Path): core_api.list_namespaced_pod.side_effect = [pod_list, exit_pod_list] hook = _make_hook() + release = _make_release() work_dir = tmp_path / "depositions" / "localhost_abc" / "hooks" / "validate_dna" work_dir.mkdir(parents=True) inputs = HookInputs(records=[HookRecord(id="test", metadata={})], run_id=_RUN_ID) @@ -643,6 +695,7 @@ async def test_nonzero_exit(self, tmp_path: Path): with pytest.raises(PermanentError, match="[Ee]xit"): await runner._run_job( hook, + release, inputs, work_dir, ) @@ -687,6 +740,7 @@ async def test_orphan_running_job_attaches(self, tmp_path: Path): batch_api.read_namespaced_job.return_value = completed_job hook = _make_hook() + release = _make_release() work_dir = tmp_path / "depositions" / "localhost_abc" / "hooks" / "validate_dna" output_dir = work_dir / "output" output_dir.mkdir(parents=True) @@ -694,6 +748,7 @@ async def test_orphan_running_job_attaches(self, tmp_path: Path): result = await runner._run_job( hook, + release, inputs, work_dir, ) @@ -723,6 +778,7 @@ async def test_orphan_completed_job_reads_output(self, tmp_path: Path): batch_api.list_namespaced_job.return_value = job_list hook = _make_hook() + release = _make_release() work_dir = tmp_path / "depositions" / "localhost_abc" / "hooks" / "validate_dna" output_dir = work_dir / "output" output_dir.mkdir(parents=True) @@ -730,6 +786,7 @@ async def test_orphan_completed_job_reads_output(self, tmp_path: Path): result = await runner._run_job( hook, + release, inputs, work_dir, ) @@ -778,6 +835,7 @@ async def test_orphan_failed_job_creates_new(self, tmp_path: Path): batch_api.read_namespaced_job.return_value = completed_job hook = _make_hook() + release = _make_release() work_dir = tmp_path / "depositions" / "localhost_abc" / "hooks" / "validate_dna" output_dir = work_dir / "output" output_dir.mkdir(parents=True) @@ -785,6 +843,7 @@ async def test_orphan_failed_job_creates_new(self, tmp_path: Path): result = await runner._run_job( hook, + release, inputs, work_dir, ) @@ -842,6 +901,7 @@ async def test_rejection_via_progress(self, tmp_path: Path): batch_api.read_namespaced_job.return_value = completed_job hook = _make_hook() + release = _make_release() work_dir = tmp_path / "depositions" / "localhost_abc" / "hooks" / "validate_dna" runner._s3.get_object.return_value = ( b'{"step":"Validate","status":"rejected","message":"Missing atoms"}\n' @@ -850,6 +910,7 @@ async def test_rejection_via_progress(self, tmp_path: Path): result = await runner._run_job( hook, + release, inputs, work_dir, ) @@ -904,12 +965,13 @@ async def test_run_uses_run_id_from_inputs(self, tmp_path: Path): # S3 mock returns empty progress (default from _make_s3_mock) hook = _make_hook() + release = _make_release() inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="my-real-run-id", ) - await runner.run(hook, inputs, work_dir) + await runner.run(hook, release, inputs, work_dir) # Verify the Job was created with the run_id from inputs call_args = batch_api.create_namespaced_job.call_args diff --git a/server/tests/unit/infrastructure/k8s/test_k8s_ingester_runner.py b/server/tests/unit/infrastructure/k8s/test_k8s_ingester_runner.py index 0341814..e775154 100644 --- a/server/tests/unit/infrastructure/k8s/test_k8s_ingester_runner.py +++ b/server/tests/unit/infrastructure/k8s/test_k8s_ingester_runner.py @@ -9,11 +9,11 @@ from osa.config import K8sConfig from osa.domain.shared.error import OOMError, TransientError from osa.domain.shared.model.source import IngesterDefinition, IngesterLimits -from osa.domain.shared.model.srn import ConventionSRN +from osa.domain.shared.model.srn import ConventionSlug from osa.domain.shared.port.ingester_runner import IngesterInputs from osa.infrastructure.k8s.ingester_runner import K8sIngesterRunner -_CONV_SRN = ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") +_CONV_SLUG = ConventionSlug("test") def _make_ingester( @@ -228,7 +228,7 @@ def test_env_vars(self): ingester, work_dir=Path("/data/sources/localhost_conv1/staging/run1"), files_dir=Path("/data/sources/localhost_conv1/staging/run1/files"), - inputs=IngesterInputs(convention_srn=_CONV_SRN, limit=100, offset=50), + inputs=IngesterInputs(convention_id=_CONV_SLUG, limit=100, offset=50), ) env = spec.spec.template.spec.containers[0].env env_dict = {e.name: e.value for e in env} @@ -248,7 +248,7 @@ def test_since_env_var(self): ingester, work_dir=Path("/data/sources/localhost_conv1/staging/run1"), files_dir=Path("/data/sources/localhost_conv1/staging/run1/files"), - inputs=IngesterInputs(convention_srn=_CONV_SRN, since=since), + inputs=IngesterInputs(convention_id=_CONV_SLUG, since=since), ) env = spec.spec.template.spec.containers[0].env env_dict = {e.name: e.value for e in env} @@ -272,23 +272,23 @@ def test_human_readable_name(self): ingester, work_dir=Path("/data/sources/localhost_conv1/staging/run1"), files_dir=Path("/data/sources/localhost_conv1/staging/run1/files"), - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:conv1@1.0.0"), + convention_id=ConventionSlug("conv1"), ) name = spec.metadata.name assert name.startswith("osa-ingester-") assert len(name) <= 63 - def test_convention_srn_in_labels(self): + def test_convention_id_in_labels(self): runner = _make_runner() ingester = _make_ingester() spec = runner._build_job_spec( ingester, work_dir=Path("/data/sources/localhost_conv1/staging/run1"), files_dir=Path("/data/sources/localhost_conv1/staging/run1/files"), - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:conv1@1.0.0"), + convention_id=ConventionSlug("conv1"), ) labels = spec.spec.template.metadata.labels - assert labels["osa.io/convention"] == "localhost.conv.conv1.1.0.0" + assert labels["osa.io/convention"] == "conv1" # --------------------------------------------------------------------------- @@ -350,7 +350,7 @@ async def s3_get(key: str) -> bytes: runner._s3.get_object.side_effect = s3_get - inputs = IngesterInputs(convention_srn=_CONV_SRN) + inputs = IngesterInputs(convention_id=_CONV_SLUG) result = await runner._run_job(ingester, inputs, work_dir, files_dir) assert len(result.records) == 2 @@ -396,7 +396,7 @@ async def test_timeout_raises_transient_error(self, tmp_path: Path): work_dir.mkdir(parents=True) files_dir = work_dir / "files" files_dir.mkdir(parents=True) - inputs = IngesterInputs(convention_srn=_CONV_SRN) + inputs = IngesterInputs(convention_id=_CONV_SLUG) with pytest.raises(TransientError, match="[Tt]imed out|[Dd]eadline"): await runner._run_job(ingester, inputs, work_dir, files_dir) @@ -451,7 +451,7 @@ async def test_oom_raises_oom_error(self, tmp_path: Path): work_dir.mkdir(parents=True) files_dir = work_dir / "files" files_dir.mkdir(parents=True) - inputs = IngesterInputs(convention_srn=_CONV_SRN) + inputs = IngesterInputs(convention_id=_CONV_SLUG) with pytest.raises(OOMError, match="[Oo]OM"): await runner._run_job(ingester, inputs, work_dir, files_dir) @@ -463,10 +463,10 @@ async def test_oom_raises_oom_error(self, tmp_path: Path): class TestConventionSrnFromInputs: - """Verify run() threads convention_srn from inputs to Job labels.""" + """Verify run() threads convention_id from inputs to Job labels.""" @pytest.mark.asyncio - async def test_run_uses_convention_srn_from_inputs(self, tmp_path: Path): + async def test_run_uses_convention_id_from_inputs(self, tmp_path: Path): config = _make_config(data_mount_path=str(tmp_path)) runner = K8sIngesterRunner(api_client=MagicMock(), config=config, s3=_make_s3_mock()) @@ -503,14 +503,12 @@ async def test_run_uses_convention_srn_from_inputs(self, tmp_path: Path): files_dir = work_dir / "files" files_dir.mkdir(parents=True) - inputs = IngesterInputs( - convention_srn=ConventionSRN.parse("urn:osa:localhost:conv:my-conv@1.0.0") - ) + inputs = IngesterInputs(convention_id=ConventionSlug("my-conv")) await runner.run(ingester, inputs, files_dir, work_dir) - # Verify convention_srn from inputs ends up in the Job labels + # Verify convention_id from inputs ends up in the Job labels call_args = batch_api.create_namespaced_job.call_args spec = call_args[0][1] labels = spec.metadata.labels - assert labels["osa.io/convention"] == "localhost.conv.my-conv.1.0.0" + assert labels["osa.io/convention"] == "my-conv" diff --git a/server/tests/unit/infrastructure/k8s/test_naming.py b/server/tests/unit/infrastructure/k8s/test_naming.py index cf5d6d5..bb4ce1b 100644 --- a/server/tests/unit/infrastructure/k8s/test_naming.py +++ b/server/tests/unit/infrastructure/k8s/test_naming.py @@ -2,7 +2,7 @@ import re -from osa.domain.shared.model.srn import ConventionSRN, DepositionSRN +from osa.domain.shared.model.srn import ConventionSlug, DepositionSRN from osa.infrastructure.k8s.naming import job_name, label_value, sanitize_label @@ -78,10 +78,13 @@ def test_deposition_srn(self): assert result == "localhost.dep.abc123" assert ":" not in result - def test_convention_srn_with_version(self): - srn = ConventionSRN.parse("urn:osa:localhost:conv:test@1.0.0") - result = label_value(srn) - assert result == "localhost.conv.test.1.0.0" + def test_convention_slug(self): + # Conventions are now identified by a bare slug (#145); the runner + # passes ``slug.root`` to label_value. + slug = ConventionSlug("pdb-structure") + result = label_value(slug.root) + assert result == "pdb-structure" + assert ":" not in result def test_no_colons_in_output(self): srn = DepositionSRN.parse("urn:osa:archive.university.edu:dep:xyz789") diff --git a/server/tests/unit/infrastructure/test_feature_table.py b/server/tests/unit/infrastructure/test_feature_table.py index d76b702..84084e1 100644 --- a/server/tests/unit/infrastructure/test_feature_table.py +++ b/server/tests/unit/infrastructure/test_feature_table.py @@ -51,6 +51,7 @@ def test_has_auto_columns(self) -> None: assert "id" in table.c assert "record_srn" in table.c + assert "run_id" in table.c assert "created_at" in table.c def test_id_is_primary_key(self) -> None: @@ -81,7 +82,12 @@ def test_data_columns_from_schema(self) -> None: def test_empty_schema_has_only_auto_columns(self) -> None: table = build_feature_table("empty", FeatureSchema()) - assert set(c.key for c in table.columns) == {"id", "record_srn", "created_at"} + assert set(c.key for c in table.columns) == { + "id", + "record_srn", + "run_id", + "created_at", + } def test_array_column_is_jsonb(self) -> None: schema = FeatureSchema( @@ -110,6 +116,7 @@ def test_excludes_auto_columns(self) -> None: assert col_names == ["score", "label"] assert "id" not in col_names assert "record_srn" not in col_names + assert "run_id" not in col_names assert "created_at" not in col_names def test_empty_for_schema_with_no_data_columns(self) -> None: diff --git a/server/tests/unit/infrastructure/test_oci_hook_runner.py b/server/tests/unit/infrastructure/test_oci_hook_runner.py index 33e20a8..ed8f3f7 100644 --- a/server/tests/unit/infrastructure/test_oci_hook_runner.py +++ b/server/tests/unit/infrastructure/test_oci_hook_runner.py @@ -1,18 +1,21 @@ """Unit tests for OciHookRunner — container lifecycle, parsing, and bind-mount config.""" +from datetime import UTC, datetime from pathlib import Path from unittest.mock import AsyncMock +from uuid import uuid4 import pytest from osa.domain.shared.error import OOMError, PermanentError, TransientError from osa.domain.shared.model.hook import ( ColumnDef, - HookDefinition, + HookIdentity, OciConfig, OciLimits, TableFeatureSpec, ) +from osa.domain.validation.model.hook_release import HookRelease, HookReleaseId from osa.domain.validation.model.hook_result import HookStatus, ProgressEntry from osa.domain.validation.model.hook_input import HookRecord from osa.domain.validation.port.hook_runner import HookInputs @@ -25,27 +28,39 @@ ) -def _make_hook( +def _make_hook(name: str = "pocket_detect") -> HookIdentity: + """Build a HookIdentity (#145: name + feature only, no runtime).""" + return HookIdentity( + name=name, + feature=TableFeatureSpec( + cardinality="many", + columns=[ + ColumnDef(name="score", json_type="number", required=True), + ], + ), + ) + + +def _make_release( name: str = "pocket_detect", timeout: int = 300, memory: str = "2g", cpu: str = "2.0", config: dict | None = None, -) -> HookDefinition: - return HookDefinition( - name=name, +) -> HookRelease: + """Build a HookRelease carrying the OCI runtime (#145).""" + return HookRelease( + id=HookReleaseId(uuid4()), + hook_name=name, + version=1, runtime=OciConfig( image="ghcr.io/example/hook:v1", digest="sha256:abc123", config=config or {}, limits=OciLimits(timeout_seconds=timeout, memory=memory, cpu=cpu), ), - feature=TableFeatureSpec( - cardinality="many", - columns=[ - ColumnDef(name="score", json_type="number", required=True), - ], - ), + source_ref="git+https://example.com/hook@abc", + built_at=datetime.now(UTC), ) @@ -200,6 +215,7 @@ async def test_successful_hook_returns_passed(self, tmp_path: Path): runner = OciHookRunner(docker=docker) hook = _make_hook() + release = _make_release() inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="test-run", @@ -208,10 +224,10 @@ async def test_successful_hook_returns_passed(self, tmp_path: Path): output_dir = tmp_path / "output" output_dir.mkdir() - result = await runner.run(hook, inputs, output_dir) + result = await runner.run(hook, release, inputs, output_dir) assert result.status == HookStatus.PASSED - assert result.hook_name == "pocket_detect" + assert result.hook_name.root == "pocket_detect" assert result.duration_seconds > 0 container.delete.assert_called_once_with(force=True) @@ -226,6 +242,7 @@ async def test_nonzero_exit_returns_failed(self, tmp_path: Path): runner = OciHookRunner(docker=docker) hook = _make_hook() + release = _make_release() inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="test-run", @@ -235,7 +252,7 @@ async def test_nonzero_exit_returns_failed(self, tmp_path: Path): output_dir.mkdir() with pytest.raises(PermanentError, match="[Ee]xit"): - await runner.run(hook, inputs, output_dir) + await runner.run(hook, release, inputs, output_dir) @pytest.mark.asyncio async def test_oom_killed_raises_oom_error(self, tmp_path: Path): @@ -247,6 +264,7 @@ async def test_oom_killed_raises_oom_error(self, tmp_path: Path): runner = OciHookRunner(docker=docker) hook = _make_hook() + release = _make_release() inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="test-run", @@ -256,7 +274,7 @@ async def test_oom_killed_raises_oom_error(self, tmp_path: Path): output_dir.mkdir() with pytest.raises(OOMError, match="[Oo][Oo][Mm]"): - await runner.run(hook, inputs, output_dir) + await runner.run(hook, release, inputs, output_dir) @pytest.mark.asyncio async def test_timeout_raises_infrastructure_error(self, tmp_path: Path): @@ -274,7 +292,8 @@ async def hang(): container.wait.side_effect = hang runner = OciHookRunner(docker=docker) - hook = _make_hook(timeout=1) # 1 second timeout + hook = _make_hook() + release = _make_release(timeout=1) # 1 second timeout inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="test-run", @@ -284,7 +303,7 @@ async def hang(): output_dir.mkdir() with pytest.raises(TransientError, match="[Tt]imed out"): - await runner.run(hook, inputs, output_dir) + await runner.run(hook, release, inputs, output_dir) @pytest.mark.asyncio async def test_rejection_via_progress(self, tmp_path: Path): @@ -296,6 +315,7 @@ async def test_rejection_via_progress(self, tmp_path: Path): runner = OciHookRunner(docker=docker) hook = _make_hook() + release = _make_release() work_dir = tmp_path / "hook_work" work_dir.mkdir() @@ -312,7 +332,7 @@ async def test_rejection_via_progress(self, tmp_path: Path): '{"step":"Validate","status":"rejected","message":"Missing atoms"}\n' ) - result = await runner.run(hook, inputs, work_dir) + result = await runner.run(hook, release, inputs, work_dir) assert result.status == HookStatus.REJECTED assert result.rejection_reason == "Missing atoms" @@ -329,7 +349,8 @@ async def test_security_hardening(self, tmp_path: Path): container.show.return_value = {"State": {"OOMKilled": False}} runner = OciHookRunner(docker=docker) - hook = _make_hook(memory="4g", cpu="4.0") + hook = _make_hook() + release = _make_release(memory="4g", cpu="4.0") inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="test-run", @@ -338,7 +359,7 @@ async def test_security_hardening(self, tmp_path: Path): output_dir = tmp_path / "output" output_dir.mkdir() - await runner.run(hook, inputs, output_dir) + await runner.run(hook, release, inputs, output_dir) # Inspect the config passed to containers.create call_args = docker.containers.create.call_args @@ -364,6 +385,7 @@ async def test_env_vars_set(self, tmp_path: Path): runner = OciHookRunner(docker=docker) hook = _make_hook() + release = _make_release() inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="test-run", @@ -372,7 +394,7 @@ async def test_env_vars_set(self, tmp_path: Path): output_dir = tmp_path / "output" output_dir.mkdir() - await runner.run(hook, inputs, output_dir) + await runner.run(hook, release, inputs, output_dir) call_args = docker.containers.create.call_args config = call_args[0][0] if call_args[0] else call_args[1].get("config", {}) @@ -392,6 +414,7 @@ async def test_nested_bind_mounts(self, tmp_path: Path): runner = OciHookRunner(docker=docker) hook = _make_hook() + release = _make_release() files_dir = tmp_path / "files" files_dir.mkdir() inputs = HookInputs( @@ -403,7 +426,7 @@ async def test_nested_bind_mounts(self, tmp_path: Path): work_dir = tmp_path / "hook_work" work_dir.mkdir() - await runner.run(hook, inputs, work_dir) + await runner.run(hook, release, inputs, work_dir) call_args = docker.containers.create.call_args config = call_args[0][0] if call_args[0] else call_args[1].get("config", {}) @@ -430,6 +453,7 @@ async def test_no_files_bind_when_no_files_dir(self, tmp_path: Path): runner = OciHookRunner(docker=docker) hook = _make_hook() + release = _make_release() inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="test-run", @@ -438,7 +462,7 @@ async def test_no_files_bind_when_no_files_dir(self, tmp_path: Path): output_dir = tmp_path / "output" output_dir.mkdir() - await runner.run(hook, inputs, output_dir) + await runner.run(hook, release, inputs, output_dir) call_args = docker.containers.create.call_args config = call_args[0][0] if call_args[0] else call_args[1].get("config", {}) @@ -461,6 +485,7 @@ async def test_container_deleted_on_failure(self, tmp_path: Path): runner = OciHookRunner(docker=docker) hook = _make_hook() + release = _make_release() inputs = HookInputs( records=[HookRecord(id="test", metadata={})], run_id="test-run", @@ -470,4 +495,4 @@ async def test_container_deleted_on_failure(self, tmp_path: Path): output_dir.mkdir() with pytest.raises(TransientError, match="Docker error"): - await runner.run(hook, inputs, output_dir) + await runner.run(hook, release, inputs, output_dir) diff --git a/server/tests/unit/infrastructure/test_postgres_feature_store.py b/server/tests/unit/infrastructure/test_postgres_feature_store.py index 6f45d20..f40b3ee 100644 --- a/server/tests/unit/infrastructure/test_postgres_feature_store.py +++ b/server/tests/unit/infrastructure/test_postgres_feature_store.py @@ -12,6 +12,9 @@ from osa.infrastructure.persistence.feature_table import FEATURES_SCHEMA +_RUN_ID = "0190a1b2-c3d4-7e5f-8a9b-0c1d2e3f4a5b" + + def _make_columns() -> list[ColumnDef]: return [ ColumnDef(name="score", json_type="number", required=True), @@ -50,6 +53,7 @@ def fake_run_sync(fn, *args, **kwargs): cols = [ sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True), sa.Column("record_srn", sa.Text, nullable=False), + sa.Column("run_id", sa.Text, nullable=False), sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), ] for col_name in feature_columns or []: @@ -147,7 +151,7 @@ async def test_inserts_rows(self): {"score": 0.82, "pocket_id": "P2"}, ] - count = await store.insert_features("pocket_detect", "urn:rec:1", rows) + count = await store.insert_features("pocket_detect", "urn:rec:1", rows, _RUN_ID) assert count == 2 conn.execute.assert_called_once() @@ -157,7 +161,7 @@ async def test_empty_rows_returns_zero(self): engine = AsyncMock() store = PostgresFeatureStore(engine=engine, session=AsyncMock()) - count = await store.insert_features("pocket_detect", "urn:rec:1", []) + count = await store.insert_features("pocket_detect", "urn:rec:1", [], _RUN_ID) assert count == 0 @@ -166,12 +170,13 @@ async def test_enriches_rows_with_record_srn(self): engine, conn = _mock_engine_with_reflect("pocket_detect", ["score"]) store = PostgresFeatureStore(engine=engine, session=AsyncMock()) - await store.insert_features("pocket_detect", "urn:rec:1", [{"score": 0.95}]) + await store.insert_features("pocket_detect", "urn:rec:1", [{"score": 0.95}], _RUN_ID) call_args = conn.execute.call_args params = call_args[0][1] # second positional arg is the params list assert len(params) == 1 assert params[0]["record_srn"] == "urn:rec:1" + assert params[0]["run_id"] == _RUN_ID assert "created_at" in params[0] assert params[0]["score"] == 0.95 @@ -181,7 +186,7 @@ async def test_chunks_large_inserts(self): store = PostgresFeatureStore(engine=engine, session=AsyncMock()) rows = [{"score": float(i)} for i in range(2500)] - count = await store.insert_features("hook", "urn:rec:1", rows) + count = await store.insert_features("hook", "urn:rec:1", rows, _RUN_ID) assert count == 2500 assert conn.execute.call_count == 3 # 1000 + 1000 + 500 @@ -192,7 +197,7 @@ async def test_single_chunk_for_small_batch(self): store = PostgresFeatureStore(engine=engine, session=AsyncMock()) rows = [{"score": float(i)} for i in range(999)] - count = await store.insert_features("hook", "urn:rec:1", rows) + count = await store.insert_features("hook", "urn:rec:1", rows, _RUN_ID) assert count == 999 assert conn.execute.call_count == 1 @@ -203,7 +208,7 @@ async def test_insert_rejects_invalid_hook_name(self): store = PostgresFeatureStore(engine=engine, session=AsyncMock()) with pytest.raises(ValidationError, match="Invalid identifier"): - await store.insert_features("'; DROP TABLE --", "urn:rec:1", [{"score": 1}]) + await store.insert_features("'; DROP TABLE --", "urn:rec:1", [{"score": 1}], _RUN_ID) @pytest.mark.asyncio async def test_create_rejects_invalid_hook_name(self): @@ -219,7 +224,7 @@ async def test_reflects_table_before_insert(self): engine, conn = _mock_engine_with_reflect("hook", ["score"]) store = PostgresFeatureStore(engine=engine, session=AsyncMock()) - await store.insert_features("hook", "urn:rec:1", [{"score": 0.95}]) + await store.insert_features("hook", "urn:rec:1", [{"score": 0.95}], _RUN_ID) # run_sync should have been called for reflection conn.run_sync.assert_called_once() diff --git a/server/uv.lock b/server/uv.lock index 2b9e5c1..d93ea46 100644 --- a/server/uv.lock +++ b/server/uv.lock @@ -305,6 +305,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, ] +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "(implementation_name != 'PyPy' and platform_machine == 'arm64' and sys_platform == 'darwin') or (implementation_name != 'PyPy' and platform_machine == 'aarch64' and sys_platform == 'linux') or (implementation_name != 'PyPy' and platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, +] + [[package]] name = "cfgv" version = "3.5.0" @@ -395,6 +427,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/db/d291e30fdf7ea617a335531e72294e0c723356d7fdde8fba00610a76bda9/coverage-7.13.2-py3-none-any.whl", hash = "sha256:40ce1ea1e25125556d8e76bd0b61500839a07944cc287ac21d5626f3e620cad5", size = 210943, upload-time = "2026-01-25T13:00:02.388Z" }, ] +[[package]] +name = "cryptography" +version = "49.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "(platform_machine == 'arm64' and platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/99/d1c90d6041656cc6ee229dc99cd67fd0cd5aec3c5f7d72fffc27cc750054/cryptography-49.0.0.tar.gz", hash = "sha256:f89660a348f4f78a92366240a61404e337586ef7f5909a2fef59ca88ef505493", size = 854345, upload-time = "2026-06-12T20:02:30.512Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/22/adf66990e63584a68dfb50c24f48a125c07b1699899381c8151e63ed458c/cryptography-49.0.0-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:966fe0e9c67490071f14c0d2b1cb2dfb3023c5ce39457343931415f08382f2db", size = 4032100, upload-time = "2026-06-12T20:02:32.143Z" }, + { url = "https://files.pythonhosted.org/packages/09/41/3797cfaf69cae04a13ee78ebd83f0678d9c02b4779d21ce24445326f1a69/cryptography-49.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:36d1709f992593689b45bda411498d62c6e365f2ca00b84657d4dadd24de16db", size = 4692978, upload-time = "2026-06-12T20:01:21.305Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8b/43011f7ebe515a8aa20d61f290a326cd890c2e738e16e59eaff8d9c3a412/cryptography-49.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0e959b578856a3924bc0cbb710fc12c387b9412a951389f3ca61704a9e25f325", size = 4716422, upload-time = "2026-06-12T20:01:48.566Z" }, + { url = "https://files.pythonhosted.org/packages/4a/91/01ce7303a4579e6d3a6abef01bd322848e9ea7a219adcabc5048b9033571/cryptography-49.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:53ecee2e23f7169b6117e99fc8a944e5e50f79e69758a83b52a00cb98ab2b2d2", size = 4700503, upload-time = "2026-06-12T20:02:47.091Z" }, + { url = "https://files.pythonhosted.org/packages/62/99/a2c95cf8293f07491e9e27c20cc4dcd18176d944e674679adeb1d0173fd6/cryptography-49.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:2eda353d8a27bcbcaa4cbed18994a74ab4d19a2ca897db188ea269ab9b71419b", size = 5309779, upload-time = "2026-06-12T20:02:08.987Z" }, + { url = "https://files.pythonhosted.org/packages/20/2c/0622f20ff02b2ef32558733443805dc82fd4c275be01b2d19d14676f3a1b/cryptography-49.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2afe9051da7ae7bd5905da5a949280c7d2bb75682e188f650a9d0f2756b834c6", size = 4749683, upload-time = "2026-06-12T20:02:03.335Z" }, + { url = "https://files.pythonhosted.org/packages/a3/5b/c5246635d5fd3b64e0d45ae10e99fd32fe9676a79915ccfe5a61ba9af1a5/cryptography-49.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:0b82e28ee398a386f0807bba7884d30f25218855690f45115831bcce5d90822c", size = 4337874, upload-time = "2026-06-12T20:02:54.323Z" }, + { url = "https://files.pythonhosted.org/packages/6d/88/05563c7fe2e914e87d1a536d06fe83e66b4e1d95cb593e05aea375531da8/cryptography-49.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ccac2bfebc306b862133e3bb71f3f6ee8bb525240089b2d952e4144b3a6d5da7", size = 4700283, upload-time = "2026-06-12T20:01:34.822Z" }, + { url = "https://files.pythonhosted.org/packages/c4/b6/d7696e4e890d6ae1469935164c9e5215c557671cb78d6e3f458ccceaa632/cryptography-49.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d0527ce944105f257f605a827d6ebead966c752038b6e8656abb9c5edee6fc68", size = 5265844, upload-time = "2026-06-12T20:01:24.09Z" }, + { url = "https://files.pythonhosted.org/packages/a9/3c/f3ad17eecc1a57b0ba236dc01f90e783c51f4a2f35f64777cc4f47a184b2/cryptography-49.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:cbc77da8c523d5abd028635ba850a6966fcee2c82e2bf65a41d1d8afe0f98be9", size = 4749290, upload-time = "2026-06-12T20:01:30.848Z" }, + { url = "https://files.pythonhosted.org/packages/4f/01/339573cf1023163a400b0b5d16f6d507de413b9f60be6fd1b77feeaf6737/cryptography-49.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b87e65d263b3e5d3bb92a57e2a6638e2f31110fa7aa890c7b2dbba42248d0a3f", size = 4834612, upload-time = "2026-06-12T20:01:29.246Z" }, + { url = "https://files.pythonhosted.org/packages/71/fd/577302e213a1be9468f92d1afef66fcf1ef83d516819d9992ca547f592bd/cryptography-49.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:66ec79c3904820572d7e987abdf304281f141d37ad9a489b8e97066e7b9b6459", size = 4980804, upload-time = "2026-06-12T20:01:42.853Z" }, + { url = "https://files.pythonhosted.org/packages/ec/9e/db72b3ae7fc9cfad53e630e56c6ae83b9b6ff0bf3718ffb8012d20b3aabf/cryptography-49.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:73a205dce83953d131a4aa1e0fd917a2fd1c5b1eef251e9d7152efefcbf5caf7", size = 4013892, upload-time = "2026-06-12T20:02:10.735Z" }, + { url = "https://files.pythonhosted.org/packages/86/12/c48a424f38db03027be9f7ed5c7dc5de9933dbee992865f98b13727a009d/cryptography-49.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:196ecd6a36e4e9aa10270393bb98d8df88fccee0bf1e5128b91ae4eb4375896d", size = 4678835, upload-time = "2026-06-12T20:02:48.743Z" }, + { url = "https://files.pythonhosted.org/packages/68/28/8a3ad4653662c93fc44dc4e5d8fd374c25c42e07b34bbfbadf49cf57a5a8/cryptography-49.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7abcee80084cda3f7691f3eb1ce480d8df49cec637b429aa35986c1de71738aa", size = 4697239, upload-time = "2026-06-12T20:02:56.03Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b2/2193fc74f81aee4f9b62733133b73b5176718932ed8f2e4b03fa040480a6/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:4ae387c9cb68ea569ca17e490d66d8142b81c3cc814bf179974b7d146e490bbb", size = 4685593, upload-time = "2026-06-12T20:02:50.666Z" }, + { url = "https://files.pythonhosted.org/packages/47/f1/1d3eaa243bfc5de4a187b22aa8c048b3e4980bfbe830ac46e6bac2e66947/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:f37d847238971164fdbc68ade6f6574aecc9c0af714190e2083429ff68f4ce9d", size = 5289961, upload-time = "2026-06-12T20:01:46.468Z" }, + { url = "https://files.pythonhosted.org/packages/58/39/2d51306721330c486495853eda1c567880ff036de15a14c4b74f399934af/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:c2bc30226390d60ea19d9f82b19db005fe0452154a23c1c410c12ea801e43561", size = 4731145, upload-time = "2026-06-12T20:02:16.832Z" }, + { url = "https://files.pythonhosted.org/packages/17/50/983e838c7fd0d87fd8c969bcdd328edaf5f756e38df5281637424c155873/cryptography-49.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:07cab27cc7b7e0fd28e5e26bb9eeedde5c135c868b46de4a27845abe94af6122", size = 4321719, upload-time = "2026-06-12T20:02:52.611Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f5/8f571d7e27c55bce9f76f026143bcb1e040a4233149ecca0bea5fa5dd5f7/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:b20133d204d2bb56ba047642199603876c872026ca53e79c35b83772ab2cc505", size = 4685209, upload-time = "2026-06-12T20:02:07.282Z" }, + { url = "https://files.pythonhosted.org/packages/e7/84/0e27016a6fc5a0886f797018b26aa42f40c09a82332bff77822a451deaaa/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b970c6da94d5bb18629db453d14f2a1300f6bf59b61e9b82377931ef95504866", size = 5246285, upload-time = "2026-06-12T20:01:32.439Z" }, + { url = "https://files.pythonhosted.org/packages/11/2d/5e1fb307cb5931881516b464c98774b3f2c36b5d4bb9a2830253cf553cad/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d8ecde755e2e91bf773fc94e8c9d730cd7f2007004cb492263a794ec3899a1c8", size = 4730441, upload-time = "2026-06-12T20:02:01.469Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c0/bff5a02ee731d207d6a1ed51732549d8c53d2bc8da1d10ec6f2844201d68/cryptography-49.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e3fb64c420688e5319ae25113a354015abbd8dffbfbc41781a1ea66fc7622ac3", size = 4815869, upload-time = "2026-06-12T20:01:36.574Z" }, + { url = "https://files.pythonhosted.org/packages/b9/26/814681d14248d95d73d5c3eea0c39a94eb8302df966f670a2c60de90974b/cryptography-49.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32703d93296f5c1f4b53349ad3a250c2cae0fdecd3a3dd5d47e616d8d616af27", size = 4960948, upload-time = "2026-06-12T20:02:18.688Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/5bb823f5bedcf80718cea7fbc95ec5515cca3769633c4b01a32be7f30e7c/cryptography-49.0.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ec5e529fb80935c94fe7b729f9972b50e351a0e6b50aa294fd5cabb109fcc29a", size = 4025947, upload-time = "2026-06-12T20:01:25.745Z" }, + { url = "https://files.pythonhosted.org/packages/3d/df/40577043ca124e17012f408ddddaeb213b856336ac82ddb3bc915f39e29f/cryptography-49.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f78ff2c9ed8dc2d036b0f4d640e22522213d047c1b14e61205a7e55c80a494d4", size = 4692429, upload-time = "2026-06-12T20:01:53.628Z" }, + { url = "https://files.pythonhosted.org/packages/2c/99/2d13299eb3dd27b02dcfaafcc91d6b5cb3329f7cbd6d8f51921acd566c1a/cryptography-49.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:35b151772baff2c74cba7fa290ceaff4c3b11c0c881eb93eb5dbc05a7cfbba18", size = 4700968, upload-time = "2026-06-12T20:02:45.383Z" }, + { url = "https://files.pythonhosted.org/packages/a5/4d/9c0cd02f95e2602dd5e563da149ee0830abef3537be8b34dc56281ebe27a/cryptography-49.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0f21641cf4b30fca7aee061ced0ec7ad7b073518088b7c9969a297c0ae796c69", size = 4697758, upload-time = "2026-06-12T20:01:41.13Z" }, + { url = "https://files.pythonhosted.org/packages/24/01/186c825898477d77e2324d5360fefe622ff1d8d1963ec0554e2cada8ec77/cryptography-49.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9e82dcc8e56052715fb18b2429e3bca4823b1629136a2084fc45a9a5cecb9b64", size = 5298863, upload-time = "2026-06-12T20:02:24.579Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7b/62cbbab75d0659865bf0273790031544a0b16c8072d258f9428dcd8190dc/cryptography-49.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6f2debedf9ca60cf1d5bd466475638af5130f89965605cd818484d19987d3a21", size = 4735983, upload-time = "2026-06-12T20:01:50.14Z" }, + { url = "https://files.pythonhosted.org/packages/6c/72/3e798c064bc39e471008075d0f9bc9daf77a80879c092e4a8e170c585ed4/cryptography-49.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:8c25ceb16df5b9435f3f6a9829204985b0e0cbee3b48aacd432c7d2c850b44d9", size = 4334173, upload-time = "2026-06-12T20:01:44.743Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ee/6fca21d1ac73e06f8bef71940abfd4d2f6472b4bca284d770f32bd4086f6/cryptography-49.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:28d8b15e6275f12c8a207dc309dfa957903c927d08d0cc937ee3f63f200693cc", size = 4697298, upload-time = "2026-06-12T20:02:20.918Z" }, + { url = "https://files.pythonhosted.org/packages/67/d0/a5fcd3515f0bae49a7b6d0413cc1bdccdcc1fc0047037a0d480642cdc5d6/cryptography-49.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:6fc361c34fb6aac015ce19435876635e5c6d21db31998b0920f675f131e043b8", size = 5254338, upload-time = "2026-06-12T20:02:22.737Z" }, + { url = "https://files.pythonhosted.org/packages/a0/84/84fe36f19caf857d61cb7fc9c63035a47ffabd84ea12d1d393148efa3615/cryptography-49.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:2400ef9c9e2299a25614eb1dea3db54a69b1349efd043bfac9c67630d136df36", size = 4735650, upload-time = "2026-06-12T20:02:41.389Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a0/db537264e234f7273a73ec020873d6d6b39dfd8a53db78b550ca8320440e/cryptography-49.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:67e1d20ad9ef3a563c59ef22e7a8a0b8210bd26604369ea4a30a7c66aefe504e", size = 4834820, upload-time = "2026-06-12T20:01:51.847Z" }, + { url = "https://files.pythonhosted.org/packages/93/77/8df9eb486495979bccecd1062e2eaf435250e84437040295b57d09048b0b/cryptography-49.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:42b0684e0e40cf26122427802486f6d93aea593612603a94fbf260c7eb1e9c1b", size = 4967968, upload-time = "2026-06-12T20:02:12.524Z" }, +] + [[package]] name = "deprecated" version = "1.3.1" @@ -1032,7 +1111,7 @@ wheels = [ [[package]] name = "osa" -version = "0.0.3" +version = "0.0.4" source = { editable = "." } dependencies = [ { name = "aiodocker", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -1049,7 +1128,7 @@ dependencies = [ { name = "psycopg2-binary", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pydantic", extra = ["email"], marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "pydantic-settings", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "pyjwt", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyjwt", extra = ["crypto"], marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "python-multipart", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "slowapi", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "sqlalchemy", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -1094,7 +1173,7 @@ requires-dist = [ { name = "psycopg2-binary", specifier = ">=2.9.11" }, { name = "pydantic", extras = ["email"], specifier = ">=2.12.4" }, { name = "pydantic-settings", specifier = ">=2.12.0" }, - { name = "pyjwt", specifier = ">=2.11.0" }, + { name = "pyjwt", extras = ["crypto"], specifier = ">=2.11.0" }, { name = "python-multipart", specifier = ">=0.0.22" }, { name = "slowapi", specifier = ">=0.1.9" }, { name = "sqlalchemy", specifier = ">=2.0.44" }, @@ -1249,6 +1328,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/67/69/f36abe5f118c1dca6d3726ceae164b9356985805480731ac6712a63f24f0/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3cb3a676873d7506825221045bd70e0427c905b9c8ee8d6acd70cfcbd6e576d", size = 3347643, upload-time = "2025-10-10T11:13:53.499Z" }, ] +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + [[package]] name = "pydantic" version = "2.12.5" @@ -1339,6 +1427,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" }, ] +[package.optional-dependencies] +crypto = [ + { name = "cryptography", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + [[package]] name = "pytest" version = "9.0.2"