w7-mgfcode
diff --git a/‎.env.example‎
Lines changed: 1 addition & 1 deletion b/‎.env.example‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎INITIAL-7.md‎
Lines changed: 13 additions & 0 deletions b/‎INITIAL-7.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎PRPs/PRP-7-model-registry.md‎
Lines changed: 1253 additions & 0 deletions b/‎PRPs/PRP-7-model-registry.md‎
Lines changed: 1253 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 44 additions & 2 deletions b/‎README.md‎
Lines changed: 44 additions & 2 deletions
diff --git a/‎alembic/env.py‎
Lines changed: 1 addition & 0 deletions b/‎alembic/env.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎alembic/versions/a2f7b3c8d901_create_model_registry_tables.py‎
Lines changed: 173 additions & 0 deletions b/‎alembic/versions/a2f7b3c8d901_create_model_registry_tables.py‎
Lines changed: 173 additions & 0 deletions
diff --git a/‎app/core/config.py‎
Lines changed: 5 additions & 1 deletion b/‎app/core/config.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎app/features/backtesting/tests/test_schemas.py‎
Lines changed: 2 additions & 2 deletions b/‎app/features/backtesting/tests/test_schemas.py‎
Lines changed: 2 additions & 2 deletions
@@ -2,7 +2,7 @@
 # Copy this file to .env and adjust values as needed
 
 # Database connection (PostgreSQL + pgvector via Docker Compose)
-DATABASE_URL=postgresql+asyncpg://forecastlab:forecastlab@localhost:5432/forecastlab
+DATABASE_URL=postgresql+asyncpg://forecastlab:forecastlab@localhost:5433/forecastlab
 
 # Application settings
 APP_NAME=ForecastLabAI
 
@@ -12,6 +12,17 @@
 - Artifact storage abstraction:
   - local filesystem by default (Settings-driven)
   - compatible with future S3-like storage backends
+- Lifecycle Management:
+  - State machine tracking: PENDING | RUNNING | SUCCESS | FAILED | ARCHIVED.
+  - Deployment Aliases: Mutable pointers (e.g., 'prod-v1') to specific successful runs.
+- Metadata & Lineage:
+  - JSONB storage for ModelConfig, FeatureConfig, and Performance Metrics.
+  - Runtime Snapshot: Recording Python/Library versions for environment parity.
+  - Agent Context: Integration of agent_id and session_id for autonomous run traceability.
+- Artifact Integrity:
+  - Checksum-based verification (SHA-256) for all serialized artifacts.
+- Storage Strategy:
+  - Pluggable storage providers (LocalFS, future S3/GCS) via Abstract Registry Interface.
 
 ## EXAMPLES:
 - `examples/registry/create_run.py` — create run record + persist configs.
@@ -21,6 +32,8 @@
 ## DOCUMENTATION:
 - Postgres JSONB patterns
 - Artifact integrity (hashing) best practices
+- [Using JSONB in PostgreSQL](https://scalegrid.io/blog/using-jsonb-in-postgresql-how-to-effectively-store-index-json-data-in-postgresql/)
+- [Supply Chain Vulnerability](https://www.fortra.com/blog/supply-chain-vulnerability)
 
 ## OTHER CONSIDERATIONS:
 - No hardcoded artifact paths: derived from `ARTIFACT_ROOT` + run_id.
 
@@ -118,7 +118,8 @@ app/
 │   ├── ingest/         # Batch upsert endpoints for sales data
 │   ├── featuresets/    # Time-safe feature engineering (lags, rolling, calendar)
 │   ├── forecasting/    # Model training, prediction, persistence
-│   └── backtesting/    # Time-series CV, metrics, baseline comparisons
+│   ├── backtesting/    # Time-series CV, metrics, baseline comparisons
+│   └── registry/       # Model run tracking, artifacts, deployment aliases
 └── main.py         # FastAPI entry point
 
 tests/              # Test fixtures and helpers
@@ -129,7 +130,8 @@ examples/
 ├── queries/        # Example SQL queries
 ├── models/         # Baseline model examples (naive, seasonal_naive, moving_average)
 ├── backtest/       # Backtesting examples (run_backtest, inspect_splits, metrics_demo)
-└── compute_features_demo.py  # Feature engineering demo
+├── compute_features_demo.py  # Feature engineering demo
+└── registry_demo.py  # Model registry workflow demo
 scripts/            # Utility scripts
 ```
 
@@ -301,6 +303,46 @@ When `include_baselines=true`, automatically compares against naive and seasonal
 
 See [examples/backtest/](examples/backtest/) for usage examples.
 
+### Model Registry
+
+- `POST /registry/runs` - Create a new model run
+- `GET /registry/runs` - List runs with filtering and pagination
+- `GET /registry/runs/{run_id}` - Get run details
+- `PATCH /registry/runs/{run_id}` - Update run (status, metrics, artifacts)
+- `GET /registry/runs/{run_id}/verify` - Verify artifact integrity
+- `POST /registry/aliases` - Create or update deployment alias
+- `GET /registry/aliases` - List all aliases
+- `GET /registry/aliases/{alias_name}` - Get alias details
+- `DELETE /registry/aliases/{alias_name}` - Delete an alias
+- `GET /registry/compare/{run_id_a}/{run_id_b}` - Compare two runs
+
+**Example Create Run Request:**
+```bash
+curl -X POST http://localhost:8123/registry/runs \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model_type": "seasonal_naive",
+    "model_config": {"season_length": 7},
+    "data_window_start": "2024-01-01",
+    "data_window_end": "2024-03-31",
+    "store_id": 1,
+    "product_id": 1
+  }'
+```
+
+**Run Lifecycle:**
+- `pending` → `running` → `success` | `failed` → `archived`
+- Aliases can only point to runs with `success` status
+
+**Features:**
+- JSONB storage for model_config, metrics, runtime_info
+- SHA-256 artifact integrity verification
+- Duplicate detection (configurable: allow/deny/detect)
+- Runtime environment capture (Python, numpy, pandas versions)
+- Agent context tracking for autonomous workflows
+
+See [examples/registry_demo.py](examples/registry_demo.py) for a complete workflow demo.
+
 ## API Documentation
 
 Once the server is running:
 
@@ -13,6 +13,7 @@
 
 # Import all models for Alembic autogenerate detection
 from app.features.data_platform import models as data_platform_models  # noqa: F401
+from app.features.registry import models as registry_models  # noqa: F401
 
 # Alembic Config object
 config = context.config
 
@@ -0,0 +1,173 @@
+"""create_model_registry_tables
+
+Revision ID: a2f7b3c8d901
+Revises: e1165ebcef61
+Create Date: 2026-02-01 10:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = "a2f7b3c8d901"
+down_revision: Union[str, None] = "e1165ebcef61"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Apply migration - create model_run and deployment_alias tables."""
+    # Create model_run table
+    op.create_table(
+        "model_run",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("run_id", sa.String(length=32), nullable=False),
+        sa.Column("status", sa.String(length=20), nullable=False, server_default="pending"),
+        # Model configuration
+        sa.Column("model_type", sa.String(length=50), nullable=False),
+        sa.Column("model_config", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+        sa.Column("feature_config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("config_hash", sa.String(length=16), nullable=False),
+        # Data window
+        sa.Column("data_window_start", sa.Date(), nullable=False),
+        sa.Column("data_window_end", sa.Date(), nullable=False),
+        sa.Column("store_id", sa.Integer(), nullable=False),
+        sa.Column("product_id", sa.Integer(), nullable=False),
+        # Metrics
+        sa.Column("metrics", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        # Artifact info
+        sa.Column("artifact_uri", sa.String(length=500), nullable=True),
+        sa.Column("artifact_hash", sa.String(length=64), nullable=True),
+        sa.Column("artifact_size_bytes", sa.Integer(), nullable=True),
+        # Environment & lineage
+        sa.Column("runtime_info", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("agent_context", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("git_sha", sa.String(length=40), nullable=True),
+        # Error tracking
+        sa.Column("error_message", sa.String(length=2000), nullable=True),
+        # Timing
+        sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
+        # Timestamps (from TimestampMixin)
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        # Constraints
+        sa.PrimaryKeyConstraint("id"),
+        sa.CheckConstraint(
+            "status IN ('pending', 'running', 'success', 'failed', 'archived')",
+            name="ck_model_run_valid_status",
+        ),
+        sa.CheckConstraint(
+            "data_window_end >= data_window_start",
+            name="ck_model_run_valid_data_window",
+        ),
+    )
+
+    # Create indexes for model_run
+    op.create_index(op.f("ix_model_run_run_id"), "model_run", ["run_id"], unique=True)
+    op.create_index(op.f("ix_model_run_status"), "model_run", ["status"], unique=False)
+    op.create_index(op.f("ix_model_run_model_type"), "model_run", ["model_type"], unique=False)
+    op.create_index(op.f("ix_model_run_config_hash"), "model_run", ["config_hash"], unique=False)
+    op.create_index(op.f("ix_model_run_store_id"), "model_run", ["store_id"], unique=False)
+    op.create_index(op.f("ix_model_run_product_id"), "model_run", ["product_id"], unique=False)
+
+    # Composite indexes
+    op.create_index(
+        "ix_model_run_store_product", "model_run", ["store_id", "product_id"], unique=False
+    )
+    op.create_index(
+        "ix_model_run_data_window",
+        "model_run",
+        ["data_window_start", "data_window_end"],
+        unique=False,
+    )
+
+    # GIN indexes for JSONB containment queries
+    op.create_index(
+        "ix_model_run_model_config_gin",
+        "model_run",
+        ["model_config"],
+        unique=False,
+        postgresql_using="gin",
+    )
+    op.create_index(
+        "ix_model_run_metrics_gin",
+        "model_run",
+        ["metrics"],
+        unique=False,
+        postgresql_using="gin",
+    )
+
+    # Create deployment_alias table
+    op.create_table(
+        "deployment_alias",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("alias_name", sa.String(length=100), nullable=False),
+        sa.Column("run_id", sa.Integer(), nullable=False),
+        sa.Column("description", sa.String(length=500), nullable=True),
+        # Timestamps (from TimestampMixin)
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        # Constraints
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(["run_id"], ["model_run.id"]),
+        sa.UniqueConstraint("alias_name", name="uq_deployment_alias_name"),
+    )
+
+    # Create indexes for deployment_alias
+    op.create_index(
+        op.f("ix_deployment_alias_alias_name"),
+        "deployment_alias",
+        ["alias_name"],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_deployment_alias_run_id"), "deployment_alias", ["run_id"], unique=False
+    )
+
+
+def downgrade() -> None:
+    """Revert migration - drop model_run and deployment_alias tables."""
+    # Drop deployment_alias table and indexes
+    op.drop_index(op.f("ix_deployment_alias_run_id"), table_name="deployment_alias")
+    op.drop_index(op.f("ix_deployment_alias_alias_name"), table_name="deployment_alias")
+    op.drop_table("deployment_alias")
+
+    # Drop model_run indexes
+    op.drop_index("ix_model_run_metrics_gin", table_name="model_run")
+    op.drop_index("ix_model_run_model_config_gin", table_name="model_run")
+    op.drop_index("ix_model_run_data_window", table_name="model_run")
+    op.drop_index("ix_model_run_store_product", table_name="model_run")
+    op.drop_index(op.f("ix_model_run_product_id"), table_name="model_run")
+    op.drop_index(op.f("ix_model_run_store_id"), table_name="model_run")
+    op.drop_index(op.f("ix_model_run_config_hash"), table_name="model_run")
+    op.drop_index(op.f("ix_model_run_model_type"), table_name="model_run")
+    op.drop_index(op.f("ix_model_run_status"), table_name="model_run")
+    op.drop_index(op.f("ix_model_run_run_id"), table_name="model_run")
+
+    # Drop model_run table
+    op.drop_table("model_run")
@@ -21,7 +21,7 @@ class Settings(BaseSettings):
     debug: bool = False
 
     # Database
-    database_url: str = "postgresql+asyncpg://forecastlab:forecastlab@localhost:5432/forecastlab"
+    database_url: str = "postgresql+asyncpg://forecastlab:forecastlab@localhost:5433/forecastlab"
 
     # Logging
     log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
@@ -53,6 +53,10 @@ class Settings(BaseSettings):
     backtest_max_gap: int = 30
     backtest_results_dir: str = "./artifacts/backtests"
 
+    # Registry
+    registry_artifact_root: str = "./artifacts/registry"
+    registry_duplicate_policy: Literal["allow", "deny", "detect"] = "detect"
+
     @property
     def is_development(self) -> bool:
         """Check if running in development mode."""
 
@@ -93,7 +93,7 @@ def test_frozen_config(self):
         """Test SplitConfig is immutable."""
         config = SplitConfig()
         with pytest.raises(ValidationError):
-            config.n_splits = 10
+            config.n_splits = 10  # type: ignore[misc]
 
 
 class TestBacktestConfig:
@@ -136,7 +136,7 @@ def test_frozen_config(self):
         """Test BacktestConfig is immutable."""
         config = BacktestConfig(model_config_main=NaiveModelConfig())
         with pytest.raises(ValidationError):
-            config.include_baselines = False
+            config.include_baselines = False  # type: ignore[misc]
 
     def test_invalid_schema_version(self):
         """Test invalid schema_version raises error."""