From 4b377797b86b8d63ac7c98bdfb6787a9019dc0f5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:04:41 +0000 Subject: [PATCH 01/53] Initial plan From 64f791a17c332ddec0b5a993a170506aedf8616b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:10:38 +0000 Subject: [PATCH 02/53] data_sources: add account_id field to DataSource model - Add account_id column (FK to account.id, nullable) to DataSource - Add account relationship (backref 'data_sources' on Account) - Update __table_args__ UniqueConstraint to include account_id - Set self.account from user.account in __init__ when user is provided --- flexmeasures/data/models/data_sources.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index e0488023f5..8b31cbd57a 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -271,7 +271,9 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): __tablename__ = "data_source" __table_args__ = ( - db.UniqueConstraint("name", "user_id", "model", "version", "attributes_hash"), + db.UniqueConstraint( + "name", "user_id", "account_id", "model", "version", "attributes_hash" + ), ) # The type of data source (e.g. user, forecaster or scheduler) @@ -284,6 +286,10 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): ) user = db.relationship("User", backref=db.backref("data_source", lazy=True)) + # The account this data source belongs to (populated from user.account for user-type sources) + account_id = db.Column(db.Integer, db.ForeignKey("account.id"), nullable=True) + account = db.relationship("Account", backref=db.backref("data_sources", lazy=True)) + attributes = db.Column(MutableDict.as_mutable(JSONB), nullable=False, default={}) attributes_hash = db.Column(db.LargeBinary(length=256)) @@ -316,6 +322,7 @@ def __init__( name = user.username type = "user" self.user = user + self.account = user.account elif user is None and type == "user": raise TypeError("A data source cannot have type 'user' but no user set.") self.type = type From 67459217d7d06f605ce63a4ccc5b3739f83be6a1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:10:44 +0000 Subject: [PATCH 03/53] migration: add account_id to data_source table (9877450113f6) - Add account_id column (nullable, FK to account.id) - Data migration: populate account_id from user's account where user_id IS NOT NULL - Update UniqueConstraint to include account_id column - down_revision: 8b62f8129f34 --- ...7450113f6_add_account_id_to_data_source.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py diff --git a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py new file mode 100644 index 0000000000..719249d210 --- /dev/null +++ b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py @@ -0,0 +1,89 @@ +"""Add account_id to data_source table + +Revision ID: 9877450113f6 +Revises: 8b62f8129f34 +Create Date: 2025-01-15 00:00:00.000000 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "9877450113f6" +down_revision = "8b62f8129f34" +branch_labels = None +depends_on = None + +# Minimal table definitions for the data migration (SQLAlchemy Core only, no ORM) +t_data_source = sa.Table( + "data_source", + sa.MetaData(), + sa.Column("id", sa.Integer), + sa.Column("user_id", sa.Integer), + sa.Column("account_id", sa.Integer), +) + +t_fm_user = sa.Table( + "fm_user", + sa.MetaData(), + sa.Column("id", sa.Integer), + sa.Column("account_id", sa.Integer), +) + + +def upgrade(): + # 1. Add the account_id column (nullable) + with op.batch_alter_table("data_source", schema=None) as batch_op: + batch_op.add_column(sa.Column("account_id", sa.Integer(), nullable=True)) + batch_op.create_foreign_key( + op.f("data_source_account_id_account_fkey"), + "account", + ["account_id"], + ["id"], + ) + + # 2. Data migration: populate account_id from the related user's account + connection = op.get_bind() + rows = connection.execute( + sa.select(t_data_source.c.id, t_data_source.c.user_id).where( + t_data_source.c.user_id.isnot(None) + ) + ).fetchall() + + for ds_id, user_id in rows: + user_row = connection.execute( + sa.select(t_fm_user.c.account_id).where(t_fm_user.c.id == user_id) + ).fetchone() + if user_row is not None and user_row[0] is not None: + connection.execute( + sa.update(t_data_source) + .where(t_data_source.c.id == ds_id) + .values(account_id=user_row[0]) + ) + + # 3. Drop old UniqueConstraint and recreate it with account_id included + with op.batch_alter_table("data_source", schema=None) as batch_op: + batch_op.drop_constraint("data_source_name_key", type_="unique") + batch_op.create_unique_constraint( + "data_source_name_key", + ["name", "user_id", "account_id", "model", "version", "attributes_hash"], + ) + + +def downgrade(): + # 1. Restore the original UniqueConstraint without account_id + with op.batch_alter_table("data_source", schema=None) as batch_op: + batch_op.drop_constraint("data_source_name_key", type_="unique") + batch_op.create_unique_constraint( + "data_source_name_key", + ["name", "user_id", "model", "version", "attributes_hash"], + ) + + # 2. Drop the account_id column and its FK + with op.batch_alter_table("data_source", schema=None) as batch_op: + batch_op.drop_constraint( + op.f("data_source_account_id_account_fkey"), type_="foreignkey" + ) + batch_op.drop_column("account_id") From 78fb04f73584034d57450619ed35bd8a4120d15b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:11:38 +0000 Subject: [PATCH 04/53] migration: replace N+1 loop with single UPDATE...FROM for account_id backfill --- ...7450113f6_add_account_id_to_data_source.py | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py index 719249d210..ab156e4605 100644 --- a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py +++ b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py @@ -44,24 +44,14 @@ def upgrade(): ["id"], ) - # 2. Data migration: populate account_id from the related user's account + # 2. Data migration: populate account_id from the related user's account. + # Use a single UPDATE ... FROM to avoid N+1 queries. connection = op.get_bind() - rows = connection.execute( - sa.select(t_data_source.c.id, t_data_source.c.user_id).where( - t_data_source.c.user_id.isnot(None) - ) - ).fetchall() - - for ds_id, user_id in rows: - user_row = connection.execute( - sa.select(t_fm_user.c.account_id).where(t_fm_user.c.id == user_id) - ).fetchone() - if user_row is not None and user_row[0] is not None: - connection.execute( - sa.update(t_data_source) - .where(t_data_source.c.id == ds_id) - .values(account_id=user_row[0]) - ) + connection.execute( + sa.update(t_data_source) + .where(t_data_source.c.user_id == t_fm_user.c.id) + .values(account_id=t_fm_user.c.account_id) + ) # 3. Drop old UniqueConstraint and recreate it with account_id included with op.batch_alter_table("data_source", schema=None) as batch_op: From 65efbdd01b242ac41ff412d6c92f8dd6ece3fc80 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:14:16 +0000 Subject: [PATCH 05/53] tests/sensor_data: verify account_id is set on data source when posting sensor data Context: - DataSource.__init__ now sets self.account = user.account when a user is provided - The POST sensor data endpoint calls get_or_create_source(current_user) - Need to verify account_id is populated correctly on the resulting data source Change: - Added test_post_sensor_data_sets_account_id_on_data_source - Posts sensor data as test_supplier_user_4@seita.nl - Asserts data_source.account_id == user.account_id --- .../v3_0/tests/test_sensor_data_fresh_db.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py b/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py index 3912bc0cca..e4b68ef05c 100644 --- a/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py +++ b/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py @@ -9,6 +9,7 @@ from flexmeasures import Source from flexmeasures.api.v3_0.tests.utils import make_sensor_data_request_for_gas_sensor from flexmeasures.data.models.time_series import TimedBelief +from flexmeasures.data.models.user import User @pytest.mark.parametrize( @@ -162,3 +163,35 @@ def test_post_sensor_instantaneous_data_round( assert data.reset_index().event_start[0] == pd.Timestamp( "2021-06-06 22:00:00+0000", tz="UTC" ) + + +@pytest.mark.parametrize( + "requesting_user", ["test_supplier_user_4@seita.nl"], indirect=True +) +def test_post_sensor_data_sets_account_id_on_data_source( + client, + setup_api_fresh_test_data, + requesting_user, + db, +): + """When sensor data is posted, the resulting data source should have account_id + set to the posting user's account_id. + """ + sensor = setup_api_fresh_test_data["some gas sensor"] + post_data = make_sensor_data_request_for_gas_sensor( + num_values=6, unit="m³/h", include_a_null=False + ) + response = client.post( + url_for("SensorAPI:post_data", id=sensor.id), + json=post_data, + ) + assert response.status_code == 200 + + user = db.session.execute( + select(User).filter_by(email="test_supplier_user_4@seita.nl") + ).scalar_one() + data_source = db.session.execute( + select(Source).filter_by(user=user) + ).scalar_one_or_none() + assert data_source is not None + assert data_source.account_id == user.account_id From 85778e47cfba807bf27605b38e1cddb56c9bc77f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:17:40 +0000 Subject: [PATCH 06/53] migration: use correlated subquery for account_id data migration; add changelog entry Co-authored-by: Flix6x <30658763+Flix6x@users.noreply.github.com> Agent-Logs-Url: https://github.com/FlexMeasures/flexmeasures/sessions/7969297e-88c2-4ace-99c9-ec74109f03c8 --- documentation/changelog.rst | 1 + .../9877450113f6_add_account_id_to_data_source.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/documentation/changelog.rst b/documentation/changelog.rst index ccd08e4861..5e1be218ea 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -17,6 +17,7 @@ New features Infrastructure / Support ---------------------- +* Add ``account_id`` field to the ``data_source`` table, populated from the related user's account (including data migration and automatic assignment when posting sensor data) [see `PR #2058 `_] * Stop creating new toy assets when restarting the docker-compose stack [see `PR #2018 `_] * Migrate from ``pip`` to ``uv`` for dependency management [see `PR #1973 `_] * Migrate from ``make`` to ``poe`` for running tasks [see `PR #1973 `_] diff --git a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py index ab156e4605..aa460b2ff8 100644 --- a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py +++ b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py @@ -45,12 +45,16 @@ def upgrade(): ) # 2. Data migration: populate account_id from the related user's account. - # Use a single UPDATE ... FROM to avoid N+1 queries. + # Use a correlated subquery to avoid N+1 queries and ensure portability. connection = op.get_bind() connection.execute( sa.update(t_data_source) - .where(t_data_source.c.user_id == t_fm_user.c.id) - .values(account_id=t_fm_user.c.account_id) + .values( + account_id=sa.select(t_fm_user.c.account_id) + .where(t_fm_user.c.id == t_data_source.c.user_id) + .scalar_subquery() + ) + .where(t_data_source.c.user_id.isnot(None)) ) # 3. Drop old UniqueConstraint and recreate it with account_id included From 06bbec9f480870ea6f690c7afbc2afb975e13500 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:25:04 +0000 Subject: [PATCH 07/53] data_sources: fix account_id assignment to avoid SAWarning and handle unflushed users Co-authored-by: Flix6x <30658763+Flix6x@users.noreply.github.com> Agent-Logs-Url: https://github.com/FlexMeasures/flexmeasures/sessions/7969297e-88c2-4ace-99c9-ec74109f03c8 --- flexmeasures/data/models/data_sources.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 8b31cbd57a..6a775332b2 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -322,7 +322,12 @@ def __init__( name = user.username type = "user" self.user = user - self.account = user.account + # Prefer the FK column directly (avoids triggering a lazy load/autoflush). + # Fall back to the account relationship for users not yet flushed to the DB + # (where account_id may not be set on the column yet). + self.account_id = user.account_id or ( + user.account.id if user.account else None + ) elif user is None and type == "user": raise TypeError("A data source cannot have type 'user' but no user set.") self.type = type From 8b020340dae9b3dd47421620255d190951f3be88 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:29:01 +0000 Subject: [PATCH 08/53] agents/architecture: add migration checklist and DataSource account_id invariants Context: - PR #2058 added account_id FK to DataSource table - No migration checklist existed for reviewing/writing Alembic migrations - New domain invariant (user-type DataSources now have account_id) was undocumented - Agent failed to self-update during the PR session Change: - Added Alembic migration checklist with safe backfill pattern - Added DataSource entity to Domain Knowledge with account_id invariant - Documented invariants #5 and #6 for DataSource UniqueConstraint and account_id - Added Lessons Learned section with PR #2058 case study --- .../agents/architecture-domain-specialist.md | 66 ++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/.github/agents/architecture-domain-specialist.md b/.github/agents/architecture-domain-specialist.md index d295024663..7454a15de0 100644 --- a/.github/agents/architecture-domain-specialist.md +++ b/.github/agents/architecture-domain-specialist.md @@ -38,9 +38,43 @@ This agent owns the integrity of models (e.g. assets, sensors, data sources, sch - [ ] **Acyclic asset trees**: Verify no changes break the `parent_asset_id != id` constraint - [ ] **Asset hierarchy**: Ensure parent-child relationships maintain referential integrity - [ ] **Account ownership**: Check that all assets have `account_id` set correctly +- [ ] **DataSource account_id**: Verify user-type DataSources have `account_id` populated from `user.account_id` (see invariant below) - [ ] **Sensor-Asset binding**: Validate sensors are properly linked to assets - [ ] **TimedBelief structure**: Ensure (event_start, belief_time, source, cumulative_probability) integrity +### Alembic Migration Changes + +When reviewing or writing Alembic migrations, check: + +- [ ] **down_revision correct**: Verify `down_revision` matches the actual current head (run `flask db heads` to check) +- [ ] **FK naming convention**: FK constraints follow the pattern `___fkey` +- [ ] **Data backfill**: Bulk UPDATE used (correlated subquery or UPDATE...FROM), not N+1 Python loop +- [ ] **Downgrade path**: `downgrade()` fully reverses `upgrade()` including constraint drops/recreates +- [ ] **No ORM in migrations**: Use `sa.Table` + `sa.MetaData()` with SQLAlchemy Core only; never import ORM models +- [ ] **Nullable new columns**: New FK columns should be `nullable=True` to avoid breaking existing rows +- [ ] **batch_alter_table**: Use `op.batch_alter_table()` for all ALTER TABLE operations (required for SQLite compat in test environments) +- [ ] **UniqueConstraint names**: Verify constraint name matches the existing DB constraint name exactly (check with `\d
` in psql) + +**Pattern: Safe data migration in Alembic** + +```python +# ✅ Correct: SQLAlchemy Core table stubs, bulk correlated subquery update +t_data_source = sa.Table("data_source", sa.MetaData(), sa.Column("id", sa.Integer), ...) +t_fm_user = sa.Table("fm_user", sa.MetaData(), sa.Column("id", sa.Integer), ...) + +connection.execute( + sa.update(t_data_source) + .values(account_id=sa.select(t_fm_user.c.account_id) + .where(t_fm_user.c.id == t_data_source.c.user_id) + .scalar_subquery()) + .where(t_data_source.c.user_id.isnot(None)) +) + +# ❌ Wrong: N+1 Python loop over ORM objects +for source in DataSource.query.filter_by(...): # Don't import ORM! + source.account_id = source.user.account_id # Triggers N queries +``` + ### Flex-context & flex-model - [ ] **Flex-context inheritance**: Verify `get_flex_context()` parent walk logic is preserved @@ -285,6 +319,16 @@ fields_to_remove = ["as_job"] # ❌ Wrong format - **Location**: `/flexmeasures/data/models/data_sources.py` - **Purpose**: Forecasters and reporters subclass `DataGenerator` to couple configured instances to unique data sources (schedulers are not yet subclassing `DataGenerator`) +#### DataSource +- **Location**: `flexmeasures/data/models/data_sources.py` +- **Key fields**: `id`, `name`, `type`, `user_id`, `account_id`, `model`, `version`, `attributes`, `attributes_hash` +- **Relationships**: + - `user` → User (via `user_id`, nullable — only for `type="user"` sources) + - `account` → Account (via `account_id`, nullable — populated from user's account for user-type sources) + - `sensors` ↔ Sensor (via `timed_belief` join table, viewonly) +- **`account_id` invariant**: For user-type DataSources, `account_id` is always set from the creating user's account (see invariant #6 below). For non-user sources (forecasters, schedulers, reporters), `account_id` remains `None`. +- **UniqueConstraint**: `(name, user_id, account_id, model, version, attributes_hash)` — added `account_id` in migration `9877450113f6` + ### Critical Invariants 1. **Acyclic Asset Trees** @@ -308,7 +352,18 @@ fields_to_remove = ["as_job"] # ❌ Wrong format - Role-based permissions (ACCOUNT_ADMIN, CONSULTANT) - Audit logging for all mutations -5. **Timezone Awareness** +5. **DataSource UniqueConstraint includes `account_id`** + - Since migration `9877450113f6` (PR #2058), `account_id` is part of the unique constraint + - Code that creates or deduplicates DataSources must include `account_id` in lookup logic + - See `get_or_create_source()` in `flexmeasures/data/services/data_sources.py` + +6. **User-type DataSources always have `account_id` populated** + - Invariant added in PR #2058: when a `DataSource` is created with `user=`, `account_id` is set from `user.account_id` + - Fallback for unflushed users: `user.account.id` is used if `user.account_id` is not yet set + - Non-user DataSources (forecasters, schedulers, reporters created without a user) have `account_id=None` + - Implication: filtering DataSources by `account_id` only works for user-type sources + +7. **Timezone Awareness** - All datetime objects MUST be timezone-aware - Sensors have explicit `timezone` field @@ -487,3 +542,12 @@ After each assignment: Change: - Added guidance on ``` + +### Lessons Learned + +**Session 2026-03-24 (PR #2058 — add account_id to DataSource)**: + +- **New domain invariant**: User-type DataSources now have `account_id` populated. Document new FK relationships and invariants in the Domain Knowledge section immediately. +- **Migration checklist**: Added an explicit Alembic migration checklist after reviewing the migration for this PR. Key patterns: correlated subquery for bulk backfill, SQLAlchemy Core stubs (no ORM imports), `batch_alter_table` for all ALTER operations, exact constraint name matching. +- **Missed API Specialist coordination**: The PR changed endpoint behavior (POST sensor data sets account_id on the created data source). The API Specialist should have been engaged to verify backward compatibility. When domain model changes affect how endpoints behave or what they return, flag for API Specialist review. +- **Self-improvement failure**: Despite having explicit self-improvement requirements, no agent updated its instructions during this PR session. This was caught by the Coordinator post-hoc. The agent must update its own instructions as the LAST step of every assignment, not skip it. From d7f7120e2969c2be994b1d67d81f29073fe72729 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:29:47 +0000 Subject: [PATCH 09/53] agents/test-specialist: add DataSource property testing pattern and lessons Context: - PR #2058 added a test for account_id on DataSource after API POST - No guidance existed for testing data source properties after API calls - Agent failed to self-update during the PR session Change: - Added 'Testing DataSource Properties After API Calls' section with pattern - Added Lessons Learned section documenting PR #2058 self-improvement failure --- .github/agents/test-specialist.md | 39 +++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/.github/agents/test-specialist.md b/.github/agents/test-specialist.md index 390778918f..3c1cb28e41 100644 --- a/.github/agents/test-specialist.md +++ b/.github/agents/test-specialist.md @@ -302,6 +302,30 @@ flask db current - **Assertions**: Use descriptive assertion messages for failures - **Mocking**: Use pytest fixtures and mocking when testing external dependencies +### Testing DataSource Properties After API Calls + +When writing tests that verify data source properties (e.g. `account_id`, `user`, `type`) after an API call: + +1. **Use `fresh_db` fixture** — tests that POST data and then query the resulting data source are modifying the DB and must use the function-scoped `fresh_db` fixture. Place these tests in a `_fresh_db` module. + +2. **Query by user, not just name** — data sources created by the same user across test runs may collide; use `filter_by(user=user)` or `filter_by(user_id=user.id)` for precision. + +3. **Pattern** (from `test_post_sensor_data_sets_account_id_on_data_source`): + ```python + # Fetch the user that made the request + user = db.session.execute( + select(User).filter_by(email="test_supplier_user_4@seita.nl") + ).scalar_one() + # Fetch the data source created for that user + data_source = db.session.execute( + select(Source).filter_by(user=user) + ).scalar_one_or_none() + assert data_source is not None + assert data_source.account_id == user.account_id + ``` + +4. **Check both existence and value** — don't just assert `data_source is not None`; also assert the specific field value you're testing. + ## Understanding Test Design Intent (CRITICAL) **Before changing a test, understand WHY it's designed that way.** @@ -482,14 +506,9 @@ After each assignment: - Added guidance on ``` -Example: +### Lessons Learned -``` -agents/test-specialist: learned to verify claims with actual test runs -Context: -- Session #456 claimed tests passed but they were never actually run -- Led to bug slipping through to production -Change: -- Added "Actually Run Tests" section with verification steps -- Emphasized checking test output and coverage -``` \ No newline at end of file +**Session 2026-03-24 (PR #2058 — add account_id to DataSource)**: + +- **Self-improvement failure**: Despite having explicit instructions to update this agent file after each assignment, no update was made during this PR session. This was caught by the Coordinator post-hoc. The agent must treat instruction updates as the LAST mandatory step of any assignment. +- **DataSource property testing**: Added guidance in "Testing DataSource Properties After API Calls" above. When testing properties set by the API on a data source (like `account_id`), use `fresh_db`, query by user to avoid ambiguity, and assert both existence and the specific field value. \ No newline at end of file From 20a4e156e1ef28e35d78b69d5420de4cbcead113 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:30:24 +0000 Subject: [PATCH 10/53] agents/review-lead: add agent selection checklist; document repeated Coordinator failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Context: - PR #2058 session: Coordinator was not invoked (3rd session in a row) - API Specialist was not engaged despite endpoint behavior change - No agents updated their instructions (same failure as 2026-02-06) Change: - Added 'Agent Selection Checklist' mapping code change types to required agents - Documented PR #2058 as lessons learned (3 distinct failures) - Reinforced that Coordinator must ALWAYS be last agent in every session - Clarified that endpoint behavior changes → API Specialist must be engaged --- .github/agents/review-lead.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.github/agents/review-lead.md b/.github/agents/review-lead.md index 24f4e455b2..96f2b5d413 100644 --- a/.github/agents/review-lead.md +++ b/.github/agents/review-lead.md @@ -631,8 +631,40 @@ Track and document when the Review Lead: - **Prevention**: Investigate production code first; understand test design intent; look for schema migrations - **Key insight**: "Failing tests often reveal production bugs, not test bugs" +**Specific lesson learned (2026-03-24 — PR #2058, add account_id to DataSource)**: + +- **Failure 1**: Coordinator was not invoked despite (a) explicit user prompt instruction and (b) the "MUST always run the Coordinator" requirement in these instructions. This is now the **third session in a row** where Coordinator invocation was skipped. The requirement is clearly not being followed. + - **Root cause hypothesis**: The Coordinator step is listed last, but sessions end before it's reached. It needs to be treated as a non-negotiable gate, not an optional final step. + - **Reinforcement**: Added "Agent Selection Checklist" below to force explicit reasoning about which agents to run. + +- **Failure 2**: API Specialist was not engaged for a change that modified endpoint behavior. The POST sensor data endpoint now sets `account_id` on the resulting data source. This is a side-effect change that the API Specialist should have reviewed. + - **Rule added**: Any PR that changes what data is stored or returned by an endpoint → engage API Specialist. + +- **Failure 3**: No agent (including Review Lead) updated their own instructions after the session. This is the same failure pattern from 2026-02-06. Three sessions now. This is a systemic problem. + - **Root cause**: Self-improvement is treated as optional/forgettable. It must be the **last commit** of every session, non-negotiable. + Update this file to prevent repeating the same mistakes. +### Agent Selection Checklist + +Use this checklist when selecting agents for a PR. Check each category: + +| Code Change Type | Agent to Engage | +|------------------|----------------| +| Domain model changes (`models/`) | Architecture Specialist | +| Alembic migrations | Architecture Specialist | +| Test additions/changes | Test Specialist | +| Endpoint behavior changes (stored/returned data) | **API Specialist** | +| API schema changes | API Specialist | +| CLI command changes | API Specialist | +| Performance-critical paths | Performance Specialist | +| Time/datetime/timezone code | Data & Time Specialist | +| Documentation changes | Documentation Specialist | +| CI/CD, pre-commit, GitHub Actions | Tooling & CI Specialist | +| Agent instruction changes | **Coordinator** (always at end) | + +**The Coordinator must ALWAYS be the last agent run, in every session, no exceptions.** + ### Continuous Improvement The Review Lead should: From fff71b20e7c1af609d026825c1bde88a2622b531 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:31:01 +0000 Subject: [PATCH 11/53] agents/coordinator: document PR #2058 persistent self-improvement failure pattern Context: - PR #2058 repeated the same 3 failures from sessions 2026-02-06 and 2026-02-08 - Coordinator not invoked, no agent self-updates, API Specialist not engaged - Same failures now documented 3 times in Review Lead instructions with no change in behavior Change: - Added 'Additional Pattern Discovered (2026-03-24)' with root cause analysis - Documented missed API Specialist engagement for endpoint behavior change - Flagged governance escalation if Coordinator invocation fails a 4th time - Noted code observation: or-pattern for account_id and empty Initial plan commit --- .github/agents/coordinator.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.github/agents/coordinator.md b/.github/agents/coordinator.md index 334aee19eb..bfb178d9dc 100644 --- a/.github/agents/coordinator.md +++ b/.github/agents/coordinator.md @@ -448,3 +448,35 @@ The Coordinator has identified these recurring issues: Review Lead should now invoke Coordinator as subagent. These patterns must not repeat. Agent instructions have been updated to prevent recurrence. + +### Additional Pattern Discovered (2026-03-24) + +**Pattern**: Persistent self-improvement failure and missing API Specialist agent selection + +**Session**: PR #2058 — Add `account_id` to DataSource table + +**Observation**: After three sessions now, the same two failures recur: +1. Coordinator is not invoked at end of session (despite MUST requirement in Review Lead instructions) +2. No agent updates its own instructions (despite MUST requirement in all agents) + +**Root cause analysis**: +- "Coordinator invocation" and "self-improvement" are both documented as mandatory last steps +- But the session ends before they are reached — they are treated as optional epilogue, not gating requirements +- The Review Lead agent selection is ad-hoc, with no explicit checklist forcing API Specialist engagement when endpoints change + +**What was missed in PR #2058**: +- API Specialist not engaged: POST sensor data now sets `account_id` on the resulting data source — this is an endpoint behavior change that should be reviewed for backward compatibility +- Zero agent instruction updates across all three participating agents (Architecture Specialist, Test Specialist, Review Lead) +- No Coordinator invocation despite explicit user request in the original prompt + +**Solutions implemented**: +- Architecture Specialist: Added Alembic migration checklist + DataSource domain invariants +- Test Specialist: Added DataSource property testing pattern + lessons learned +- Review Lead: Added Agent Selection Checklist mapping code change types to required agents; documented 3rd recurrence of these failures +- Coordinator (this file): Documented case study + +**Governance escalation**: The Review Lead's "Must Always Run Coordinator" requirement has now been documented in three sessions without being followed. If it fails a fourth time, consider structural changes — e.g., making Coordinator invocation the FIRST step of a session rather than the last, so it sets context rather than being a forgotten epilogue. + +**Code observation from PR #2058 worth tracking**: +- `user.account_id or (user.account.id if user.account else None)` — the `or` pattern is fragile for `account_id=0` (unrealistic but worth noting). Prefer `user.account_id if user.account_id is not None else (user.account.id if user.account else None)` for strict correctness. +- Empty "Initial plan" commit adds git history noise. When orchestrating agents, the first commit should be functional code, not a planning marker. From 4191ae3582d0d6d12780c5051a95918853eaaddf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 24 Mar 2026 22:32:27 +0000 Subject: [PATCH 12/53] data_sources: use explicit is not None check for account_id assignment Co-authored-by: Flix6x <30658763+Flix6x@users.noreply.github.com> Agent-Logs-Url: https://github.com/FlexMeasures/flexmeasures/sessions/7969297e-88c2-4ace-99c9-ec74109f03c8 --- flexmeasures/data/models/data_sources.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index 6a775332b2..bee911a13b 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -325,9 +325,10 @@ def __init__( # Prefer the FK column directly (avoids triggering a lazy load/autoflush). # Fall back to the account relationship for users not yet flushed to the DB # (where account_id may not be set on the column yet). - self.account_id = user.account_id or ( - user.account.id if user.account else None - ) + if user.account_id is not None: + self.account_id = user.account_id + elif user.account is not None: + self.account_id = user.account.id elif user is None and type == "user": raise TypeError("A data source cannot have type 'user' but no user set.") self.type = type From 38db656d8737577392375d0fb5d0869b85743c71 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Wed, 25 Mar 2026 00:25:10 +0100 Subject: [PATCH 13/53] feat: improve test by using a user that is not already registered as a data source before POSTing sensor data Signed-off-by: F.N. Claessen --- .../v3_0/tests/test_sensor_data_fresh_db.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py b/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py index e4b68ef05c..16ab046d11 100644 --- a/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py +++ b/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py @@ -165,12 +165,11 @@ def test_post_sensor_instantaneous_data_round( ) -@pytest.mark.parametrize( - "requesting_user", ["test_supplier_user_4@seita.nl"], indirect=True -) +@pytest.mark.parametrize("requesting_user", ["improper_user@seita.nl"], indirect=True) def test_post_sensor_data_sets_account_id_on_data_source( client, setup_api_fresh_test_data, + setup_user_without_data_source, requesting_user, db, ): @@ -181,17 +180,22 @@ def test_post_sensor_data_sets_account_id_on_data_source( post_data = make_sensor_data_request_for_gas_sensor( num_values=6, unit="m³/h", include_a_null=False ) + + # Make sure the user is not yet registered as a data source + data_source = db.session.execute( + select(Source).filter_by(user=setup_user_without_data_source) + ).scalar_one_or_none() + assert data_source is None + response = client.post( url_for("SensorAPI:post_data", id=sensor.id), json=post_data, ) assert response.status_code == 200 - user = db.session.execute( - select(User).filter_by(email="test_supplier_user_4@seita.nl") - ).scalar_one() + # Make sure the user is now registered as a data source with account_id set data_source = db.session.execute( - select(Source).filter_by(user=user) + select(Source).filter_by(user=setup_user_without_data_source) ).scalar_one_or_none() assert data_source is not None - assert data_source.account_id == user.account_id + assert data_source.account_id == setup_user_without_data_source.account_id From 67eba6ee6b333bf74773e34a84fff09b331ff994 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Wed, 25 Mar 2026 00:30:39 +0100 Subject: [PATCH 14/53] fix: set approximate create datetime based on git info, assuming local time (UTC+1) Signed-off-by: F.N. Claessen --- .../versions/9877450113f6_add_account_id_to_data_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py index aa460b2ff8..dde9f1c0bf 100644 --- a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py +++ b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py @@ -2,7 +2,7 @@ Revision ID: 9877450113f6 Revises: 8b62f8129f34 -Create Date: 2025-01-15 00:00:00.000000 +Create Date: 2026-03-24 22:10:00.000000 """ From b7a4a5c13760e858c5e61f78308e4e4ffd070a05 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Wed, 25 Mar 2026 00:32:17 +0100 Subject: [PATCH 15/53] style: flake8 Signed-off-by: F.N. Claessen --- flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py b/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py index 16ab046d11..f297e97ef6 100644 --- a/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py +++ b/flexmeasures/api/v3_0/tests/test_sensor_data_fresh_db.py @@ -9,7 +9,6 @@ from flexmeasures import Source from flexmeasures.api.v3_0.tests.utils import make_sensor_data_request_for_gas_sensor from flexmeasures.data.models.time_series import TimedBelief -from flexmeasures.data.models.user import User @pytest.mark.parametrize( From 44cff24ab856563d05dd5dbd781c869b55aee871 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Wed, 25 Mar 2026 00:35:52 +0100 Subject: [PATCH 16/53] feat: add migration check to architecture agent instructions Signed-off-by: F.N. Claessen --- .github/agents/architecture-domain-specialist.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/agents/architecture-domain-specialist.md b/.github/agents/architecture-domain-specialist.md index 7454a15de0..19aedcc553 100644 --- a/.github/agents/architecture-domain-specialist.md +++ b/.github/agents/architecture-domain-specialist.md @@ -54,6 +54,7 @@ When reviewing or writing Alembic migrations, check: - [ ] **Nullable new columns**: New FK columns should be `nullable=True` to avoid breaking existing rows - [ ] **batch_alter_table**: Use `op.batch_alter_table()` for all ALTER TABLE operations (required for SQLite compat in test environments) - [ ] **UniqueConstraint names**: Verify constraint name matches the existing DB constraint name exactly (check with `\d
` in psql) +- [ ] **Create Date**: Verify the current datetime is correct (e.g. a `Create Date: 2025-01-15 00:00:00.000000` is highly unlikely to be true) **Pattern: Safe data migration in Alembic** From 95653f1ea68f5c43aa6c00d23b364e5d7ad6e6bc Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Wed, 25 Mar 2026 00:39:15 +0100 Subject: [PATCH 17/53] docs: add db upgrade warning and update agent instructions accordingly Signed-off-by: F.N. Claessen --- .github/agents/architecture-domain-specialist.md | 1 + documentation/changelog.rst | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.github/agents/architecture-domain-specialist.md b/.github/agents/architecture-domain-specialist.md index 19aedcc553..cf7042cf98 100644 --- a/.github/agents/architecture-domain-specialist.md +++ b/.github/agents/architecture-domain-specialist.md @@ -55,6 +55,7 @@ When reviewing or writing Alembic migrations, check: - [ ] **batch_alter_table**: Use `op.batch_alter_table()` for all ALTER TABLE operations (required for SQLite compat in test environments) - [ ] **UniqueConstraint names**: Verify constraint name matches the existing DB constraint name exactly (check with `\d
` in psql) - [ ] **Create Date**: Verify the current datetime is correct (e.g. a `Create Date: 2025-01-15 00:00:00.000000` is highly unlikely to be true) +- [ ] **Changelog**: Add a db upgrade warning in the changelog under the relevant release section (see previous sections for styling) **Pattern: Safe data migration in Alembic** diff --git a/documentation/changelog.rst b/documentation/changelog.rst index 5e1be218ea..b367ffc582 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -7,6 +7,8 @@ FlexMeasures Changelog v0.32.0 | April XX, 2026 ============================ +.. warning:: Upgrading to this version requires running ``flexmeasures db upgrade`` (you can create a backup first with ``flexmeasures db-ops dump``). + New features ------------- * Support fetching a schedule in a different unit still compatible to the sensor unit [see `PR #1993 `_] From 4e22e439c310af33637a0c6e201b9f69340ba748 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Mar 2026 03:09:34 +0000 Subject: [PATCH 18/53] data_sources: drop FK constraints on user_id and account_id for data lineage preservation Co-authored-by: Flix6x <30658763+Flix6x@users.noreply.github.com> Agent-Logs-Url: https://github.com/FlexMeasures/flexmeasures/sessions/004f0bb9-133d-45c1-9e06-2fedbf25b258 --- flexmeasures/cli/tests/test_data_delete.py | 27 +++++++++++- ...f6_drop_fk_constraints_from_data_source.py | 44 +++++++++++++++++++ flexmeasures/data/models/data_sources.py | 26 ++++++++--- flexmeasures/data/tests/test_user_services.py | 17 +++++++ 4 files changed, 106 insertions(+), 8 deletions(-) create mode 100644 flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py diff --git a/flexmeasures/cli/tests/test_data_delete.py b/flexmeasures/cli/tests/test_data_delete.py index 44ace78500..e73640100d 100644 --- a/flexmeasures/cli/tests/test_data_delete.py +++ b/flexmeasures/cli/tests/test_data_delete.py @@ -9,14 +9,28 @@ def test_delete_account( fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db, app ): - """Check account is deleted + old audit log entries get affected_account_id set to None.""" + """Check account is deleted + old audit log entries get affected_account_id set to None. + Also check that data source lineage is preserved: account_id is NOT nullified after account deletion. + """ from flexmeasures.cli.data_delete import delete_account + from flexmeasures.data.models.data_sources import DataSource prosumer: User = find_user_by_email("test_prosumer_user@seita.nl") prosumer_account_id = prosumer.account_id num_accounts = fresh_db.session.scalar(select(func.count()).select_from(Account)) + # Find data sources belonging to the account's users (for lineage check after deletion) + data_sources_before = fresh_db.session.scalars( + select(DataSource).filter_by(account_id=prosumer_account_id) + ).all() + assert ( + len(data_sources_before) > 0 + ), "Data sources linked to the account should exist before deletion." + data_source_ids_and_account_ids = [ + (ds.id, ds.account_id) for ds in data_sources_before + ] + # Add creation audit log record user_creation_audit_log = AuditLog( event="User Test Prosumer User created test", @@ -47,3 +61,14 @@ def test_delete_account( .one_or_none() ) assert user_creation_audit_log.affected_account_id is None + + # Check that data source lineage is preserved: account_id is NOT nullified after account deletion + for ds_id, original_account_id in data_source_ids_and_account_ids: + data_source = fresh_db.session.get(DataSource, ds_id) + assert ( + data_source is not None + ), f"Data source {ds_id} should still exist after account deletion." + assert data_source.account_id == original_account_id, ( + f"Data source {ds_id} account_id should be preserved (not nullified) " + "after account deletion for lineage purposes." + ) diff --git a/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py b/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py new file mode 100644 index 0000000000..35bbb45acc --- /dev/null +++ b/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py @@ -0,0 +1,44 @@ +"""Drop FK constraints from data_source for data lineage preservation + +When users or accounts are deleted, we want to preserve the historical +user_id and account_id values in data_source rows for lineage purposes, +rather than cascade-deleting or nullifying them. + +Revision ID: a1b2c3d4e5f6 +Revises: 9877450113f6 +Create Date: 2026-03-25 00:00:00.000000 + +""" + +from alembic import op + + +# revision identifiers, used by Alembic. +revision = "a1b2c3d4e5f6" +down_revision = "9877450113f6" +branch_labels = None +depends_on = None + + +def upgrade(): + with op.batch_alter_table("data_source", schema=None) as batch_op: + batch_op.drop_constraint( + op.f("data_source_account_id_account_fkey"), type_="foreignkey" + ) + batch_op.drop_constraint("data_sources_user_id_fkey", type_="foreignkey") + + +def downgrade(): + with op.batch_alter_table("data_source", schema=None) as batch_op: + batch_op.create_foreign_key( + "data_sources_user_id_fkey", + "fm_user", + ["user_id"], + ["id"], + ) + batch_op.create_foreign_key( + op.f("data_source_account_id_account_fkey"), + "account", + ["account_id"], + ["id"], + ) diff --git a/flexmeasures/data/models/data_sources.py b/flexmeasures/data/models/data_sources.py index bee911a13b..32fbccaa9e 100644 --- a/flexmeasures/data/models/data_sources.py +++ b/flexmeasures/data/models/data_sources.py @@ -280,15 +280,27 @@ class DataSource(db.Model, tb.BeliefSourceDBMixin): # just a string, but preferably one of DEFAULT_DATASOURCE_TYPES type = db.Column(db.String(80), default="") - # The id of the user source (can link e.g. to fm_user table) - user_id = db.Column( - db.Integer, db.ForeignKey("fm_user.id"), nullable=True, unique=True + # The id of the user source (can link e.g. to fm_user table). + # No DB-level FK so that deleting a user preserves the lineage reference in this column. + user_id = db.Column(db.Integer, nullable=True, unique=True) + user = db.relationship( + "User", + primaryjoin="DataSource.user_id == User.id", + foreign_keys="[DataSource.user_id]", + backref=db.backref("data_source", lazy=True, passive_deletes="all"), + passive_deletes="all", ) - user = db.relationship("User", backref=db.backref("data_source", lazy=True)) - # The account this data source belongs to (populated from user.account for user-type sources) - account_id = db.Column(db.Integer, db.ForeignKey("account.id"), nullable=True) - account = db.relationship("Account", backref=db.backref("data_sources", lazy=True)) + # The account this data source belongs to (populated from user.account for user-type sources). + # No DB-level FK so that deleting an account preserves the lineage reference in this column. + account_id = db.Column(db.Integer, nullable=True) + account = db.relationship( + "Account", + primaryjoin="DataSource.account_id == Account.id", + foreign_keys="[DataSource.account_id]", + backref=db.backref("data_sources", lazy=True, passive_deletes="all"), + passive_deletes="all", + ) attributes = db.Column(MutableDict.as_mutable(JSONB), nullable=False, default={}) diff --git a/flexmeasures/data/tests/test_user_services.py b/flexmeasures/data/tests/test_user_services.py index eb08734b5f..ef4f22f7b6 100644 --- a/flexmeasures/data/tests/test_user_services.py +++ b/flexmeasures/data/tests/test_user_services.py @@ -133,11 +133,21 @@ def test_create_invalid_user( def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db, app): """Check that deleting a user does not lead to deleting their organisation's (asset/sensor/beliefs) data. Also check that an audit log entry is created + old audit log entries get affected_user_id set to None. + Also check that the data source's user_id is preserved (not nullified) after user deletion for lineage. """ prosumer: User = find_user_by_email("test_prosumer_user@seita.nl") + prosumer_id = prosumer.id prosumer_account_id = prosumer.account_id num_users_before = fresh_db.session.scalar(select(func.count(User.id))) + # Find the data source linked to the prosumer user (for lineage check after deletion) + data_source_before = fresh_db.session.execute( + select(DataSource).filter_by(user_id=prosumer_id) + ).scalar_one_or_none() + assert ( + data_source_before is not None + ), "A data source linked to the prosumer user should exist before deletion." + # Find assets belonging to the user's organisation asset_query = select(GenericAsset).filter_by(account_id=prosumer_account_id) assets_before = fresh_db.session.scalars(asset_query).all() @@ -190,3 +200,10 @@ def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db fresh_db.session.refresh(user_creation_audit_log) assert user_creation_audit_log.affected_user_id is None + + # Check that data source lineage is preserved: user_id is NOT nullified after user deletion + fresh_db.session.expire(data_source_before) + fresh_db.session.refresh(data_source_before) + assert ( + data_source_before.user_id == prosumer_id + ), "Data source user_id should be preserved (not nullified) after user deletion for lineage purposes." From 5c29f56259c438558f3a9bfb5161691f017968c0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Mar 2026 03:20:22 +0000 Subject: [PATCH 19/53] tests/data-lineage: also assert account_id and user_id preservation in delete tests Context: - PR #2058 dropped FK constraints on DataSource.user_id and account_id to preserve lineage when users/accounts are deleted - The new tests only checked one of the two lineage fields per test Change: - test_delete_user: also assert data_source.account_id is preserved after user deletion (user deletion should not nullify account_id) - test_delete_account: also save and assert data_source.user_id per data source; account deletion internally calls delete_user so both user_id and account_id lineage must survive the full deletion chain --- flexmeasures/cli/tests/test_data_delete.py | 13 +++++++++---- flexmeasures/data/tests/test_user_services.py | 9 ++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/flexmeasures/cli/tests/test_data_delete.py b/flexmeasures/cli/tests/test_data_delete.py index e73640100d..daa3cc315b 100644 --- a/flexmeasures/cli/tests/test_data_delete.py +++ b/flexmeasures/cli/tests/test_data_delete.py @@ -27,8 +27,8 @@ def test_delete_account( assert ( len(data_sources_before) > 0 ), "Data sources linked to the account should exist before deletion." - data_source_ids_and_account_ids = [ - (ds.id, ds.account_id) for ds in data_sources_before + data_source_ids_and_lineage = [ + (ds.id, ds.user_id, ds.account_id) for ds in data_sources_before ] # Add creation audit log record @@ -62,8 +62,8 @@ def test_delete_account( ) assert user_creation_audit_log.affected_account_id is None - # Check that data source lineage is preserved: account_id is NOT nullified after account deletion - for ds_id, original_account_id in data_source_ids_and_account_ids: + # Check that data source lineage is preserved: account_id and user_id are NOT nullified after account deletion + for ds_id, original_user_id, original_account_id in data_source_ids_and_lineage: data_source = fresh_db.session.get(DataSource, ds_id) assert ( data_source is not None @@ -72,3 +72,8 @@ def test_delete_account( f"Data source {ds_id} account_id should be preserved (not nullified) " "after account deletion for lineage purposes." ) + if original_user_id is not None: + assert data_source.user_id == original_user_id, ( + f"Data source {ds_id} user_id should be preserved (not nullified) " + "after account deletion for lineage purposes." + ) diff --git a/flexmeasures/data/tests/test_user_services.py b/flexmeasures/data/tests/test_user_services.py index ef4f22f7b6..5e431681ed 100644 --- a/flexmeasures/data/tests/test_user_services.py +++ b/flexmeasures/data/tests/test_user_services.py @@ -147,6 +147,10 @@ def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db assert ( data_source_before is not None ), "A data source linked to the prosumer user should exist before deletion." + data_source_account_id_before = data_source_before.account_id + assert ( + data_source_account_id_before is not None + ), "The data source linked to the prosumer user should have an account_id before deletion." # Find assets belonging to the user's organisation asset_query = select(GenericAsset).filter_by(account_id=prosumer_account_id) @@ -201,9 +205,12 @@ def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db fresh_db.session.refresh(user_creation_audit_log) assert user_creation_audit_log.affected_user_id is None - # Check that data source lineage is preserved: user_id is NOT nullified after user deletion + # Check that data source lineage is preserved: user_id and account_id are NOT nullified after user deletion fresh_db.session.expire(data_source_before) fresh_db.session.refresh(data_source_before) assert ( data_source_before.user_id == prosumer_id ), "Data source user_id should be preserved (not nullified) after user deletion for lineage purposes." + assert ( + data_source_before.account_id == data_source_account_id_before + ), "Data source account_id should be preserved (not nullified) after user deletion for lineage purposes." From 97efde0846e38ee35fdcb1c6f924be4cdfb8d7a4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Mar 2026 03:29:51 +0000 Subject: [PATCH 20/53] docs: expand PR #2058 changelog entry to cover FK drop and lineage preservation Context: - The existing entry only mentioned adding account_id to data_source - PR #2058 also drops FK constraints on user_id and account_id to preserve historical references when users/accounts are deleted (data lineage) Change: - Changelog now states both the column addition AND the behavior change: 'historical user/account IDs are no longer nullified when users or accounts are deleted' --- documentation/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/changelog.rst b/documentation/changelog.rst index b367ffc582..56135a070e 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -19,7 +19,7 @@ New features Infrastructure / Support ---------------------- -* Add ``account_id`` field to the ``data_source`` table, populated from the related user's account (including data migration and automatic assignment when posting sensor data) [see `PR #2058 `_] +* Add ``account_id`` field to the ``data_source`` table, populated from the related user's account (including data migration and automatic assignment when posting sensor data); also drop FK constraints on ``data_source.user_id`` and ``data_source.account_id`` to preserve data lineage (historical user/account IDs are no longer nullified when users or accounts are deleted) [see `PR #2058 `_] * Stop creating new toy assets when restarting the docker-compose stack [see `PR #2018 `_] * Migrate from ``pip`` to ``uv`` for dependency management [see `PR #1973 `_] * Migrate from ``make`` to ``poe`` for running tasks [see `PR #1973 `_] From 6abc8d90ff0c2e2f3851d6853c21e098b9702325 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Mar 2026 03:29:58 +0000 Subject: [PATCH 21/53] coordinator: document no-FK lineage pattern and changelog completeness lesson Context: - PR #2058 dropped FK constraints on DataSource.user_id and account_id for data lineage preservation (a deliberate design decision) - The original changelog entry omitted the FK drop and behavior change - No existing agent pattern covered 'intentional FK removal for lineage' Change: - Added checklist for no-FK lineage pattern (passive_deletes, tests, changelog) - Added lesson: changelog must describe behavior changes, not just schema changes - Documented FK drop design decision for future reviewers --- .github/agents/coordinator.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/agents/coordinator.md b/.github/agents/coordinator.md index bfb178d9dc..8a8d892150 100644 --- a/.github/agents/coordinator.md +++ b/.github/agents/coordinator.md @@ -480,3 +480,23 @@ These patterns must not repeat. Agent instructions have been updated to prevent **Code observation from PR #2058 worth tracking**: - `user.account_id or (user.account.id if user.account else None)` — the `or` pattern is fragile for `account_id=0` (unrealistic but worth noting). Prefer `user.account_id if user.account_id is not None else (user.account.id if user.account else None)` for strict correctness. - Empty "Initial plan" commit adds git history noise. When orchestrating agents, the first commit should be functional code, not a planning marker. + +### Additional Pattern Discovered (2026-03-25) + +**Pattern**: No-FK columns for data lineage preservation + +**Session**: PR #2058 continued — Drop FK constraints on `data_source.user_id` and `data_source.account_id` + +**Design decision documented**: +FlexMeasures now intentionally drops DB-level FK constraints on `DataSource.user_id` and `DataSource.account_id` so that historical lineage references survive user/account deletion. The ORM uses `passive_deletes="all"` to prevent auto-nullification. + +**Checklist implication for future PRs**: +When reviewing schema changes that affect FK constraints: +- [ ] If a FK is dropped intentionally for lineage: verify `passive_deletes="all"` on the ORM relationship AND its backref +- [ ] Verify tests check that the orphaned column values are NOT nullified after parent deletion +- [ ] Verify changelog describes the *behavior change* (lineage preservation), not just the schema change (column added) + +**Changelog completeness check** — lessons from this session: +- The initial changelog entry for PR #2058 only described adding `account_id`; it omitted the FK drop and behavior change +- When a migration both adds a column AND changes deletion semantics (e.g., drops a FK), the changelog must cover BOTH aspects +- Coordinator caught this and updated the entry to read: "...also drop FK constraints on `data_source.user_id` and `data_source.account_id` to preserve data lineage (historical user/account IDs are no longer nullified when users or accounts are deleted)" From ebfba1d79e317f35bbd9cf05298bf8f5cedc84ba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Mar 2026 03:30:58 +0000 Subject: [PATCH 22/53] coordinator: clarify account_id fallback pattern references actual implementation The note previously described the 'or' anti-pattern as a concern, but the final code already uses 'if account_id is not None' correctly. Clarified to point to the implemented solution. --- .github/agents/coordinator.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/agents/coordinator.md b/.github/agents/coordinator.md index 8a8d892150..bd9317de9c 100644 --- a/.github/agents/coordinator.md +++ b/.github/agents/coordinator.md @@ -478,7 +478,7 @@ These patterns must not repeat. Agent instructions have been updated to prevent **Governance escalation**: The Review Lead's "Must Always Run Coordinator" requirement has now been documented in three sessions without being followed. If it fails a fourth time, consider structural changes — e.g., making Coordinator invocation the FIRST step of a session rather than the last, so it sets context rather than being a forgotten epilogue. **Code observation from PR #2058 worth tracking**: -- `user.account_id or (user.account.id if user.account else None)` — the `or` pattern is fragile for `account_id=0` (unrealistic but worth noting). Prefer `user.account_id if user.account_id is not None else (user.account.id if user.account else None)` for strict correctness. +- An early draft used `user.account_id or (user.account.id if user.account else None)` — the `or` pattern is fragile for `account_id=0` (unrealistic but worth noting). The final implementation correctly uses `if user.account_id is not None` (see `data_sources.py` lines 340-343) — this is the right pattern to follow. - Empty "Initial plan" commit adds git history noise. When orchestrating agents, the first commit should be functional code, not a planning marker. ### Additional Pattern Discovered (2026-03-25) From 8ad1866acc289c145cf3318c1189f175654abf6d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 14:54:02 +0000 Subject: [PATCH 23/53] Initial plan From 5cf63d2636d5ebf7aeb08d566ebd351ac0f52b2b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:01:39 +0000 Subject: [PATCH 24/53] Add account_id filter for source in search_beliefs Add an account_id parameter (int or list[int]) to: - get_source_criteria() / source_account_criterion() in queries/utils.py - Sensor.search_beliefs() and TimedBelief.search() in models/time_series.py - GenericAsset.search_beliefs() in models/generic_assets.py - BeliefsSearchConfigSchema in schemas/reporting/__init__.py Allows reporters/schedulers/forecasters to filter beliefs by the account_id of their data source, implementing issue #2046. --- flexmeasures/data/models/generic_assets.py | 3 +++ flexmeasures/data/models/time_series.py | 7 ++++++- flexmeasures/data/queries/utils.py | 10 ++++++++++ flexmeasures/data/schemas/reporting/__init__.py | 1 + 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/models/generic_assets.py b/flexmeasures/data/models/generic_assets.py index 4649cde934..b9a139a6fe 100644 --- a/flexmeasures/data/models/generic_assets.py +++ b/flexmeasures/data/models/generic_assets.py @@ -793,6 +793,7 @@ def search_beliefs( # noqa C901 source: ( DataSource | list[DataSource] | int | list[int] | str | list[str] | None ) = None, + account_id: int | list[int] | None = None, use_latest_version_per_event: bool = True, most_recent_beliefs_only: bool = True, most_recent_events_only: bool = False, @@ -812,6 +813,7 @@ def search_beliefs( # noqa C901 :param horizons_at_least: only return beliefs with a belief horizon equal or greater than this timedelta (for example, use timedelta(0) to get ante knowledge time beliefs) :param horizons_at_most: only return beliefs with a belief horizon equal or less than this timedelta (for example, use timedelta(0) to get post knowledge time beliefs) :param source: search only beliefs by this source (pass the DataSource, or its name or id) or list of sources + :param account_id: Optional account id (or list thereof) to filter sources by their owning account :param use_latest_version_per_event: only return the belief from the latest version of a source, for each event :param most_recent_events_only: only return (post knowledge time) beliefs for the most recent event (maximum event start) :param as_json: return beliefs in JSON format (e.g. for use in charts) rather than as BeliefsDataFrame @@ -836,6 +838,7 @@ def search_beliefs( # noqa C901 horizons_at_least=horizons_at_least, horizons_at_most=horizons_at_most, source=source, + account_id=account_id, use_latest_version_per_event=use_latest_version_per_event, most_recent_beliefs_only=most_recent_beliefs_only, most_recent_events_only=most_recent_events_only, diff --git a/flexmeasures/data/models/time_series.py b/flexmeasures/data/models/time_series.py index 76a3605254..80b45efd5d 100644 --- a/flexmeasures/data/models/time_series.py +++ b/flexmeasures/data/models/time_series.py @@ -397,6 +397,7 @@ def search_beliefs( # noqa: C901 user_source_ids: int | list[int] | None = None, source_types: list[str] | None = None, exclude_source_types: list[str] | None = None, + account_id: int | list[int] | None = None, use_latest_version_per_event: bool = True, most_recent_beliefs_only: bool = True, most_recent_events_only: bool = False, @@ -421,6 +422,7 @@ def search_beliefs( # noqa: C901 :param user_source_ids: Optional list of user source ids to query only specific user sources :param source_types: Optional list of source type names to query only specific source types * :param exclude_source_types: Optional list of source type names to exclude specific source types * + :param account_id: Optional account id (or list thereof) to filter sources by their owning account :param use_latest_version_per_event: only return the belief from the latest version of a source, for each event :param most_recent_beliefs_only: only return the most recent beliefs for each event from each source (minimum belief horizon). Defaults to True. :param most_recent_events_only: only return (post knowledge time) beliefs for the most recent event (maximum event start). Defaults to False. @@ -444,6 +446,7 @@ def search_beliefs( # noqa: C901 user_source_ids=user_source_ids, source_types=source_types, exclude_source_types=exclude_source_types, + account_id=account_id, use_latest_version_per_event=use_latest_version_per_event, most_recent_beliefs_only=most_recent_beliefs_only, most_recent_events_only=most_recent_events_only, @@ -853,6 +856,7 @@ def search( user_source_ids: int | list[int] | None = None, source_types: list[str] | None = None, exclude_source_types: list[str] | None = None, + account_id: int | list[int] | None = None, use_latest_version_per_event: bool = True, most_recent_beliefs_only: bool = True, most_recent_events_only: bool = False, @@ -877,6 +881,7 @@ def search( :param user_source_ids: Optional list of user source ids to query only specific user sources :param source_types: Optional list of source type names to query only specific source types * :param exclude_source_types: Optional list of source type names to exclude specific source types * + :param account_id: Optional account id (or list thereof) to filter sources by their owning account :param use_latest_version_per_event: only return the belief from the latest version of a source, for each event :param most_recent_beliefs_only: only return the most recent beliefs for each event from each source (minimum belief horizon). Defaults to True. :param most_recent_events_only: only return (post knowledge time) beliefs for the most recent event (maximum event start) @@ -915,7 +920,7 @@ def search( parsed_sources = parse_source_arg(source) source_criteria = get_source_criteria( - cls, user_source_ids, source_types, exclude_source_types + cls, user_source_ids, source_types, exclude_source_types, account_id ) custom_join_targets = [] if parsed_sources else [DataSource] diff --git a/flexmeasures/data/queries/utils.py b/flexmeasures/data/queries/utils.py index c439f41da1..d45561d88c 100644 --- a/flexmeasures/data/queries/utils.py +++ b/flexmeasures/data/queries/utils.py @@ -87,6 +87,7 @@ def get_source_criteria( user_source_ids: int | list[int], source_types: list[str], exclude_source_types: list[str], + account_id: int | list[int] | None = None, ) -> list[BinaryExpression]: source_criteria: list[BinaryExpression] = [] if user_source_ids is not None: @@ -99,6 +100,8 @@ def get_source_criteria( if user_source_ids and "user" in exclude_source_types: exclude_source_types.remove("user") source_criteria.append(source_type_exclusion_criterion(exclude_source_types)) + if account_id is not None: + source_criteria.append(source_account_criterion(account_id)) return source_criteria @@ -141,6 +144,13 @@ def source_type_exclusion_criterion(source_types: list[str]) -> BinaryExpression return DataSource.type.not_in(source_types) +def source_account_criterion(account_id: int | list[int]) -> BinaryExpression: + """Criterion to collect only data from sources belonging to the given account(s).""" + if not isinstance(account_id, list): + account_id = [account_id] + return DataSource.account_id.in_(account_id) + + def get_belief_timing_criteria( cls: "Type[ts.TimedValue]", asset_class: db.Model, diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 94257599ef..75c3f8db15 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -58,6 +58,7 @@ class BeliefsSearchConfigSchema(Schema): source_types = fields.List(fields.Str()) exclude_source_types = fields.List(fields.Str()) + account_id = fields.List(fields.Int()) most_recent_beliefs_only = fields.Boolean() most_recent_events_only = fields.Boolean() From 867c46ca8b188286faddce11026bc1c246e886e1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:08:50 +0000 Subject: [PATCH 25/53] tests/data: add fixture for sources with account IDs Context: - Issue #2046 added account_id filtering to Sensor.search_beliefs and TimedBelief.search - No tests existed for the new account_id parameter Change: - Added setup_sources_with_accounts fixture in conftest.py Creates a sensor with beliefs from three sources: one per Prosumer/Supplier account and one without an account, enabling account_id filter testing --- flexmeasures/conftest.py | 63 +++++++++++++++++++++ flexmeasures/data/tests/test_queries.py | 75 +++++++++++++++++++++++++ 2 files changed, 138 insertions(+) diff --git a/flexmeasures/conftest.py b/flexmeasures/conftest.py index f737bb4c82..27ffa62743 100644 --- a/flexmeasures/conftest.py +++ b/flexmeasures/conftest.py @@ -1723,6 +1723,69 @@ def setup_multiple_sources(db, add_battery_assets): return test_sensor, s1, s2, s3 +@pytest.fixture(scope="module") +def setup_sources_with_accounts(db, add_battery_assets, setup_accounts): + """Set up a sensor with beliefs from data sources that belong to different accounts. + + Returns a tuple of (sensor, source_account_a, source_account_b, source_no_account) + where each source has beliefs at different time slots: + - source_account_a: account "Prosumer", belief at 2024-02-01T10:00+01:00 + - source_account_b: account "Supplier", belief at 2024-02-01T11:00+01:00 + - source_no_account: no account, belief at 2024-02-01T12:00+01:00 + """ + battery = add_battery_assets["Test battery with dynamic power capacity"] + + test_sensor = Sensor( + name="test sensor for account filtering", + generic_asset=battery, + unit="kW", + event_resolution=timedelta(minutes=15), + ) + db.session.add(test_sensor) + + source_account_a = DataSource( + name="Source Account A", + type="demo script", + account_id=setup_accounts["Prosumer"].id, + ) + source_account_b = DataSource( + name="Source Account B", + type="demo script", + account_id=setup_accounts["Supplier"].id, + ) + source_no_account = DataSource( + name="Source No Account", + type="demo script", + ) + db.session.add_all([source_account_a, source_account_b, source_no_account]) + + add_beliefs( + db=db, + sensor=test_sensor, + time_slots=[pd.Timestamp("2024-02-01T10:00:00+01:00")], + values=[1.0], + source=source_account_a, + ) + add_beliefs( + db=db, + sensor=test_sensor, + time_slots=[pd.Timestamp("2024-02-01T11:00:00+01:00")], + values=[2.0], + source=source_account_b, + ) + add_beliefs( + db=db, + sensor=test_sensor, + time_slots=[pd.Timestamp("2024-02-01T12:00:00+01:00")], + values=[3.0], + source=source_no_account, + ) + + db.session.commit() + + return test_sensor, source_account_a, source_account_b, source_no_account + + def add_beliefs( db, sensor: Sensor, diff --git a/flexmeasures/data/tests/test_queries.py b/flexmeasures/data/tests/test_queries.py index 0fb4a248b1..ea45a8a140 100644 --- a/flexmeasures/data/tests/test_queries.py +++ b/flexmeasures/data/tests/test_queries.py @@ -305,3 +305,78 @@ def get_sources_names(vec: list[DataSource]) -> list[str]: event_ends_before="2024-01-02T00:00:00+01:00", ) ) == ["S1", "S2"] + + +def test_search_beliefs_account_id_filter(db, setup_sources_with_accounts): + """Check that search_beliefs with account_id only returns beliefs from sources + belonging to the specified account. + + We set up three sources: + - source_account_a: belongs to the Prosumer account + - source_account_b: belongs to the Supplier account + - source_no_account: no account assigned + + Filtering by account_a should return only source_account_a's belief, + filtering by account_b should return only source_account_b's belief, + filtering by both accounts should return beliefs from both sources, + and filtering with no account_id should return all three beliefs. + """ + sensor, source_account_a, source_account_b, source_no_account = ( + setup_sources_with_accounts + ) + + # Filter by Prosumer account: should return only 1 belief + bdf_a = sensor.search_beliefs( + account_id=source_account_a.account_id, + most_recent_beliefs_only=False, + ) + assert len(bdf_a) == 1 + assert bdf_a.index.get_level_values("source")[0] == source_account_a + + # Filter by Supplier account: should return only 1 belief + bdf_b = sensor.search_beliefs( + account_id=source_account_b.account_id, + most_recent_beliefs_only=False, + ) + assert len(bdf_b) == 1 + assert bdf_b.index.get_level_values("source")[0] == source_account_b + + # Filter by both accounts: should return 2 beliefs + bdf_ab = sensor.search_beliefs( + account_id=[source_account_a.account_id, source_account_b.account_id], + most_recent_beliefs_only=False, + ) + assert len(bdf_ab) == 2 + sources_ab = set(bdf_ab.index.get_level_values("source")) + assert source_account_a in sources_ab + assert source_account_b in sources_ab + + # No account_id filter: should return all 3 beliefs (from all 3 sources) + bdf_all = sensor.search_beliefs(most_recent_beliefs_only=False) + assert len(bdf_all) == 3 + + +def test_timed_belief_search_account_id_filter(db, setup_sources_with_accounts): + """Check that TimedBelief.search with account_id returns only beliefs from + sources belonging to the specified account. + """ + sensor, source_account_a, source_account_b, source_no_account = ( + setup_sources_with_accounts + ) + + # Filter by Prosumer account + bdf = TimedBelief.search( + sensor, + account_id=source_account_a.account_id, + most_recent_beliefs_only=False, + ) + assert len(bdf) == 1 + assert bdf.index.get_level_values("source")[0] == source_account_a + + # Filter by a non-existent account id: should return no beliefs + bdf_empty = TimedBelief.search( + sensor, + account_id=999999, + most_recent_beliefs_only=False, + ) + assert len(bdf_empty) == 0 From 7267db4cb793307671ac341eb6cecdaa3e4bd6f5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:09:03 +0000 Subject: [PATCH 26/53] tests/schemas: add BeliefsSearchConfigSchema account_id field tests Context: - Issue #2046 added account_id field to BeliefsSearchConfigSchema - No tests existed for this new schema field Change: - Added parametrized test_beliefs_search_config_schema_account_id in test_reporting.py - Verifies account_id accepts list of ints and rejects non-integer values --- .../data/schemas/tests/test_reporting.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index efa1ab2c59..7eb5e9f34f 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -6,6 +6,7 @@ ProfitOrLossReporterConfigSchema, ProfitOrLossReporterParametersSchema, ) +from flexmeasures.data.schemas.reporting import BeliefsSearchConfigSchema from marshmallow.exceptions import ValidationError import pytest @@ -228,3 +229,41 @@ def test_profit_reporter_parameters_schema( else: with pytest.raises(ValidationError): schema.load(parameters) + + +@pytest.mark.parametrize( + "config, is_valid", + [ + ( + {"account_id": [1, 2, 3]}, + True, + ), + ( + {"account_id": [42]}, + True, + ), + ( + {"account_id": []}, + True, + ), + ( + {"account_id": "not-a-list"}, + False, + ), + ( + {"account_id": ["a", "b"]}, + False, + ), + ], +) +def test_beliefs_search_config_schema_account_id(config, is_valid): + """Check that BeliefsSearchConfigSchema accepts account_id as a list of ints + and rejects non-integer values. + """ + schema = BeliefsSearchConfigSchema() + if is_valid: + result = schema.load(config) + assert result["account_id"] == config["account_id"] + else: + with pytest.raises(ValidationError): + schema.load(config) From 162dbd9989cb46e554a36e76ed1b1680635c7577 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:16:09 +0000 Subject: [PATCH 27/53] docs: add account_id source filtering to changelog and reporting docs - Add v0.32.0 New Features entry for account_id filter in search_beliefs (PR #2065) - Add a note in features/reporting.rst explaining available source filtering options (source_types, exclude_source_types, account_id) alongside the existing source/sources parameter examples --- documentation/changelog.rst | 1 + documentation/features/reporting.rst | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/documentation/changelog.rst b/documentation/changelog.rst index 56135a070e..c4a858c32c 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -16,6 +16,7 @@ New features * Version headers (for server and API) in API responses [see `PR #2021 `_] * Show sensor attributes on sensor page, if not empty [see `PR #2015 `_] * Separate the ``StorageScheduler``'s tie-breaking preference for a full :abbr:`SoC (state of charge)` from its reported energy costs [see `PR #2023 `_] +* Support filtering beliefs by the account of their data source, using the new ``account_id`` parameter of ``Sensor.search_beliefs``, ``GenericAsset.search_beliefs``, and ``BeliefsSearchConfigSchema`` [see `PR #2065 `_] Infrastructure / Support ---------------------- diff --git a/documentation/features/reporting.rst b/documentation/features/reporting.rst index 0796d351c3..59c803c5ac 100644 --- a/documentation/features/reporting.rst +++ b/documentation/features/reporting.rst @@ -69,6 +69,14 @@ This parameterizes the computation (from which sensors does data come from, whic "end" : "2023-01-03T00:00:00+00:00", } +.. note:: + In addition to filtering by specific data source IDs (``source`` / ``sources``), reporter input data can be filtered using: + + - ``source_types``: list of source type names to include (e.g. ``["forecaster", "scheduler"]``) + - ``exclude_source_types``: list of source type names to exclude + - ``account_id``: list of account IDs to include only data from sources belonging to those accounts + + These correspond to the same filters available on ``Sensor.search_beliefs``. Example: Profits & losses From 32bf40d4b9f1a5ff1ae617b0b5f168b94300d273 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:25:27 +0000 Subject: [PATCH 28/53] agents: document schema-parity and model-coverage gaps from PR #2065 Context: - PR #2065 adds account_id filter to search_beliefs across Sensor, TimedBelief, GenericAsset - Review identified two cross-cutting gaps not covered by existing agent instructions Change: - architecture-domain-specialist: add Schema Parity checklist item for Input/BeliefsSearchConfigSchema sync - test-specialist: add lesson for full model class coverage and empty-list edge case - coordinator: document Schema Surface Parity pattern --- .../agents/architecture-domain-specialist.md | 7 +++++++ .github/agents/coordinator.md | 21 +++++++++++++++++++ .github/agents/test-specialist.md | 7 ++++++- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/.github/agents/architecture-domain-specialist.md b/.github/agents/architecture-domain-specialist.md index cf7042cf98..84600f8f8e 100644 --- a/.github/agents/architecture-domain-specialist.md +++ b/.github/agents/architecture-domain-specialist.md @@ -165,6 +165,7 @@ Marshmallow schemas define the canonical format for parameter dictionaries. All - [ ] **Dictionary access**: Verify code uses dict keys from `data_key`, not Python attributes - [ ] **Parameter modification**: Check `pop()`, `del`, assignment operations use correct keys - [ ] **Storage consistency**: Ensure DataSource.attributes, job.meta use schema format +- [ ] **Schema parity**: When adding a filter/parameter to `Sensor.search_beliefs`, verify it is added to BOTH `Input` (io.py) AND `BeliefsSearchConfigSchema` (reporting/__init__.py). These two schemas serve overlapping purposes but are distinct classes — omitting one creates a silent gap where documented features silently fail at schema validation time. **Domain Pattern: Schema Format Migrations** @@ -553,3 +554,9 @@ After each assignment: - **Migration checklist**: Added an explicit Alembic migration checklist after reviewing the migration for this PR. Key patterns: correlated subquery for bulk backfill, SQLAlchemy Core stubs (no ORM imports), `batch_alter_table` for all ALTER operations, exact constraint name matching. - **Missed API Specialist coordination**: The PR changed endpoint behavior (POST sensor data sets account_id on the created data source). The API Specialist should have been engaged to verify backward compatibility. When domain model changes affect how endpoints behave or what they return, flag for API Specialist review. - **Self-improvement failure**: Despite having explicit self-improvement requirements, no agent updated its instructions during this PR session. This was caught by the Coordinator post-hoc. The agent must update its own instructions as the LAST step of every assignment, not skip it. + +**Session 2026-04 (PR #2065 — add account_id filter to search_beliefs)**: + +- **Schema parity gap**: The PR added `account_id` to `BeliefsSearchConfigSchema` but not to `Input` (io.py). These two schemas both expose `Sensor.search_beliefs` parameters; omitting a parameter from one creates a silent gap. The architecture agent must check both schemas on any search_beliefs parameter addition. +- **Documentation vs. implementation mismatch**: The `reporting.rst` docs stated reporters can filter by `account_id`, but this only works if `Input` also has the field. Docs that outrun schema support mislead users. Always verify the full schema chain before documenting a feature. +- **DataSource account_id=None for non-user sources**: The existing invariant (reporters/schedulers/forecasters have `account_id=None`) limits the usefulness of `account_id` filtering: it only matches user-type sources. PRs adding `account_id` filters should either document this limitation explicitly or reconsider the invariant. diff --git a/.github/agents/coordinator.md b/.github/agents/coordinator.md index bd9317de9c..d779fb8cfe 100644 --- a/.github/agents/coordinator.md +++ b/.github/agents/coordinator.md @@ -500,3 +500,24 @@ When reviewing schema changes that affect FK constraints: - The initial changelog entry for PR #2058 only described adding `account_id`; it omitted the FK drop and behavior change - When a migration both adds a column AND changes deletion semantics (e.g., drops a FK), the changelog must cover BOTH aspects - Coordinator caught this and updated the entry to read: "...also drop FK constraints on `data_source.user_id` and `data_source.account_id` to preserve data lineage (historical user/account IDs are no longer nullified when users or accounts are deleted)" + +### Pattern: Schema Surface Parity (2026-04 — PR #2065) + +**Context**: PR #2065 adds `account_id` as a filter parameter to `Sensor.search_beliefs` / `TimedBelief.search` / `GenericAsset.search_beliefs`. + +**Gap discovered**: `account_id` was added to `BeliefsSearchConfigSchema` (used in `StatusSchema.staleness_search`) but NOT to `Input` in `flexmeasures/data/schemas/io.py` (used by reporters like `PandasReporter` and `AggregatorReporter` for their input parameter lists). The `reporting.rst` documentation stated reporters can filter by `account_id`, but users attempting to do so would receive a Marshmallow `ValidationError`. + +**Root cause**: Two Marshmallow schemas expose `Sensor.search_beliefs` parameters to users, but they are distinct classes maintained separately: +- `Input` (io.py): used for reporter/forecaster `input` parameter list +- `BeliefsSearchConfigSchema` (reporting/__init__.py): used for sensor status config + +**Coordinator rule added**: When any PR adds a parameter to `Sensor.search_beliefs`, the Coordinator should check that **all schemas exposing search_beliefs parameters** receive the same addition. Current list: +- `flexmeasures/data/schemas/io.py` → `Input` (reporter parameters) +- `flexmeasures/data/schemas/reporting/__init__.py` → `BeliefsSearchConfigSchema` (status config) + +**Agent coordination implication**: +- Architecture Specialist owns the schema parity checklist (added to its review checklist) +- Documentation Specialist must verify documentation examples are exercisable via the actual available schemas +- Test Specialist must cover all model classes that receive the new parameter + +**Additional gap**: `account_id` for non-user DataSources (reporters, schedulers, forecasters) remains `None`. The filter therefore only matches user-type sources. This architectural constraint (documented in Architecture Specialist) limits the feature's utility and should be prominently noted in documentation whenever `account_id` filtering is described. diff --git a/.github/agents/test-specialist.md b/.github/agents/test-specialist.md index 3c1cb28e41..c1c9554064 100644 --- a/.github/agents/test-specialist.md +++ b/.github/agents/test-specialist.md @@ -511,4 +511,9 @@ After each assignment: **Session 2026-03-24 (PR #2058 — add account_id to DataSource)**: - **Self-improvement failure**: Despite having explicit instructions to update this agent file after each assignment, no update was made during this PR session. This was caught by the Coordinator post-hoc. The agent must treat instruction updates as the LAST mandatory step of any assignment. -- **DataSource property testing**: Added guidance in "Testing DataSource Properties After API Calls" above. When testing properties set by the API on a data source (like `account_id`), use `fresh_db`, query by user to avoid ambiguity, and assert both existence and the specific field value. \ No newline at end of file +- **DataSource property testing**: Added guidance in "Testing DataSource Properties After API Calls" above. When testing properties set by the API on a data source (like `account_id`), use `fresh_db`, query by user to avoid ambiguity, and assert both existence and the specific field value. + +**Session 2026-04 (PR #2065 — add account_id filter to search_beliefs)**: + +- **Model layer coverage gap**: The PR added `account_id` parameter to three model methods: `Sensor.search_beliefs`, `TimedBelief.search`, and `GenericAsset.search_beliefs`. Tests covered only the first two. When a parameter is added to multiple model-layer methods, each method must be tested independently — especially when they use different code paths (e.g., `GenericAsset.search_beliefs` delegates through `Sensor.search_beliefs` but may not if the delegation chain changes). Add a checklist item: "When a filter/param is added to `search_beliefs` across multiple models, ensure at least one test exercises each model class." +- **Empty-list edge case**: `account_id=[]` is schema-valid but semantically means "match nothing" (SQL `IN ()` returns zero rows). Tests should include this edge case and assert the expected empty result rather than letting it pass as a valid no-op. \ No newline at end of file From a090571d5e4c9695fc14a8175b645ded7b6831a9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:34:01 +0000 Subject: [PATCH 29/53] fix: add account_id to Input schema, GenericAsset test, and edge case handling --- documentation/features/reporting.rst | 2 +- flexmeasures/data/schemas/io.py | 1 + flexmeasures/data/tests/test_queries.py | 37 +++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/documentation/features/reporting.rst b/documentation/features/reporting.rst index 59c803c5ac..9576b035bd 100644 --- a/documentation/features/reporting.rst +++ b/documentation/features/reporting.rst @@ -74,7 +74,7 @@ This parameterizes the computation (from which sensors does data come from, whic - ``source_types``: list of source type names to include (e.g. ``["forecaster", "scheduler"]``) - ``exclude_source_types``: list of source type names to exclude - - ``account_id``: list of account IDs to include only data from sources belonging to those accounts + - ``account_id``: list of account IDs to include only data from sources belonging to those accounts (note: only matches user-type data sources — DataSources created by reporters, schedulers, and forecasters have no account and will not be matched by this filter) These correspond to the same filters available on ``Sensor.search_beliefs``. diff --git a/flexmeasures/data/schemas/io.py b/flexmeasures/data/schemas/io.py index afecb4b5b9..94f432e531 100644 --- a/flexmeasures/data/schemas/io.py +++ b/flexmeasures/data/schemas/io.py @@ -37,6 +37,7 @@ class Input(Schema): user_source_ids = fields.List(DataSourceIdField()) source_types = fields.List(fields.Str()) exclude_source_types = fields.List(fields.Str()) + account_id = fields.List(fields.Int()) most_recent_beliefs_only = fields.Boolean() most_recent_events_only = fields.Boolean() diff --git a/flexmeasures/data/tests/test_queries.py b/flexmeasures/data/tests/test_queries.py index ea45a8a140..a51ad46ea3 100644 --- a/flexmeasures/data/tests/test_queries.py +++ b/flexmeasures/data/tests/test_queries.py @@ -355,6 +355,13 @@ def test_search_beliefs_account_id_filter(db, setup_sources_with_accounts): bdf_all = sensor.search_beliefs(most_recent_beliefs_only=False) assert len(bdf_all) == 3 + # Empty list: matches nothing (consistent with SQL IN () semantics) + bdf_empty_list = sensor.search_beliefs( + account_id=[], + most_recent_beliefs_only=False, + ) + assert len(bdf_empty_list) == 0 + def test_timed_belief_search_account_id_filter(db, setup_sources_with_accounts): """Check that TimedBelief.search with account_id returns only beliefs from @@ -380,3 +387,33 @@ def test_timed_belief_search_account_id_filter(db, setup_sources_with_accounts): most_recent_beliefs_only=False, ) assert len(bdf_empty) == 0 + + +def test_generic_asset_search_beliefs_account_id_filter( + db, setup_sources_with_accounts +): + """Check that GenericAsset.search_beliefs with account_id returns only beliefs + from sources belonging to the specified account. + """ + sensor, source_account_a, source_account_b, source_no_account = ( + setup_sources_with_accounts + ) + asset = sensor.generic_asset + + # Filter by Prosumer account: should return only 1 belief + bdf_dict_a = asset.search_beliefs( + sensors=[sensor], + account_id=source_account_a.account_id, + most_recent_beliefs_only=False, + ) + bdf_a = bdf_dict_a[sensor] + assert len(bdf_a) == 1 + assert bdf_a.index.get_level_values("source")[0] == source_account_a + + # No account_id filter: should return all 3 beliefs + bdf_dict_all = asset.search_beliefs( + sensors=[sensor], + most_recent_beliefs_only=False, + ) + bdf_all = bdf_dict_all[sensor] + assert len(bdf_all) == 3 From 2b3105b0e281e466341e924c5dc7af650ea39098 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 18:27:53 +0000 Subject: [PATCH 30/53] fix: use AccountIdField, support single int, reject empty list, fix changelog Agent-Logs-Url: https://github.com/FlexMeasures/flexmeasures/sessions/ef00e39a-3eb8-439b-a440-2e42c4afbf0b Co-authored-by: Flix6x <30658763+Flix6x@users.noreply.github.com> --- .github/agents/test-specialist.md | 2 +- documentation/changelog.rst | 3 +- flexmeasures/data/queries/utils.py | 11 +++- flexmeasures/data/schemas/account.py | 9 ++- flexmeasures/data/schemas/io.py | 11 +++- .../data/schemas/reporting/__init__.py | 11 +++- .../data/schemas/tests/test_reporting.py | 64 +++++++++---------- 7 files changed, 64 insertions(+), 47 deletions(-) diff --git a/.github/agents/test-specialist.md b/.github/agents/test-specialist.md index c1c9554064..d0799ccf6e 100644 --- a/.github/agents/test-specialist.md +++ b/.github/agents/test-specialist.md @@ -516,4 +516,4 @@ After each assignment: **Session 2026-04 (PR #2065 — add account_id filter to search_beliefs)**: - **Model layer coverage gap**: The PR added `account_id` parameter to three model methods: `Sensor.search_beliefs`, `TimedBelief.search`, and `GenericAsset.search_beliefs`. Tests covered only the first two. When a parameter is added to multiple model-layer methods, each method must be tested independently — especially when they use different code paths (e.g., `GenericAsset.search_beliefs` delegates through `Sensor.search_beliefs` but may not if the delegation chain changes). Add a checklist item: "When a filter/param is added to `search_beliefs` across multiple models, ensure at least one test exercises each model class." -- **Empty-list edge case**: `account_id=[]` is schema-valid but semantically means "match nothing" (SQL `IN ()` returns zero rows). Tests should include this edge case and assert the expected empty result rather than letting it pass as a valid no-op. \ No newline at end of file +- **Empty-list edge case**: `account_id=[]` is schema-valid but semantically means "match nothing" (SQL `IN ()` returns zero rows). Tests should include this edge case and assert the expected empty result rather than letting it pass as a valid no-op. diff --git a/documentation/changelog.rst b/documentation/changelog.rst index c4a858c32c..dc0267bc7c 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -16,11 +16,10 @@ New features * Version headers (for server and API) in API responses [see `PR #2021 `_] * Show sensor attributes on sensor page, if not empty [see `PR #2015 `_] * Separate the ``StorageScheduler``'s tie-breaking preference for a full :abbr:`SoC (state of charge)` from its reported energy costs [see `PR #2023 `_] -* Support filtering beliefs by the account of their data source, using the new ``account_id`` parameter of ``Sensor.search_beliefs``, ``GenericAsset.search_beliefs``, and ``BeliefsSearchConfigSchema`` [see `PR #2065 `_] Infrastructure / Support ---------------------- -* Add ``account_id`` field to the ``data_source`` table, populated from the related user's account (including data migration and automatic assignment when posting sensor data); also drop FK constraints on ``data_source.user_id`` and ``data_source.account_id`` to preserve data lineage (historical user/account IDs are no longer nullified when users or accounts are deleted) [see `PR #2058 `_] +* Add ``account_id`` field to the ``data_source`` table, populated from the related user's account (including data migration and automatic assignment when posting sensor data); also drop FK constraints on ``data_source.user_id`` and ``data_source.account_id`` to preserve data lineage (historical user/account IDs are no longer nullified when users or accounts are deleted); support filtering beliefs by the account of their data source, using the new ``account_id`` parameter of ``Sensor.search_beliefs``, ``GenericAsset.search_beliefs``, and ``BeliefsSearchConfigSchema`` [see `PR #2058 `_ and `PR #2065 `_] * Stop creating new toy assets when restarting the docker-compose stack [see `PR #2018 `_] * Migrate from ``pip`` to ``uv`` for dependency management [see `PR #1973 `_] * Migrate from ``make`` to ``poe`` for running tasks [see `PR #1973 `_] diff --git a/flexmeasures/data/queries/utils.py b/flexmeasures/data/queries/utils.py index d45561d88c..b23845957a 100644 --- a/flexmeasures/data/queries/utils.py +++ b/flexmeasures/data/queries/utils.py @@ -144,11 +144,16 @@ def source_type_exclusion_criterion(source_types: list[str]) -> BinaryExpression return DataSource.type.not_in(source_types) -def source_account_criterion(account_id: int | list[int]) -> BinaryExpression: - """Criterion to collect only data from sources belonging to the given account(s).""" +def source_account_criterion(account_id) -> BinaryExpression: + """Criterion to collect only data from sources belonging to the given account(s). + + Accepts account IDs as integers or Account model instances (or a list of either). + """ if not isinstance(account_id, list): account_id = [account_id] - return DataSource.account_id.in_(account_id) + # Support both integer IDs and Account model instances + account_ids = [a.id if not isinstance(a, int) else a for a in account_id] + return DataSource.account_id.in_(account_ids) def get_belief_timing_criteria( diff --git a/flexmeasures/data/schemas/account.py b/flexmeasures/data/schemas/account.py index cb2e970ace..a977c4ad7d 100644 --- a/flexmeasures/data/schemas/account.py +++ b/flexmeasures/data/schemas/account.py @@ -1,12 +1,15 @@ from typing import Any -from flask.cli import with_appcontext from flexmeasures.data import ma from marshmallow import fields, validates from flexmeasures.data import db from flexmeasures.data.models.user import Account, AccountRole -from flexmeasures.data.schemas.utils import FMValidationError, MarshmallowClickMixin +from flexmeasures.data.schemas.utils import ( + FMValidationError, + MarshmallowClickMixin, + with_appcontext_if_needed, +) from flexmeasures.utils.validation_utils import validate_color_hex, validate_url @@ -60,7 +63,7 @@ def validate_logo_url(self, value, **kwargs): class AccountIdField(fields.Int, MarshmallowClickMixin): """Field that deserializes to an Account and serializes back to an integer.""" - @with_appcontext + @with_appcontext_if_needed() def _deserialize(self, value: Any, attr, data, **kwargs) -> Account: """Turn an account id into an Account.""" account_id: int = super()._deserialize(value, attr, data, **kwargs) diff --git a/flexmeasures/data/schemas/io.py b/flexmeasures/data/schemas/io.py index 94f432e531..e62100f5d5 100644 --- a/flexmeasures/data/schemas/io.py +++ b/flexmeasures/data/schemas/io.py @@ -1,8 +1,9 @@ -from marshmallow import fields, Schema, post_load, post_dump +from marshmallow import fields, Schema, post_load, post_dump, pre_load, validate from flexmeasures.data.schemas.sensors import SensorIdField from flexmeasures.data.schemas import AwareDateTimeField, DurationField from flexmeasures.data.schemas.sources import DataSourceIdField +from flexmeasures.data.schemas.account import AccountIdField from flask import current_app @@ -37,7 +38,7 @@ class Input(Schema): user_source_ids = fields.List(DataSourceIdField()) source_types = fields.List(fields.Str()) exclude_source_types = fields.List(fields.Str()) - account_id = fields.List(fields.Int()) + account_id = fields.List(AccountIdField(), validate=validate.Length(min=1)) most_recent_beliefs_only = fields.Boolean() most_recent_events_only = fields.Boolean() @@ -54,6 +55,12 @@ def print_source_deprecation_warning(self, data): "`source` field to be deprecated in v0.17.0. Please, use `sources` instead" ) + @pre_load + def normalize_account_id(self, data: dict, **kwargs) -> dict: + if "account_id" in data and not isinstance(data["account_id"], list): + data["account_id"] = [data["account_id"]] + return data + @post_load def post_load_deprecation_warning_source(self, data: dict, **kawrgs) -> dict: self.print_source_deprecation_warning(data) diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 75c3f8db15..10515e5899 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -1,6 +1,7 @@ -from marshmallow import Schema, fields, validate +from marshmallow import Schema, fields, validate, pre_load from flexmeasures.data.schemas.sources import DataSourceIdField +from flexmeasures.data.schemas.account import AccountIdField from flexmeasures.data.schemas import AwareDateTimeField, DurationField from flexmeasures.data.schemas.io import Input, Output @@ -58,7 +59,7 @@ class BeliefsSearchConfigSchema(Schema): source_types = fields.List(fields.Str()) exclude_source_types = fields.List(fields.Str()) - account_id = fields.List(fields.Int()) + account_id = fields.List(AccountIdField(), validate=validate.Length(min=1)) most_recent_beliefs_only = fields.Boolean() most_recent_events_only = fields.Boolean() @@ -67,6 +68,12 @@ class BeliefsSearchConfigSchema(Schema): resolution = DurationField() sum_multiple = fields.Boolean() + @pre_load + def normalize_account_id(self, data: dict, **kwargs) -> dict: + if "account_id" in data and not isinstance(data["account_id"], list): + data["account_id"] = [data["account_id"]] + return data + class StatusSchema(Schema): max_staleness = DurationField(required=True) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 7eb5e9f34f..0dec9057ff 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -231,39 +231,35 @@ def test_profit_reporter_parameters_schema( schema.load(parameters) -@pytest.mark.parametrize( - "config, is_valid", - [ - ( - {"account_id": [1, 2, 3]}, - True, - ), - ( - {"account_id": [42]}, - True, - ), - ( - {"account_id": []}, - True, - ), - ( - {"account_id": "not-a-list"}, - False, - ), - ( - {"account_id": ["a", "b"]}, - False, - ), - ], -) -def test_beliefs_search_config_schema_account_id(config, is_valid): - """Check that BeliefsSearchConfigSchema accepts account_id as a list of ints - and rejects non-integer values. +def test_beliefs_search_config_schema_account_id(db, app, setup_accounts): + """Check that BeliefsSearchConfigSchema accepts account_id as a single int or list of ints, + rejects empty lists and non-integer values, and validates accounts exist in the DB. """ schema = BeliefsSearchConfigSchema() - if is_valid: - result = schema.load(config) - assert result["account_id"] == config["account_id"] - else: - with pytest.raises(ValidationError): - schema.load(config) + prosumer_id = setup_accounts["Prosumer"].id + supplier_id = setup_accounts["Supplier"].id + + # Valid: single account ID (normalised to a list by pre_load) + result = schema.load({"account_id": prosumer_id}) + assert len(result["account_id"]) == 1 + assert result["account_id"][0].id == prosumer_id + + # Valid: list of account IDs + result = schema.load({"account_id": [prosumer_id, supplier_id]}) + assert len(result["account_id"]) == 2 + + # Valid: single-item list + result = schema.load({"account_id": [prosumer_id]}) + assert len(result["account_id"]) == 1 + + # Invalid: empty list + with pytest.raises(ValidationError): + schema.load({"account_id": []}) + + # Invalid: non-integer string + with pytest.raises(ValidationError): + schema.load({"account_id": "not-valid"}) + + # Invalid: list of strings + with pytest.raises(ValidationError): + schema.load({"account_id": ["a", "b"]}) From ddf576d5996dddd0d689183d7e414c72eec80eba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 19:22:37 +0000 Subject: [PATCH 31/53] refactor: introduce AccountIdOrListField, restore parametrized tests Agent-Logs-Url: https://github.com/FlexMeasures/flexmeasures/sessions/011e3277-9159-49eb-a53d-1c9cf0505b25 Co-authored-by: Flix6x <30658763+Flix6x@users.noreply.github.com> --- flexmeasures/data/schemas/__init__.py | 2 +- flexmeasures/data/schemas/account.py | 27 +++++++ flexmeasures/data/schemas/io.py | 12 +-- .../data/schemas/reporting/__init__.py | 12 +-- .../data/schemas/tests/test_reporting.py | 76 ++++++++++++------- 5 files changed, 83 insertions(+), 46 deletions(-) diff --git a/flexmeasures/data/schemas/__init__.py b/flexmeasures/data/schemas/__init__.py index 8f56be0ab2..0e79ace9bb 100644 --- a/flexmeasures/data/schemas/__init__.py +++ b/flexmeasures/data/schemas/__init__.py @@ -2,7 +2,7 @@ Data schemas (Marshmallow) """ -from .account import AccountIdField +from .account import AccountIdField, AccountIdOrListField from .generic_assets import GenericAssetIdField as AssetIdField from .locations import LatitudeField, LongitudeField from .sensors import SensorIdField, VariableQuantityField diff --git a/flexmeasures/data/schemas/account.py b/flexmeasures/data/schemas/account.py index a977c4ad7d..64cab93fd6 100644 --- a/flexmeasures/data/schemas/account.py +++ b/flexmeasures/data/schemas/account.py @@ -77,3 +77,30 @@ def _deserialize(self, value: Any, attr, data, **kwargs) -> Account: def _serialize(self, value: Account, attr, obj, **kwargs): """Turn an Account into a source id.""" return value.id + + +class AccountIdOrListField(fields.Field): + """Field that accepts a single account ID or a non-empty list of account IDs. + + Both ``42`` and ``[42, 99]`` are accepted. Always deserializes to a list of + :class:`~flexmeasures.data.models.user.Account` instances. + + The field is intentionally expressed as a union of ``integer`` and + ``array[integer]`` rather than always requiring a list, so that future + OpenAPI generation can emit a ``oneOf`` schema for it. + """ + + def _deserialize(self, value: Any, attr, data, **kwargs) -> list[Account]: + _item_field = AccountIdField() + if isinstance(value, list): + if len(value) == 0: + raise FMValidationError("Must be a non-empty list of account IDs.") + return [_item_field._deserialize(v, attr, data, **kwargs) for v in value] + return [_item_field._deserialize(value, attr, data, **kwargs)] + + def _serialize(self, value: Any, attr, obj, **kwargs): + if value is None: + return None + if isinstance(value, list): + return [a.id if hasattr(a, "id") else a for a in value] + return value.id if hasattr(value, "id") else value diff --git a/flexmeasures/data/schemas/io.py b/flexmeasures/data/schemas/io.py index e62100f5d5..d8754fd75e 100644 --- a/flexmeasures/data/schemas/io.py +++ b/flexmeasures/data/schemas/io.py @@ -1,9 +1,9 @@ -from marshmallow import fields, Schema, post_load, post_dump, pre_load, validate +from marshmallow import fields, Schema, post_load, post_dump from flexmeasures.data.schemas.sensors import SensorIdField from flexmeasures.data.schemas import AwareDateTimeField, DurationField from flexmeasures.data.schemas.sources import DataSourceIdField -from flexmeasures.data.schemas.account import AccountIdField +from flexmeasures.data.schemas.account import AccountIdOrListField from flask import current_app @@ -38,7 +38,7 @@ class Input(Schema): user_source_ids = fields.List(DataSourceIdField()) source_types = fields.List(fields.Str()) exclude_source_types = fields.List(fields.Str()) - account_id = fields.List(AccountIdField(), validate=validate.Length(min=1)) + account_id = AccountIdOrListField() most_recent_beliefs_only = fields.Boolean() most_recent_events_only = fields.Boolean() @@ -55,12 +55,6 @@ def print_source_deprecation_warning(self, data): "`source` field to be deprecated in v0.17.0. Please, use `sources` instead" ) - @pre_load - def normalize_account_id(self, data: dict, **kwargs) -> dict: - if "account_id" in data and not isinstance(data["account_id"], list): - data["account_id"] = [data["account_id"]] - return data - @post_load def post_load_deprecation_warning_source(self, data: dict, **kawrgs) -> dict: self.print_source_deprecation_warning(data) diff --git a/flexmeasures/data/schemas/reporting/__init__.py b/flexmeasures/data/schemas/reporting/__init__.py index 10515e5899..d97ef7d9ba 100644 --- a/flexmeasures/data/schemas/reporting/__init__.py +++ b/flexmeasures/data/schemas/reporting/__init__.py @@ -1,7 +1,7 @@ -from marshmallow import Schema, fields, validate, pre_load +from marshmallow import Schema, fields, validate from flexmeasures.data.schemas.sources import DataSourceIdField -from flexmeasures.data.schemas.account import AccountIdField +from flexmeasures.data.schemas.account import AccountIdOrListField from flexmeasures.data.schemas import AwareDateTimeField, DurationField from flexmeasures.data.schemas.io import Input, Output @@ -59,7 +59,7 @@ class BeliefsSearchConfigSchema(Schema): source_types = fields.List(fields.Str()) exclude_source_types = fields.List(fields.Str()) - account_id = fields.List(AccountIdField(), validate=validate.Length(min=1)) + account_id = AccountIdOrListField() most_recent_beliefs_only = fields.Boolean() most_recent_events_only = fields.Boolean() @@ -68,12 +68,6 @@ class BeliefsSearchConfigSchema(Schema): resolution = DurationField() sum_multiple = fields.Boolean() - @pre_load - def normalize_account_id(self, data: dict, **kwargs) -> dict: - if "account_id" in data and not isinstance(data["account_id"], list): - data["account_id"] = [data["account_id"]] - return data - class StatusSchema(Schema): max_staleness = DurationField(required=True) diff --git a/flexmeasures/data/schemas/tests/test_reporting.py b/flexmeasures/data/schemas/tests/test_reporting.py index 0dec9057ff..cf7427186b 100644 --- a/flexmeasures/data/schemas/tests/test_reporting.py +++ b/flexmeasures/data/schemas/tests/test_reporting.py @@ -231,35 +231,57 @@ def test_profit_reporter_parameters_schema( schema.load(parameters) -def test_beliefs_search_config_schema_account_id(db, app, setup_accounts): +@pytest.mark.parametrize( + "make_config, is_valid", + [ + ( + lambda p, s: {"account_id": p}, + True, + ), + ( + lambda p, s: {"account_id": [p]}, + True, + ), + ( + lambda p, s: {"account_id": [p, s]}, + True, + ), + ( + lambda p, s: {"account_id": []}, + False, + ), + ( + lambda p, s: {"account_id": "not-a-list"}, + False, + ), + ( + lambda p, s: {"account_id": ["a", "b"]}, + False, + ), + ], + ids=[ + "single-int", + "single-item-list", + "multi-item-list", + "empty-list", + "string", + "list-of-strings", + ], +) +def test_beliefs_search_config_schema_account_id( + make_config, is_valid, db, app, setup_accounts +): """Check that BeliefsSearchConfigSchema accepts account_id as a single int or list of ints, rejects empty lists and non-integer values, and validates accounts exist in the DB. """ - schema = BeliefsSearchConfigSchema() prosumer_id = setup_accounts["Prosumer"].id supplier_id = setup_accounts["Supplier"].id - - # Valid: single account ID (normalised to a list by pre_load) - result = schema.load({"account_id": prosumer_id}) - assert len(result["account_id"]) == 1 - assert result["account_id"][0].id == prosumer_id - - # Valid: list of account IDs - result = schema.load({"account_id": [prosumer_id, supplier_id]}) - assert len(result["account_id"]) == 2 - - # Valid: single-item list - result = schema.load({"account_id": [prosumer_id]}) - assert len(result["account_id"]) == 1 - - # Invalid: empty list - with pytest.raises(ValidationError): - schema.load({"account_id": []}) - - # Invalid: non-integer string - with pytest.raises(ValidationError): - schema.load({"account_id": "not-valid"}) - - # Invalid: list of strings - with pytest.raises(ValidationError): - schema.load({"account_id": ["a", "b"]}) + config = make_config(prosumer_id, supplier_id) + schema = BeliefsSearchConfigSchema() + if is_valid: + result = schema.load(config) + assert isinstance(result["account_id"], list) + assert len(result["account_id"]) >= 1 + else: + with pytest.raises(ValidationError): + schema.load(config) From c64b4ee82c50c6157a943d478bf0ad7b15b428f9 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 13:38:50 +0100 Subject: [PATCH 32/53] feat: add test covering CLI command to delete user Signed-off-by: F.N. Claessen --- flexmeasures/cli/tests/test_data_delete.py | 72 ++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/flexmeasures/cli/tests/test_data_delete.py b/flexmeasures/cli/tests/test_data_delete.py index daa3cc315b..1198b4e114 100644 --- a/flexmeasures/cli/tests/test_data_delete.py +++ b/flexmeasures/cli/tests/test_data_delete.py @@ -77,3 +77,75 @@ def test_delete_account( f"Data source {ds_id} user_id should be preserved (not nullified) " "after account deletion for lineage purposes." ) + + +def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db, app): + """Check user is deleted + old audit log entries get affected_user_id set to None. + Also check that data source lineage is preserved: user_id is NOT nullified after user deletion. + """ + from flexmeasures.cli.data_delete import delete_a_user + from flexmeasures.data.models.data_sources import DataSource + + prosumer: User = find_user_by_email("test_prosumer_user@seita.nl") + prosumer_id = prosumer.id + prosumer_email = prosumer.email + prosumer_account_id = prosumer.account_id + + num_users = fresh_db.session.scalar(select(func.count()).select_from(User)) + + # Find data sources belonging to the user (for lineage check after deletion) + data_source_before = fresh_db.session.execute( + select(DataSource).filter_by(user_id=prosumer_id) + ).scalar_one_or_none() + if data_source_before is not None: + data_source_id = data_source_before.id + data_source_user_id_before = data_source_before.user_id + data_source_account_id_before = data_source_before.account_id + else: + data_source_id = None + + # Add creation audit log record + user_creation_audit_log = AuditLog( + event="User Test Prosumer User created test", + affected_user_id=prosumer_id, + affected_account_id=prosumer_account_id, + ) + fresh_db.session.add(user_creation_audit_log) + fresh_db.session.commit() + + # Delete the user via CLI + cli_input = { + "email": prosumer_email, + } + runner = app.test_cli_runner() + result = runner.invoke(delete_a_user, to_flags(cli_input), input="y\n") + check_command_ran_without_error(result) + + # Check user is deleted + assert find_user_by_email(prosumer_email) is None + assert ( + fresh_db.session.scalar(select(func.count()).select_from(User)) == num_users - 1 + ) + + # Check that old audit log entries get affected_user_id set to None + user_creation_audit_log_after = ( + fresh_db.session.query(AuditLog) + .filter_by(event="User Test Prosumer User created test") + .one_or_none() + ) + assert user_creation_audit_log_after.affected_user_id is None + + # Check that data source lineage is preserved: user_id is NOT nullified after user deletion + if data_source_id is not None: + data_source_after = fresh_db.session.get(DataSource, data_source_id) + assert ( + data_source_after is not None + ), f"Data source {data_source_id} should still exist after user deletion." + assert data_source_after.user_id == data_source_user_id_before, ( + f"Data source {data_source_id} user_id should be preserved (not nullified) " + "after user deletion for lineage purposes." + ) + assert data_source_after.account_id == data_source_account_id_before, ( + f"Data source {data_source_id} account_id should be preserved (not nullified) " + "after user deletion for lineage purposes." + ) From 57c1f8eb56d3eb4f919081977667ea89927e4677 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 13:43:31 +0100 Subject: [PATCH 33/53] feat: let audit log entries retain the account ID and user IDs after either is deleted Signed-off-by: F.N. Claessen --- documentation/changelog.rst | 2 +- flexmeasures/cli/tests/test_data_delete.py | 18 +++++++-- ...onstraint_from_audit_log_active_user_id.py | 38 +++++++++++++++++++ flexmeasures/data/models/audit_log.py | 33 +++++++++++++--- flexmeasures/data/tests/test_user_services.py | 5 ++- 5 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py diff --git a/documentation/changelog.rst b/documentation/changelog.rst index 69c2c66507..cb69038390 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -19,7 +19,7 @@ New features Infrastructure / Support ---------------------- -* Add ``account_id`` field to the ``data_source`` table, populated from the related user's account (including data migration and automatic assignment when posting sensor data); also drop FK constraints on ``data_source.user_id`` and ``data_source.account_id`` to preserve data lineage (historical user/account IDs are no longer nullified when users or accounts are deleted) [see `PR #2058 `_] +* Preserve data lineage by removing cascading deletes from user and account references in audit logs and data sources; when users or accounts are deleted, their IDs are retained in historical records for traceability and compliance [see `PR #2058 `_] * Stop creating new toy assets when restarting the docker-compose stack [see `PR #2018 `_] * Migrate from ``pip`` to ``uv`` for dependency management [see `PR #1973 `_] * Migrate from ``make`` to ``poe`` for running tasks [see `PR #1973 `_] diff --git a/flexmeasures/cli/tests/test_data_delete.py b/flexmeasures/cli/tests/test_data_delete.py index 1198b4e114..9b15d13a30 100644 --- a/flexmeasures/cli/tests/test_data_delete.py +++ b/flexmeasures/cli/tests/test_data_delete.py @@ -60,7 +60,14 @@ def test_delete_account( .filter_by(event="User Test Prosumer User created test") .one_or_none() ) - assert user_creation_audit_log.affected_account_id is None + assert user_creation_audit_log.affected_account_id == prosumer_account_id, ( + "Audit log affected_account_id should be preserved (not nullified) " + "after account deletion for lineage purposes." + ) + assert user_creation_audit_log.affected_user_id == prosumer.id, ( + "Audit log affected_user_id should be preserved (not nullified) " + "after account deletion for lineage purposes." + ) # Check that data source lineage is preserved: account_id and user_id are NOT nullified after account deletion for ds_id, original_user_id, original_account_id in data_source_ids_and_lineage: @@ -80,7 +87,7 @@ def test_delete_account( def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db, app): - """Check user is deleted + old audit log entries get affected_user_id set to None. + """Check user is deleted + old audit log entries get affected_user_id preserved. Also check that data source lineage is preserved: user_id is NOT nullified after user deletion. """ from flexmeasures.cli.data_delete import delete_a_user @@ -127,13 +134,16 @@ def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db fresh_db.session.scalar(select(func.count()).select_from(User)) == num_users - 1 ) - # Check that old audit log entries get affected_user_id set to None + # Check that old audit log entries preserve affected_user_id (not set to None) user_creation_audit_log_after = ( fresh_db.session.query(AuditLog) .filter_by(event="User Test Prosumer User created test") .one_or_none() ) - assert user_creation_audit_log_after.affected_user_id is None + assert user_creation_audit_log_after.affected_user_id == prosumer_id, ( + "Audit log affected_user_id should be preserved (not nullified) " + "after user deletion for lineage purposes." + ) # Check that data source lineage is preserved: user_id is NOT nullified after user deletion if data_source_id is not None: diff --git a/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py b/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py new file mode 100644 index 0000000000..66216bee5e --- /dev/null +++ b/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py @@ -0,0 +1,38 @@ +"""Drop FK constraint from audit_log.active_user_id for data lineage preservation + +When users are deleted, we want to preserve the historical active_user_id +values in audit_log rows for lineage purposes, rather than cascade-deleting +or nullifying them. + +Revision ID: b2c3d4e5f6a7 +Revises: a1b2c3d4e5f6 +Create Date: 2026-03-27 00:00:00.000000 + +""" + +from alembic import op + + +# revision identifiers, used by Alembic. +revision = "b2c3d4e5f6a7" +down_revision = "a1b2c3d4e5f6" +branch_labels = None +depends_on = None + + +def upgrade(): + with op.batch_alter_table("audit_log", schema=None) as batch_op: + batch_op.drop_constraint( + op.f("audit_log_active_user_id_fm_user_fkey"), type_="foreignkey" + ) + + +def downgrade(): + with op.batch_alter_table("audit_log", schema=None) as batch_op: + batch_op.create_foreign_key( + op.f("audit_log_active_user_id_fm_user_fkey"), + "fm_user", + ["active_user_id"], + ["id"], + ondelete="SET NULL", + ) diff --git a/flexmeasures/data/models/audit_log.py b/flexmeasures/data/models/audit_log.py index bf1c615fbd..9a54048cce 100644 --- a/flexmeasures/data/models/audit_log.py +++ b/flexmeasures/data/models/audit_log.py @@ -30,14 +30,35 @@ class AuditLog(db.Model, AuthModelMixin): event_datetime = Column(DateTime()) event = Column(String(255)) active_user_name = Column(String(255)) - active_user_id = Column( - "active_user_id", Integer(), ForeignKey("fm_user.id", ondelete="SET NULL") + # No DB-level FK with cascade for active_user_id so that deleting a user preserves the lineage reference in this column. + active_user_id = Column("active_user_id", Integer(), nullable=True) + # No DB-level FK with cascade for affected_user_id so that deleting a user preserves the lineage reference in this column. + affected_user_id = Column("affected_user_id", Integer(), nullable=True) + # No DB-level FK with cascade for affected_account_id so that deleting an account preserves the lineage reference in this column. + affected_account_id = Column("affected_account_id", Integer(), nullable=True) + + # Relationships to navigate to User and Account without database-level FK constraints + # This allows audit logs to maintain references to deleted users/accounts for lineage purposes + active_user = db.relationship( + "User", + primaryjoin="AuditLog.active_user_id == User.id", + foreign_keys="[AuditLog.active_user_id]", + backref=db.backref("active_audit_logs", lazy=True, passive_deletes="all"), + passive_deletes="all", ) - affected_user_id = Column( - "affected_user_id", Integer(), ForeignKey("fm_user.id", ondelete="SET NULL") + affected_user = db.relationship( + "User", + primaryjoin="AuditLog.affected_user_id == User.id", + foreign_keys="[AuditLog.affected_user_id]", + backref=db.backref("affected_audit_logs", lazy=True, passive_deletes="all"), + passive_deletes="all", ) - affected_account_id = Column( - "affected_account_id", Integer(), ForeignKey("account.id", ondelete="SET NULL") + affected_account = db.relationship( + "Account", + primaryjoin="AuditLog.affected_account_id == Account.id", + foreign_keys="[AuditLog.affected_account_id]", + backref=db.backref("affected_audit_logs", lazy=True, passive_deletes="all"), + passive_deletes="all", ) @classmethod diff --git a/flexmeasures/data/tests/test_user_services.py b/flexmeasures/data/tests/test_user_services.py index 5e431681ed..41715d362d 100644 --- a/flexmeasures/data/tests/test_user_services.py +++ b/flexmeasures/data/tests/test_user_services.py @@ -203,7 +203,10 @@ def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db assert user_deletion_audit_log.active_user_id is None fresh_db.session.refresh(user_creation_audit_log) - assert user_creation_audit_log.affected_user_id is None + assert user_creation_audit_log.affected_user_id == prosumer_id, ( + "Audit log affected_user_id should be preserved (not nullified) " + "after user deletion for lineage purposes." + ) # Check that data source lineage is preserved: user_id and account_id are NOT nullified after user deletion fresh_db.session.expire(data_source_before) From e1d6cdcdffc76c043b1587e88904b7925381ae1e Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 13:54:29 +0100 Subject: [PATCH 34/53] docs: rewrite changelog entry Signed-off-by: F.N. Claessen --- documentation/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/changelog.rst b/documentation/changelog.rst index cb69038390..07ef29cbab 100644 --- a/documentation/changelog.rst +++ b/documentation/changelog.rst @@ -19,7 +19,7 @@ New features Infrastructure / Support ---------------------- -* Preserve data lineage by removing cascading deletes from user and account references in audit logs and data sources; when users or accounts are deleted, their IDs are retained in historical records for traceability and compliance [see `PR #2058 `_] +* Support coupling data sources to accounts, and preserve user ID and account ID references in audit logs and data sources for traceability and compliance [see `PR #2058 `_] * Stop creating new toy assets when restarting the docker-compose stack [see `PR #2018 `_] * Migrate from ``pip`` to ``uv`` for dependency management [see `PR #1973 `_] * Migrate from ``make`` to ``poe`` for running tasks [see `PR #1973 `_] From 7ec2cb1eb0445e716088d2cf4ed02bb208c8fb7b Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 14:10:03 +0100 Subject: [PATCH 35/53] fix: dropped foreign key name Signed-off-by: F.N. Claessen --- ...f6_drop_fk_constraints_from_data_source.py | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py b/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py index 35bbb45acc..79070d5498 100644 --- a/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py +++ b/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py @@ -11,6 +11,7 @@ """ from alembic import op +from sqlalchemy import inspect # revision identifiers, used by Alembic. @@ -21,23 +22,36 @@ def upgrade(): + # Inspect existing FK constraints to handle different database states gracefully + bind = op.get_bind() + inspector = inspect(bind) + existing_fks = inspector.get_foreign_keys("data_source") + existing_fk_names = [fk["name"] for fk in existing_fks] + with op.batch_alter_table("data_source", schema=None) as batch_op: - batch_op.drop_constraint( - op.f("data_source_account_id_account_fkey"), type_="foreignkey" - ) - batch_op.drop_constraint("data_sources_user_id_fkey", type_="foreignkey") + # Drop the account_id FK if it exists + if "data_source_account_id_account_fkey" in existing_fk_names: + batch_op.drop_constraint( + "data_source_account_id_account_fkey", type_="foreignkey" + ) + + # Drop the user_id FK if it exists (may have auto-generated name) + for fk_name in existing_fk_names: + if "user_id" in fk_name: + batch_op.drop_constraint(fk_name, type_="foreignkey") + break def downgrade(): with op.batch_alter_table("data_source", schema=None) as batch_op: batch_op.create_foreign_key( - "data_sources_user_id_fkey", + "data_source_user_id_fkey", "fm_user", ["user_id"], ["id"], ) batch_op.create_foreign_key( - op.f("data_source_account_id_account_fkey"), + "data_source_account_id_account_fkey", "account", ["account_id"], ["id"], From 0fefb2ef36e8abf36c7984d8cb4937098b9b8b95 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 15:27:01 +0100 Subject: [PATCH 36/53] fix: when deleting a user, still log the ID of the deleted user Signed-off-by: F.N. Claessen --- flexmeasures/data/services/users.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flexmeasures/data/services/users.py b/flexmeasures/data/services/users.py index f33dbfd87f..d802919999 100644 --- a/flexmeasures/data/services/users.py +++ b/flexmeasures/data/services/users.py @@ -222,11 +222,6 @@ def delete_user(user: User): if hasattr(current_user, "id") and user.id == current_user.id: raise Exception("You cannot delete yourself.") - user_datastore = SQLAlchemySessionUserDatastore(db.session, User, Role) - user_datastore.delete_user(user) - db.session.execute(delete(User).filter_by(id=user.id)) - current_app.logger.info("Deleted %s." % user) - active_user_id, active_user_name = None, None if hasattr(current_user, "id"): active_user_id, active_user_name = current_user.id, current_user.username @@ -235,7 +230,12 @@ def delete_user(user: User): event=f"User {user.username} deleted", active_user_id=active_user_id, active_user_name=active_user_name, - affected_user_id=None, # add the audit log record even if the user is gone + affected_user_id=user.id, # add the audit log record even if the user will be gone affected_account_id=user.account_id, ) db.session.add(user_audit_log) + + user_datastore = SQLAlchemySessionUserDatastore(db.session, User, Role) + user_datastore.delete_user(user) + db.session.execute(delete(User).filter_by(id=user.id)) + current_app.logger.info("Deleted %s." % user) From 6bcf9c6d222a0924b59ebaa21f85bbae471aa306 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 15:34:44 +0100 Subject: [PATCH 37/53] feat: allow tying a newly CLI-created data source to an account Signed-off-by: F.N. Claessen --- flexmeasures/cli/data_add.py | 12 +++++++++++- flexmeasures/data/services/data_sources.py | 6 +++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/flexmeasures/cli/data_add.py b/flexmeasures/cli/data_add.py index 066c85e49d..42a753c080 100755 --- a/flexmeasures/cli/data_add.py +++ b/flexmeasures/cli/data_add.py @@ -516,13 +516,23 @@ def add_initial_structure(): type=str, help=f"Type of source (free, but FlexMeasures has support for {DEFAULT_DATASOURCE_TYPES}).", ) -def add_source(name: str, model: str, version: str, source_type: str): +@click.option( + "--account", + "account", + required=False, + type=AccountIdField(), + help=f"Organisation account associated with the source.", +) +def add_source( + name: str, model: str, version: str, source_type: str, account: Account | None +): """Add a data source.""" source = get_or_create_source( source=name, model=model, version=version, source_type=source_type, + account=account, ) db.session.commit() click.secho(f"Added source {source.__repr__()}", **MsgStyle.SUCCESS) diff --git a/flexmeasures/data/services/data_sources.py b/flexmeasures/data/services/data_sources.py index c2f260806b..d8260b022e 100644 --- a/flexmeasures/data/services/data_sources.py +++ b/flexmeasures/data/services/data_sources.py @@ -6,7 +6,7 @@ from sqlalchemy import select from typing import Type, TypeVar -from flexmeasures import User, Source +from flexmeasures import Account, Source, User from flexmeasures.data import db from flexmeasures.data.models.data_sources import DataSource, DataGenerator from flexmeasures.data.models.user import is_user @@ -22,6 +22,7 @@ def get_or_create_source( model: str | None = None, version: str | None = None, attributes: dict | None = None, + account: Account | None = None, flush: bool = True, ) -> DataSource: if is_user(source): @@ -35,6 +36,8 @@ def get_or_create_source( query = query.filter( DataSource.attributes_hash == DataSource.hash_attributes(attributes) ) + if account is not None: + query = query.filter(DataSource.account == account) if is_user(source): query = query.filter(DataSource.user == source) elif isinstance(source, str): @@ -54,6 +57,7 @@ def get_or_create_source( version=version, type=source_type, attributes=attributes, + account=account, ) current_app.logger.info(f"Setting up {_source} as new data source...") db.session.add(_source) From faa876f69ea8eac978ceba3aa04bb2450da60d0c Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 15:43:11 +0100 Subject: [PATCH 38/53] fix: add missing drop_constraints Signed-off-by: F.N. Claessen --- ...onstraint_from_audit_log_active_user_id.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py b/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py index 66216bee5e..749219c46f 100644 --- a/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py +++ b/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py @@ -25,10 +25,30 @@ def upgrade(): batch_op.drop_constraint( op.f("audit_log_active_user_id_fm_user_fkey"), type_="foreignkey" ) + batch_op.drop_constraint( + op.f("audit_log_affected_user_id_fm_user_fkey"), type_="foreignkey" + ) + batch_op.drop_constraint( + op.f("audit_log_affected_account_id_account_fkey"), type_="foreignkey" + ) def downgrade(): with op.batch_alter_table("audit_log", schema=None) as batch_op: + batch_op.create_foreign_key( + op.f("audit_log_affected_account_id_account_fkey"), + "account", + ["affected_account_id"], + ["id"], + ondelete="SET NULL", + ) + batch_op.create_foreign_key( + op.f("audit_log_affected_user_id_fm_user_fkey"), + "fm_user", + ["affected_user_id"], + ["id"], + ondelete="SET NULL", + ) batch_op.create_foreign_key( op.f("audit_log_active_user_id_fm_user_fkey"), "fm_user", From 317c687f8dc06338df797db52b6cad4008e37756 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 15:44:24 +0100 Subject: [PATCH 39/53] chore: minimize diff Signed-off-by: F.N. Claessen --- flexmeasures/data/services/users.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/flexmeasures/data/services/users.py b/flexmeasures/data/services/users.py index d802919999..ce96ba13b6 100644 --- a/flexmeasures/data/services/users.py +++ b/flexmeasures/data/services/users.py @@ -222,6 +222,11 @@ def delete_user(user: User): if hasattr(current_user, "id") and user.id == current_user.id: raise Exception("You cannot delete yourself.") + user_datastore = SQLAlchemySessionUserDatastore(db.session, User, Role) + user_datastore.delete_user(user) + db.session.execute(delete(User).filter_by(id=user.id)) + current_app.logger.info("Deleted %s." % user) + active_user_id, active_user_name = None, None if hasattr(current_user, "id"): active_user_id, active_user_name = current_user.id, current_user.username @@ -234,8 +239,3 @@ def delete_user(user: User): affected_account_id=user.account_id, ) db.session.add(user_audit_log) - - user_datastore = SQLAlchemySessionUserDatastore(db.session, User, Role) - user_datastore.delete_user(user) - db.session.execute(delete(User).filter_by(id=user.id)) - current_app.logger.info("Deleted %s." % user) From 4401355998d2bfdbf2b8ca93210c57b26e6b7ab3 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 15:46:45 +0100 Subject: [PATCH 40/53] style: flake8 Signed-off-by: F.N. Claessen --- flexmeasures/cli/data_add.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/cli/data_add.py b/flexmeasures/cli/data_add.py index 42a753c080..011cc114c8 100755 --- a/flexmeasures/cli/data_add.py +++ b/flexmeasures/cli/data_add.py @@ -521,7 +521,7 @@ def add_initial_structure(): "account", required=False, type=AccountIdField(), - help=f"Organisation account associated with the source.", + help="Organisation account associated with the source.", ) def add_source( name: str, model: str, version: str, source_type: str, account: Account | None From d7a4ec1dd2237537257a9b325495db111db51fcd Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Fri, 27 Mar 2026 16:25:22 +0100 Subject: [PATCH 41/53] fix: test Signed-off-by: F.N. Claessen --- flexmeasures/data/tests/test_user_services.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flexmeasures/data/tests/test_user_services.py b/flexmeasures/data/tests/test_user_services.py index 41715d362d..8d90fae4dd 100644 --- a/flexmeasures/data/tests/test_user_services.py +++ b/flexmeasures/data/tests/test_user_services.py @@ -198,7 +198,10 @@ def test_delete_user(fresh_db, setup_roles_users_fresh_db, setup_assets_fresh_db .filter_by(event="User Test Prosumer User deleted") .one_or_none() ) - assert user_deletion_audit_log.affected_user_id is None + assert user_deletion_audit_log.affected_user_id == prosumer_id, ( + "Audit log affected_user_id should be preserved (not nullified) " + "after user deletion for lineage purposes." + ) assert user_deletion_audit_log.affected_account_id == prosumer_account_id assert user_deletion_audit_log.active_user_id is None From 8bb5ebeba24f8ee4f02f11d0497994e96ac2cb95 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Thu, 2 Apr 2026 15:17:11 +0200 Subject: [PATCH 42/53] agents: rename Review Lead to Lead across all agent instructions Update all agent documentation to use 'Lead' instead of 'Review Lead', reflecting the consolidated governance model where lead.md has been removed and its responsibilities are now documented in AGENTS.md. Changes: - AGENTS.md: All 31 'Review Lead' references updated to 'Lead' - coordinator.md: Updated monitoring patterns to reference Lead - test-specialist.md: Updated coordination examples to reference Lead - tooling-ci-specialist.md: Updated coordination examples to reference Lead This ensures consistent terminology across all agent instructions. --- .github/agents/coordinator.md | 88 +++++++++--------- .github/agents/test-specialist.md | 12 +-- .github/agents/tooling-ci-specialist.md | 12 +-- AGENTS.md | 118 ++++++++++++------------ 4 files changed, 115 insertions(+), 115 deletions(-) diff --git a/.github/agents/coordinator.md b/.github/agents/coordinator.md index 58a3b95698..b3e0bbc871 100644 --- a/.github/agents/coordinator.md +++ b/.github/agents/coordinator.md @@ -292,30 +292,30 @@ Agents should escalate to the Coordinator when: - Encourage agent autonomy and expertise - Provide actionable feedback via review comments -### Review Lead Delegation Pattern Monitoring +### Lead Delegation Pattern Monitoring -**The Coordinator MUST verify Review Lead delegation patterns during governance reviews.** +**The Coordinator MUST verify Lead delegation patterns during governance reviews.** -**Context:** Review Lead has a recurring failure mode of working solo instead of delegating to specialists (observed in session 2026-02-08). +**Context:** Lead has a recurring failure mode of working solo instead of delegating to specialists (observed in session 2026-02-08). **What to check:** -When reviewing a session where Review Lead was involved: +When reviewing a session where Lead was involved: -- [ ] **Delegation occurred**: Did Review Lead invoke appropriate specialists? -- [ ] **No solo execution**: Did Review Lead make code/API/docs changes itself? -- [ ] **Git commit author check**: Are there Review Lead commits with production code? -- [ ] **Request interpretation**: Did Review Lead parse user intent correctly? +- [ ] **Delegation occurred**: Did Lead invoke appropriate specialists? +- [ ] **No solo execution**: Did Lead make code/API/docs changes itself? +- [ ] **Git commit author check**: Are there Lead commits with production code? +- [ ] **Request interpretation**: Did Lead parse user intent correctly? - [ ] **Regression indicators**: Any signs of "too simple to delegate" thinking? **Red flags (immediate governance concern):** -- 🚩 Review Lead commits containing code changes (should be specialist commits) -- 🚩 Review Lead commits containing test changes (should be Test Specialist) -- 🚩 Review Lead commits containing doc changes (should be Documentation Specialist) +- 🚩 Lead commits containing code changes (should be specialist commits) +- 🚩 Lead commits containing test changes (should be Test Specialist) +- 🚩 Lead commits containing doc changes (should be Documentation Specialist) - 🚩 User says "You are regressing" or "You must handle requests as a team" - 🚩 Session closed without specialist involvement on implementation tasks -- 🚩 Review Lead justifies solo work with "too simple to delegate" +- 🚩 Lead justifies solo work with "too simple to delegate" **Verification commands:** @@ -323,27 +323,27 @@ When reviewing a session where Review Lead was involved: # Check who made commits git log --oneline --all --since="1 day ago" --format="%h %an %s" -# Check Review Lead commit types -git log --author="Review Lead" --oneline -10 +# Check Lead commit types +git log --author="Lead" --oneline -10 -# Look for code changes by Review Lead (should be empty or synthesis only) -git log --author="Review Lead" --stat -5 +# Look for code changes by Lead (should be empty or synthesis only) +git log --author="Lead" --stat -5 ``` **When delegation failure detected:** 1. **Document in session review** - What was the failure? -2. **Check Review Lead instructions** - Were they followed? +2. **Check Lead instructions** - Were they followed? 3. **Identify gap** - What prevented proper delegation? 4. **Recommend fix** - How to prevent recurrence? -5. **Update Review Lead instructions** - Add enforcement mechanism +5. **Update Lead instructions** - Add enforcement mechanism 6. **Verify fix works** - Test with hypothetical scenario **Escalation pattern:** -If Review Lead repeatedly violates delegation requirements: +If Lead repeatedly violates delegation requirements: - This is a systemic issue requiring Coordinator intervention -- Review Lead instructions need stronger enforcement +- Lead instructions need stronger enforcement - Consider adding mandatory checkpoints before work execution - May need explicit blockers to prevent solo execution @@ -351,20 +351,20 @@ If Review Lead repeatedly violates delegation requirements: | Pattern | Indicator | Action | |---------|-----------|--------| -| Solo execution | Review Lead makes code commits | Flag as regression | -| "Too simple" trap | Review Lead justifies not delegating | Update instructions with example | -| Request misinterpretation | Review Lead confirms instead of implements | Strengthen request parsing guidance | +| Solo execution | Lead makes code commits | Flag as regression | +| "Too simple" trap | Lead justifies not delegating | Update instructions with example | +| Request misinterpretation | Lead confirms instead of implements | Strengthen request parsing guidance | | Delegation omission | Specialists not invoked on implementation | Verify Session Close Checklist followed | **Success indicators:** -- ✅ Review Lead invoked appropriate specialists +- ✅ Lead invoked appropriate specialists - ✅ Specialists made the actual changes -- ✅ Review Lead synthesized findings +- ✅ Lead synthesized findings - ✅ Team-based execution pattern maintained - ✅ Session Close Checklist verified delegation -**This monitoring ensures Review Lead maintains its orchestration role and doesn't regress to solo execution.** +**This monitoring ensures Lead maintains its orchestration role and doesn't regress to solo execution.** ## Self-Improvement Notes @@ -551,25 +551,25 @@ Lead should now invoke Coordinator as subagent. - Governance process shown to be optional (dangerous precedent) **Solution implemented**: -1. ✅ Added mandatory "Session Close Checklist" to Review Lead (commit 3ad8908) +1. ✅ Added mandatory "Session Close Checklist" to Lead (commit 3ad8908) 2. ✅ Added "Full Test Suite Requirement" to Test Specialist (commit 8d67f3c) 3. ✅ Added "Pre-commit Hook Enforcement" to Tooling & CI Specialist (commit dfe67e8) 4. ✅ Added "Session Close Verification" pattern to Coordinator (this commit) **Structural changes**: -- Review Lead now has comprehensive checklist before closing any session +- Lead now has comprehensive checklist before closing any session - Test Specialist must execute full suite, not just feature-specific tests - Tooling & CI Specialist must verify pre-commit execution -- Coordinator enforces Review Lead checklist completion +- Coordinator enforces Lead checklist completion **New Coordinator pattern (Pattern #7)**: When invoked for governance review, Coordinator must verify: -- [ ] Review Lead followed session close checklist +- [ ] Lead followed session close checklist - [ ] No checklist items were skipped without justification - [ ] Evidence provided for each checklist item **Enforcement escalation**: -If Review Lead repeatedly closes sessions without completing checklist: +If Lead repeatedly closes sessions without completing checklist: 1. First occurrence: Document and update instructions (this session) 2. Second occurrence: Require explicit justification for skips 3. Third occurrence: Escalate to architectural solution (automated checks) @@ -592,26 +592,26 @@ These patterns must not repeat. Agent instructions have been updated to prevent **Session**: PR #2058 — Add `account_id` to DataSource table **Observation**: After three sessions now, the same two failures recur: -1. Coordinator is not invoked at end of session (despite MUST requirement in Review Lead instructions) +1. Coordinator is not invoked at end of session (despite MUST requirement in Lead instructions) 2. No agent updates its own instructions (despite MUST requirement in all agents) **Root cause analysis**: - "Coordinator invocation" and "self-improvement" are both documented as mandatory last steps - But the session ends before they are reached — they are treated as optional epilogue, not gating requirements -- The Review Lead agent selection is ad-hoc, with no explicit checklist forcing API Specialist engagement when endpoints change +- The Lead agent selection is ad-hoc, with no explicit checklist forcing API Specialist engagement when endpoints change **What was missed in PR #2058**: - API Specialist not engaged: POST sensor data now sets `account_id` on the resulting data source — this is an endpoint behavior change that should be reviewed for backward compatibility -- Zero agent instruction updates across all three participating agents (Architecture Specialist, Test Specialist, Review Lead) +- Zero agent instruction updates across all three participating agents (Architecture Specialist, Test Specialist, Lead) - No Coordinator invocation despite explicit user request in the original prompt **Solutions implemented**: - Architecture Specialist: Added Alembic migration checklist + DataSource domain invariants - Test Specialist: Added DataSource property testing pattern + lessons learned -- Review Lead: Added Agent Selection Checklist mapping code change types to required agents; documented 3rd recurrence of these failures +- Lead: Added Agent Selection Checklist mapping code change types to required agents; documented 3rd recurrence of these failures - Coordinator (this file): Documented case study -**Governance escalation**: The Review Lead's "Must Always Run Coordinator" requirement has now been documented in three sessions without being followed. If it fails a fourth time, consider structural changes — e.g., making Coordinator invocation the FIRST step of a session rather than the last, so it sets context rather than being a forgotten epilogue. +**Governance escalation**: The Lead's "Must Always Run Coordinator" requirement has now been documented in three sessions without being followed. If it fails a fourth time, consider structural changes — e.g., making Coordinator invocation the FIRST step of a session rather than the last, so it sets context rather than being a forgotten epilogue. **Code observation from PR #2058 worth tracking**: - An early draft used `user.account_id or (user.account.id if user.account else None)` — the `or` pattern is fragile for `account_id=0` (unrealistic but worth noting). The final implementation correctly uses `if user.account_id is not None` (see `data_sources.py` lines 340-343) — this is the right pattern to follow. @@ -641,7 +641,7 @@ When reviewing schema changes that affect FK constraints: **Pattern**: Systemic self-improvement failure across all agents -**Observation**: Five agents completed substantial work (Architecture, API, Test, Documentation, Review Lead): +**Observation**: Five agents completed substantial work (Architecture, API, Test, Documentation, Lead): - Created new API endpoints (3 POST endpoints) - Wrote 17 comprehensive test functions - Created 494-line feature guide documentation @@ -658,17 +658,17 @@ When reviewing schema changes that affect FK constraints: **Root causes identified**: 1. **Self-improvement not enforced**: No blocking requirement, agents treat as optional 2. **Unclear triggers**: Agents don't know when to update instructions ("after completing work" too vague) -3. **No verification**: Review Lead doesn't check if agents self-improved +3. **No verification**: Lead doesn't check if agents self-improved 4. **Invisible requirement**: Self-improvement not in task completion checklist **Secondary violations observed**: - Temporary file committed (`API_REVIEW_ANNOTATIONS.md`, 575 lines) then removed - Non-atomic commits mixing multiple concerns - Test claims without execution evidence -- Review Lead didn't invoke Coordinator despite governance request +- Lead didn't invoke Coordinator despite governance request **Solution implemented**: -1. Added self-improvement enforcement to Review Lead checklist (see below) +1. Added self-improvement enforcement to Lead checklist (see below) 2. Documented temporary file prevention patterns 3. Added test execution evidence requirement 4. Strengthened Coordinator invocation triggers @@ -681,14 +681,14 @@ When reviewing schema changes that affect FK constraints: **Future sessions**: Monitor whether self-improvement enforcement works. If pattern recurs 3+ times, escalate to architectural solution (e.g., automated checks, mandatory prompts). -**Session 2026-02-10 (Annotation API Tests)**: Pattern recurred despite Review Lead update. Test Specialist fixed 32 annotation API tests (100% passing), made excellent technical commits, but did NOT update instructions with learned lessons (permission semantics, fixture selection, error code expectations). Review Lead enforcement unclear—may not have been involved in session. **Status**: Pattern persists. Approaching threshold for architectural solution. +**Session 2026-02-10 (Annotation API Tests)**: Pattern recurred despite Lead update. Test Specialist fixed 32 annotation API tests (100% passing), made excellent technical commits, but did NOT update instructions with learned lessons (permission semantics, fixture selection, error code expectations). Lead enforcement unclear—may not have been involved in session. **Status**: Pattern persists. Approaching threshold for architectural solution. ### Enforcement Mechanism Added -**New requirement for Review Lead**: Before marking task complete, verify: +**New requirement for Lead**: Before marking task complete, verify: ```markdown -## Task Completion Checklist (Review Lead) +## Task Completion Checklist (Lead) - [ ] Code review completed and feedback addressed - [ ] Security scan completed and alerts investigated @@ -698,7 +698,7 @@ When reviewing schema changes that affect FK constraints: - [ ] No temporary analysis files committed ``` -If any agent hasn't self-improved, Review Lead must: +If any agent hasn't self-improved, Lead must: 1. Request agent update their instructions 2. Wait for update 3. Review update for quality diff --git a/.github/agents/test-specialist.md b/.github/agents/test-specialist.md index 4e638cc154..9862364645 100644 --- a/.github/agents/test-specialist.md +++ b/.github/agents/test-specialist.md @@ -118,11 +118,11 @@ If ANY test fails during full suite execution: **Click context errors**: - Check IdField decorators (`@with_appcontext` vs `@with_appcontext_if_needed()`) - Compare against SensorIdField pattern -- See Review Lead's Click context error pattern +- See Lead's Click context error pattern -### Integration with Review Lead +### Integration with Lead -The Test Specialist MUST provide evidence of full test suite execution to Review Lead. +The Test Specialist MUST provide evidence of full test suite execution to Lead. **Required evidence format:** ``` @@ -134,14 +134,14 @@ Full test suite execution: - Coverage: 87.2% (unchanged) ``` -**Review Lead verification:** -Review Lead's session close checklist includes: +**Lead verification:** +Lead's session close checklist includes: - [ ] Test Specialist confirmed full test suite execution - [ ] All tests pass (100%) - [ ] Test output captured and reviewed **Enforcement:** -Review Lead cannot close session until Test Specialist provides evidence of full test suite execution with 100% pass rate. +Lead cannot close session until Test Specialist provides evidence of full test suite execution with 100% pass rate. ## Testing Patterns for flexmeasures diff --git a/.github/agents/tooling-ci-specialist.md b/.github/agents/tooling-ci-specialist.md index fb9d178a2f..b30995e71c 100644 --- a/.github/agents/tooling-ci-specialist.md +++ b/.github/agents/tooling-ci-specialist.md @@ -163,12 +163,12 @@ Code that bypasses pre-commit: **Who runs pre-commit:** - **During code changes**: Agent making changes runs pre-commit before committing -- **Before PR close**: Review Lead verifies pre-commit execution +- **Before PR close**: Lead verifies pre-commit execution - **In PR review**: Tooling & CI Specialist validates config matches CI **Enforcement:** -- Review Lead's session close checklist includes pre-commit verification -- Review Lead cannot close session without pre-commit evidence +- Lead's session close checklist includes pre-commit verification +- Lead cannot close session without pre-commit evidence - If pre-commit fails, agent must fix all issues before proceeding #### Common Failures and Fixes @@ -206,9 +206,9 @@ black . ci/run_mypy.sh ``` -#### Integration with Review Lead +#### Integration with Lead -**Review Lead checklist items:** +**Lead checklist items:** - [ ] Pre-commit hooks installed - [ ] All hooks pass: `pre-commit run --all-files` - [ ] Zero failures from flake8, black, mypy @@ -219,7 +219,7 @@ ci/run_mypy.sh - Or confirm: "Pre-commit verified: all hooks passed" **Enforcement:** -Review Lead MUST verify pre-commit execution before closing session. +Lead MUST verify pre-commit execution before closing session. ### Agent Environment Setup diff --git a/AGENTS.md b/AGENTS.md index 108b0832c3..a752091904 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -69,7 +69,7 @@ This avoids "agent spam" and ensures unified results. ## Quick Navigation for Critical Sections -**Before starting ANY session, Review Lead MUST consult:** +**Before starting ANY session, Lead MUST consult:** 1. **Parse user intent** → Section 1.1 (Request Interpretation) 2. **Check delegation requirements** → Section 2.1 (Mandatory Delegation Triggers) @@ -110,7 +110,7 @@ The Lead: ### 1.1. Parse User Intent (FIRST STEP - ALWAYS DO THIS) -**Before selecting agents or doing ANY work, Review Lead MUST verify understanding.** +**Before selecting agents or doing ANY work, Lead MUST verify understanding.** This prevents misinterpreting requests and working on the wrong thing. @@ -162,11 +162,11 @@ Or do you want me to [Y] instead?" User: "migrate endpoints to /api/v3_0/accounts//annotations" ❌ **Wrong interpretation:** User wants confirmation of their migration -→ Review Lead confirms work, doesn't do migration +→ Lead confirms work, doesn't do migration → User: "That was rather useless... you basically ignored my request" ✅ **Correct interpretation:** "migrate" = implementation verb = action request -→ Review Lead delegates to specialists to DO the migration +→ Lead delegates to specialists to DO the migration → Test Specialist, API Specialist, Documentation Specialist all participate * * * @@ -197,9 +197,9 @@ Notably: ### 2.1. Delegation Requirements (NON-NEGOTIABLE) -**The Review Lead MUST NEVER work alone on implementation tasks.** +**The Lead MUST NEVER work alone on implementation tasks.** -This is the most critical anti-pattern to avoid: Review Lead working solo instead of delegating. +This is the most critical anti-pattern to avoid: Lead working solo instead of delegating. **Mandatory Delegation Triggers:** @@ -230,9 +230,9 @@ This is the most critical anti-pattern to avoid: Review Lead working solo instea - ✅ ALL endpoint changes → Test + API + Documentation Specialists - ✅ ALL agent/process changes → Coordinator governance -**Review Lead's role in implementation:** +**Lead's role in implementation:** -The Review Lead: +The Lead: - ✅ Orchestrates specialists - ✅ Synthesizes their findings - ✅ Manages coordination @@ -255,13 +255,13 @@ Correct pattern: - [ ] Test Specialist made code changes and verified tests ✅ - [ ] API Specialist reviewed backward compatibility ✅ - [ ] Documentation Specialist updated docs ✅ -- [ ] Review Lead synthesized findings ✅ +- [ ] Lead synthesized findings ✅ **Example from session 2026-02-08 (failure):** User: "migrate endpoints to /api/v3_0/accounts//annotations" -❌ **What Review Lead did:** +❌ **What Lead did:** - Migrated AccountAPI, AssetAPI, SensorAPI endpoints ALONE - Updated test URLs ALONE - Ran pre-commit hooks ALONE @@ -270,7 +270,7 @@ User: "migrate endpoints to /api/v3_0/accounts//annotations" ❌ **Result:** User: "You are regressing. You must handle my requests as a team" -✅ **What Review Lead should have done:** +✅ **What Lead should have done:** ```python # Delegate to Test Specialist task(agent_type="test-specialist", @@ -424,9 +424,9 @@ The coordinator will: ### Must Enforce Agent Self-Improvement (CRITICAL) -**The Review Lead MUST ensure all participating agents update their instructions.** +**The Lead MUST ensure all participating agents update their instructions.** -This is the Review Lead's responsibility from the coordinator's governance review. When agents complete work, the Review Lead must: +This is the Lead's responsibility from the coordinator's governance review. When agents complete work, the Lead must: #### 1. Identify Participating Agents After work is complete, identify which agents contributed: @@ -487,11 +487,11 @@ If an agent doesn't update or provides insufficient update: - Instructions stay current and relevant **Example from Session 2026-02-10:** -- 5 agents participated (Architecture, API, Test, Documentation, Review Lead) -- Only Review Lead updated instructions initially +- 5 agents participated (Architecture, API, Test, Documentation, Lead) +- Only Lead updated instructions initially - Coordinator flagged 100% failure rate -- Review Lead should have prompted all 4 other agents -- Review Lead should have verified updates before closing +- Lead should have prompted all 4 other agents +- Lead should have verified updates before closing ### Must Not Create PRs Prematurely @@ -944,13 +944,13 @@ Before completing an assignment and closing the session: ### Regression Prevention (CRITICAL) -**The Review Lead can backslide to solo execution mode.** +**The Lead can backslide to solo execution mode.** This is the primary failure pattern observed in session 2026-02-08. **What regression looks like:** -When Review Lead starts working alone instead of delegating to specialists: +When Lead starts working alone instead of delegating to specialists: - Writing code directly - Updating tests without Test Specialist - Modifying docs without Documentation Specialist @@ -967,9 +967,9 @@ When Review Lead starts working alone instead of delegating to specialists: **Regression indicators (how to detect):** -- 🚩 Review Lead making code commits (should be specialist commits) -- 🚩 Review Lead updating tests (should be Test Specialist) -- 🚩 Review Lead modifying docs (should be Documentation Specialist) +- 🚩 Lead making code commits (should be specialist commits) +- 🚩 Lead updating tests (should be Test Specialist) +- 🚩 Lead modifying docs (should be Documentation Specialist) - 🚩 User says "You are regressing" - 🚩 User says "You must handle my requests as a team" - 🚩 Session closes without specialist involvement @@ -1013,40 +1013,40 @@ Ask these questions before ANY work execution: **The correct workflow:** 1. User requests implementation -2. Review Lead parses intent (section 1.1) -3. Review Lead identifies required specialists (section 2.1) -4. **Review Lead delegates to specialists** ← THIS IS THE JOB +2. Lead parses intent (section 1.1) +3. Lead identifies required specialists (section 2.1) +4. **Lead delegates to specialists** ← THIS IS THE JOB 5. Specialists do the actual work -6. Review Lead synthesizes findings -7. Review Lead runs session close checklist +6. Lead synthesizes findings +7. Lead runs session close checklist **Example from session 2026-02-08 (regression case study):** **Request:** "migrate endpoints to /api/v3_0/accounts//annotations" -**What Review Lead did (WRONG):** +**What Lead did (WRONG):** ``` -✗ Review Lead migrated AccountAPI endpoints -✗ Review Lead updated AssetAPI endpoints -✗ Review Lead modified SensorAPI endpoints -✗ Review Lead changed test URLs -✗ Review Lead ran pre-commit hooks +✗ Lead migrated AccountAPI endpoints +✗ Lead updated AssetAPI endpoints +✗ Lead modified SensorAPI endpoints +✗ Lead changed test URLs +✗ Lead ran pre-commit hooks ✗ NO specialist involvement ``` **User response:** "You are regressing. You must handle my requests as a team" -**What Review Lead should have done (CORRECT):** +**What Lead should have done (CORRECT):** ``` -✓ Review Lead parsed intent: Implementation request -✓ Review Lead identified specialists needed: +✓ Lead parsed intent: Implementation request +✓ Lead identified specialists needed: - Test Specialist (test URL updates) - API Specialist (backward compatibility) - Documentation Specialist (doc updates) -✓ Review Lead delegated to each specialist +✓ Lead delegated to each specialist ✓ Specialists did the actual work -✓ Review Lead synthesized findings +✓ Lead synthesized findings ✓ Team-based execution ``` @@ -1054,7 +1054,7 @@ Ask these questions before ANY work execution: "Simple task" is a cognitive trap. **NO task is too simple to delegate.** -The Review Lead's job is orchestration, not execution. +The Lead's job is orchestration, not execution. ### Learning from Failures @@ -1105,7 +1105,7 @@ Track and document when the Lead: **Specific lesson learned (2026-02-10 follow-up)**: - **Session**: Implementing coordinator's governance review recommendations -- **Failure**: Review Lead updated own instructions but didn't ensure other agents did the same +- **Failure**: Lead updated own instructions but didn't ensure other agents did the same - **What went wrong**: Didn't take ownership of follow-through on coordinator recommendations - **Impact**: 4 out of 5 participating agents didn't update their instructions (80% failure rate) - **Root cause**: No enforcement mechanism; assumed agents would self-update without prompting @@ -1116,7 +1116,7 @@ Track and document when the Lead: 3. Verify updates are substantive and committed 4. Re-prompt if necessary 5. Don't close session until all agents have updated -- **Key insight**: "Review Lead owns follow-through on coordinator recommendations" +- **Key insight**: "Lead owns follow-through on coordinator recommendations" - **Test execution learning**: Test specialist couldn't run tests because PostgreSQL setup was skipped; must follow copilot-setup-steps.yml workflow **Specific lesson learned (2026-02-10 test fixes)**: @@ -1152,24 +1152,24 @@ Track and document when the Lead: **Specific lesson learned (2026-02-08 endpoint migration)**: - **Session**: Annotation API endpoint migration (flat to nested RESTful pattern) -- **Failures identified**: Review Lead worked solo instead of delegating to specialists +- **Failures identified**: Lead worked solo instead of delegating to specialists - **Root cause**: Treated "simple" endpoint URL changes as not requiring delegation - **Impact**: User intervention required ("You are regressing. You must handle my requests as a team") - **Failure pattern**: 1. User: "migrate endpoints to /api/v3_0/accounts//annotations" - 2. Review Lead misunderstood as confirmation request (Failure #1) + 2. Lead misunderstood as confirmation request (Failure #1) 3. User corrected: "That was rather useless... you basically ignored my request" - 4. Review Lead did entire migration alone without delegation (Failure #2): + 4. Lead did entire migration alone without delegation (Failure #2): - Migrated AccountAPI, AssetAPI, SensorAPI endpoints - Updated test URLs - Ran pre-commit hooks - NO delegation to Test/API/Documentation specialists 5. User: "You are regressing. You must handle my requests as a team" - 6. Review Lead then properly delegated after explicit user checklist + 6. Lead then properly delegated after explicit user checklist - **Key insights**: - "Simple task" is a cognitive trap that triggers solo execution mode - - NO task is too simple to delegate - delegation is the Review Lead's core job - - Regression pattern: Review Lead forgets team-based model under time pressure + - NO task is too simple to delegate - delegation is the Lead's core job + - Regression pattern: Lead forgets team-based model under time pressure - Request interpretation MUST happen before work starts - **Prevention**: Added sections to this file: 1. **Request Interpretation** (Section 1.1) - Parse intent before work @@ -1177,25 +1177,25 @@ Track and document when the Lead: 3. **Regression Prevention** - How to detect and correct backsliding 4. **Delegation Verification** - Session close checklist item 5. **Quick Navigation** - Prominent links to critical sections -- **Verification**: Review Lead must now answer "Am I working solo?" before ANY execution +- **Verification**: Lead must now answer "Am I working solo?" before ANY execution Update this file to prevent repeating the same mistakes. ## Session Close Checklist (MANDATORY) -**Before closing ANY session, the Review Lead MUST verify ALL items in this checklist.** +**Before closing ANY session, the Lead MUST verify ALL items in this checklist.** This is non-negotiable. Skipping items without explicit justification and user approval is a governance failure. ### Delegation Verification (CRITICAL - NEW) -**Before closing session, verify Review Lead did NOT work solo:** +**Before closing session, verify Lead did NOT work solo:** - [ ] **Task type identified**: Code/API/docs/time/performance/governance changes -- [ ] **Specialists involved**: Appropriate specialists were invoked (not Review Lead alone) +- [ ] **Specialists involved**: Appropriate specialists were invoked (not Lead alone) - [ ] **Evidence of delegation**: Show task() calls that invoked specialists -- [ ] **No solo execution**: Review Lead did NOT make code/API/docs changes itself +- [ ] **No solo execution**: Lead did NOT make code/API/docs changes itself - [ ] **Synthesis provided**: Combined specialist findings into unified output **Evidence required:** @@ -1205,7 +1205,7 @@ List which specialists were invoked and what each did: ✓ Test Specialist - Updated test URLs, verified 32 tests pass ✓ API Specialist - Verified backward compatibility ✓ Documentation Specialist - Updated API docs with new structure -✓ Review Lead - Synthesized findings, managed coordination +✓ Lead - Synthesized findings, managed coordination ``` **FORBIDDEN patterns (immediate governance failure):** @@ -1213,14 +1213,14 @@ List which specialists were invoked and what each did: - ❌ "I handled it myself" (regression to solo mode) - ❌ "Too simple to delegate" (invalid justification) - ❌ "No specialists needed" (delegation always needed for code/API/docs) -- ❌ Review Lead commits containing code changes (should be specialist commits) -- ❌ Review Lead commits containing test changes (should be Test Specialist) -- ❌ Review Lead commits containing doc changes (should be Documentation Specialist) +- ❌ Lead commits containing code changes (should be specialist commits) +- ❌ Lead commits containing test changes (should be Test Specialist) +- ❌ Lead commits containing doc changes (should be Documentation Specialist) **Git commit check:** ```bash -git log --oneline -10 --author="Review Lead" +git log --oneline -10 --author="Lead" ``` Should show ONLY: @@ -1299,7 +1299,7 @@ This is a regression (see Regression Prevention section). You MUST: ### Enforcement -**The Review Lead MUST NOT close a session until ALL checklist items are verified.** +**The Lead MUST NOT close a session until ALL checklist items are verified.** If you cannot complete an item: 1. Document why in session notes @@ -1309,7 +1309,7 @@ If you cannot complete an item: If you close without completing checklist: - This is a governance failure - Coordinator will document it -- Review Lead instructions will be updated to prevent recurrence +- Lead instructions will be updated to prevent recurrence ### Continuous Improvement From 86d3ebb0e77e734d65a2564b85c23fd3a6f52b01 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Thu, 2 Apr 2026 15:26:44 +0200 Subject: [PATCH 43/53] db: fix merge migration to properly resolve conflicting heads Update b2e07f0dafa1_merge.py migration to specify both parent revisions that are being merged: - 3f4a6f9d2b11 (increase_audit_event_length_to_500) - b2c3d4e5f6a7 (Drop FK constraint from audit_log.active_user_id) This resolves the multiple heads issue and creates a proper mergepoint in the migration history. The migration now correctly shows as a (mergepoint) in the history output. --- .../migrations/versions/b2e07f0dafa1_merge.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py diff --git a/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py b/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py new file mode 100644 index 0000000000..8875f38e5c --- /dev/null +++ b/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py @@ -0,0 +1,25 @@ +"""merge + +Revision ID: b2e07f0dafa1 +Revises: +Create Date: 2026-04-02 15:21:56.334943 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "b2e07f0dafa1" +down_revision = ("3f4a6f9d2b11", "b2c3d4e5f6a7") +branch_labels = None +depends_on = None + + +def upgrade(): + pass + + +def downgrade(): + pass From 7d5fa60947479640f125dec846d6b870c9353bc5 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Thu, 2 Apr 2026 15:33:26 +0200 Subject: [PATCH 44/53] agents: remove merge conflict markers from coordinator.md Clean up leftover git merge conflict markers (<<<<<<< HEAD, =======, >>>>>>> origin/main) that were not properly resolved. Both pattern sections (2026-03-24 and 2026-02-10) have been preserved as they document complementary learning from different sessions. --- .github/agents/coordinator.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/agents/coordinator.md b/.github/agents/coordinator.md index b3e0bbc871..2731a33bfa 100644 --- a/.github/agents/coordinator.md +++ b/.github/agents/coordinator.md @@ -584,7 +584,6 @@ If Lead repeatedly closes sessions without completing checklist: These patterns must not repeat. Agent instructions have been updated to prevent recurrence. -<<<<<<< HEAD ### Additional Pattern Discovered (2026-03-24) **Pattern**: Persistent self-improvement failure and missing API Specialist agent selection @@ -705,4 +704,3 @@ If any agent hasn't self-improved, Lead must: 4. Then mark task complete **This makes self-improvement blocking, not optional.** ->>>>>>> origin/main From 4724959678efc2a38ef3508bfeb57271c133f7f9 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Thu, 2 Apr 2026 15:36:52 +0200 Subject: [PATCH 45/53] style: flake8 Signed-off-by: F.N. Claessen --- flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py b/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py index 8875f38e5c..7daab17abf 100644 --- a/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py +++ b/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py @@ -1,15 +1,11 @@ """merge Revision ID: b2e07f0dafa1 -Revises: +Revises: 3f4a6f9d2b11, b2c3d4e5f6a7 Create Date: 2026-04-02 15:21:56.334943 """ -from alembic import op -import sqlalchemy as sa - - # revision identifiers, used by Alembic. revision = "b2e07f0dafa1" down_revision = ("3f4a6f9d2b11", "b2c3d4e5f6a7") From 60a4b502d196fb2c998081c6244e3a95c81a03c3 Mon Sep 17 00:00:00 2001 From: Felix Claessen <30658763+Flix6x@users.noreply.github.com> Date: Sun, 5 Apr 2026 20:24:35 +0200 Subject: [PATCH 46/53] docs: clarify why we add a creation audit log record in a test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nicolas Höning Signed-off-by: Felix Claessen <30658763+Flix6x@users.noreply.github.com> --- flexmeasures/cli/tests/test_data_delete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/cli/tests/test_data_delete.py b/flexmeasures/cli/tests/test_data_delete.py index 9b15d13a30..7fd9a2b558 100644 --- a/flexmeasures/cli/tests/test_data_delete.py +++ b/flexmeasures/cli/tests/test_data_delete.py @@ -31,7 +31,7 @@ def test_delete_account( (ds.id, ds.user_id, ds.account_id) for ds in data_sources_before ] - # Add creation audit log record + # Add creation audit log record, as that has not automatically been done when setting up test data user_creation_audit_log = AuditLog( event="User Test Prosumer User created test", affected_user_id=prosumer.id, From bf5bf7571be379d8e5fffc7227002ca64e1b029a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 10:58:40 +0000 Subject: [PATCH 47/53] migration: merge drop-FK migration into add-account-id migration (9877450113f6) Remove the intermediate a1b2c3d4e5f6 migration that was dropping the account_id FK (only created within this PR) and the user_id FK. Instead: - Don't create the account_id FK in 9877450113f6 in the first place - Absorb the user_id FK drop into 9877450113f6 - Update b2c3d4e5f6a7 to chain off 9877450113f6 directly - Delete a1b2c3d4e5f6 Agent-Logs-Url: https://github.com/FlexMeasures/flexmeasures/sessions/4d9d8a92-d4fd-4635-bebf-1965c35667b3 Co-authored-by: nhoening <1042336+nhoening@users.noreply.github.com> --- ...7450113f6_add_account_id_to_data_source.py | 44 +++++++++----- ...f6_drop_fk_constraints_from_data_source.py | 58 ------------------- ...onstraint_from_audit_log_active_user_id.py | 2 +- 3 files changed, 32 insertions(+), 72 deletions(-) delete mode 100644 flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py diff --git a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py index dde9f1c0bf..227f0e751e 100644 --- a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py +++ b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py @@ -1,4 +1,9 @@ -"""Add account_id to data_source table +"""Add account_id to data_source table and drop FK constraints for lineage preservation + +Adds account_id to data_source (without a DB-level FK constraint, so that referenced +accounts can be deleted while the historical account_id value is preserved for lineage). +Also drops the existing user_id FK constraint for the same reason: when a user is deleted, +the data_source.user_id should remain intact rather than being cascaded or nullified. Revision ID: 9877450113f6 Revises: 8b62f8129f34 @@ -8,6 +13,7 @@ from alembic import op import sqlalchemy as sa +from sqlalchemy import inspect # revision identifiers, used by Alembic. @@ -34,15 +40,9 @@ def upgrade(): - # 1. Add the account_id column (nullable) + # 1. Add the account_id column (nullable, no DB-level FK so lineage is preserved) with op.batch_alter_table("data_source", schema=None) as batch_op: batch_op.add_column(sa.Column("account_id", sa.Integer(), nullable=True)) - batch_op.create_foreign_key( - op.f("data_source_account_id_account_fkey"), - "account", - ["account_id"], - ["id"], - ) # 2. Data migration: populate account_id from the related user's account. # Use a correlated subquery to avoid N+1 queries and ensure portability. @@ -65,9 +65,30 @@ def upgrade(): ["name", "user_id", "account_id", "model", "version", "attributes_hash"], ) + # 4. Drop the user_id FK constraint so that deleting a user preserves the lineage + # reference in data_source rows (no cascade, no SET NULL). + bind = op.get_bind() + inspector = inspect(bind) + existing_fks = inspector.get_foreign_keys("data_source") + existing_fk_names = [fk["name"] for fk in existing_fks] + with op.batch_alter_table("data_source", schema=None) as batch_op: + for fk_name in existing_fk_names: + if "user_id" in fk_name: + batch_op.drop_constraint(fk_name, type_="foreignkey") + break + def downgrade(): - # 1. Restore the original UniqueConstraint without account_id + # 1. Re-add the user_id FK constraint + with op.batch_alter_table("data_source", schema=None) as batch_op: + batch_op.create_foreign_key( + "data_source_user_id_fkey", + "fm_user", + ["user_id"], + ["id"], + ) + + # 2. Restore the original UniqueConstraint without account_id with op.batch_alter_table("data_source", schema=None) as batch_op: batch_op.drop_constraint("data_source_name_key", type_="unique") batch_op.create_unique_constraint( @@ -75,9 +96,6 @@ def downgrade(): ["name", "user_id", "model", "version", "attributes_hash"], ) - # 2. Drop the account_id column and its FK + # 3. Drop the account_id column with op.batch_alter_table("data_source", schema=None) as batch_op: - batch_op.drop_constraint( - op.f("data_source_account_id_account_fkey"), type_="foreignkey" - ) batch_op.drop_column("account_id") diff --git a/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py b/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py deleted file mode 100644 index 79070d5498..0000000000 --- a/flexmeasures/data/migrations/versions/a1b2c3d4e5f6_drop_fk_constraints_from_data_source.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Drop FK constraints from data_source for data lineage preservation - -When users or accounts are deleted, we want to preserve the historical -user_id and account_id values in data_source rows for lineage purposes, -rather than cascade-deleting or nullifying them. - -Revision ID: a1b2c3d4e5f6 -Revises: 9877450113f6 -Create Date: 2026-03-25 00:00:00.000000 - -""" - -from alembic import op -from sqlalchemy import inspect - - -# revision identifiers, used by Alembic. -revision = "a1b2c3d4e5f6" -down_revision = "9877450113f6" -branch_labels = None -depends_on = None - - -def upgrade(): - # Inspect existing FK constraints to handle different database states gracefully - bind = op.get_bind() - inspector = inspect(bind) - existing_fks = inspector.get_foreign_keys("data_source") - existing_fk_names = [fk["name"] for fk in existing_fks] - - with op.batch_alter_table("data_source", schema=None) as batch_op: - # Drop the account_id FK if it exists - if "data_source_account_id_account_fkey" in existing_fk_names: - batch_op.drop_constraint( - "data_source_account_id_account_fkey", type_="foreignkey" - ) - - # Drop the user_id FK if it exists (may have auto-generated name) - for fk_name in existing_fk_names: - if "user_id" in fk_name: - batch_op.drop_constraint(fk_name, type_="foreignkey") - break - - -def downgrade(): - with op.batch_alter_table("data_source", schema=None) as batch_op: - batch_op.create_foreign_key( - "data_source_user_id_fkey", - "fm_user", - ["user_id"], - ["id"], - ) - batch_op.create_foreign_key( - "data_source_account_id_account_fkey", - "account", - ["account_id"], - ["id"], - ) diff --git a/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py b/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py index 749219c46f..6eac719b54 100644 --- a/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py +++ b/flexmeasures/data/migrations/versions/b2c3d4e5f6a7_drop_fk_constraint_from_audit_log_active_user_id.py @@ -15,7 +15,7 @@ # revision identifiers, used by Alembic. revision = "b2c3d4e5f6a7" -down_revision = "a1b2c3d4e5f6" +down_revision = "9877450113f6" branch_labels = None depends_on = None From 1d39eae5ce10cfc195ec1e478f4e3afc25d824ac Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Sun, 5 Apr 2026 20:29:12 +0200 Subject: [PATCH 48/53] docs: make this lengthy comment only once Signed-off-by: F.N. Claessen --- flexmeasures/data/models/audit_log.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/flexmeasures/data/models/audit_log.py b/flexmeasures/data/models/audit_log.py index 84c9c45fd2..587e0df77f 100644 --- a/flexmeasures/data/models/audit_log.py +++ b/flexmeasures/data/models/audit_log.py @@ -30,11 +30,9 @@ class AuditLog(db.Model, AuthModelMixin): event_datetime = Column(DateTime()) event = Column(String(500)) active_user_name = Column(String(255)) - # No DB-level FK with cascade for active_user_id so that deleting a user preserves the lineage reference in this column. + # No DB-level FK with cascade for any user_id or account_id so that deleting a user preserves the lineage reference in this column. active_user_id = Column("active_user_id", Integer(), nullable=True) - # No DB-level FK with cascade for affected_user_id so that deleting a user preserves the lineage reference in this column. affected_user_id = Column("affected_user_id", Integer(), nullable=True) - # No DB-level FK with cascade for affected_account_id so that deleting an account preserves the lineage reference in this column. affected_account_id = Column("affected_account_id", Integer(), nullable=True) # Relationships to navigate to User and Account without database-level FK constraints From 2503d95bf1d0243ea1c76cbb0b04c64d50b1d4d9 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Sun, 5 Apr 2026 20:34:07 +0200 Subject: [PATCH 49/53] docs: add explanation for defining foreign keys in the db.relationship Signed-off-by: F.N. Claessen --- flexmeasures/data/models/audit_log.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flexmeasures/data/models/audit_log.py b/flexmeasures/data/models/audit_log.py index 587e0df77f..86c0b250f6 100644 --- a/flexmeasures/data/models/audit_log.py +++ b/flexmeasures/data/models/audit_log.py @@ -37,6 +37,8 @@ class AuditLog(db.Model, AuthModelMixin): # Relationships to navigate to User and Account without database-level FK constraints # This allows audit logs to maintain references to deleted users/accounts for lineage purposes + # The foreign_keys= parameter inside db.relationship(...) is a SQLAlchemy ORM hint only — it has zero effect on the database schema. + # It's needed here because SQLAlchemy can't automatically infer which column is the "FK side" of the join when there's no actual ForeignKey() in the column definition active_user = db.relationship( "User", primaryjoin="AuditLog.active_user_id == User.id", From d43b41d914e7b0bd7cf8b30a9ad54621ba5da542 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 13 Apr 2026 12:12:02 +0200 Subject: [PATCH 50/53] fix: merge db revisions Signed-off-by: F.N. Claessen --- .../migrations/versions/2c6a4c8127c1_merge.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py diff --git a/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py b/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py new file mode 100644 index 0000000000..c40b018e63 --- /dev/null +++ b/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py @@ -0,0 +1,21 @@ +"""merge + +Revision ID: 2c6a4c8127c1 +Revises: +Create Date: 2026-04-13 12:10:52.322376 + +""" + +# revision identifiers, used by Alembic. +revision = "2c6a4c8127c1" +down_revision = ("b2e07f0dafa1", "e26d02ed1621") +branch_labels = None +depends_on = None + + +def upgrade(): + pass + + +def downgrade(): + pass From 74ed3dc63b63a38b429372a5a5a07b9448f43f6c Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 13 Apr 2026 12:24:35 +0200 Subject: [PATCH 51/53] style: flake8 Signed-off-by: F.N. Claessen --- flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py b/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py index c40b018e63..0e3e6356dd 100644 --- a/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py +++ b/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py @@ -1,7 +1,7 @@ """merge Revision ID: 2c6a4c8127c1 -Revises: +Revises: b2e07f0dafa1, e26d02ed1621 Create Date: 2026-04-13 12:10:52.322376 """ From 16655e4313e5133d2aad28272461b116687a9da5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:52:20 +0000 Subject: [PATCH 52/53] migration: rebase add-account-id migration onto e26d02ed1621 (tip of main) Change 9877450113f6.down_revision from 8b62f8129f34 to e26d02ed1621 so the PR's two real migrations (9877450113f6, b2c3d4e5f6a7) chain linearly after the tip of the main branch, making the two merge-only files (b2e07f0dafa1 and 2c6a4c8127c1) completely obsolete. Delete b2e07f0dafa1_merge.py and 2c6a4c8127c1_merge.py. Agent-Logs-Url: https://github.com/FlexMeasures/flexmeasures/sessions/94c3070e-a701-4a97-a895-9ea1064e3c20 Co-authored-by: Flix6x <30658763+Flix6x@users.noreply.github.com> --- .../migrations/versions/2c6a4c8127c1_merge.py | 21 ------------------- ...7450113f6_add_account_id_to_data_source.py | 2 +- .../migrations/versions/b2e07f0dafa1_merge.py | 21 ------------------- 3 files changed, 1 insertion(+), 43 deletions(-) delete mode 100644 flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py delete mode 100644 flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py diff --git a/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py b/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py deleted file mode 100644 index 0e3e6356dd..0000000000 --- a/flexmeasures/data/migrations/versions/2c6a4c8127c1_merge.py +++ /dev/null @@ -1,21 +0,0 @@ -"""merge - -Revision ID: 2c6a4c8127c1 -Revises: b2e07f0dafa1, e26d02ed1621 -Create Date: 2026-04-13 12:10:52.322376 - -""" - -# revision identifiers, used by Alembic. -revision = "2c6a4c8127c1" -down_revision = ("b2e07f0dafa1", "e26d02ed1621") -branch_labels = None -depends_on = None - - -def upgrade(): - pass - - -def downgrade(): - pass diff --git a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py index 227f0e751e..f0e046d120 100644 --- a/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py +++ b/flexmeasures/data/migrations/versions/9877450113f6_add_account_id_to_data_source.py @@ -18,7 +18,7 @@ # revision identifiers, used by Alembic. revision = "9877450113f6" -down_revision = "8b62f8129f34" +down_revision = "e26d02ed1621" branch_labels = None depends_on = None diff --git a/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py b/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py deleted file mode 100644 index 7daab17abf..0000000000 --- a/flexmeasures/data/migrations/versions/b2e07f0dafa1_merge.py +++ /dev/null @@ -1,21 +0,0 @@ -"""merge - -Revision ID: b2e07f0dafa1 -Revises: 3f4a6f9d2b11, b2c3d4e5f6a7 -Create Date: 2026-04-02 15:21:56.334943 - -""" - -# revision identifiers, used by Alembic. -revision = "b2e07f0dafa1" -down_revision = ("3f4a6f9d2b11", "b2c3d4e5f6a7") -branch_labels = None -depends_on = None - - -def upgrade(): - pass - - -def downgrade(): - pass From e7139da3ffc3db37844931cca03428ccf54f1073 Mon Sep 17 00:00:00 2001 From: "F.N. Claessen" Date: Mon, 13 Apr 2026 12:44:23 +0200 Subject: [PATCH 53/53] style: punctuation / double spaces Signed-off-by: F.N. Claessen --- ...1_recompute_attributes_hash_with_sort_keys.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/flexmeasures/data/migrations/versions/a5b26c3f8e91_recompute_attributes_hash_with_sort_keys.py b/flexmeasures/data/migrations/versions/a5b26c3f8e91_recompute_attributes_hash_with_sort_keys.py index 7d06e7df75..4a708932e4 100644 --- a/flexmeasures/data/migrations/versions/a5b26c3f8e91_recompute_attributes_hash_with_sort_keys.py +++ b/flexmeasures/data/migrations/versions/a5b26c3f8e91_recompute_attributes_hash_with_sort_keys.py @@ -2,22 +2,22 @@ Previously the hash was computed without sorting JSON object keys, so a PostgreSQL JSONB round-trip (which always returns keys in alphabetical order) -produced a different hash than the one stored in the database. This caused +produced a different hash than the one stored in the database. This caused get_or_create_source() to silently create duplicate DataSource rows when it was called with attributes that had been loaded back from the database. The upgrade also handles the case where the bug already produced duplicate rows (same logical content, but saved with different key-insertion-order hashes). -For each group of duplicates the newest row (highest ID) is kept as-is. Older +For each group of duplicates the newest row (highest ID) is kept as-is. Older duplicates receive a synthetic ``{"flexmeasures-hash-conflict": N}`` attribute so that their hashes remain unique without touching the timed_belief table. Downgrade note: since PostgreSQL JSONB already serialises all object keys in alphabetical order when storing, ``json.dumps(attrs)`` and ``json.dumps(attrs, sort_keys=True)`` produce identical strings for any data -that has gone through JSONB. Therefore recomputing the hash without +that has gone through JSONB. Therefore, recomputing the hash without ``sort_keys`` would yield the same bytes as the upgrade, making a downgrade -data-migration a no-op. The downgrade function is intentionally left empty. +data-migration a no-op. The downgrade function is intentionally left empty. Revision ID: a5b26c3f8e91 Revises: 8b62f8129f34 @@ -51,7 +51,7 @@ def upgrade(): Duplicate rows (same logical content, different key-order hashes) are resolved by tagging older rows with a synthetic conflict-marker attribute so - that each row still gets a unique hash. The newest row (highest id) is + that each row still gets a unique hash. The newest row (highest id) is always kept clean. """ bind = op.get_bind() @@ -64,7 +64,7 @@ def upgrade(): ).fetchall() # Group rows by their normalised unique key (name, user_id, model, version, - # sorted-attributes hash). Any group with >1 member means the bug created + # sorted-attributes hash). Any group with >1 member means the bug created # duplicate rows. groups: dict = defaultdict(list) attrs_by_id: dict = {} @@ -112,7 +112,7 @@ def downgrade(): """No data migration needed on downgrade. PostgreSQL JSONB always serialises object keys in alphabetical order when - storing. This means ``json.dumps(attrs)`` and + storing. This means ``json.dumps(attrs)`` and ``json.dumps(attrs, sort_keys=True)`` produce identical strings for any attributes that have been round-tripped through the database, so recomputing hashes without ``sort_keys`` would yield the same bytes. @@ -120,7 +120,7 @@ def downgrade(): Note: rows that were tagged with ``flexmeasures-hash-conflict`` during upgrade are NOT cleaned up here, because doing so would require knowing which rows were duplicates and which one was the "canonical" row -- that - information is not reliably recoverable. After a downgrade, those rows + information is not reliably recoverable. After a downgrade, those rows will have a slightly different ``attributes`` dict than before the upgrade, but ``get_or_create_source`` will still find them correctly via their hash. """