diff --git a/alembic/versions/4dbf24ed1857_add_unique_constraint_on_replaces_id.py b/alembic/versions/4dbf24ed1857_add_unique_constraint_on_replaces_id.py new file mode 100644 index 00000000..b3f2e590 --- /dev/null +++ b/alembic/versions/4dbf24ed1857_add_unique_constraint_on_replaces_id.py @@ -0,0 +1,28 @@ +"""add unique constraint on replaces_id + +Revision ID: 4dbf24ed1857 +Revises: 398067c53257 +Create Date: 2026-05-13 11:22:59.646605 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = '4dbf24ed1857' +down_revision = '398067c53257' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index('ix_scoresets_replaces_id', table_name='scoresets') + op.create_index(op.f('ix_scoresets_replaces_id'), 'scoresets', ['replaces_id'], unique=True) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_scoresets_replaces_id'), table_name='scoresets') + op.create_index('ix_scoresets_replaces_id', 'scoresets', ['replaces_id'], unique=False) + # ### end Alembic commands ### diff --git a/src/mavedb/models/score_set.py b/src/mavedb/models/score_set.py index 5cf638df..ccdcb73d 100644 --- a/src/mavedb/models/score_set.py +++ b/src/mavedb/models/score_set.py @@ -124,7 +124,7 @@ class ScoreSet(Base): # TODO Standardize on US or GB spelling for licenc/se. licence_id = Column(Integer, ForeignKey("licenses.id"), index=True, nullable=False) license: Mapped["License"] = relationship("License") - superseded_score_set_id = Column("replaces_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=True) + superseded_score_set_id = Column("replaces_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=True, unique=True,) superseded_score_set: Mapped[Optional["ScoreSet"]] = relationship( "ScoreSet", uselist=False, diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 74f2a5c0..b386d309 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -54,6 +54,7 @@ csv_data_to_df, fetch_score_set_search_filter_options, find_meta_analyses_for_experiment_sets, + find_superseded_score_set_tail, get_score_set_variants_as_csv, refresh_variant_urns, variants_to_csv_rows, @@ -1600,11 +1601,11 @@ async def create_score_set( save_to_logging_context({"requested_superseded_score_set": item_create.superseded_score_set_urn}) if item_create.superseded_score_set_urn is not None: - superseded_score_set = await fetch_score_set_by_urn( + current_superseded = await fetch_score_set_by_urn( db, item_create.superseded_score_set_urn, user_data, user_data, True ) - if superseded_score_set is None: + if current_superseded is None: logger.info( msg="Failed to create score set; The requested superseded score set does not exist.", extra=logging_context(), @@ -1613,6 +1614,17 @@ async def create_score_set( status_code=404, detail="The requested superseded score set does not exist", ) + superseded_score_set: Optional[ScoreSet] = find_superseded_score_set_tail(current_superseded, Action.READ, user_data) + if superseded_score_set is None or superseded_score_set.private: + logger.info( + msg="Failed to create score set; The newest version of the requested superseded score set is not accessible.", + extra=logging_context(), + ) + raise HTTPException( + status_code=404, + detail="The newest version of the requested superseded score set is not accessible. " + "It may be private and multiple score sets supersede the same score set.", + ) else: superseded_score_set = None diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 17ab2ab9..2b6be3b5 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -1796,46 +1796,6 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments( assert response.json()[0]["urn"] == published_superseding_score_set["urn"] -def test_non_owner_searches_multiple_unpublished_superseding_score_sets_no_duplicates( - session, client, setup_router_db, data_files, data_provider -): - """When multiple private score sets supersede the same published score set, - a non-owner should see the published score set exactly once (no duplicates).""" - experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) - score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - - with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: - published_score_set = publish_score_set(client, score_set["urn"]) - worker_queue.assert_called_once() - - experiment_urn = published_score_set["experiment"]["urn"] - - # Create two unpublished score sets that both supersede the same published score set. - superseding_1_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - superseding_1_payload["experimentUrn"] = experiment_urn - superseding_1_payload["supersededScoreSetUrn"] = published_score_set["urn"] - superseding_1_response = client.post("/api/v1/score-sets/", json=superseding_1_payload) - assert superseding_1_response.status_code == 200 - - superseding_2_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - superseding_2_payload["experimentUrn"] = experiment_urn - superseding_2_payload["supersededScoreSetUrn"] = published_score_set["urn"] - superseding_2_response = client.post("/api/v1/score-sets/", json=superseding_2_payload) - assert superseding_2_response.status_code == 200 - - # Transfer ownership so the requesting user is a non-owner of the superseding score sets. - change_ownership(session, superseding_1_response.json()["urn"], ScoreSetDbModel) - change_ownership(session, superseding_2_response.json()["urn"], ScoreSetDbModel) - change_ownership(session, published_score_set["urn"], ScoreSetDbModel) - - response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") - assert response.status_code == 200 - response_urns = [ss["urn"] for ss in response.json()] - assert len(response_urns) == 1 - assert response_urns[0] == published_score_set["urn"] - - def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 8a05cc39..e719f02b 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -2890,16 +2890,46 @@ def test_search_score_sets_not_affected_by_experiment_metadata( assert response.json()["numScoreSets"] == num_score_sets -def test_search_score_sets_not_affected_by_multiple_superseding_versions( +def test_cannot_create_multiple_superseding_versions( + session, data_provider, client, setup_router_db, data_files +): + """Attempting to create multiple superseding versions should fail.""" + experiment = create_experiment(client, {"title": "Original Experiment"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Original Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None): + published = publish_score_set(client, score_set["urn"]) + + # Create the first private superseding score set successfully + first_superseding = create_seq_score_set( + client, + create_experiment(client, {"title": "First Superseding Experiment"})["urn"], + update={"title": "First Superseding", "supersededScoreSetUrn": published["urn"]}, + ) + assert first_superseding is not None + + # Attempt to create the second private superseding score set for the same published score set + # This should fail due to the unique constraint on replaces_id + experiment2 = create_experiment(client) + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set_post_payload["experimentUrn"] = experiment2["urn"] + score_set_post_payload["supersededScoreSetUrn"] = published["urn"] + + response = client.post("/api/v1/score-sets/", json=score_set_post_payload) + assert response.status_code == 404 + assert ("The newest version of the requested superseded score set is not accessible. " + "It may be private and multiple score sets supersede the same score set.") in response.json()["detail"] + + +def test_search_score_sets_not_affected_by_an_unpublishing_superseding_versions( session, data_provider, client, setup_router_db, data_files ): - """Multiple unpublished superseding versions of the same score set should not reduce search page size. + """One unpublished superseding versions of the same score set should not reduce search page size. Regression test for a bug where the superseding score set filter used a LEFT OUTER JOIN - (scoresets LEFT JOIN scoresets AS s ON scoresets.id = s.replaces_id). Since replaces_id has - no uniqueness constraint, a score set with N superseding versions produces N rows, all inside - the LIMIT boundary. This consumed extra row budget and caused paginated searches to return - fewer unique score sets than the requested limit. + (scoresets LEFT JOIN scoresets AS s ON scoresets.id = s.replaces_id). replaces_id has + uniqueness constraint now. """ num_published = 3 published_urns = [] @@ -2912,14 +2942,12 @@ def test_search_score_sets_not_affected_by_multiple_superseding_versions( published = publish_score_set(client, score_set["urn"]) published_urns.append(published["urn"]) - # Create multiple unpublished superseding versions for the first score set. - # These share the same replaces_id, which caused row multiplication with the old LEFT JOIN filter. - for j in range(3): - create_seq_score_set( - client, - create_experiment(client, {"title": f"Superseding Experiment {j}"})["urn"], - update={"title": f"Superseding {j}", "supersededScoreSetUrn": published_urns[0]}, - ) + # Create an unpublished superseding versions for the first score set. + create_seq_score_set( + client, + create_experiment(client, {"title": f"Superseding Experiment {1}"})["urn"], + update={"title": f"Superseding {1}", "supersededScoreSetUrn": published_urns[0]}, + ) search_payload = {"limit": 2} response = client.post("/api/v1/score-sets/search", json=search_payload)