Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""add unique constraint on replaces_id

Revision ID: 4dbf24ed1857
Revises: 398067c53257
Create Date: 2026-05-13 11:22:59.646605

"""
from alembic import op

# revision identifiers, used by Alembic.
revision = '4dbf24ed1857'
down_revision = '398067c53257'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('ix_scoresets_replaces_id', table_name='scoresets')
op.create_index(op.f('ix_scoresets_replaces_id'), 'scoresets', ['replaces_id'], unique=True)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_scoresets_replaces_id'), table_name='scoresets')
op.create_index('ix_scoresets_replaces_id', 'scoresets', ['replaces_id'], unique=False)
# ### end Alembic commands ###
2 changes: 1 addition & 1 deletion src/mavedb/models/score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class ScoreSet(Base):
# TODO Standardize on US or GB spelling for licenc/se.
licence_id = Column(Integer, ForeignKey("licenses.id"), index=True, nullable=False)
license: Mapped["License"] = relationship("License")
superseded_score_set_id = Column("replaces_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=True)
superseded_score_set_id = Column("replaces_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=True, unique=True,)
superseded_score_set: Mapped[Optional["ScoreSet"]] = relationship(
"ScoreSet",
uselist=False,
Expand Down
16 changes: 14 additions & 2 deletions src/mavedb/routers/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
csv_data_to_df,
fetch_score_set_search_filter_options,
find_meta_analyses_for_experiment_sets,
find_superseded_score_set_tail,
get_score_set_variants_as_csv,
refresh_variant_urns,
variants_to_csv_rows,
Expand Down Expand Up @@ -1600,11 +1601,11 @@ async def create_score_set(

save_to_logging_context({"requested_superseded_score_set": item_create.superseded_score_set_urn})
if item_create.superseded_score_set_urn is not None:
superseded_score_set = await fetch_score_set_by_urn(
current_superseded = await fetch_score_set_by_urn(
db, item_create.superseded_score_set_urn, user_data, user_data, True
)

if superseded_score_set is None:
if current_superseded is None:
logger.info(
msg="Failed to create score set; The requested superseded score set does not exist.",
extra=logging_context(),
Expand All @@ -1613,6 +1614,17 @@ async def create_score_set(
status_code=404,
detail="The requested superseded score set does not exist",
)
superseded_score_set: Optional[ScoreSet] = find_superseded_score_set_tail(current_superseded, Action.READ, user_data)
if superseded_score_set is None or superseded_score_set.private:
logger.info(
msg="Failed to create score set; The newest version of the requested superseded score set is not accessible.",
extra=logging_context(),
)
raise HTTPException(
status_code=404,
detail="The newest version of the requested superseded score set is not accessible. "
"It may be private and multiple score sets supersede the same score set.",
)
else:
superseded_score_set = None

Expand Down
40 changes: 0 additions & 40 deletions tests/routers/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1796,46 +1796,6 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments(
assert response.json()[0]["urn"] == published_superseding_score_set["urn"]


def test_non_owner_searches_multiple_unpublished_superseding_score_sets_no_duplicates(
session, client, setup_router_db, data_files, data_provider
):
"""When multiple private score sets supersede the same published score set,
a non-owner should see the published score set exactly once (no duplicates)."""
experiment = create_experiment(client)
score_set = create_seq_score_set(client, experiment["urn"])
score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv")

with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue:
published_score_set = publish_score_set(client, score_set["urn"])
worker_queue.assert_called_once()

experiment_urn = published_score_set["experiment"]["urn"]

# Create two unpublished score sets that both supersede the same published score set.
superseding_1_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
superseding_1_payload["experimentUrn"] = experiment_urn
superseding_1_payload["supersededScoreSetUrn"] = published_score_set["urn"]
superseding_1_response = client.post("/api/v1/score-sets/", json=superseding_1_payload)
assert superseding_1_response.status_code == 200

superseding_2_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
superseding_2_payload["experimentUrn"] = experiment_urn
superseding_2_payload["supersededScoreSetUrn"] = published_score_set["urn"]
superseding_2_response = client.post("/api/v1/score-sets/", json=superseding_2_payload)
assert superseding_2_response.status_code == 200

# Transfer ownership so the requesting user is a non-owner of the superseding score sets.
change_ownership(session, superseding_1_response.json()["urn"], ScoreSetDbModel)
change_ownership(session, superseding_2_response.json()["urn"], ScoreSetDbModel)
change_ownership(session, published_score_set["urn"], ScoreSetDbModel)

response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets")
assert response.status_code == 200
response_urns = [ss["urn"] for ss in response.json()]
assert len(response_urns) == 1
assert response_urns[0] == published_score_set["urn"]


def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider):
experiment = create_experiment(client)
score_set = create_seq_score_set(client, experiment["urn"])
Expand Down
56 changes: 42 additions & 14 deletions tests/routers/test_score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -2890,16 +2890,46 @@ def test_search_score_sets_not_affected_by_experiment_metadata(
assert response.json()["numScoreSets"] == num_score_sets


def test_search_score_sets_not_affected_by_multiple_superseding_versions(
def test_cannot_create_multiple_superseding_versions(
session, data_provider, client, setup_router_db, data_files
):
"""Attempting to create multiple superseding versions should fail."""
experiment = create_experiment(client, {"title": "Original Experiment"})
score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Original Score Set"})
score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv")

with patch.object(arq.ArqRedis, "enqueue_job", return_value=None):
published = publish_score_set(client, score_set["urn"])

# Create the first private superseding score set successfully
first_superseding = create_seq_score_set(
client,
create_experiment(client, {"title": "First Superseding Experiment"})["urn"],
update={"title": "First Superseding", "supersededScoreSetUrn": published["urn"]},
)
assert first_superseding is not None

# Attempt to create the second private superseding score set for the same published score set
# This should fail due to the unique constraint on replaces_id
experiment2 = create_experiment(client)
score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
score_set_post_payload["experimentUrn"] = experiment2["urn"]
score_set_post_payload["supersededScoreSetUrn"] = published["urn"]

response = client.post("/api/v1/score-sets/", json=score_set_post_payload)
assert response.status_code == 404
assert ("The newest version of the requested superseded score set is not accessible. "
"It may be private and multiple score sets supersede the same score set.") in response.json()["detail"]


def test_search_score_sets_not_affected_by_an_unpublishing_superseding_versions(
session, data_provider, client, setup_router_db, data_files
):
"""Multiple unpublished superseding versions of the same score set should not reduce search page size.
"""One unpublished superseding versions of the same score set should not reduce search page size.

Regression test for a bug where the superseding score set filter used a LEFT OUTER JOIN
(scoresets LEFT JOIN scoresets AS s ON scoresets.id = s.replaces_id). Since replaces_id has
no uniqueness constraint, a score set with N superseding versions produces N rows, all inside
the LIMIT boundary. This consumed extra row budget and caused paginated searches to return
fewer unique score sets than the requested limit.
(scoresets LEFT JOIN scoresets AS s ON scoresets.id = s.replaces_id). replaces_id has
uniqueness constraint now.
"""
num_published = 3
published_urns = []
Expand All @@ -2912,14 +2942,12 @@ def test_search_score_sets_not_affected_by_multiple_superseding_versions(
published = publish_score_set(client, score_set["urn"])
published_urns.append(published["urn"])

# Create multiple unpublished superseding versions for the first score set.
# These share the same replaces_id, which caused row multiplication with the old LEFT JOIN filter.
for j in range(3):
create_seq_score_set(
client,
create_experiment(client, {"title": f"Superseding Experiment {j}"})["urn"],
update={"title": f"Superseding {j}", "supersededScoreSetUrn": published_urns[0]},
)
# Create an unpublished superseding versions for the first score set.
create_seq_score_set(
client,
create_experiment(client, {"title": f"Superseding Experiment {1}"})["urn"],
update={"title": f"Superseding {1}", "supersededScoreSetUrn": published_urns[0]},
)

search_payload = {"limit": 2}
response = client.post("/api/v1/score-sets/search", json=search_payload)
Expand Down
Loading