From 1c08d1b40c38631edcf41ce7321c4dce2e0f3bd8 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 29 May 2026 22:15:13 +0000 Subject: [PATCH] perf: use dict.fromkeys for faster order-preserving deduplication Co-authored-by: SatoryKono <13055362+SatoryKono@users.noreply.github.com> --- .../infrastructure/adapters/common/deduplication.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/bioetl/infrastructure/adapters/common/deduplication.py b/src/bioetl/infrastructure/adapters/common/deduplication.py index 18e1a8f9b9..3fc438c996 100644 --- a/src/bioetl/infrastructure/adapters/common/deduplication.py +++ b/src/bioetl/infrastructure/adapters/common/deduplication.py @@ -35,14 +35,7 @@ def deduplicate_preserving_order(values: Iterable[str]) -> list[str]: """Return unique values while preserving the original order.""" - unique_values: list[str] = [] - seen_values: set[str] = set() - for value in values: - if value in seen_values: - continue - seen_values.add(value) - unique_values.append(value) - return unique_values + return list(dict.fromkeys(values)) def iter_deduplicated_records(