diff --git a/src/bioetl/infrastructure/adapters/common/deduplication.py b/src/bioetl/infrastructure/adapters/common/deduplication.py index 18e1a8f9b9..e6dd308fb5 100644 --- a/src/bioetl/infrastructure/adapters/common/deduplication.py +++ b/src/bioetl/infrastructure/adapters/common/deduplication.py @@ -35,14 +35,9 @@ def deduplicate_preserving_order(values: Iterable[str]) -> list[str]: """Return unique values while preserving the original order.""" - unique_values: list[str] = [] - seen_values: set[str] = set() - for value in values: - if value in seen_values: - continue - seen_values.add(value) - unique_values.append(value) - return unique_values + # Optimization: dict.fromkeys leverages C-level iteration and insertion order + # preservation to deduplicate faster than a pure-Python seen-set loop. + return list(dict.fromkeys(values)) def iter_deduplicated_records(