From 0f95d725c02d26646d9d3ce5346f8f55ca89c5ed Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sat, 30 May 2026 22:02:33 +0000
Subject: [PATCH] perf: optimize list deduplication

Co-authored-by: SatoryKono <13055362+SatoryKono@users.noreply.github.com>
---
 .../infrastructure/adapters/common/deduplication.py   | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/bioetl/infrastructure/adapters/common/deduplication.py b/src/bioetl/infrastructure/adapters/common/deduplication.py
index 18e1a8f9b9..e6dd308fb5 100644
--- a/src/bioetl/infrastructure/adapters/common/deduplication.py
+++ b/src/bioetl/infrastructure/adapters/common/deduplication.py
@@ -35,14 +35,9 @@
 
 def deduplicate_preserving_order(values: Iterable[str]) -> list[str]:
     """Return unique values while preserving the original order."""
-    unique_values: list[str] = []
-    seen_values: set[str] = set()
-    for value in values:
-        if value in seen_values:
-            continue
-        seen_values.add(value)
-        unique_values.append(value)
-    return unique_values
+    # Optimization: dict.fromkeys leverages C-level iteration and insertion order
+    # preservation to deduplicate faster than a pure-Python seen-set loop.
+    return list(dict.fromkeys(values))
 
 
 def iter_deduplicated_records(