From 65d2096fbffec824cc0cb727754e584d6b4ffb7d Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 23 May 2026 22:13:00 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20order-preserving?= =?UTF-8?q?=20deduplication=20with=20dict.fromkeys?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced pure-Python `seen` loop mechanisms with the C-optimized `dict.fromkeys()` pattern for order-preserving deduplication of lists and dictionary keys in infrastructure config loaders and batch writer schemas. Co-authored-by: SatoryKono <13055362+SatoryKono@users.noreply.github.com> --- .../application/core/batch_writer_columns_mixin.py | 11 +++-------- .../infrastructure/config/base_config_loader.py | 9 ++------- .../infrastructure/config/filter_config_loader.py | 11 ++--------- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/src/bioetl/application/core/batch_writer_columns_mixin.py b/src/bioetl/application/core/batch_writer_columns_mixin.py index 718c8b5696..2b663bd586 100644 --- a/src/bioetl/application/core/batch_writer_columns_mixin.py +++ b/src/bioetl/application/core/batch_writer_columns_mixin.py @@ -3,6 +3,7 @@ from __future__ import annotations +import itertools from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: @@ -86,14 +87,8 @@ def _get_schema_columns( def _collect_record_columns(self, records: list[GoldRecord]) -> list[str]: """Collect columns in stable first-seen order.""" - columns: list[str] = [] - seen: set[str] = set() - for record in records: - for key in record: - if key not in seen: - seen.add(key) - columns.append(key) - return columns + # Optimized: C-level iteration and insertion-order preservation via dict.fromkeys + return list(dict.fromkeys(itertools.chain.from_iterable(records))) def _get_column_order(self, columns: Sequence[str]) -> list[str] | None: """Resolve explicit column order from configured column groups.""" diff --git a/src/bioetl/infrastructure/config/base_config_loader.py b/src/bioetl/infrastructure/config/base_config_loader.py index 35b6b8eac0..aed1e5f3c5 100644 --- a/src/bioetl/infrastructure/config/base_config_loader.py +++ b/src/bioetl/infrastructure/config/base_config_loader.py @@ -141,13 +141,8 @@ def _merge_lists( """ # Default: simple concatenation with deduplication for string lists if base and isinstance(base[0], str): - seen: set[str] = set() - result: list[str] = [] - for item in base + override: - if item not in seen: - seen.add(item) - result.append(item) - return result + # Optimized: C-level iteration and insertion-order preservation via dict.fromkeys + return list(dict.fromkeys(base + override)) # Non-string lists: just concatenate return base + override diff --git a/src/bioetl/infrastructure/config/filter_config_loader.py b/src/bioetl/infrastructure/config/filter_config_loader.py index a3e073a641..8d0f601643 100644 --- a/src/bioetl/infrastructure/config/filter_config_loader.py +++ b/src/bioetl/infrastructure/config/filter_config_loader.py @@ -304,15 +304,8 @@ def _merge_string_lists( Returns: Merged list with unique values, base items first. """ - seen: set[str] = set() - result: list[str] = [] - - for item in base + override: - if item not in seen: - seen.add(item) - result.append(item) - - return result + # Optimized: C-level iteration and insertion-order preservation via dict.fromkeys + return list(dict.fromkeys(base + override)) __all__ = ["FilterConfigLoader"]