diff --git a/src/bioetl/application/core/batch_writer_columns_mixin.py b/src/bioetl/application/core/batch_writer_columns_mixin.py index 718c8b5696..7d49af0ecd 100644 --- a/src/bioetl/application/core/batch_writer_columns_mixin.py +++ b/src/bioetl/application/core/batch_writer_columns_mixin.py @@ -3,6 +3,7 @@ from __future__ import annotations +import itertools from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: @@ -86,14 +87,7 @@ def _get_schema_columns( def _collect_record_columns(self, records: list[GoldRecord]) -> list[str]: """Collect columns in stable first-seen order.""" - columns: list[str] = [] - seen: set[str] = set() - for record in records: - for key in record: - if key not in seen: - seen.add(key) - columns.append(key) - return columns + return list(dict.fromkeys(itertools.chain.from_iterable(records))) def _get_column_order(self, columns: Sequence[str]) -> list[str] | None: """Resolve explicit column order from configured column groups.""" diff --git a/src/bioetl/infrastructure/adapters/common/deduplication.py b/src/bioetl/infrastructure/adapters/common/deduplication.py index 18e1a8f9b9..3fc438c996 100644 --- a/src/bioetl/infrastructure/adapters/common/deduplication.py +++ b/src/bioetl/infrastructure/adapters/common/deduplication.py @@ -35,14 +35,7 @@ def deduplicate_preserving_order(values: Iterable[str]) -> list[str]: """Return unique values while preserving the original order.""" - unique_values: list[str] = [] - seen_values: set[str] = set() - for value in values: - if value in seen_values: - continue - seen_values.add(value) - unique_values.append(value) - return unique_values + return list(dict.fromkeys(values)) def iter_deduplicated_records( diff --git a/src/bioetl/infrastructure/config/base_config_loader.py b/src/bioetl/infrastructure/config/base_config_loader.py index 48057368dd..cd7a1d1ab8 100644 --- a/src/bioetl/infrastructure/config/base_config_loader.py +++ b/src/bioetl/infrastructure/config/base_config_loader.py @@ -144,13 +144,7 @@ def _merge_lists( """ # Default: simple concatenation with deduplication for string lists if base and isinstance(base[0], str): - seen: set[str] = set() - result: list[str] = [] - for item in base + override: - if item not in seen: - seen.add(item) - result.append(item) - return result + return list(dict.fromkeys(base + override)) # Non-string lists: just concatenate return base + override diff --git a/src/bioetl/infrastructure/config/filter_config_loader.py b/src/bioetl/infrastructure/config/filter_config_loader.py index a3e073a641..41f64e3c03 100644 --- a/src/bioetl/infrastructure/config/filter_config_loader.py +++ b/src/bioetl/infrastructure/config/filter_config_loader.py @@ -304,15 +304,7 @@ def _merge_string_lists( Returns: Merged list with unique values, base items first. """ - seen: set[str] = set() - result: list[str] = [] - - for item in base + override: - if item not in seen: - seen.add(item) - result.append(item) - - return result + return list(dict.fromkeys(base + override)) __all__ = ["FilterConfigLoader"] diff --git a/src/bioetl/infrastructure/config_merge.py b/src/bioetl/infrastructure/config_merge.py index 5bbffbaab6..4d337c3369 100644 --- a/src/bioetl/infrastructure/config_merge.py +++ b/src/bioetl/infrastructure/config_merge.py @@ -35,14 +35,8 @@ def _default_concat_list_merger( if all(isinstance(item, str) for item in base) and all( isinstance(item, str) for item in override ): - seen: set[str] = set() - merged: list[Any] = [] # Any: YAML config values are heterogeneous - for item in base + override: - item_str = str(item) - if item_str not in seen: - seen.add(item_str) - merged.append(item) - return merged + # Items are already strings, so list(dict.fromkeys(...)) is safe and preserves order. + return list(dict.fromkeys(base + override)) return [*base, *override]