Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions src/bioetl/application/core/batch_writer_columns_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import itertools
from typing import TYPE_CHECKING, Literal

if TYPE_CHECKING:
Expand Down Expand Up @@ -86,14 +87,7 @@ def _get_schema_columns(

def _collect_record_columns(self, records: list[GoldRecord]) -> list[str]:
"""Collect columns in stable first-seen order."""
columns: list[str] = []
seen: set[str] = set()
for record in records:
for key in record:
if key not in seen:
seen.add(key)
columns.append(key)
return columns
return list(dict.fromkeys(itertools.chain.from_iterable(records)))

def _get_column_order(self, columns: Sequence[str]) -> list[str] | None:
"""Resolve explicit column order from configured column groups."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,7 @@

def deduplicate_preserving_order(values: Iterable[str]) -> list[str]:
"""Return unique values while preserving the original order."""
unique_values: list[str] = []
seen_values: set[str] = set()
for value in values:
if value in seen_values:
continue
seen_values.add(value)
unique_values.append(value)
return unique_values
return list(dict.fromkeys(values))


def iter_deduplicated_records(
Expand Down
8 changes: 1 addition & 7 deletions src/bioetl/infrastructure/config/base_config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,7 @@ def _merge_lists(
"""
# Default: simple concatenation with deduplication for string lists
if base and isinstance(base[0], str):
seen: set[str] = set()
result: list[str] = []
for item in base + override:
if item not in seen:
seen.add(item)
result.append(item)
return result
return list(dict.fromkeys(base + override))
# Non-string lists: just concatenate
return base + override

Expand Down
10 changes: 1 addition & 9 deletions src/bioetl/infrastructure/config/filter_config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,15 +304,7 @@ def _merge_string_lists(
Returns:
Merged list with unique values, base items first.
"""
seen: set[str] = set()
result: list[str] = []

for item in base + override:
if item not in seen:
seen.add(item)
result.append(item)

return result
return list(dict.fromkeys(base + override))


__all__ = ["FilterConfigLoader"]
10 changes: 2 additions & 8 deletions src/bioetl/infrastructure/config_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,8 @@ def _default_concat_list_merger(
if all(isinstance(item, str) for item in base) and all(
isinstance(item, str) for item in override
):
seen: set[str] = set()
merged: list[Any] = [] # Any: YAML config values are heterogeneous
for item in base + override:
item_str = str(item)
if item_str not in seen:
seen.add(item_str)
merged.append(item)
return merged
# Items are already strings, so list(dict.fromkeys(...)) is safe and preserves order.
return list(dict.fromkeys(base + override))

return [*base, *override]

Expand Down
Loading