diff --git a/.github/workflows/trivy.yaml b/.github/workflows/trivy.yaml
index aa6735f4..94b60a64 100644
--- a/.github/workflows/trivy.yaml
+++ b/.github/workflows/trivy.yaml
@@ -49,6 +49,7 @@ jobs:
                   template: "@/contrib/sarif.tpl"
                   output: "trivy-results.sarif"
                   severity: "CRITICAL,HIGH"
+                  timeout: 15m
 
             - name: Upload Trivy scan results to GitHub Security tab
               uses: github/codeql-action/upload-sarif@v3
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..2afd8536
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,52 @@
+# CDM Data Loaders Changelog
+
+- [CDM Data Loaders Changelog](#cdm-data-loaders-changelog)
+    - [v0.1.8](#v018)
+    - [v0.1.7](#v017)
+    - [v0.1.6](#v016)
+    - [v0.1.5](#v015)
+    - [v0.1.4](#v014)
+    - [v0.1.3](#v013)
+    - [v0.1.2](#v012)
+    - [v0.1.1](#v011)
+    - [v0.1.0](#v010)
+
+
+### v0.1.8
+
+- Add rotating file log handler for easier debugging.
+
+### v0.1.7
+
+- Add in AllTheBacteria file download client.
+
+### v0.1.6
+
+- Make NCBI REST API client more resilient to errors and ensure existing imports are not lost.
+
+### v0.1.5
+
+- Add batch size parameter to the NCBI REST API interface.
+
+
+### v0.1.4
+
+- Add in NCBI REST API interface.
+
+
+### v0.1.3
+
+- Add in file batcher for use with file-based importers.
+
+
+### v0.1.2
+
+- Update XML File Splitter to use the latest version, which includes the `gzip` parameter.
+
+### v0.1.1
+
+- Add [XML File Splitter](https://github.com/ialarmedalien/xml_file_splitter) to the container.
+
+### v0.1.0
+
+- Initial release.
diff --git a/Dockerfile b/Dockerfile
index 35a2085c..a218a8bc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -54,7 +54,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Place executables in the environment at the front of the path
 ENV PATH="/app/.venv/bin:$PATH"
 
-COPY --chmod=+x ./scripts/entrypoint.sh /app/
+RUN chmod +x ./scripts/entrypoint.sh
 # Use the non-root user to run our application
 USER nonroot
-ENTRYPOINT ["./entrypoint.sh"]
+ENTRYPOINT ["./scripts/entrypoint.sh"]
diff --git a/README.md b/README.md
index 4ecc8b96..322ad2ed 100644
--- a/README.md
+++ b/README.md
@@ -11,15 +11,6 @@ Repo for CDM input data loading and wrangling
     - [Tests](#tests)
   - [Loading genomes, contigs, and features](#loading-genomes-contigs-and-features)
   - [Running bbmap stats and checkm2 on genome or contigset files](#running-bbmap-stats-and-checkm2-on-genome-or-contigset-files)
-  - [Changelog](#changelog)
-    - [v0.1.7](#v017)
-    - [v0.1.6](#v016)
-    - [v0.1.5](#v015)
-    - [v0.1.4](#v014)
-    - [v0.1.3](#v013)
-    - [v0.1.2](#v012)
-    - [v0.1.1](#v011)
-    - [v0.1.0](#v010)
 
 
 
@@ -168,41 +159,3 @@ Run the stats and checkm2 tools with the following command:
 bash scripts/run_tools.sh path/to/genome_paths_file.json output_dir
 ```
 where `path/to/genome_paths_file.json` specifies the path to the genome paths file (format specified above) and `output_dir` is the directory for the results.
-
-
-## Changelog
-
-### v0.1.7
-
-- Add in AllTheBacteria file download client.
-
-### v0.1.6
-
-- Make NCBI REST API client more resilient to errors and ensure existing imports are not lost.
-
-### v0.1.5
-
-- Add batch size parameter to the NCBI REST API interface.
-
-
-### v0.1.4
-
-- Add in NCBI REST API interface.
-
-
-### v0.1.3
-
-- Add in file batcher for use with file-based importers.
-
-
-### v0.1.2
-
-- Update XML File Splitter to use the latest version, which includes the `gzip` parameter.
-
-### v0.1.1
-
-- Add [XML File Splitter](https://github.com/ialarmedalien/xml_file_splitter) to the container.
-
-### v0.1.0
-
-- Initial release.
diff --git a/pyproject.toml b/pyproject.toml
index b5cce288..c1642088 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "cdm-data-loaders"
-version = "0.1.7"
+version = "0.1.8"
 description = "Data loaders and wranglers for the CDM."
 requires-python = ">= 3.13"
 readme = "README.md"
@@ -17,7 +17,7 @@ dependencies = [
     "dlt[deltalake,duckdb,filesystem,parquet]>=1.22.2",
     "frictionless[aws]>=5.18.1",
     "frozendict>=2.4.7",
-    "lxml>=6.0.2",
+    "lxml>=6.1.0",
     "pydantic>=2.12.5",
     "pydantic-settings>=2.12.0",
     "tqdm>=4.67.3",
diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh
index 2be0475f..f9edcbfd 100755
--- a/scripts/entrypoint.sh
+++ b/scripts/entrypoint.sh
@@ -1,9 +1,16 @@
 #!/usr/bin/env bash
 set -euo pipefail
 
-# Ensure at least one argument is provided
+VALID_COMMANDS=(all_the_bacteria ncbi_rest_api uniprot uniref xml_split test bash)
+
+usage() {
+  local joined
+  joined=$(IFS='|'; echo "${VALID_COMMANDS[*]}")
+  echo "Usage: $0 {${joined}} [args...]" >&2
+}
+
 if [ "$#" -eq 0 ]; then
-  echo "Usage: $0 {all_the_bacteria|ncbi_rest_api|uniprot|uniref|xml_split|test} [args...]"
+  usage
   exit 1
 fi
 
@@ -12,34 +19,29 @@ shift
 
 case "$cmd" in
   all_the_bacteria)
-    # All the Bacteria file importer
     exec /usr/bin/tini -- uv run --no-sync all_the_bacteria "$@"
     ;;
   ncbi_rest_api)
-    # Run the NCBI datasets API importer
     exec /usr/bin/tini -- uv run --no-sync ncbi_rest_api "$@"
     ;;
   uniprot)
-    # Run the uniprot pipeline with any additional arguments
     exec /usr/bin/tini -- uv run --no-sync uniprot "$@"
     ;;
   uniref)
-    # Run the uniref pipeline with any additional arguments
     exec /usr/bin/tini -- uv run --no-sync uniref "$@"
     ;;
   xml_split)
-    # Run the xml_file_splitter app
     exec /usr/bin/tini -- xml_file_splitter "$@"
     ;;
   test)
-    # run the tests
     exec /usr/bin/tini -- uv run --no-sync pytest -m "not requires_spark"
     ;;
   bash)
     exec /usr/bin/tini -- /bin/bash
     ;;
   *)
-    echo "Error: unknown command '$cmd'; valid commands are 'all_the_bacteria', 'ncbi_rest_api', 'uniprot', 'uniref', or 'xml_split'." >&2
+    echo "Error: unknown command '$cmd'." >&2
+    usage
     exit 1
     ;;
 esac
diff --git a/src/cdm_data_loaders/pipelines/all_the_bacteria.py b/src/cdm_data_loaders/pipelines/all_the_bacteria.py
index bb32836a..d874b8b8 100644
--- a/src/cdm_data_loaders/pipelines/all_the_bacteria.py
+++ b/src/cdm_data_loaders/pipelines/all_the_bacteria.py
@@ -17,9 +17,10 @@
 
 import dlt
 from dlt.extract.items import DataItemWithMeta
+from dlt.sources.helpers import requests
 from dlt.sources.helpers.rest_client.client import RESTClient
 from frozendict import frozendict
-from pydantic import AliasChoices, Field
+from pydantic import AliasChoices, Field, computed_field
 from pydantic_settings import SettingsConfigDict
 
 from cdm_data_loaders.pipelines.core import (
@@ -28,20 +29,31 @@
 )
 from cdm_data_loaders.pipelines.cts_defaults import DEFAULT_SETTINGS_CONFIG_DICT, CtsSettings
 from cdm_data_loaders.utils.download.sync_client import FileDownloader
+from cdm_data_loaders.utils.s3 import stream_to_s3
 
 logger = logging.getLogger("dlt")
 
 
 DATASET_NAME = "all_the_bacteria"
 ALL_FILES_TSV_FILE_ID = "R6gcp"
+ALL_ATB_FILE_NAME = "all_atb_files.tsv"
+REGEX_FILE = "filters.txt"
 ATB_VERSION = "2025-05"
 
 ARG_ALIASES = frozendict(
     {
         "version": ["-v", "--version"],
-    }
+        "pattern_file": ["-f", "--pattern-file", "--pattern_file"],
+    },
 )
 
+# project parts needed:
+PROJECT_PARTS = ["Annotation/Bakta", "Assembly", "Metadata"]
+PROJECT_PART_REGEX = re.compile(f"^AllTheBacteria/({'|'.join(PROJECT_PARTS)})")
+
+EXPECTED_ATB_FIELDNAMES = ["project", "project_id", "filename", "url", "md5", "size(MB)"]
+REQUIRED_ATB_FIELDNAMES = {"project", "filename", "url", "md5"}
+
 
 class AtbSettings(CtsSettings):
     """Configuration for running the AllTheBacteria import pipeline."""
@@ -54,37 +66,71 @@ class AtbSettings(CtsSettings):
         validation_alias=AliasChoices(*[alias.strip("-") for alias in ARG_ALIASES["version"]]),
     )
 
+    pattern_file: str | None = Field(
+        default=None,
+        description="Path, relative to the input dir, of a file containing patterns to match when downloading ATB files",
+        validation_alias=AliasChoices(*[alias.strip("-") for alias in ARG_ALIASES["pattern_file"]]),
+    )
+
+    @computed_field
     @property
     def raw_data_dir(self) -> str:
         """Directory in which to save the raw data files that are downloaded.
 
         Set to the output directory / "raw_data" / version.
         """
-        return str(Path(self.output) / "raw_data" / self.version)
-
-
-# project parts needed:
-PROJECT_PARTS = ["Annotation/Bakta", "Assembly", "Metadata"]
+        if self.use_destination == "local_fs":
+            return str(Path(self.output) / "raw_data" / self.version)
+        return f"{self.output}/raw_data/{self.version}"
 
-PROJECT_PART_REGEX = re.compile(f"^AllTheBacteria/({'|'.join(PROJECT_PARTS)})")
+    @computed_field
+    @property
+    def pattern_matches(self) -> re.Pattern:
+        """The regular expression pattern to be used to select files for download.
 
-EXPECTED_ATB_FIELDNAMES = ["project", "project_id", "filename", "url", "md5", "size(MB)"]
-REQUIRED_ATB_FIELDNAMES = {"project", "filename", "url", "md5"}
+        If a pattern_file is supplied, it will read in the file at {input_dir}/{pattern_file}
+        and convert the contents into a regular expression. If no file is supplied or the file is empty
+        or does not contain any content, the default PROJECT_PART_REGEX will be used instead.
+        """
+        if self.pattern_file:
+            pattern_file = Path(self.input_dir) / self.pattern_file
+            regex = load_patterns(pattern_file)
+            if regex is not None:
+                return regex
+        # return the default
+        return PROJECT_PART_REGEX
+
+
+def load_patterns(pattern_file: Path) -> re.Pattern | None:
+    """Load the pattern file and convert it into a set of regexes."""
+    patterns = []
+    try:
+        for line in pattern_file.read_text(encoding="utf-8").splitlines():
+            trimmed_line = line.strip()
+            # skip blank lines
+            if not trimmed_line:
+                continue
+            patterns.append(
+                re.escape(trimmed_line[:-1]) + ".*" if trimmed_line.endswith("*") else re.escape(trimmed_line)
+            )
+
+        if patterns:
+            return re.compile("^(" + "|".join(patterns) + ")$")
+    except Exception:
+        logger.exception("Could not load patterns from %s", str(pattern_file))
+
+    return None
 
 
 def download_atb_index_tsv(settings: AtbSettings) -> Path:
     """Download the ATB file index TSV file from the OSF and save it to disk.
 
     :param settings: pipeline config
-    :type settings: Settings
+    :type settings: AtbSettings
     :raises RuntimeError: if the download URL cannot be found
     :return: path to the downloaded file
     :rtype: Path
     """
-    # make sure that the directory structure to save the file in can be written to
-    raw_data_dir = Path(settings.raw_data_dir)
-    raw_data_dir.mkdir(parents=True, exist_ok=True)
-
     # get the all_atb_files.tsv file info from the OSF API and retrieve the download link
     osf_client = RESTClient(
         base_url="https://api.osf.io/v2/",
@@ -102,20 +148,40 @@ def download_atb_index_tsv(settings: AtbSettings) -> Path:
         err_msg = f"Could not find download URL in response from 'https://api.osf.io/v2/files/{ALL_FILES_TSV_FILE_ID}/'"
         raise RuntimeError(err_msg)
 
-    atb_files_tsv = raw_data_dir / "all_atb_files.tsv"
+    if settings.use_destination == "s3":
+        # download to a local temp file and also copy the file to s3
+        save_path = f"{settings.raw_data_dir}/{ALL_ATB_FILE_NAME}"
+        try:
+            stream_to_s3(url=all_files_tsv_download, s3_path=save_path, requests=requests)
+        except Exception:
+            logger.exception("Could not transfer %s to s3", ALL_ATB_FILE_NAME)
+            raise
+        # TODO: save as a temporary file, delete after pipeline has completed
+        # save a local copy of the file to current dir
+        atb_files_tsv = Path(ALL_ATB_FILE_NAME)
+    else:
+        # make sure that the directory structure to save the file in can be written to
+        raw_data_dir = Path(settings.raw_data_dir)
+        raw_data_dir.mkdir(parents=True, exist_ok=True)
+        atb_files_tsv = raw_data_dir / ALL_ATB_FILE_NAME
+
     # download the file listing and save it
-    FileDownloader().download(all_files_tsv_download, atb_files_tsv)
+    FileDownloader().download(url=all_files_tsv_download, destination=atb_files_tsv)
+    logger.info("Downloaded TSV index file.")
     return atb_files_tsv
 
 
-def get_file_download_links(atb_files_tsv: Path) -> Generator[list[dict[str, Any]], Any]:
+def get_file_download_links(settings: AtbSettings, atb_files_tsv: Path) -> Generator[list[dict[str, Any]], Any]:
     """Parse the ATB file index TSV and to yield a list of files to download.
 
+    :param settings: pipeline config
+    :type settings: AtbSettings
     :param atb_files_tsv: path to the ATB file index TSV file
     :type atb_files_tsv: Path
     :yield: list of fields to download
     :rtype: Generator[list[dict[str, Any]], Any]
     """
+    pattern_to_match = settings.pattern_matches
     with atb_files_tsv.open() as index_file:
         reader = csv.DictReader(index_file, delimiter="\t")
         all_lines = list(reader)
@@ -134,7 +200,7 @@ def get_file_download_links(atb_files_tsv: Path) -> Generator[list[dict[str, Any
                 logger.error(err_msg)
                 raise RuntimeError(err_msg)
 
-        files_to_download = [row for row in all_lines if PROJECT_PART_REGEX.match(row["project"])]
+        files_to_download = [row for row in all_lines if pattern_to_match.match(row["project"])]
 
         yield files_to_download
 
@@ -144,16 +210,24 @@ def osf_file_downloader(settings: AtbSettings, atb_file_list: list[dict[str, Any
 
     :param settings: pipeline config
     :type settings: Settings
-    :param atb_file_list: list of dictionaries
+    :param atb_file_list: info about files to transfer, as a list of dictionaries
     :type atb_file_list: list[dict[str, Any]]
     """
     client = FileDownloader()
-    raw_data_dir = Path(settings.raw_data_dir)
     successful_downloads = []
     for f in atb_file_list:
         try:
-            save_path = raw_data_dir / f["filename"]
-            client.download(f["url"], save_path, expected_checksum=f["md5"], checksum_fn="md5")
+            project_part = f["project"].removeprefix("AllTheBacteria").removeprefix("/").rstrip("/")
+            if settings.use_destination == "s3":
+                if project_part:
+                    project_part = f"{project_part}/"
+                save_path = f"{settings.raw_data_dir}/{project_part}{f['filename']}"
+                stream_to_s3(url=f["url"], s3_path=save_path, requests=requests)
+                logger.debug("Successfully transferred file from %s to %s", f["url"], save_path)
+            else:
+                save_path = Path(settings.raw_data_dir) / project_part / f["filename"]
+                client.download(url=f["url"], destination=save_path, expected_checksum=f["md5"], checksum_fn="md5")
+
             f["path"] = str(save_path)
             successful_downloads.append(f)
         except Exception as e:
@@ -169,7 +243,7 @@ def osf_file_downloader(settings: AtbSettings, atb_file_list: list[dict[str, Any
 def atb_file_list(settings: AtbSettings) -> Generator[list[dict[str, Any]], Any, Any]:
     """Generate a list of files to download from the list of all ATB files."""
     atb_files_tsv = download_atb_index_tsv(settings)
-    return get_file_download_links(atb_files_tsv)
+    return get_file_download_links(settings, atb_files_tsv)
 
 
 @dlt.transformer(name="file_downloader", data_from=atb_file_list, parallelized=True)
diff --git a/src/cdm_data_loaders/pipelines/cts_defaults.py b/src/cdm_data_loaders/pipelines/cts_defaults.py
index 5f59ff91..2ea95128 100644
--- a/src/cdm_data_loaders/pipelines/cts_defaults.py
+++ b/src/cdm_data_loaders/pipelines/cts_defaults.py
@@ -1,6 +1,5 @@
 """Common defaults for running pipelines on the KBase CTS."""
 
-from pathlib import Path
 from typing import Any, Self
 
 import dlt.common.configuration.accessors
@@ -86,7 +85,7 @@ class CtsSettings(BaseSettings):
     )
     use_destination: str = Field(
         default=DEFAULT_CTS_SETTINGS["use_destination"],
-        description=f"DLT destination configuration to use for data output. Choices: {VALID_DESTINATIONS}",
+        description=f"DLT destination configuration to use for data output. Data to be saved to s3 should use the destination 's3'; to save data locally, use the destination 'local_fs'. The output directory can be specified using the 'output' field. Choices: {VALID_DESTINATIONS}",
         validation_alias=AliasChoices(*[alias.strip("-") for alias in ARG_ALIASES["use_destination"]]),
     )
     use_output_dir_for_pipeline_metadata: bool = Field(
@@ -120,6 +119,27 @@ def reconcile_with_dlt_config(self) -> Self:
                 raise ValueError(err_msg)
 
             self.output = self.dlt_config[f"destination.{self.use_destination}.bucket_url"]
+            if self.output != "/":
+                self.output.rstrip("/")
+
+        # TODO: this should never happen
+        if not self.output:
+            err_msg = "No output specified!"
+            raise ValueError(err_msg)
+
+        # ensure that the use_destination value does not conflict with whether or not pipeline data should be saved
+        destination_is_s3 = False
+        if self.output.startswith("s3://") or self.output.startswith("s3a://"):
+            destination_is_s3 = True
+
+        # self.use_destination should be "s3" if the output is an s3 url and vice versa
+        if bool(self.use_destination == "s3") != destination_is_s3:
+            err_msg = "Mismatch between output location and use_destination. To ensure internal settings functions work correctly, set use_destination to 's3' for writing files to s3, and 'local_fs' for writing files locally. The output directory can be configured using the 'output' parameter."
+            raise ValueError(err_msg)
+
+        if self.use_output_dir_for_pipeline_metadata and destination_is_s3:
+            err_msg = "It is not currently possible to have the pipeline directory on s3."
+            raise ValueError(err_msg)
 
         return self
 
@@ -148,7 +168,7 @@ def raw_data_dir(self) -> str:
 
         If not set, defaults to a 'raw_data' directory within the output directory after reconciling with dlt config.
         """
-        return str(Path(self.output or "") / "raw_data")
+        return f"{self.output}{'' if self.output in ('', '/') else '/'}raw_data"
 
     @computed_field
     @property
@@ -158,7 +178,7 @@ def pipeline_dir(self) -> str | None:
         If use_output_dir_for_pipeline_metadata is true, this defaults to a `.dlt_conf` directory within the output directory.
         """
         if self.use_output_dir_for_pipeline_metadata:
-            return str(Path(self.output or "") / ".dlt_conf")
+            return f"{self.output}{'' if self.output in ('', '/') else '/'}.dlt_conf"
         return None
 
 
diff --git a/src/cdm_data_loaders/utils/cdm_logger.py b/src/cdm_data_loaders/utils/cdm_logger.py
index 25c341b7..cb91cae4 100644
--- a/src/cdm_data_loaders/utils/cdm_logger.py
+++ b/src/cdm_data_loaders/utils/cdm_logger.py
@@ -2,138 +2,224 @@
 Provides structured logging with contextual metadata for CDM data import pipelines.
 """
 
+import json
 import logging
+import logging.config
 import logging.handlers
 import os
-import sys
 from pathlib import Path
 
-DEFAULT_LOGGER_NAME = "cdm_data_loader"
+from frozendict import frozendict
 
+DEFAULT_LOGGER_NAME = "cdm_data_loader"
 GENERIC_ERROR_MESSAGE = "An error of unknown origin occurred."
 
 LOG_FILENAME = "cdm_data_loader.log"
 MAX_LOG_FILE_SIZE = 2**30  # 1 GiB
 MAX_LOG_BACKUPS = 5
 
-__LOGGER = None
-
-# TODO: adopt logging config, set just once
-LOGGING_CONFIG = {
-    "root": {"name": "cdm_data_loader", "level": "INFO", "handlers": ["console", "file"]},
-    "version": 1,
-    "handlers": {
-        "console": {
-            "class": "logging.StreamHandler",
-            "formatter": "json",
-            "level": "INFO",
-            "stream": "ext://sys.stdout",
-        },
-        "file": {
-            "class": "logging.handlers.RotatingFileHandler",
-            "formatter": "json",
-            "filename": LOG_FILENAME,
-            "maxBytes": MAX_LOG_FILE_SIZE,
-            "backupCount": MAX_LOG_BACKUPS,
+VALID_LOG_LEVELS = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
+LOGGING_CONFIG_FILENAME = "logging_config.json"
+
+JSON_LOG_CONFIG = '{"time": "%(asctime)s", "level": "%(levelname)s", "module": "%(module)s", "msg": "%(message)s"}'
+
+_module_logger = logging.getLogger(__name__)
+
+# Immutable fallback config used when no external config file can be found.
+# Note: disable_existing_loggers is intentionally absent — it is always
+# injected at runtime by _load_logging_config() to guarantee it is False.
+LOGGING_CONFIG = frozendict(
+    {
+        "version": 1,
+        "handlers": {
+            "console": {
+                "class": "logging.StreamHandler",
+                "formatter": "json",
+                "level": "INFO",
+                "stream": "ext://sys.stdout",
+            },
         },
-    },
-    "formatters": {
-        "json": {
-            "format": '{"time": "%(asctime)s", "level": "%(levelname)s", "module": "%(module)s", "msg": "%(message)s"}'
-        }
-    },
-}
+        "formatters": {"json": {"format": JSON_LOG_CONFIG}},
+        "loggers": {DEFAULT_LOGGER_NAME: {"level": "INFO", "handlers": ["console"]}},
+    }
+)
 
 
-def get_cdm_logger(
-    logger_name: str | None = None, log_level: str | None = None, log_dir: str | None = None
-) -> logging.Logger:
-    """Retrieve the logger, initialising it if necessary.
+def _load_config_from_path(path: Path) -> dict:
+    """Attempt to load and parse a JSON logging config from the given path.
+
+    :param path: path to a JSON logging config file
+    :type path: Path
+    :return: parsed config dict
+    :rtype: dict
+    :raises FileNotFoundError: if no file exists at the given path
+    :raises ValueError: if the file content is not valid JSON
+    """
+    with path.open() as f:
+        return json.load(f)
 
-    If the logger name is not set, the default name "cdm_data_loader" will be used.
 
-    :param logger_name: name for the logger, defaults to None
-    :type logger_name: str | None, optional
-    :param log_level: logger level, defaults to None
-    :type log_level: str | None, optional
-    :param log_dir: directory to save log files to, optional. If no directory is specified, logs will just be emitted to the console.
-    :type log_dir: str | None
-    :return: initialised logger
-    :rtype: logging.Logger
+def _load_logging_config(config_file: str | Path | None = None) -> dict:
+    """Resolve and load a logging config, working through a priority chain until a source succeeds.
+
+    Resolution order:
+      1. ``config_file`` argument (if provided)
+      2. ``LOG_CONFIG_FILE`` environment variable (if set)
+      3. ``logging_config.json`` in the current working directory
+      4. Built-in ``LOGGING_CONFIG`` frozendict
+
+    If a file is found but sets ``disable_existing_loggers`` to ``True``,
+    a warning is emitted and the value is overridden. The key is always
+    set to ``False`` in the returned dict, regardless of source, to prevent
+    ``dictConfig`` from silently disabling pre-existing loggers.
+
+    :param config_file: explicit path to a logging config file, defaults to None
+    :type config_file: str | Path | None, optional
+    :return: logging config dict ready to pass to dictConfig
+    :rtype: dict
     """
-    global __LOGGER
-    if not __LOGGER:
-        __LOGGER = init_logger(logger_name, log_level, log_dir)
-    return __LOGGER
+    candidates: list[tuple[Path, str]] = []
+
+    if config_file is not None:
+        candidates.append((Path(config_file), "config_file argument"))
+
+    env_path = os.getenv("LOG_CONFIG_FILE")
+    if env_path:
+        candidates.append((Path(env_path), "LOG_CONFIG_FILE env var"))
+
+    candidates.append((Path.cwd() / LOGGING_CONFIG_FILENAME, "current working directory"))
+
+    for path, source in candidates:
+        try:
+            config = _load_config_from_path(path)
+            if config.get("disable_existing_loggers", True):
+                _module_logger.warning(
+                    "Logging config loaded from %s (%s) sets disable_existing_loggers "
+                    "to True. Overriding to prevent existing loggers being silently disabled.",
+                    path,
+                    source,
+                )
+            _module_logger.info("Loaded logging config from %s (%s).", path, source)
+            break
+        except FileNotFoundError:
+            _module_logger.warning("Logging config not found at %s (%s). Trying next source.", path, source)
+        except ValueError:
+            _module_logger.warning("Logging config at %s (%s) is not valid JSON. Trying next source.", path, source)
+    else:
+        _module_logger.warning("No logging config file found. Falling back to built-in config.")
+        config = dict(LOGGING_CONFIG)
+
+    config["disable_existing_loggers"] = False
+    return config
+
+
+def _json_formatter() -> logging.Formatter:
+    """Construct the standard CDM JSON log formatter.
+
+    :return: configured Formatter instance
+    :rtype: logging.Formatter
+    """
+    return logging.Formatter(JSON_LOG_CONFIG)
 
 
-def init_logger(
-    logger_name: str | None = None, log_level: str | None = None, log_dir: str | None = None
-) -> logging.Logger:
-    """Initialise the logger for the module.
+def _set_level_safe(logger: logging.Logger, level: str) -> None:
+    """Set the log level on a logger, raising a descriptive error for invalid values.
 
-    If the logger name is not set, the default name "cdm_data_loader" will be used.
+    :param logger: the logger to configure
+    :type logger: logging.Logger
+    :param level: log level string
+    :type level: str
+    :raises ValueError: if the level string is not a recognised logging level
+    """
+    normalised = level.upper()
+    if normalised not in VALID_LOG_LEVELS:
+        msg = f"Invalid log level {level!r}. Must be one of: {', '.join(sorted(VALID_LOG_LEVELS))}"
+        raise ValueError(msg)
+    logger.setLevel(normalised)
+
+
+def get_cdm_logger() -> logging.Logger:
+    """Retrieve the default CDM logger, initialising it if necessary.
+
+    Prefers the 'dlt' logger if it has already been configured, otherwise
+    falls back to the CDM logger, creating it if needed.
 
-    :param logger_name: name for the logger, defaults to None
-    :type logger_name: str | None, optional
-    :param log_level: logger level, defaults to None
-    :type log_level: str | None, optional
-    :param log_dir: directory to save log files to, optional. If no directory is specified, logs will just be emitted to the console.
-    :type log_dir: str | None
     :return: initialised logger
     :rtype: logging.Logger
     """
-    if not logger_name:
-        logger_name = DEFAULT_LOGGER_NAME
+    all_loggers = logging.root.manager.loggerDict
+    if "dlt" in all_loggers:
+        return logging.getLogger("dlt")
+    if DEFAULT_LOGGER_NAME in all_loggers:
+        return logging.getLogger(DEFAULT_LOGGER_NAME)
+    return init_logger()
+
 
-    # Always get the same logger by name
-    logger = logging.getLogger(logger_name)
+def _attach_file_handler(logger: logging.Logger, log_file: str | Path) -> None:
+    """Attach a file handler to the given logger if no file handler is already present.
 
-    # Determine log level (argument > env var > default)
-    effective_log_level = (log_level or os.getenv("LOG_LEVEL", "INFO")).upper()
-    logger.setLevel(getattr(logging, effective_log_level, logging.DEBUG))
+    Checks for any existing handler whose class name contains 'FileHandler'
+    to avoid attaching duplicate file handlers of any type. Creates the parent
+    directory of log_file if it does not already exist.
 
-    # JSON-style structured formatter
-    formatter = logging.Formatter(
-        '{"time": "%(asctime)s", "level": "%(levelname)s", "module": "%(module)s", "msg": "%(message)s"}'
+    :param logger: the logger to attach the handler to
+    :type logger: logging.Logger
+    :param log_file: path to the log file
+    :type log_file: str | Path
+    """
+    if any("FileHandler" in type(h).__name__ for h in logger.handlers):
+        return
+
+    log_path = Path(log_file)
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+
+    handler = logging.handlers.RotatingFileHandler(
+        log_path,
+        maxBytes=MAX_LOG_FILE_SIZE,
+        backupCount=MAX_LOG_BACKUPS,
     )
+    handler.setFormatter(_json_formatter())
+    logger.addHandler(handler)
 
-    # Console handler
-    ch = logging.StreamHandler(sys.stdout)
-    ch.setFormatter(formatter)
-    logger.addHandler(ch)
-
-    if log_dir:
-        log_dir_path = Path(log_dir)
-        if not log_dir_path.exists() and log_dir_path.is_dir():
-            msg = f"{log_dir} does not exist or is not a directory."
-            raise FileNotFoundError(msg)
-        # Add the log message handler to the logger
-        file_handler = logging.handlers.RotatingFileHandler(
-            LOG_FILENAME, maxBytes=MAX_LOG_FILE_SIZE, backupCount=MAX_LOG_BACKUPS
-        )
-        logger.addHandler(file_handler)
-    return logger
 
+def init_logger(
+    log_level: str | None = None,
+    config_file: str | Path | None = None,
+    enable_file_logging: bool = False,  # noqa: FBT001, FBT002
+    log_file: str | Path | None = None,
+) -> logging.Logger:
+    """Initialise the logger for the module.
+
+    Loads config by working through the following priority chain:
+      1. ``config_file`` argument (if provided)
+      2. ``LOG_CONFIG_FILE`` environment variable (if set)
+      3. ``logging_config.json`` in the current working directory
+      4. Built-in ``LOGGING_CONFIG`` frozendict
 
-def log_and_die(error_msg: str, error_class: type[Exception], logger_name: str | None = None) -> None:
-    """Log an error message and then raise the error.
+    If ``log_level`` is specified or the ``LOG_LEVEL`` env var is set, the
+    logger is set to that level. File logging is opt-in via
+    ``enable_file_logging`` or the ``ENABLE_FILE_LOGGING`` env var.
 
-    :param error_msg: error message string
-    :type error_msg: str
-    :param error_class: class of error to throw
-    :type error_class: type[Exception]
-    :param logger_name: name of the logger to use, defaults to None
-    :type logger_name: str | None, optional
+    :param log_level: logger level, defaults to None
+    :type log_level: str | None, optional
+    :param config_file: explicit path to a logging config file, defaults to None
+    :type config_file: str | Path | None, optional
+    :param enable_file_logging: attach a file handler, defaults to False
+    :type enable_file_logging: bool, optional
+    :param log_file: path to the log file, defaults to LOG_FILENAME in the CWD
+    :type log_file: str | Path, optional
+    :return: initialised logger
+    :rtype: logging.Logger
     """
-    logger = get_cdm_logger(logger_name)
+    logging.config.dictConfig(_load_logging_config(config_file))
 
-    if not error_msg:
-        logger.warning("No error supplied to log_and_die. Using generic error message.")
-        error_msg = GENERIC_ERROR_MESSAGE
+    logger = logging.getLogger(DEFAULT_LOGGER_NAME)
 
-    if not isinstance(error_class, type) or not issubclass(error_class, BaseException):
-        error_class = RuntimeError
+    new_log_level = log_level or os.getenv("LOG_LEVEL")
+    if new_log_level:
+        _set_level_safe(logger, new_log_level)
 
-    logger.error(error_msg)
-    raise error_class(error_msg)
+    if enable_file_logging or os.getenv("ENABLE_FILE_LOGGING", "").lower() == "true":
+        _attach_file_handler(logger, log_file or LOG_FILENAME)
+
+    return logger
diff --git a/src/cdm_data_loaders/utils/s3.py b/src/cdm_data_loaders/utils/s3.py
index 8253e2cd..fd7e8849 100644
--- a/src/cdm_data_loaders/utils/s3.py
+++ b/src/cdm_data_loaders/utils/s3.py
@@ -1,12 +1,14 @@
 """Utilities for s3 interaction."""
 
 from pathlib import Path
+from types import ModuleType
 from typing import Any
 
 import boto3
 import botocore
 import botocore.client
 import tqdm
+from botocore.config import Config
 
 CDM_LAKE_BUCKET = "cdm-lake"
 DEFAULT_EXTRA_ARGS = {"ChecksumAlgorithm": "CRC64NVME"}
@@ -14,6 +16,12 @@
 VALID_S3_PREFIXES = ["s3://", "s3a://"]
 VALID_BUCKETS = [CDM_LAKE_BUCKET, "cts"]
 
+# "legacy", "standard", "adaptive"
+AWS_CLIENT_RETRY_MODE = "adaptive"
+# how many times to retry, including the initial attempt
+AWS_CLIENT_TOTAL_MAX_ATTEMPTS = 10
+
+
 _s3_client: botocore.client.BaseClient | None = None
 
 
@@ -33,9 +41,19 @@ def get_s3_client(args: dict[str, str] | None = None) -> botocore.client.BaseCli
     if _s3_client is not None:
         return _s3_client
 
+    config = Config(retries={"total_max_attempts": AWS_CLIENT_TOTAL_MAX_ATTEMPTS, "mode": AWS_CLIENT_RETRY_MODE})
+
     if not args:
+        # try using env vars and skip manual configuration
+        client = boto3.client("s3", config=config)
+        # check for credentials and endpoint_url
+        credentials = client._request_signer._credentials  # noqa: SLF001
+        if credentials.access_key and credentials.secret_key and client.meta.endpoint_url:
+            _s3_client = client
+            return _s3_client
+
         try:
-            from berdl_notebook_utils.berdl_settings import get_settings
+            from berdl_notebook_utils.berdl_settings import get_settings  # noqa: PLC0415
 
             settings = get_settings()
             args = {
@@ -56,7 +74,7 @@ def get_s3_client(args: dict[str, str] | None = None) -> botocore.client.BaseCli
         msg = "Cannot initialise s3 client: missing arguments: " + ", ".join(missing)
         raise ValueError(msg)
 
-    _s3_client = boto3.client("s3", **keyword_args)
+    _s3_client = boto3.client("s3", config=config, **keyword_args)
     return _s3_client
 
 
@@ -126,6 +144,19 @@ def list_matching_objects(s3_path: str) -> list[dict[str, Any]]:
     return contents
 
 
+def head_object(s3_path: str) -> dict[str, Any]:
+    """Check whether an object exists on s3.
+
+    :param s3_path: path to the object on s3, INCLUDING the bucket name
+    :type s3_path: str
+    :return: response from the head_object request
+    :rtype: dict[str, Any]
+    """
+    s3 = get_s3_client()
+    (bucket, key) = split_s3_path(s3_path)
+    return s3.head_object(Bucket=bucket, Key=key)
+
+
 def object_exists(s3_path: str) -> bool:
     """Check whether an object exists on s3.
 
@@ -134,11 +165,8 @@ def object_exists(s3_path: str) -> bool:
     :return: True if the object exists, False otherwise
     :rtype: bool
     """
-    s3 = get_s3_client()
-
-    (bucket, key) = split_s3_path(s3_path)
     try:
-        s3.head_object(Bucket=bucket, Key=key)
+        head_object(s3_path)
     except Exception as e:
         error_string = str(e)
         if not error_string.startswith("An error occurred (404) when calling the HeadObject operation: Not Found"):
@@ -199,6 +227,35 @@ def upload_file(
         return True
 
 
+def stream_to_s3(url: str, s3_path: str, requests: ModuleType) -> str:
+    """Stream directly from an HTTP download to s3.
+
+    :param url: address of the object to transfer to s3
+    :type url: str
+    :param s3_path: save path on s3
+    :type s3_path: str
+    :param requests: module implementing requests.get and returning a response
+    :type requests: ModuleType
+    :return: path of the file on s3, in the form bucket/key
+    :rtype: str
+    """
+    s3_client = get_s3_client()
+    (bucket, key) = split_s3_path(s3_path)
+    with requests.get(url, stream=True) as response:
+        response.raise_for_status()
+        s3_client.upload_fileobj(
+            # raw stream from urllib3
+            response.raw,
+            bucket,
+            key,
+            ExtraArgs={
+                **DEFAULT_EXTRA_ARGS,
+                "ContentType": response.headers.get("content-type", "application/octet-stream"),
+            },
+        )
+    return f"{bucket}/{key}"
+
+
 def download_file(s3_path: str, local_file_path: str | Path, version_id: str | None = None) -> None:
     """Download an object from s3.
 
@@ -335,6 +392,9 @@ def copy_object(current_s3_path: str, new_s3_path: str) -> dict[str, Any]:
     A successful copy operation will return a response where
     resp["ResponseMetadata"]["HTTPStatusCode"] == 200
 
+    Errors (e.g, buckets or keys not existing, wrong credentials, etc.) are passed
+    directly to the user without being caught.
+
     :param current_path: path to the file on s3, INCLUDING the bucket name
     :type current_path: str
     :param new_path: the desired new file path on s3, INCLUDING the bucket name
@@ -360,6 +420,9 @@ def delete_object(s3_path: str) -> dict[str, Any]:
     A successful deletion will return a response where
     resp["ResponseMetadata"]["HTTPStatusCode"] == 204.
 
+    Errors (e.g, buckets or keys not existing, wrong credentials, etc.) are passed
+    directly to the user without being caught.
+
     :param s3_path: path to the file on s3, INCLUDING the bucket name
     :type s3_path: str
     :return: dictionary containing response
diff --git a/tests/conftest.py b/tests/conftest.py
index 37d704b9..fc3b3909 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,7 @@
 """Global configuration settings for tests."""
 
 import datetime
+import logging
 import shutil
 from collections.abc import Generator
 from copy import deepcopy
@@ -41,6 +42,21 @@
 ALT_PIPELINE_RUN = {RUN_ID: "9876-5432-10", PIPELINE: "KeystoneXXXL", SOURCE: "/path/to/dir"}
 
 
+@pytest.fixture(autouse=True)
+def logging_setup(caplog: pytest.LogCaptureFixture) -> None:
+    """Fiddle with the loggers used in the tests for a better experience.
+
+    N.b. this is overwritten by the conftest in the pipelines directory, which uses the dlt logger.
+    """
+    vcr_logger = logging.getLogger("vcr")
+    vcr_logger.setLevel("ERROR")
+    # turn on log propagation for the dlt logger
+    dlt_logger = logging.getLogger("dlt")
+    dlt_logger.propagate = True
+    caplog.set_level(logging.INFO)
+    caplog.clear()
+
+
 @pytest.fixture
 def spark(tmp_path: Path) -> Generator[SparkSession, Any]:
     """Generate a spark session with spark.sql.warehouse.dir set to the pytest temporary directory."""
diff --git a/tests/data/atb/assembly_bakta_metadata_exact.tsv b/tests/data/atb/assembly_bakta_metadata_exact.tsv
new file mode 100644
index 00000000..8b8c6ae8
--- /dev/null
+++ b/tests/data/atb/assembly_bakta_metadata_exact.tsv
@@ -0,0 +1,7 @@
+project	project_id	filename	url	md5	size(MB)
+AllTheBacteria/Annotation/Bakta	zt57s	File_Lists/atb.bakta.incr_release.202408.status.tsv.gz	https://osf.io/download/2skzy/	7da48fce8e310916a072786872f475b1	14.19
+AllTheBacteria/Annotation/Bakta	zt57s	File_Lists/atb.bakta.r0.2.status.tsv.gz	https://osf.io/download/rxfks/	b7255867206dc66f2c26db31921a67a6	53.87
+AllTheBacteria/Assembly	zxfmy	File_Lists/file_list.all.20240805.tsv.gz	https://osf.io/download/dw3h7/	5fc5c88a0593785341e466143a97f126	17.04
+AllTheBacteria/Assembly	zxfmy	File_Lists/file_list.all.202505.tsv.gz	https://osf.io/download/69a03f582574717cb3643d62/	0a9ee0b1efaf42b3ea9e89ce91d2b9e1	22.13
+AllTheBacteria/Metadata	h7wzy	0.1/HQ_set_0.1/hq_dataset.accessions.txt.gz	https://osf.io/download/3znhb/	6f001b72101779b5ff6a556f46c9ddab	4.11
+AllTheBacteria/Metadata	h7wzy	0.1/HQ_set_0.1/hq_dataset.counts_bar_plot.pdf	https://osf.io/download/p5vkt/	a147e807f6a661b6f7f6dc856aa0465c	0.02
diff --git a/tests/data/atb/assembly_bakta_star_metadata.tsv b/tests/data/atb/assembly_bakta_star_metadata.tsv
new file mode 100644
index 00000000..06098902
--- /dev/null
+++ b/tests/data/atb/assembly_bakta_star_metadata.tsv
@@ -0,0 +1,15 @@
+project	project_id	filename	url	md5	size(MB)
+AllTheBacteria/Annotation/Bakta	zt57s	File_Lists/atb.bakta.incr_release.202408.status.tsv.gz	https://osf.io/download/2skzy/	7da48fce8e310916a072786872f475b1	14.19
+AllTheBacteria/Annotation/Bakta	zt57s	File_Lists/atb.bakta.r0.2.status.tsv.gz	https://osf.io/download/rxfks/	b7255867206dc66f2c26db31921a67a6	53.87
+AllTheBacteria/Annotation/Bakta/Incr_release.202408_Set_1	kdnwp	atb.bakta.incr_release.202408.batch.1.tar.xz	https://osf.io/download/r84xg/	a3ba0148c435f31b4de3f0b72e01075d	1378.51
+AllTheBacteria/Annotation/Bakta/Incr_release.202408_Set_1	kdnwp	atb.bakta.incr_release.202408.batch.10.tar.xz	https://osf.io/download/qxye9/	e0724b0a605da7298b802f01145d8b49	388.76
+AllTheBacteria/Annotation/Bakta/Incr_release.202408_Set_2	p4kvy	atb.bakta.incr_release.202408.batch.49.tar.xz	https://osf.io/download/nyua7/	c8871eab5ee9071b101ba7bbafa7983b	2320.01
+AllTheBacteria/Annotation/Bakta/Incr_release.202408_Set_2	p4kvy	atb.bakta.incr_release.202408.batch.50.tar.xz	https://osf.io/download/tqrs2/	887db770a7e176e644a2b413de4b07e5	2169.25
+AllTheBacteria/Annotation/Bakta/Release_0.2_Set_1	tyw72	atb.bakta.r0.2.batch.1.tar.xz	https://osf.io/download/vpg3d/	5e6ccd68f5c6a69a14dafaf599f9377e	293.86
+AllTheBacteria/Annotation/Bakta/Release_0.2_Set_1	tyw72	atb.bakta.r0.2.batch.10.tar.xz	https://osf.io/download/gbzev/	8fab65bbeadc99201a1df83f32b8c779	132.76
+AllTheBacteria/Annotation/Bakta/Release_0.2_Set_9	evurw	atb.bakta.r0.2.batch.228.tar.xz	https://osf.io/download/679b9dd3bc1fdea8290eb046/	dc061bcc2e1bb9c30887a4dfefb1b6c2	3841.85
+AllTheBacteria/Annotation/Bakta/Release_0.2_Set_9	evurw	atb.bakta.r0.2.batch.229.tar.xz	https://osf.io/download/679ba14cae64ecefd50eaea0/	db0081059366d08e8c3cd8c19857ef42	3902.73
+AllTheBacteria/Assembly	zxfmy	File_Lists/file_list.all.20240805.tsv.gz	https://osf.io/download/dw3h7/	5fc5c88a0593785341e466143a97f126	17.04
+AllTheBacteria/Assembly	zxfmy	File_Lists/file_list.all.202505.tsv.gz	https://osf.io/download/69a03f582574717cb3643d62/	0a9ee0b1efaf42b3ea9e89ce91d2b9e1	22.13
+AllTheBacteria/Metadata	h7wzy	0.1/HQ_set_0.1/hq_dataset.accessions.txt.gz	https://osf.io/download/3znhb/	6f001b72101779b5ff6a556f46c9ddab	4.11
+AllTheBacteria/Metadata	h7wzy	0.1/HQ_set_0.1/hq_dataset.counts_bar_plot.pdf	https://osf.io/download/p5vkt/	a147e807f6a661b6f7f6dc856aa0465c	0.02
diff --git a/tests/data/atb/bakta_exact.tsv b/tests/data/atb/bakta_exact.tsv
new file mode 100644
index 00000000..5ca71a80
--- /dev/null
+++ b/tests/data/atb/bakta_exact.tsv
@@ -0,0 +1,3 @@
+project	project_id	filename	url	md5	size(MB)
+AllTheBacteria/Annotation/Bakta	zt57s	File_Lists/atb.bakta.incr_release.202408.status.tsv.gz	https://osf.io/download/2skzy/	7da48fce8e310916a072786872f475b1	14.19
+AllTheBacteria/Annotation/Bakta	zt57s	File_Lists/atb.bakta.r0.2.status.tsv.gz	https://osf.io/download/rxfks/	b7255867206dc66f2c26db31921a67a6	53.87
diff --git a/tests/data/atb/bakta_star.tsv b/tests/data/atb/bakta_star.tsv
new file mode 100644
index 00000000..4f5d5517
--- /dev/null
+++ b/tests/data/atb/bakta_star.tsv
@@ -0,0 +1,11 @@
+project	project_id	filename	url	md5	size(MB)
+AllTheBacteria/Annotation/Bakta	zt57s	File_Lists/atb.bakta.incr_release.202408.status.tsv.gz	https://osf.io/download/2skzy/	7da48fce8e310916a072786872f475b1	14.19
+AllTheBacteria/Annotation/Bakta	zt57s	File_Lists/atb.bakta.r0.2.status.tsv.gz	https://osf.io/download/rxfks/	b7255867206dc66f2c26db31921a67a6	53.87
+AllTheBacteria/Annotation/Bakta/Incr_release.202408_Set_1	kdnwp	atb.bakta.incr_release.202408.batch.1.tar.xz	https://osf.io/download/r84xg/	a3ba0148c435f31b4de3f0b72e01075d	1378.51
+AllTheBacteria/Annotation/Bakta/Incr_release.202408_Set_1	kdnwp	atb.bakta.incr_release.202408.batch.10.tar.xz	https://osf.io/download/qxye9/	e0724b0a605da7298b802f01145d8b49	388.76
+AllTheBacteria/Annotation/Bakta/Incr_release.202408_Set_2	p4kvy	atb.bakta.incr_release.202408.batch.49.tar.xz	https://osf.io/download/nyua7/	c8871eab5ee9071b101ba7bbafa7983b	2320.01
+AllTheBacteria/Annotation/Bakta/Incr_release.202408_Set_2	p4kvy	atb.bakta.incr_release.202408.batch.50.tar.xz	https://osf.io/download/tqrs2/	887db770a7e176e644a2b413de4b07e5	2169.25
+AllTheBacteria/Annotation/Bakta/Release_0.2_Set_1	tyw72	atb.bakta.r0.2.batch.1.tar.xz	https://osf.io/download/vpg3d/	5e6ccd68f5c6a69a14dafaf599f9377e	293.86
+AllTheBacteria/Annotation/Bakta/Release_0.2_Set_1	tyw72	atb.bakta.r0.2.batch.10.tar.xz	https://osf.io/download/gbzev/	8fab65bbeadc99201a1df83f32b8c779	132.76
+AllTheBacteria/Annotation/Bakta/Release_0.2_Set_9	evurw	atb.bakta.r0.2.batch.228.tar.xz	https://osf.io/download/679b9dd3bc1fdea8290eb046/	dc061bcc2e1bb9c30887a4dfefb1b6c2	3841.85
+AllTheBacteria/Annotation/Bakta/Release_0.2_Set_9	evurw	atb.bakta.r0.2.batch.229.tar.xz	https://osf.io/download/679ba14cae64ecefd50eaea0/	db0081059366d08e8c3cd8c19857ef42	3902.73
diff --git a/tests/pipelines/conftest.py b/tests/pipelines/conftest.py
index 4fefeae0..203038a1 100644
--- a/tests/pipelines/conftest.py
+++ b/tests/pipelines/conftest.py
@@ -16,7 +16,6 @@
 from cdm_data_loaders.pipelines.cts_defaults import (
     DEFAULT_CTS_SETTINGS,
     DEFAULT_START_AT,
-    VALID_DESTINATIONS,
     BatchedFileInputSettings,
     CtsSettings,
 )
@@ -29,18 +28,38 @@
 START_AT_VALUE = 50
 START_AT_STRING = "50"
 
+CONFIG_BUCKET = {"local_fs": "/output_dir", "s3": "s3://some/s3/bucket"}
+
 TEST_DLT_CONFIG = frozendict(
     {
-        "destination.local_fs.bucket_url": "/output_dir",
+        "destination.local_fs.bucket_url": CONFIG_BUCKET["local_fs"],
         "destination.local_fs.destination_type": "filesystem",
-        "destination.s3.bucket_url": "s3://some/s3/bucket",
+        "destination.s3.bucket_url": CONFIG_BUCKET["s3"],
         "destination.s3.destination_type": "filesystem",
         "normalize.data_writer.disable_compression": False,
     }
 )
 
 
-DESTINATION_OUTPUT = TEST_DLT_CONFIG[f"destination.{DEFAULT_CTS_SETTINGS['use_destination']}.bucket_url"]
+def _generate_dlt_config() -> dict[str, Any]:
+    """Return a fresh DLT config dict (same shape as the conftest fixture)."""
+    return {
+        "destination": {
+            "local_fs": {"bucket_url": CONFIG_BUCKET["local_fs"]},
+            "s3": {"bucket_url": CONFIG_BUCKET["s3"]},
+        },
+        "destination.local_fs.bucket_url": CONFIG_BUCKET["local_fs"],
+        "destination.s3.bucket_url": CONFIG_BUCKET["s3"],
+        "normalize.data_writer.disable_compression": False,
+    }
+
+
+DESTINATION_TO_OUTPUT = {
+    "local_fs": TEST_DLT_CONFIG["destination.local_fs.bucket_url"],
+    "s3": TEST_DLT_CONFIG["destination.s3.bucket_url"],
+}
+
+DESTINATION_OUTPUT = DESTINATION_TO_OUTPUT[DEFAULT_CTS_SETTINGS["use_destination"]]
 
 DEFAULT_CTS_SETTINGS_RECONCILED = frozendict(
     {
@@ -58,34 +77,33 @@
         "dev_mode": "false",
         "input_dir": "/dir/path",
         "output": "/some/dir",
-        "use_destination": VALID_DESTINATIONS[1],
+        "use_destination": "local_fs",
         "use_output_dir_for_pipeline_metadata": "true",
     }
 )
 
-TEST_CTS_SETTINGS_EXPECTED = frozendict(
+TEST_CTS_SETTINGS_RECONCILED = frozendict(
     {
         **TEST_CTS_SETTINGS,
         "dev_mode": False,
         "use_output_dir_for_pipeline_metadata": True,
+        "pipeline_dir": "/some/dir/.dlt_conf",
+        "raw_data_dir": "/some/dir/raw_data",
     }
 )
 
-TEST_CTS_SETTINGS_RECONCILED = frozendict(
-    {**TEST_CTS_SETTINGS_EXPECTED, "pipeline_dir": "/some/dir/.dlt_conf", "raw_data_dir": "/some/dir/raw_data"}
-)
-
 TEST_BATCH_FILE_SETTINGS = frozendict(
     **TEST_CTS_SETTINGS,
     start_at=START_AT_STRING,
 )
 
-TEST_BATCH_FILE_SETTINGS_EXPECTED = frozendict(
-    **TEST_CTS_SETTINGS_EXPECTED,
-    start_at=START_AT_VALUE,
-)
 TEST_BATCH_FILE_SETTINGS_RECONCILED = frozendict(
-    {**TEST_BATCH_FILE_SETTINGS_EXPECTED, "pipeline_dir": "/some/dir/.dlt_conf", "raw_data_dir": "/some/dir/raw_data"}
+    {
+        **TEST_CTS_SETTINGS_RECONCILED,
+        "start_at": START_AT_VALUE,
+        "pipeline_dir": "/some/dir/.dlt_conf",
+        "raw_data_dir": "/some/dir/raw_data",
+    }
 )
 
 
@@ -121,16 +139,6 @@ def make_batcher(files: list[Path], batch_size: int = 5) -> MagicMock:
     return mock_batcher
 
 
-def _generate_dlt_config() -> dict[str, Any]:
-    """Return a fresh DLT config dict (same shape as the conftest fixture)."""
-    return {
-        "destination": {"local_fs": {"bucket_url": "/output_dir"}, "s3": {"bucket_url": "s3://my-bucket/output"}},
-        "destination.local_fs.bucket_url": "/output_dir",
-        "destination.s3.bucket_url": "s3://my-bucket/output",
-        "normalize.data_writer.disable_compression": False,
-    }
-
-
 def make_settings(
     settings_cls: type[CtsSettings],
     dlt_config: dict[str, Any] | None = None,
diff --git a/tests/pipelines/test_all_the_bacteria.py b/tests/pipelines/test_all_the_bacteria.py
index cec1f4e2..06c5f458 100644
--- a/tests/pipelines/test_all_the_bacteria.py
+++ b/tests/pipelines/test_all_the_bacteria.py
@@ -2,21 +2,24 @@
 
 import csv
 import logging
-from collections.abc import Callable
+import re
 from pathlib import Path
 from typing import Any
 from unittest.mock import MagicMock, call, patch
 
 import pytest
+from frozendict import frozendict
 from requests.exceptions import HTTPError
 
 from cdm_data_loaders.pipelines import all_the_bacteria, core
 from cdm_data_loaders.pipelines.all_the_bacteria import (
+    ALL_ATB_FILE_NAME,
     DATASET_NAME,
     AtbSettings,
     cli,
     download_atb_index_tsv,
     get_file_download_links,
+    load_patterns,
     osf_file_downloader,
     run_atb_pipeline,
 )
@@ -50,8 +53,25 @@ def test_settings(tmp_path: Path, dlt_config: dict[str, Any]) -> AtbSettings:
     return AtbSettings(dlt_config=dlt_config, output=str(tmp_path))
 
 
-def test_cli_calls_run_ncbi_pipeline(monkeypatch: pytest.MonkeyPatch, dlt_config: dict[str, Any]) -> None:
-    """Ensure that cli() calls run_ncbi_pipeline with the settings."""
+@pytest.fixture
+def test_s3_settings(dlt_config: dict[str, Any]) -> AtbSettings:
+    """Generate fake settings that use s3."""
+    return AtbSettings(dlt_config=dlt_config, use_destination="s3")
+
+
+@pytest.fixture
+def pattern_file(tmp_path: Path) -> Path:
+    """Pattern file for testing load_patterns."""
+    p = tmp_path / "patterns.txt"
+    p.write_text(
+        "hello world\nfoo.bar\nstarts with*\n2+2=4\n",
+        encoding="utf-8",
+    )
+    return p
+
+
+def test_cli_calls_run_atb_pipeline(monkeypatch: pytest.MonkeyPatch, dlt_config: dict[str, Any]) -> None:
+    """Ensure that cli() calls run_atb_pipeline with the settings."""
     mock_settings_instance = MagicMock()
     mock_settings_cls = MagicMock(return_value=mock_settings_instance)
     mock_run_atb_pipeline = MagicMock()
@@ -65,6 +85,92 @@ def test_cli_calls_run_ncbi_pipeline(monkeypatch: pytest.MonkeyPatch, dlt_config
     mock_run_atb_pipeline.assert_called_once_with(mock_settings_instance)
 
 
+def test_load_patterns_returns_compiled_pattern(pattern_file: Path) -> None:
+    """load_patterns() should return a compiled re.Pattern object."""
+    assert isinstance(load_patterns(pattern_file), re.Pattern)
+
+
+def test_load_patterns_exact_match(pattern_file: Path) -> None:
+    """Plain text lines should match exactly against their literal content."""
+    pattern = load_patterns(pattern_file)
+    assert isinstance(pattern, re.Pattern)
+    assert pattern.match("hello world")
+    assert pattern.match("foo.bar")
+    assert pattern.match("2+2=4")
+
+    # patterns are anchored with ^ and $, so partial matches should not succeed
+    assert not pattern.match("hello world!!!")
+    assert not pattern.match("well, hello world")
+
+    # dot is literal, not a wildcard
+    assert not pattern.match("fooXbar")
+    # + is literal, not a quantifier
+    assert not pattern.match("22=4")
+
+    # line ending with * should match the prefix followed by any suffix
+    assert pattern.match("starts with")
+    assert pattern.match("starts with anything")
+    assert pattern.match("starts with 123!@#")
+
+    # line ending with * should not match strings with a different prefix
+    assert not pattern.match("ends with")
+
+
+def test_load_patterns_blank_lines_are_ignored(tmp_path: Path) -> None:
+    """Blank lines in the file should be silently skipped."""
+    p = tmp_path / "patterns.txt"
+    p.write_text("\nhello\n\nworld\n", encoding="utf-8")
+    pattern = load_patterns(p)
+    assert isinstance(pattern, re.Pattern)
+    assert pattern.match("hello")
+    assert pattern.match("world")
+    assert not pattern.match("")
+
+
+def test_load_patterns_only_wildcard_matches_anything(tmp_path: Path) -> None:
+    """A file containing only '*' should produce a pattern that matches any string."""
+    p = tmp_path / "patterns.txt"
+    p.write_text("*\n", encoding="utf-8")
+    pattern = load_patterns(p)
+    assert isinstance(pattern, re.Pattern)
+    assert pattern.match("")
+    assert pattern.match("anything at all")
+
+
+def test_load_patterns_alternation(tmp_path: Path) -> None:
+    """Each line in the file should become an alternative in the combined pattern."""
+    p = tmp_path / "patterns.txt"
+    p.write_text("cat\ndog\nbird\n", encoding="utf-8")
+    pattern = load_patterns(p)
+    assert isinstance(pattern, re.Pattern)
+    assert pattern.match("cat")
+    assert pattern.match("dog")
+    assert pattern.match("bird")
+    assert not pattern.match("fish")
+
+
+def test_load_patterns_no_file_returns_none(tmp_path: Path) -> None:
+    """Ensure that loading a non-existent file returns None."""
+    pattern = load_patterns(tmp_path / "some" / "path")
+    assert pattern is None
+
+
+def test_load_patterns_touched_file_returns_none(tmp_path: Path) -> None:
+    """Ensure that loading an empty file returns None."""
+    p = tmp_path / "patterns.txt"
+    p.touch()
+    pattern = load_patterns(p)
+    assert pattern is None
+
+
+def test_load_patterns_empty_file_returns_none(tmp_path: Path) -> None:
+    """Ensure that loading an empty file returns None."""
+    p = tmp_path / "patterns.txt"
+    p.write_text("\n\n   \t\n  \n   \n\t\t\n", encoding="utf-8")
+    pattern = load_patterns(p)
+    assert pattern is None
+
+
 @pytest.mark.vcr
 def test_download_atb_index_tsv_vcr(test_settings: AtbSettings) -> None:
     """Ensure that the download_atb_index function fetches the correct file."""
@@ -75,6 +181,27 @@ def test_download_atb_index_tsv_vcr(test_settings: AtbSettings) -> None:
     assert output_file.parent == raw_data_dir
 
 
+@pytest.mark.default_cassette("test_download_atb_index_tsv_vcr.yaml")
+def test_download_atb_index_tsv_vcr_destination_s3(test_s3_settings: AtbSettings) -> None:
+    """Ensure that the download_atb_index function fetches the correct file."""
+    mock_download_client = MagicMock()
+    mock_stream_to_s3 = MagicMock()
+    mock_requests = MagicMock()
+    with (
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.FileDownloader", return_value=mock_download_client),
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.stream_to_s3", mock_stream_to_s3),
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.requests", mock_requests),
+    ):
+        output_file = download_atb_index_tsv(test_s3_settings)
+
+    download_url = "https://osf.io/download/r6gcp/"
+    mock_stream_to_s3.assert_called_once_with(
+        url=download_url, s3_path=f"{test_s3_settings.raw_data_dir}/{ALL_ATB_FILE_NAME}", requests=mock_requests
+    )
+    mock_download_client.download.assert_called_once_with(url=download_url, destination=Path(ALL_ATB_FILE_NAME))
+    assert output_file == Path(ALL_ATB_FILE_NAME)
+
+
 @pytest.mark.vcr
 def test_download_atb_index_tsv_error_404(test_settings: AtbSettings) -> None:
     """Ensure that a 404 response causes an error and the function to die."""
@@ -82,6 +209,31 @@ def test_download_atb_index_tsv_error_404(test_settings: AtbSettings) -> None:
         download_atb_index_tsv(test_settings)
 
 
+@pytest.mark.default_cassette("test_download_atb_index_tsv_vcr.yaml")
+def test_download_atb_index_s3_error_boom(test_s3_settings: AtbSettings, caplog: pytest.LogCaptureFixture) -> None:
+    """Ensure that an error in the s3 upload causes things to die unpleasantly."""
+    mock_download_client = MagicMock()
+    mock_stream_to_s3 = MagicMock(side_effect=ValueError("ZOMG!"))
+    mock_requests = MagicMock()
+    with (
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.FileDownloader", return_value=mock_download_client),
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.stream_to_s3", mock_stream_to_s3),
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.requests", mock_requests),
+        pytest.raises(ValueError, match="ZOMG!"),
+    ):
+        download_atb_index_tsv(test_s3_settings)
+
+    download_url = "https://osf.io/download/r6gcp/"
+    mock_stream_to_s3.assert_called_once_with(
+        url=download_url, s3_path=f"{test_s3_settings.raw_data_dir}/{ALL_ATB_FILE_NAME}", requests=mock_requests
+    )
+    mock_download_client.assert_not_called()
+    mock_download_client.download.assert_not_called()
+    last_log_record = caplog.records.pop()
+    assert last_log_record.levelno == logging.ERROR
+    assert last_log_record.message == f"Could not transfer {ALL_ATB_FILE_NAME} to s3"
+
+
 @pytest.mark.vcr
 def test_download_atv_index_tsv_error_missing_key(test_settings: AtbSettings) -> None:
     """Ensure that the lack of a download link in the response throws an error."""
@@ -102,10 +254,10 @@ def test_download_atv_index_tsv_error_cannot_download_tsv(test_settings: AtbSett
         download_atb_index_tsv(test_settings)
 
 
-def test_get_file_download_links() -> None:
-    """Ensure that the appropriate files are picked out of the ATB file index TSV file."""
+def test_get_file_download_links(test_settings: AtbSettings) -> None:
+    """Ensure that the appropriate files are picked out of the ATB file index TSV file using the default matcher."""
     file_path = Path("tests") / "data" / "atb" / "all_atb_files.tsv"
-    filtered_files = list(get_file_download_links(file_path))
+    filtered_files = list(get_file_download_links(test_settings, file_path))
     # load the expected results
     expected = Path("tests") / "data" / "atb" / "filtered_files.tsv"
     with expected.open() as fh:
@@ -115,11 +267,41 @@ def test_get_file_download_links() -> None:
     assert filtered_files[0] == expected_files
 
 
-def test_get_file_download_links_invalid_file(caplog: pytest.LogCaptureFixture) -> None:
+EXPECTED_LINES = {
+    "*": "all_atb_files.tsv",
+    "AllTheBacteria/Annotation/Bakta\nAllTheBacteria/Assembly\nAllTheBacteria/Metadata\n": "assembly_bakta_metadata_exact.tsv",
+    "AllTheBacteria/Annotation/Bakta*\nAllTheBacteria/Assembly\nAllTheBacteria/Metadata\n": "assembly_bakta_star_metadata.tsv",
+    "AllTheBacteria/Annotation/Bakta": "bakta_exact.tsv",
+    "AllTheBacteria/Annotation/Bakta*": "bakta_star.tsv",
+}
+
+
+@pytest.mark.parametrize("pattern_lines", EXPECTED_LINES)
+def test_get_file_download_links_use_pattern_file(
+    tmp_path: Path, dlt_config: dict[str, Any], pattern_lines: str
+) -> None:
+    """Generate a pattern file from EXPECTED_LINES and check that the output from get_file_download_links is correct."""
+    # create the pattern file
+    p = tmp_path / "patterns.txt"
+    p.write_text(f"{pattern_lines}\n", encoding="utf-8")
+
+    settings = AtbSettings(dlt_config=dlt_config, input_dir=str(tmp_path), pattern_file="patterns.txt")
+    file_path = Path("tests") / "data" / "atb" / "all_atb_files.tsv"
+    filtered_files = list(get_file_download_links(settings, file_path))
+    # load the expected results
+    expected = Path("tests") / "data" / "atb" / EXPECTED_LINES[pattern_lines]
+    with expected.open() as fh:
+        reader = csv.DictReader(fh, delimiter="\t")
+        expected_files = list(reader)
+    assert len(filtered_files[0]) > 1
+    assert filtered_files[0] == expected_files
+
+
+def test_get_file_download_links_invalid_file(test_settings: AtbSettings, caplog: pytest.LogCaptureFixture) -> None:
     """Ensure that the correct fields are present in the ATB TSV file and throw an error if not."""
     file_path = Path("tests") / "data" / "atb" / "invalid_atb_files.tsv"
     with pytest.raises(RuntimeError, match="Missing required ATB file index TSV headers"):
-        list(get_file_download_links(file_path))
+        list(get_file_download_links(test_settings, file_path))
     records = caplog.records
     assert records[-1].levelno == logging.ERROR
     assert records[-1].message.startswith(
@@ -129,143 +311,244 @@ def test_get_file_download_links_invalid_file(caplog: pytest.LogCaptureFixture)
     assert records[-2].message.startswith("ATB file index TSV headers have changed.")
 
 
-def test_get_file_download_links_empty_file(caplog: pytest.LogCaptureFixture, tmp_path: Path) -> None:
+def test_get_file_download_links_empty_file(
+    test_settings: AtbSettings, caplog: pytest.LogCaptureFixture, tmp_path: Path
+) -> None:
     """Ensure that an empty file causes a runtime error."""
     file_path = tmp_path / "fake_file.tsv"
     file_path.touch()
     with pytest.raises(RuntimeError, match=f"No valid TSV data found in {file_path!s}"):
-        list(get_file_download_links(file_path))
+        list(get_file_download_links(test_settings, file_path))
     records = caplog.records
     assert records[-1].levelno == logging.ERROR
     assert records[-1].message == f"No valid TSV data found in {file_path!s}"
 
 
-def test_get_file_download_links_no_file() -> None:
+def test_get_file_download_links_no_file(test_settings: AtbSettings) -> None:
     """Ensure an error is thrown if the file cannot be found."""
     file_path = Path("/path") / "to" / "file"
     with pytest.raises(FileNotFoundError, match="No such file or directory"):
-        list(get_file_download_links(file_path))
+        list(get_file_download_links(test_settings, file_path))
+
+
+FILE_DOWNLOADER_OUTPUT = [
+    frozendict(
+        {
+            "filename": "file1.txt",
+            "url": "https://osf.io/file1",
+            "md5": "md5sum1",
+            "project": "AllTheBacteria/Annotation/Project",
+            "path": "Annotation/Project/file1.txt",
+        }
+    ),
+    frozendict(
+        {
+            "filename": "file2.txt",
+            "url": "https://osf.io/file2",
+            "md5": "md5sum2",
+            "project": "AllTheBacteria/Side/Project/",
+            "path": "Side/Project/file2.txt",
+        }
+    ),
+    frozendict(
+        {
+            "filename": "some/path/to/file3.txt",
+            "url": "https://osf.io/file3",
+            "md5": "md5sum3",
+            "project": "AllTheBacteria",
+            "path": "some/path/to/file3.txt",
+        }
+    ),
+    frozendict(
+        {
+            "filename": "not/least/file4.txt",
+            "url": "https://osf.io/file4",
+            "md5": "md5sum4",
+            "project": "AllTheBacteria/last/but",
+            "path": "last/but/not/least/file4.txt",
+        }
+    ),
+]
 
 
 # osf_file_downloader tests
 @pytest.mark.parametrize(
-    ("atb_file_list", "expected_calls", "expected_paths"),
+    "atb_input",
     [
-        (
-            [
-                {"filename": "file1.txt", "url": "https://osf.io/file1", "md5": "md5sum1"},
-                {"filename": "file2.txt", "url": "https://osf.io/file2", "md5": "md5sum2"},
-            ],
-            [
-                (
-                    "https://osf.io/file1",
-                    "file1.txt",
-                    "md5sum1",
-                ),
-                (
-                    "https://osf.io/file2",
-                    "file2.txt",
-                    "md5sum2",
-                ),
-            ],
-            ["file1.txt", "file2.txt"],
-        ),
+        # each of the files singly and then the whole lot as a batch
+        *[[f] for f in FILE_DOWNLOADER_OUTPUT],
+        FILE_DOWNLOADER_OUTPUT,
     ],
 )
+@pytest.mark.parametrize("use_destination", VALID_DESTINATIONS)
 def test_osf_file_downloader_success(
-    test_settings: AtbSettings,
-    atb_file_list: list[dict[str, Any]],
-    expected_calls: list[tuple[str, str, str]],
-    expected_paths: list[str],
+    request: pytest.FixtureRequest,
+    atb_input: list[dict[str, Any]],
+    use_destination: str,
     caplog: pytest.LogCaptureFixture,
 ) -> None:
     """Ensure that the osf_file_downloader function correctly calls the download client for each file."""
+    settings = request.getfixturevalue("test_s3_settings" if use_destination == "s3" else "test_settings")
+
+    atb_file_list = [{k: v for k, v in f.items() if k != "path"} for f in atb_input]
+
+    # expected_output path needs the raw data dir adding to it
+    expected_output = [dict(f.items()) for f in atb_input]
+    for f in expected_output:
+        f["path"] = f"{settings.raw_data_dir}/{f['path']}"
+
     mock_download_client = MagicMock()
-    mock_logger = MagicMock()
+    mock_stream_to_s3 = MagicMock()
+    mock_requests = MagicMock()
     with (
         patch("cdm_data_loaders.pipelines.all_the_bacteria.FileDownloader", return_value=mock_download_client),
-        patch("cdm_data_loaders.pipelines.all_the_bacteria.logger", mock_logger),
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.stream_to_s3", mock_stream_to_s3),
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.requests", mock_requests),
     ):
-        output = list(osf_file_downloader(test_settings, atb_file_list))
-
-    for item, filename in zip(atb_file_list, expected_paths, strict=True):
-        assert item["path"] == str(Path(test_settings.raw_data_dir) / filename)
-
-    assert output[0].data == [
-        {**f, "path": str(Path(test_settings.raw_data_dir) / f["filename"])} for f in atb_file_list
-    ]
-
-    assert mock_download_client.download.call_count == len(expected_calls)
-
-    for url, filename, checksum in expected_calls:
-        mock_download_client.download.assert_any_call(
-            url,
-            Path(test_settings.raw_data_dir) / filename,
-            expected_checksum=checksum,
-            checksum_fn="md5",
-        )
+        # get the output from the generator
+        output = list(osf_file_downloader(settings, atb_file_list))
+
+    # should have a separate call for each file downloaded
+    if use_destination == "s3":
+        mock_download_client.download.assert_not_called()
+        call_args = [c.kwargs for c in mock_stream_to_s3.call_args_list]
+        assert call_args == [
+            {
+                "url": f["url"],
+                "s3_path": f["path"],
+                "requests": mock_requests,
+            }
+            for f in atb_file_list
+        ]
+    else:
+        assert mock_download_client.download.call_count == len(atb_file_list)
+
+        call_list = [c.kwargs for c in mock_download_client.download.call_args_list]
+        expected_calls = [
+            {
+                "url": f["url"],
+                "destination": Path(settings.raw_data_dir) / f["path"],
+                "expected_checksum": f["md5"],
+                "checksum_fn": "md5",
+            }
+            for f in atb_input
+        ]
+        assert call_list == expected_calls
+
+    # output from dlt.mark.with_table_name
+    assert output[0].data == expected_output
+    # the input args are mutated in place
+    assert atb_file_list == expected_output
 
-    mock_logger.assert_not_called()
     # no logs should be emitted for successful downloads
     assert caplog.records == []
 
 
 @pytest.mark.parametrize(
-    ("atb_file_list", "download_side_effect", "expected_exceptions", "expected_paths"),
+    ("atb_file_list", "expected_exceptions", "expected_paths"),
     [
         (
             [
-                {"filename": "good_file.txt", "url": "https://osf.io/good", "md5": "md5sum1"},
-                {"filename": "great_file.txt", "url": "https://osf.io/great", "md5": "md5sum2"},
-                {"filename": "bad_file.txt", "url": "https://osf.io/bad", "md5": "badmd5"},
+                {
+                    "project": "AllTheBacteria/One",
+                    "filename": "Two/good_file.txt",
+                    "url": "https://osf.io/good",
+                    "md5": "md5sum1",
+                },
+                {
+                    "project": "AllTheBacteria/One/Two",
+                    "filename": "great_file.txt",
+                    "url": "https://osf.io/great",
+                    "md5": "md5sum2",
+                },
+                {
+                    "project": "AllTheBacteria/One",
+                    "filename": "fail.txt",
+                    "url": "https://osf.io/fail",
+                    "md5": "badmd5",
+                },
             ],
-            lambda url, _save_path, **_kwargs: (
-                (_ for _ in ()).throw(RuntimeError("download failed")) if url == "https://osf.io/bad" else None
-            ),
-            ["Could not download file from https://osf.io/bad: download failed"],
-            {"good_file.txt": True, "great_file.txt": True, "bad_file.txt": False},
+            ["Could not download file from https://osf.io/fail: Loser!"],
+            {"Two/good_file.txt": True, "great_file.txt": True, "fail.txt": False},
         ),
         (
             [
-                {"filename": "bad_file.txt", "url": "https://osf.io/bad", "md5": "badmd5"},
-                {"filename": "even_worse.txt", "url": "https://osf.io/even_worse", "md5": "badmd5"},
+                {"project": "Dud", "filename": "bad_file.txt", "url": "https://osf.io/bad", "md5": "badmd5"},
+                {
+                    "project": "Dud",
+                    "filename": "also_very_bad.txt",
+                    "url": "https://osf.io/also_very_bad",
+                    "md5": "badmd5",
+                },
             ],
-            lambda _url, _save_path, **_kwargs: (_ for _ in ()).throw(Exception("Boom!")),
             [
-                "Could not download file from https://osf.io/bad: Boom!",
-                "Could not download file from https://osf.io/even_worse: Boom!",
+                "Could not download file from https://osf.io/bad: BOOM!",
+                "Could not download file from https://osf.io/also_very_bad: BOOM!",
             ],
-            {"bad_file.txt": False, "even_worse.txt": False},
+            {"bad_file.txt": False, "also_very_bad.txt": False},
         ),
     ],
 )
-def test_osf_file_downloader_error_handling(
-    test_settings: AtbSettings,
+@pytest.mark.parametrize("use_destination", VALID_DESTINATIONS)
+def test_osf_file_downloader_error_handling(  # noqa: PLR0913
     atb_file_list: list[dict[str, Any]],
-    download_side_effect: Callable,
     expected_exceptions: list[str],
     expected_paths: dict[str, bool],
+    use_destination: str,
+    caplog: pytest.LogCaptureFixture,
+    request: pytest.FixtureRequest,
 ) -> None:
     """Ensure that errors during file download are handled correctly."""
+    settings = request.getfixturevalue("test_s3_settings" if use_destination == "s3" else "test_settings")
+
+    def file_downloader_boom(**args) -> None:  # noqa: ANN003
+        if "url" in args:
+            if "bad" in args["url"]:
+                msg = "BOOM!"
+                raise ValueError(msg)
+            if "fail" in args["url"]:
+                msg_0 = "Loser!"
+                raise RuntimeError(msg_0)
+
+    mock_requests = MagicMock()
     mock_download_client = MagicMock()
-    mock_download_client.download.side_effect = download_side_effect
-    mock_logger = MagicMock()
+    mock_download_client.download.side_effect = file_downloader_boom
+    mock_stream_to_s3 = MagicMock(side_effect=file_downloader_boom)
 
     with (
         patch("cdm_data_loaders.pipelines.all_the_bacteria.FileDownloader", return_value=mock_download_client),
-        patch("cdm_data_loaders.pipelines.all_the_bacteria.logger", mock_logger),
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.stream_to_s3", mock_stream_to_s3),
+        patch("cdm_data_loaders.pipelines.all_the_bacteria.requests", mock_requests),
     ):
-        list(osf_file_downloader(test_settings, atb_file_list))
+        list(osf_file_downloader(settings, atb_file_list))
+
+    if use_destination == "s3":
+        mock_download_client.download.assert_not_called()
+        call_args = [c.kwargs for c in mock_stream_to_s3.call_args_list]
+        assert call_args == [
+            {
+                "url": f["url"],
+                "s3_path": f"{settings.raw_data_dir}/{f['project'].replace('AllTheBacteria/', '')}/{f['filename']}",
+                "requests": mock_requests,
+            }
+            for f in atb_file_list
+        ]
+    else:
+        mock_stream_to_s3.assert_not_called()
+
+    expected_file_names = {
+        "Two/good_file.txt": f"{settings.raw_data_dir}/One/Two/good_file.txt",
+        "great_file.txt": f"{settings.raw_data_dir}/One/Two/great_file.txt",
+    }
 
     for item in atb_file_list:
         if item["filename"] in expected_paths and expected_paths[item["filename"]]:
-            assert item["path"] == str(Path(test_settings.raw_data_dir) / item["filename"])
+            assert item["path"] == expected_file_names[item["filename"]]
         else:
             assert "path" not in item
 
-    # FIXME: why is caplog not working here? Ideally this should use caplog instead of a mock logger.
-    exception_call_args = [call.args[0] for call in mock_logger.exception.call_args_list]
-    assert exception_call_args == expected_exceptions
+    log_messages = [r.message for r in caplog.records]
+    assert expected_exceptions == log_messages
 
 
 def test_run_atb_pipeline(
diff --git a/tests/pipelines/test_core.py b/tests/pipelines/test_core.py
index c6d57a75..0cc15b48 100644
--- a/tests/pipelines/test_core.py
+++ b/tests/pipelines/test_core.py
@@ -66,7 +66,7 @@ def test_cts_settings() -> CtsSettings:
     params=[
         pytest.param({"input_dir": "/fake/input"}, id="default"),
         pytest.param(
-            {"input_dir": "/path/to/dir", "use_destination": "s3", "start_at": 15, "output": "/some/dir"},
+            {"input_dir": "/path/to/dir", "use_destination": "local_fs", "start_at": 15, "output": "/some/dir"},
             id="alt",
         ),
     ]
@@ -372,9 +372,8 @@ def test_run_cli_no_slack_env_var_when_vars_missing(
 
 # dlt.config state after successful run
 @pytest.mark.parametrize("settings_cls", SETTINGS_CLASSES)
-@pytest.mark.parametrize("use_destination", VALID_DESTINATIONS)
 @pytest.mark.parametrize("dev_mode", [True, False])
-@pytest.mark.parametrize("output", ["/some/path", "s3://bucket/whatever"])
+@pytest.mark.parametrize(("use_destination", "output"), [("local_fs", "/some/path"), ("s3", "s3://bucket/whatever")])
 def test_run_cli_dlt_config_updated_after_success(
     dlt_config: dict[str, Any], settings_cls: type[CtsSettings], dev_mode: bool, use_destination: str, output: str
 ) -> None:
@@ -413,7 +412,7 @@ def test_run_pipeline_destination_pipeline_pipeline_run_kwargs_set(
     pipeline_run_kwargs: dict[str, Any] | None,
 ) -> None:
     """Ensure a non-empty output sets the correct dlt.config bucket_url key."""
-    settings = make_batched_settings(input_dir="/i", output="/custom/output", use_destination="s3")
+    settings = make_batched_settings(input_dir="/i", output="/custom/output", use_destination="local_fs")
     fake_resource = MagicMock()
     run_pipeline(
         settings,
@@ -425,7 +424,7 @@ def test_run_pipeline_destination_pipeline_pipeline_run_kwargs_set(
     assert_pipeline_run_correctly(
         mock_dlt,
         fake_resource,
-        "s3",
+        "local_fs",
         destination_kwargs,
         pipeline_kwargs,
         pipeline_run_kwargs,
@@ -794,7 +793,7 @@ def test_integration_empty_input_pipeline_run_still_called(
 
 
 # test run_cli + stream_xml_file_resource + run_pipeline
-@pytest.mark.use_fixtures("patched_io_empty_batcher", "test_bfi_settings")
+@pytest.mark.usefixtures("patched_io_empty_batcher", "test_bfi_settings")
 def test_integration_run_cli_calls_pipeline_fn_with_config(mock_dlt: MagicMock) -> None:
     """The exact config produced by run_cli reaches stream_xml_file_resource unchanged."""
     received: list[CtsSettings] = []
diff --git a/tests/pipelines/test_cts_defaults.py b/tests/pipelines/test_cts_defaults.py
index 035b81d6..26428a3d 100644
--- a/tests/pipelines/test_cts_defaults.py
+++ b/tests/pipelines/test_cts_defaults.py
@@ -1,6 +1,5 @@
 """Tests for the Settings objects used by DLT pipelines."""
 
-from pathlib import Path
 from typing import Any
 
 import pytest
@@ -17,12 +16,14 @@
 from tests.pipelines.conftest import (
     DEFAULT_BATCH_FILE_SETTINGS_RECONCILED,
     DEFAULT_CTS_SETTINGS_RECONCILED,
+    DESTINATION_TO_OUTPUT,
     TEST_BATCH_FILE_SETTINGS,
     TEST_BATCH_FILE_SETTINGS_RECONCILED,
     TEST_CTS_SETTINGS,
     TEST_CTS_SETTINGS_RECONCILED,
     check_settings,
     make_settings,
+    make_settings_autofill_config,
 )
 
 SETTINGS_CLASSES = [CtsSettings, BatchedFileInputSettings]
@@ -30,6 +31,31 @@
 INVALID_DESTINATIONS = ["gcs", "filesystem", "", "LocalFs", "S3"]
 INVALID_BOOLEAN_VALUES = ["what", "yep", "nope", "2", -1, "", " ", "wtf", None]
 
+S3 = "is_s3"
+OUT = "output"
+RAW = "raw_data_dir"
+PIPE = "pipeline_dir"
+
+
+# manually specify to avoid recapitulating logic
+OUTPUT_PATHS: dict[str, dict[str, Any]] = {
+    "": {S3: False, OUT: "", RAW: "raw_data", PIPE: ".dlt_conf"},
+    "/": {S3: False, OUT: "/", RAW: "/raw_data", PIPE: "/.dlt_conf"},
+    # from destination.local_fs
+    "/output_dir": {S3: False, OUT: "/output_dir", RAW: "/output_dir/raw_data", PIPE: "/output_dir/.dlt_conf"},
+    "/output/dir": {S3: False, OUT: "/output/dir", RAW: "/output/dir/raw_data", PIPE: "/output/dir/.dlt_conf"},
+    "s3/some/path/": {S3: False, OUT: "s3/some/path", RAW: "s3/some/path/raw_data", PIPE: "s3/some/path/.dlt_conf"},
+    # normalised form of the above
+    "s3/some/path": {S3: False, OUT: "s3/some/path", RAW: "s3/some/path/raw_data", PIPE: "s3/some/path/.dlt_conf"},
+    "s3a://bucket/key": {S3: True, OUT: "s3a://bucket/key", RAW: "s3a://bucket/key/raw_data", PIPE: None},
+    "s3://test/bucket/": {S3: True, OUT: "s3://test/bucket", RAW: "s3://test/bucket/raw_data", PIPE: None},
+    # normalised from above
+    "s3://test/bucket": {S3: True, OUT: "s3://test/bucket", RAW: "s3://test/bucket/raw_data", PIPE: None},
+    # from destination.s3
+    "s3://some/s3/bucket": {S3: True, OUT: "s3://some/s3/bucket", RAW: "s3://some/s3/bucket/raw_data", PIPE: None},
+}
+
+
 # a whole load of values that Pydantic will coerce to a boolean
 TRUE_FALSE_VALUES = [
     ("0", False),
@@ -344,48 +370,130 @@ def test_settings_reconcile_with_dlt_config_output_resolved_from_dlt_config_buck
     assert s.output == dlt_config[f"destination.{use_destination}.bucket_url"]
 
 
-# properties derived from self.output
+# properties derived from self.output: pipeline_dir and raw_data_dir
 @pytest.mark.parametrize("settings_cls", SETTINGS_CLASSES)
-@pytest.mark.parametrize("output", ["", "/output/dir", "some/convoluted/path/to/dir/"])
+@pytest.mark.parametrize(
+    "output",
+    list(OUTPUT_PATHS.keys()),
+)
 @pytest.mark.parametrize("use_output_dir_for_pipeline_metadata", [True, False])
 @pytest.mark.parametrize("use_destination", VALID_DESTINATIONS)
 def test_settings_generate_pipeline_raw_data_dirs(
     settings_cls: type[CtsSettings],
     output: str,
     use_output_dir_for_pipeline_metadata: bool,
-    dlt_config: dict[str, Any],
     use_destination: str,
 ) -> None:
-    """Ensure that the correct paths are generated for pipeline and raw data directories."""
-    s = make_settings(
-        settings_cls,
-        dlt_config=dlt_config,
-        output=output,
-        use_destination=use_destination,
-        use_output_dir_for_pipeline_metadata=use_output_dir_for_pipeline_metadata,
-    )
+    """Ensure that the correct paths are generated for pipeline and raw data directories.
+
+    Ensure that the destination set in `use_destination` concurs with any output path set.
+
+    Ensure that pipeline directories cannot be set if the output is set to s3.
+    """
+    make_settings_args = {
+        "output": output,
+        "use_destination": use_destination,
+        "use_output_dir_for_pipeline_metadata": use_output_dir_for_pipeline_metadata,
+    }
 
     expected = {
         **DEFAULT_CTS_SETTINGS_RECONCILED,
         "use_destination": use_destination,
         "use_output_dir_for_pipeline_metadata": use_output_dir_for_pipeline_metadata,
-        "output": output.rstrip("/") or dlt_config[f"destination.{use_destination}.bucket_url"],
+        "output": DESTINATION_TO_OUTPUT[use_destination] if output == "" else OUTPUT_PATHS[output][OUT],
     }
-
     if settings_cls == BatchedFileInputSettings:
         expected["start_at"] = DEFAULT_START_AT
 
-    # list containing the projected raw_data_dir and pipeline_dir
-    expected_properties = {
-        "": [str(Path(expected["output"]) / "raw_data"), str(Path(expected["output"]) / ".dlt_conf")],
-        "/output/dir": [str(Path("/output/dir") / "raw_data"), str(Path("/output/dir") / ".dlt_conf")],
-        "some/convoluted/path/to/dir/": [
-            str(Path("some/convoluted/path/to/dir") / "raw_data"),
-            str(Path("some/convoluted/path/to/dir") / ".dlt_conf"),
-        ],
+    if (OUTPUT_PATHS[expected["output"]][S3] and use_destination == "local_fs") or (
+        OUTPUT_PATHS[expected["output"]][S3] is False and use_destination == "s3"
+    ):
+        with pytest.raises(ValueError, match="Mismatch between output location and use_destination"):
+            make_settings_autofill_config(settings_cls, **make_settings_args)
+        return
+
+    if use_output_dir_for_pipeline_metadata and OUTPUT_PATHS[expected["output"]][S3] is True:
+        # can't have pipeline dir on s3
+        with pytest.raises(ValueError, match="It is not currently possible to have the pipeline directory on s3"):
+            make_settings_autofill_config(settings_cls, **make_settings_args)
+        return
+
+    s = make_settings_autofill_config(settings_cls, **make_settings_args)
+
+    # get the pipeline and raw data dirs from OUTPUT_PATHS
+    expected["raw_data_dir"] = OUTPUT_PATHS[expected["output"]][RAW]
+    # No pipeline_dir if use_output_dir_for_pipeline_metadata is not set
+    expected["pipeline_dir"] = OUTPUT_PATHS[expected["output"]][PIPE] if use_output_dir_for_pipeline_metadata else None
+    check_settings(s, expected)
+
+
+@pytest.mark.parametrize("settings_cls", SETTINGS_CLASSES)
+@pytest.mark.parametrize(
+    "output",
+    list(OUTPUT_PATHS.keys()),
+)
+@pytest.mark.parametrize("use_output_dir_for_pipeline_metadata", [True, False])
+@pytest.mark.parametrize("use_destination", VALID_DESTINATIONS)
+def test_cli_app_run_generate_pipeline_raw_data_dirs(
+    settings_cls: type[CtsSettings],
+    output: str,
+    use_output_dir_for_pipeline_metadata: bool,
+    use_destination: str,
+    dlt_config: dict[str, Any],
+) -> None:
+    """Ensure that the correct paths are generated for pipeline and raw data directories.
+
+    Ensure that the destination set in `use_destination` concurs with any output path set.
+
+    Ensure that pipeline directories cannot be set if the output is set to s3.
+    """
+    make_settings_args = [
+        "--output",
+        output,
+        "--use_destination",
+        use_destination,
+        "--use_output_dir_for_pipeline_metadata",
+        str(use_output_dir_for_pipeline_metadata),
+    ]
+
+    expected = {
+        **DEFAULT_CTS_SETTINGS_RECONCILED,
+        "use_destination": use_destination,
+        "use_output_dir_for_pipeline_metadata": use_output_dir_for_pipeline_metadata,
+        "output": DESTINATION_TO_OUTPUT[use_destination] if output == "" else OUTPUT_PATHS[output][OUT],
     }
+    if settings_cls == BatchedFileInputSettings:
+        expected["start_at"] = DEFAULT_START_AT
+
+    if (OUTPUT_PATHS[expected["output"]][S3] and use_destination == "local_fs") or (
+        OUTPUT_PATHS[expected["output"]][S3] is False and use_destination == "s3"
+    ):
+        with pytest.raises(ValueError, match="Mismatch between output location and use_destination"):
+            CliApp.run(
+                settings_cls,
+                dlt_config=dlt_config,
+                cli_args=make_settings_args,
+            )
+        return
+
+    if use_output_dir_for_pipeline_metadata and OUTPUT_PATHS[expected["output"]][S3] is True:
+        # can't have pipeline dir on s3
+        with pytest.raises(ValueError, match="It is not currently possible to have the pipeline directory on s3"):
+            CliApp.run(
+                settings_cls,
+                dlt_config=dlt_config,
+                cli_args=make_settings_args,
+            )
+        return
 
-    expected["raw_data_dir"] = expected_properties[output][0]
-    expected["pipeline_dir"] = expected_properties[output][1] if use_output_dir_for_pipeline_metadata else None
+    s = CliApp.run(
+        settings_cls,
+        dlt_config=dlt_config,
+        cli_args=make_settings_args,
+    )
 
+    # get the pipeline and raw data dirs from OUTPUT_PATHS
+    expected["raw_data_dir"] = OUTPUT_PATHS[expected["output"]][RAW]
+    # No pipeline_dir if use_output_dir_for_pipeline_metadata is not set
+    expected["pipeline_dir"] = OUTPUT_PATHS[expected["output"]][PIPE] if use_output_dir_for_pipeline_metadata else None
     check_settings(s, expected)
diff --git a/tests/utils/test_cdm_logger.py b/tests/utils/test_cdm_logger.py
new file mode 100644
index 00000000..852f7be5
--- /dev/null
+++ b/tests/utils/test_cdm_logger.py
@@ -0,0 +1,391 @@
+"""Tests for cdm_data_loaders/utils/cdm_logger.py."""
+
+import json
+import logging
+import logging.handlers
+from collections.abc import Generator
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+import pytest
+from frozendict import frozendict
+
+import cdm_data_loaders.utils.cdm_logger as cdm_logger_module
+from cdm_data_loaders.utils.cdm_logger import (
+    DEFAULT_LOGGER_NAME,
+    JSON_LOG_CONFIG,
+    LOG_FILENAME,
+    LOGGING_CONFIG,
+    MAX_LOG_BACKUPS,
+    MAX_LOG_FILE_SIZE,
+    _attach_file_handler,
+    _load_logging_config,
+    _set_level_safe,
+    get_cdm_logger,
+    init_logger,
+)
+
+# Add near the top of the test file, alongside the other imports
+MODULE_LOGGER_NAME = "cdm_data_loaders.utils.cdm_logger"
+
+
+VALID_JSON_CONFIG = frozendict(
+    {
+        "version": 1,
+        "disable_existing_loggers": False,
+        "handlers": {
+            "CONFIGURE_HANDLER_NAME": {
+                "class": "logging.StreamHandler",
+                "formatter": "json",
+                "level": "INFO",
+                "stream": "ext://sys.stdout",
+            }
+        },
+        "formatters": {"json": {"format": JSON_LOG_CONFIG}},
+        "loggers": {DEFAULT_LOGGER_NAME: {"level": "INFO", "handlers": ["CONFIGURE_HANDLER_NAME"]}},
+    }
+)
+
+
+def _write_config(path: Path, test_name: str, config: dict[str, Any] | None = None) -> Path:
+    """Write a JSON logging config to path, using VALID_JSON_CONFIG by default."""
+    # edit the config to ensure that it is recognisable as being from a specific source
+    if not config:
+        config = deepcopy(dict(VALID_JSON_CONFIG))
+        # switch out the handler name for a test-specific name
+        config["loggers"][DEFAULT_LOGGER_NAME]["handlers"] = [test_name]
+        config["handlers"][test_name] = config["handlers"].pop("CONFIGURE_HANDLER_NAME")
+
+    path.write_text(json.dumps(config if config is not None else VALID_JSON_CONFIG))
+    return path
+
+
+@pytest.fixture(autouse=True)
+def reset_logging() -> Generator[None, Any]:
+    """Remove CDM and dlt loggers from the manager and clear their handlers before and after tests."""
+
+    def _clean() -> None:
+        for name in (DEFAULT_LOGGER_NAME, "dlt"):
+            logger = logging.root.manager.loggerDict.pop(name, None)
+            if isinstance(logger, logging.Logger):
+                logger.handlers.clear()
+
+    _clean()
+    yield
+    _clean()
+
+
+@pytest.fixture
+def clean_env(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Remove logging-related env vars during tests."""
+    monkeypatch.delenv("LOG_LEVEL", raising=False)
+    monkeypatch.delenv("LOG_CONFIG_FILE", raising=False)
+    monkeypatch.delenv("ENABLE_FILE_LOGGING", raising=False)
+
+
+@pytest.fixture
+def empty_cwd(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    """Remove LOG_CONFIG_FILE env vars and chdir to an empty temporary directory with no config file."""
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.delenv("LOG_CONFIG_FILE", raising=False)
+
+
+@pytest.fixture
+def cdm_logger(clean_env: None, empty_cwd: None) -> logging.Logger:  # noqa: ARG001
+    """Return a CDM logger initialised with LOGGING_CONFIG."""
+    return init_logger()
+
+
+@pytest.fixture
+def dlt_logger() -> logging.Logger:
+    """Register a pre-configured dlt logger in the logging manager."""
+    logger = logging.getLogger("dlt")
+    logger.setLevel(logging.WARNING)
+    return logger
+
+
+@pytest.fixture
+def config_in_cwd(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+    """Write a valid config file in a temporary dir and chdir into it, simulating a config found in the CWD."""
+    path = _write_config(tmp_path / cdm_logger_module.LOGGING_CONFIG_FILENAME, "config_in_cwd")
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.delenv("LOG_CONFIG_FILE", raising=False)
+    return path
+
+
+@pytest.fixture
+def config_at_explicit_path(tmp_path: Path) -> Path:
+    """Write a valid config file in a temporary directory."""
+    return _write_config(tmp_path / "custom_logging_config.json", "config_at_explicit_path")
+
+
+# _load_logging_config — resolution order
+@pytest.mark.usefixtures("config_in_cwd")
+def test_load_logging_config_uses_explicit_path_first(config_at_explicit_path: Path) -> None:
+    """Ensure that the config file argument is used in preference to other choices."""
+    config = _load_logging_config(config_file=config_at_explicit_path)
+    assert config["disable_existing_loggers"] is False
+    assert config["loggers"][DEFAULT_LOGGER_NAME]["handlers"] == ["config_at_explicit_path"]
+    assert set(config["handlers"]) == {"config_at_explicit_path"}
+
+
+@pytest.mark.usefixtures("config_in_cwd")
+def test_load_logging_config_uses_env_var_over_cwd(
+    config_at_explicit_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Ensure that the LOG_CONFIG_FILE env var is used in preference to the LOGGING_CONFIG fallback."""
+    monkeypatch.setenv("LOG_CONFIG_FILE", str(config_at_explicit_path))
+    config = _load_logging_config()
+    assert config["loggers"][DEFAULT_LOGGER_NAME]["handlers"] == ["config_at_explicit_path"]
+    assert set(config["handlers"]) == {"config_at_explicit_path"}
+
+
+@pytest.mark.usefixtures("config_in_cwd")
+def test_load_logging_config_uses_cwd_when_no_arg_or_env(monkeypatch: pytest.MonkeyPatch) -> None:
+    """When no argument or env var is provided, the logging_config.json in the current working directory is loaded."""
+    monkeypatch.delenv("LOG_CONFIG_FILE", raising=False)
+    config = _load_logging_config()
+    assert config["disable_existing_loggers"] is False
+    assert config["loggers"][DEFAULT_LOGGER_NAME]["handlers"] == ["config_in_cwd"]
+    assert set(config["handlers"]) == {"config_in_cwd"}
+
+
+@pytest.mark.usefixtures("empty_cwd")
+def test_load_logging_config_falls_back_to_frozendict_when_all_sources_fail(caplog: pytest.LogCaptureFixture) -> None:
+    """Ensure that logging falls back to the default frozendict if no other sources are found."""
+    with caplog.at_level(logging.WARNING, logger=MODULE_LOGGER_NAME):
+        config = _load_logging_config()
+    assert config == {**LOGGING_CONFIG, "disable_existing_loggers": False}
+    log_message = caplog.records[-1]
+    assert log_message.message == "No logging config file found. Falling back to built-in config."
+
+
+@pytest.mark.usefixtures("config_in_cwd")
+def test_load_logging_config_skips_bad_argument_path_and_tries_next(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
+) -> None:
+    """Ensure that a non-existent or invalid config file is ignored and the next source tried."""
+    monkeypatch.delenv("LOG_CONFIG_FILE", raising=False)
+    with caplog.at_level(logging.DEBUG, logger=MODULE_LOGGER_NAME):
+        config = _load_logging_config(config_file=tmp_path / "nonexistent.json")
+    assert config["disable_existing_loggers"] is False
+    assert any("nonexistent.json" in m for m in caplog.messages)
+
+
+@pytest.mark.usefixtures("config_in_cwd")
+def test_load_logging_config_skips_bad_env_var_path_and_tries_next(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
+) -> None:
+    """Ensure that a non-existent or invalid config file is ignored and the next source tried."""
+    monkeypatch.setenv("LOG_CONFIG_FILE", str(tmp_path / "nonexistent.json"))
+    with caplog.at_level(logging.WARNING, logger=MODULE_LOGGER_NAME):
+        config = _load_logging_config()
+    assert config["disable_existing_loggers"] is False
+    assert any("nonexistent.json" in m for m in caplog.messages)
+
+
+@pytest.mark.usefixtures("config_in_cwd")
+def test_load_logging_config_skips_invalid_json_and_tries_next(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
+) -> None:
+    """Ensure that a non-existent or invalid config file is ignored and the next source tried."""
+    bad_path = tmp_path / "bad_config.json"
+    bad_path.write_text("this is not json {{{")
+    monkeypatch.delenv("LOG_CONFIG_FILE", raising=False)
+    with caplog.at_level(logging.WARNING, logger=MODULE_LOGGER_NAME):
+        config = _load_logging_config(config_file=bad_path)
+    assert config["disable_existing_loggers"] is False
+    assert any("bad_config.json" in m for m in caplog.messages)
+
+
+def test_load_logging_config_overrides_disable_existing_loggers_when_true_in_file(
+    tmp_path: Path, caplog: pytest.LogCaptureFixture, request: pytest.FixtureRequest
+) -> None:
+    """If a config file sets disable_existing_loggers to True, _load_logging_config should override it."""
+    path = _write_config(
+        tmp_path / "bad.json", request.node.originalname, {**VALID_JSON_CONFIG, "disable_existing_loggers": True}
+    )
+    with caplog.at_level(logging.WARNING, logger=MODULE_LOGGER_NAME):
+        config = _load_logging_config(config_file=path)
+    assert config["disable_existing_loggers"] is False
+    assert any(
+        "sets disable_existing_loggers to True. Overriding to prevent existing loggers being silently disabled" in m
+        for m in caplog.messages
+    )
+
+
+@pytest.mark.usefixtures("clean_env")
+def test_init_logger_accepts_explicit_config_file(config_at_explicit_path: Path) -> None:
+    """init_logger should accept a config_file argument and pass it through to _load_logging_config without error."""
+    logger = init_logger(config_file=config_at_explicit_path)
+    assert isinstance(logger, logging.Logger)
+
+
+@pytest.mark.usefixtures("empty_cwd")
+def test_init_logger_uses_log_config_file_env_var(
+    config_at_explicit_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """When LOG_CONFIG_FILE is set in the environment and no config_file argument is passed, init_logger should load config from that path."""
+    monkeypatch.setenv("LOG_CONFIG_FILE", str(config_at_explicit_path))
+    monkeypatch.delenv("LOG_LEVEL", raising=False)
+    logger = init_logger()
+    assert isinstance(logger, logging.Logger)
+
+
+def test_init_logger_happy_path(cdm_logger: logging.Logger) -> None:
+    """init_logger should return a Logger with the CDM default name."""
+    assert isinstance(cdm_logger, logging.Logger)
+    assert cdm_logger.name == DEFAULT_LOGGER_NAME
+    # calling init_logger more than once returns the same object
+    assert init_logger() is init_logger()
+    assert cdm_logger.level == logging.INFO
+
+
+@pytest.mark.parametrize("level", ["DEBUG", "Info", "warning"])
+@pytest.mark.usefixtures("clean_env", "empty_cwd")
+def test_init_logger_explicit_level_argument(level: str) -> None:
+    """Ensure that the log level can be set explicitly."""
+    logger = init_logger(log_level=level)
+    assert logger.level == getattr(logging, level.upper())
+
+
+@pytest.mark.usefixtures("clean_env", "empty_cwd")
+def test_init_logger_env_var_sets_level(monkeypatch: pytest.MonkeyPatch) -> None:
+    """When LOG_LEVEL is set in the environment, init_logger should apply it to the logger level."""
+    monkeypatch.setenv("LOG_LEVEL", "ERROR")
+    assert init_logger().level == logging.ERROR
+
+
+@pytest.mark.usefixtures("clean_env", "empty_cwd")
+def test_init_logger_argument_takes_priority_over_env_var(monkeypatch: pytest.MonkeyPatch) -> None:
+    """An explicit log_level argument should take precedence over the LOG_LEVEL env var when both are set."""
+    monkeypatch.setenv("LOG_LEVEL", "ERROR")
+    assert init_logger(log_level="DEBUG").level == logging.DEBUG
+
+
+def test_init_logger_log_level_gates_emission_correctly(
+    cdm_logger: logging.Logger, caplog: pytest.LogCaptureFixture
+) -> None:
+    """Messages at or above the configured level should be captured; messages below it should be suppressed."""
+    with caplog.at_level(logging.INFO, logger=DEFAULT_LOGGER_NAME):
+        cdm_logger.info("should appear")
+        cdm_logger.debug("should not appear")
+
+    assert "should appear" in caplog.messages
+    assert "should not appear" not in caplog.messages
+
+
+# console
+def test_init_logger_console_handler_configured_correctly(cdm_logger: logging.Logger) -> None:
+    """The logger should have a StreamHandler at INFO level by default.
+
+    No RotatingFileHandler should be attached unless explicitly requested.
+    """
+    stream_handlers = [h for h in cdm_logger.handlers if type(h) is logging.StreamHandler]
+    assert len(stream_handlers) == 1
+    assert stream_handlers[0].level == logging.INFO
+    rotating_handlers = [h for h in cdm_logger.handlers if isinstance(h, logging.handlers.RotatingFileHandler)]
+    assert rotating_handlers == []
+
+
+@pytest.mark.usefixtures("clean_env", "empty_cwd")
+def test_get_cdm_logger_creates_cdm_logger_when_none_exists() -> None:
+    """When neither a dlt nor a CDM logger exists, get_cdm_logger should initialise and return a new CDM logger."""
+    logger = get_cdm_logger()
+    assert isinstance(logger, logging.Logger)
+    assert logger.name == DEFAULT_LOGGER_NAME
+    assert DEFAULT_LOGGER_NAME in logging.root.manager.loggerDict
+    # get_cdm_logger should returns the existing logger rather than creating a new one.
+    assert get_cdm_logger() is init_logger()
+
+
+@pytest.mark.usefixtures("clean_env", "empty_cwd", "dlt_logger")
+def test_get_cdm_logger_prefers_dlt_over_cdm_logger() -> None:
+    """When a dlt logger is present in the logging manager, get_cdm_logger should return it even if a CDM logger also exists."""
+    init_logger()
+    assert get_cdm_logger().name == "dlt"
+
+
+@pytest.mark.parametrize("level", ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
+def test_set_level_safe_accepts_all_valid_levels(cdm_logger: logging.Logger, level: str) -> None:
+    """_set_level_safe should accept all standard logging level strings."""
+    _set_level_safe(cdm_logger, level)
+    assert cdm_logger.level == getattr(logging, level)
+
+
+@pytest.mark.parametrize("level", ["debug", "Info", "wARNING"])
+def test_set_level_safe_is_case_insensitive(cdm_logger: logging.Logger, level: str) -> None:
+    """Ensure that _set_level_safe normalises case issues."""
+    _set_level_safe(cdm_logger, level)
+    assert cdm_logger.level == getattr(logging, level.upper())
+
+
+def test_set_level_safe_raises_descriptive_error_for_invalid_level(cdm_logger: logging.Logger) -> None:
+    """_set_level_safe should raise a ValueError for an unrecognised level string.
+
+    The error message should echo back the bad value and list the valid options.
+    """
+    with pytest.raises(ValueError, match=r"Invalid log level 'VERBOS'\. Must be one of"):
+        _set_level_safe(cdm_logger, "VERBOS")
+
+
+# file handler
+def test_attach_file_handler_adds_correctly_configured_handler(cdm_logger: logging.Logger, tmp_path: Path) -> None:
+    """_attach_file_handler should add a single RotatingFileHandler with a log file at the specified path."""
+    log_path = tmp_path / LOG_FILENAME
+    _attach_file_handler(cdm_logger, log_path)
+
+    rotating_handlers = [h for h in cdm_logger.handlers if isinstance(h, logging.handlers.RotatingFileHandler)]
+    assert len(rotating_handlers) == 1
+
+    handler = rotating_handlers[0]
+    assert handler.maxBytes == MAX_LOG_FILE_SIZE
+    assert handler.backupCount == MAX_LOG_BACKUPS
+    assert handler.formatter._fmt == JSON_LOG_CONFIG  # noqa: SLF001
+
+    assert log_path.exists()
+
+    # try attaching a second handler
+    _attach_file_handler(cdm_logger, log_path)
+
+    assert len([h for h in cdm_logger.handlers if isinstance(h, logging.handlers.RotatingFileHandler)]) == 1
+
+
+def test_attach_file_handler_creates_missing_parent_directory(cdm_logger: logging.Logger, tmp_path: Path) -> None:
+    """_attach_file_handler should create the parent directory of the log file path if it does not exist."""
+    log_path = tmp_path / "nested" / "dirs" / LOG_FILENAME
+    assert not log_path.parent.exists()
+    _attach_file_handler(cdm_logger, log_path)
+    assert log_path.parent.exists()
+    assert log_path.exists()
+
+
+def test_attach_file_handler_detects_any_file_handler_type(cdm_logger: logging.Logger, tmp_path: Path) -> None:
+    """_attach_file_handler should not add a second file handler."""
+    existing = logging.FileHandler(tmp_path / "other.log")
+    cdm_logger.addHandler(existing)
+
+    _attach_file_handler(cdm_logger, tmp_path / LOG_FILENAME)
+
+    file_handlers = [h for h in cdm_logger.handlers if "FileHandler" in type(h).__name__]
+    assert len(file_handlers) == 1
+
+
+@pytest.mark.parametrize("env_value", ["true", "TRUE", "True"])
+@pytest.mark.usefixtures("clean_env")
+def test_init_logger_file_handler_added_when_requested(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, env_value: str
+) -> None:
+    """A file handler should be attached when enable_file_logging=True or ENABLE_FILE_LOGGING env var is set."""
+    log_path = tmp_path / LOG_FILENAME
+
+    logger_arg = init_logger(enable_file_logging=True, log_file=log_path)
+    assert any("FileHandler" in type(h).__name__ for h in logger_arg.handlers)
+
+    logger_arg.handlers.clear()
+    logging.root.manager.loggerDict.pop(DEFAULT_LOGGER_NAME, None)
+
+    monkeypatch.setenv("ENABLE_FILE_LOGGING", env_value)
+    logger_env = init_logger(log_file=log_path)
+    assert any("FileHandler" in type(h).__name__ for h in logger_env.handlers)
diff --git a/tests/utils/test_s3.py b/tests/utils/test_s3.py
index 7d6398d2..0effb71a 100644
--- a/tests/utils/test_s3.py
+++ b/tests/utils/test_s3.py
@@ -1,17 +1,20 @@
 """Tests for s3_utils.py using moto to mock AWS S3."""
 
 import functools
+import io
 from collections.abc import Callable, Generator
 from pathlib import Path
 from typing import Any
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import boto3
-import botocore
 import pytest
+from botocore.exceptions import ClientError
 from moto import mock_aws
+from requests.exceptions import ConnectionError as ConnError
+from requests.exceptions import HTTPError
 
-import cdm_data_loaders.utils.s3 as s3_utils  # adjust to match your module name
+import cdm_data_loaders.utils.s3 as s3_utils
 from cdm_data_loaders.utils.s3 import (
     CDM_LAKE_BUCKET,
     DEFAULT_EXTRA_ARGS,
@@ -19,10 +22,12 @@
     delete_object,
     download_file,
     get_s3_client,
+    head_object,
     list_matching_objects,
     object_exists,
     reset_s3_client,
     split_s3_path,
+    stream_to_s3,
     upload_dir,
     upload_file,
 )
@@ -184,6 +189,13 @@ def test_get_s3_client_returns_same_instance() -> None:
     assert s3_utils._s3_client is None  # noqa: SLF001
 
 
+@pytest.mark.s3
+def test_get_s3_client_populates_from_environment() -> None:
+    # set up the environment
+
+    pass
+
+
 # split_s3_path
 
 PATH = "path"
@@ -324,13 +336,20 @@ def test_list_matching_objects_returns_more_than_1000_entries(
 # object_exists
 @pytest.mark.parametrize("protocol", ["", "s3://", "s3a://"])
 @pytest.mark.s3
-def test_object_exists_returns_true_when_present(mock_s3_client: Any, protocol: str) -> None:
+def test_head_object_and_object_exists_true_and_false(mock_s3_client: Any, protocol: str) -> None:
     """Verify that object_exists returns True for an object that exists in the bucket."""
     populate_mock_s3(mock_s3_client, FILES_IN_BUCKETS)
     for bucket, file_list in FILES_IN_BUCKETS.items():
         for f in file_list:
+            output = head_object(f"{protocol}{bucket}/{f}")
+            assert output.get("ResponseMetadata", {}).get("HTTPStatusCode") == 200
             assert object_exists(f"{protocol}{bucket}/{f}") is True
 
+        nonexistent_file = f"{protocol}{bucket}/a-file-i-just-made-up.txt"
+        assert object_exists(nonexistent_file) is False
+        with pytest.raises(ClientError, match=r"An error occurred \(404\) when calling the HeadObject operation"):
+            head_object(nonexistent_file)
+
 
 @pytest.mark.parametrize("s3_path", ["absent", "dir_one", "dir_one/", "dir_one/file1.tnt"])
 @pytest.mark.parametrize("bucket", BUCKETS)
@@ -394,6 +413,128 @@ def test_upload_file_error(sample_file: Path) -> None:
         upload_file(sample_file, "")
 
 
+# TODO: Missing tests
+# - Upload failure (S3 error) - returns False
+
+
+def make_mock_requests(
+    content: bytes = b"hello world",
+    status_code: int = 200,
+    content_type: str = "application/octet-stream",
+) -> tuple[MagicMock, MagicMock]:
+    """Build a mock requests module whose .get() returns a mock response."""
+    mock_response = MagicMock()
+    mock_response.status_code = status_code
+    mock_response.raw = io.BytesIO(content)
+    mock_response.raw.decode_content = True
+    mock_response.headers = {
+        "content-type": content_type,
+    }
+    mock_response.raise_for_status = MagicMock()
+    mock_response.__enter__ = lambda s: s
+    mock_response.__exit__ = MagicMock(return_value=False)
+
+    mock_requests = MagicMock()
+    mock_requests.get.return_value = mock_response
+
+    return mock_requests, mock_response
+
+
+UPLOAD_TEST_KEY = "uploads/test-file.pdf"
+UPLOAD_BUCKET_KEY = f"{ALT_BUCKET}/{UPLOAD_TEST_KEY}"
+TEST_URL = "https://example.com/test-file.pdf"
+
+
+def test_stream_to_s3_happy_path(mock_s3_client: Any) -> None:
+    """File content from the HTTP response is stored correctly in S3."""
+    content = b"hello world"
+    mock_requests, _ = make_mock_requests(content=content)
+
+    saved_path = stream_to_s3(TEST_URL, UPLOAD_BUCKET_KEY, mock_requests)
+
+    mock_requests.get.assert_called_once_with(TEST_URL, stream=True)
+
+    # s3 path including bucket returned
+    assert saved_path == UPLOAD_BUCKET_KEY
+
+    result = mock_s3_client.get_object(Bucket=ALT_BUCKET, Key=UPLOAD_TEST_KEY)
+    # check the content is correct
+    assert result["Body"].read() == content
+
+    # new file shows up in list_objects
+    objects = mock_s3_client.list_objects_v2(Bucket=ALT_BUCKET)["Contents"]
+    keys = [obj["Key"] for obj in objects]
+    assert UPLOAD_TEST_KEY in keys
+
+
+@pytest.mark.parametrize("content_type", [None, "application/json", "application/pdf", "text"])
+def test_stream_to_s3_sets_content_type_from_response_headers(mock_s3_client: Any, content_type: str | None) -> None:
+    """ContentType metadata on the S3 object matches the HTTP response header."""
+    content_type_args = {}
+    if content_type:
+        content_type_args["content_type"] = content_type
+    mock_requests, _ = make_mock_requests(**content_type_args)
+
+    stream_to_s3(TEST_URL, UPLOAD_BUCKET_KEY, mock_requests)
+
+    head = mock_s3_client.head_object(Bucket=ALT_BUCKET, Key=UPLOAD_TEST_KEY)
+    assert head["ContentType"] == content_type or "application/octet-stream"
+
+
+def test_stream_to_s3_raises_on_http_error_status(mock_s3_client: Any) -> None:
+    """An HTTP error status causes raise_for_status() to propagate an exception."""
+    mock_requests, mock_response = make_mock_requests(status_code=404)
+    mock_response.raise_for_status.side_effect = HTTPError("404 Not Found")
+
+    with (
+        pytest.raises(HTTPError, match="404 Not Found"),
+    ):
+        stream_to_s3(TEST_URL, UPLOAD_BUCKET_KEY, mock_requests)
+
+    with pytest.raises(ClientError, match="Not Found"):
+        mock_s3_client.head_object(Bucket=ALT_BUCKET, Key=UPLOAD_TEST_KEY)
+
+
+def test_stream_to_s3_raises_on_connection_error(mock_s3_client: Any) -> None:
+    """A network-level failure raises a ConnectionError."""
+    mock_requests, _ = make_mock_requests(status_code=404)
+    mock_requests.get.side_effect = ConnError("Network unreachable")
+
+    with pytest.raises(ConnError, match="Network unreachable"):
+        stream_to_s3(TEST_URL, UPLOAD_BUCKET_KEY, mock_requests)
+
+    with pytest.raises(ClientError, match="Not Found"):
+        mock_s3_client.head_object(Bucket=ALT_BUCKET, Key=UPLOAD_TEST_KEY)
+
+
+# FIXME: don't upload if there is nothing there?
+def test_stream_to_s3_uploads_empty_file(mock_s3_client: Any) -> None:
+    """An empty HTTP response body results in an empty S3 object."""
+    mock_requests, _ = make_mock_requests(content=b"")
+
+    stream_to_s3(TEST_URL, UPLOAD_BUCKET_KEY, mock_requests)
+
+    result = mock_s3_client.get_object(Bucket=ALT_BUCKET, Key=UPLOAD_TEST_KEY)
+    assert result["Body"].read() == b""
+
+
+def test_stream_to_s3_uploads_large_file(mock_s3_client: Any) -> None:
+    """A large payload (>5MB) is uploaded correctly via multipart."""
+    content = b"x" * (6 * 1024 * 1024)  # 6 MB
+    mock_requests, _ = make_mock_requests(content=content)
+
+    stream_to_s3(TEST_URL, UPLOAD_BUCKET_KEY, mock_requests)
+
+    result = mock_s3_client.get_object(Bucket=ALT_BUCKET, Key=UPLOAD_TEST_KEY)
+    assert result["Body"].read() == content
+
+
+@pytest.mark.skip("TODO: add test(s)")
+def test_accepts_custom_requests_implementation() -> None:
+    """A subclassed or alternate requests module works as a drop-in."""
+    # TODO: add test here?
+
+
 @pytest.mark.parametrize("bucket", BUCKETS)
 @pytest.mark.parametrize("protocol", ["", "s3://", "s3a://"])
 @pytest.mark.s3
@@ -475,14 +616,15 @@ def test_download_file_does_not_clobber_existing_file_to_mkdir(mock_s3_client: A
 
 
 @pytest.mark.s3
-def test_download_file_does_not_exist(mock_s3_client: Any, tmp_path: Path, capsys: pytest.CaptureFixture) -> None:
+@pytest.mark.usefixtures("mock_s3_client")
+def test_download_file_does_not_exist(tmp_path: Path, capsys: pytest.CaptureFixture) -> None:
     """Ensure that attempting to download a file that does not exist raises an error."""
     bucket = BUCKETS[0]
     key = "to/the/door.txt"
     assert not object_exists(f"{bucket}/{key}")
 
     with pytest.raises(
-        botocore.exceptions.ClientError,
+        ClientError,
         match=r"An error occurred \(404\) when calling the HeadObject",
     ):
         download_file(f"{bucket}/{key}", tmp_path / "file.txt")
@@ -490,6 +632,12 @@ def test_download_file_does_not_exist(mock_s3_client: Any, tmp_path: Path, capsy
     assert "File not found" in capsys.readouterr().out
 
 
+# TODO: Missing tests
+# - Non-404 S3 error during head
+# - Error during directory creation (other than FileExistsError)?
+# - version_id parameter behavior
+
+
 # upload_dir
 @pytest.mark.parametrize("bucket", [CDM_LAKE_BUCKET, ALT_BUCKET])
 @pytest.mark.s3
@@ -577,6 +725,26 @@ def test_copy_file(mocked_s3_client_no_checksum: Any, destination: str) -> None:
     assert response["ResponseMetadata"]["HTTPStatusCode"] == 200
 
 
+@pytest.mark.s3
+@pytest.mark.usefixtures("mock_s3_client")
+def test_copy_file_source_object_nonexistent() -> None:
+    """Ensure that the code throws an error if the source object does not exist."""
+    s3_path = f"{CDM_LAKE_BUCKET}/some/path/to/file"
+    assert object_exists(s3_path) is False
+    with pytest.raises(Exception, match="The specified key does not exist"):
+        copy_object(s3_path, f"{CDM_LAKE_BUCKET}/a/different/path/to/file")
+
+
+@pytest.mark.s3
+@pytest.mark.usefixtures("mock_s3_client")
+def test_copy_file_source_bucket_nonexistent() -> None:
+    """Ensure that the code throws an error if the bucket does not exist."""
+    s3_path = "some-bucket/some/path/to/file"
+    assert object_exists(s3_path) is False
+    with pytest.raises(Exception, match="The specified bucket does not exist"):
+        copy_object(s3_path, f"{CDM_LAKE_BUCKET}/a/different/path/to/file")
+
+
 # delete_object
 @pytest.mark.parametrize("bucket", BUCKETS)
 @pytest.mark.parametrize("protocol", ["", "s3://", "s3a://"])
@@ -595,3 +763,14 @@ def test_delete_object_removes_object(mock_s3_client: Any, bucket: str, protocol
     resp = delete_object(s3_path)
     assert object_exists(s3_path) is False
     assert resp.get("ResponseMetadata", {}).get("HTTPStatusCode") == 204
+
+
+# delete_object - bucket does not exist
+@pytest.mark.s3
+@pytest.mark.usefixtures("mock_s3_client")
+def test_delete_object_no_such_bucket() -> None:
+    """Verify that delete_object removes the object from the specified bucket."""
+    s3_path = "fake-bucket/to/delete.txt"
+    assert object_exists(s3_path) is False
+    with pytest.raises(Exception, match="The specified bucket does not exist"):
+        delete_object(s3_path)
diff --git a/uv.lock b/uv.lock
index 243a2da8..5e485d34 100644
--- a/uv.lock
+++ b/uv.lock
@@ -393,7 +393,7 @@ crt = [
 
 [[package]]
 name = "cdm-data-loaders"
-version = "0.1.7"
+version = "0.1.8"
 source = { editable = "." }
 dependencies = [
     { name = "bioregistry" },
@@ -440,7 +440,7 @@ requires-dist = [
     { name = "dlt", extras = ["deltalake", "duckdb", "filesystem", "parquet"], specifier = ">=1.22.2" },
     { name = "frictionless", extras = ["aws"], specifier = ">=5.18.1" },
     { name = "frozendict", specifier = ">=2.4.7" },
-    { name = "lxml", specifier = ">=6.0.2" },
+    { name = "lxml", specifier = ">=6.1.0" },
     { name = "pydantic", specifier = ">=2.12.5" },
     { name = "pydantic-settings", specifier = ">=2.12.0" },
     { name = "tqdm", specifier = ">=4.67.3" },
@@ -1725,64 +1725,64 @@ wheels = [
 
 [[package]]
 name = "lxml"
-version = "6.0.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ce/08/1217ca4043f55c3c92993b283a7dbfa456a2058d8b57bbb416cc96b6efff/lxml-6.0.4.tar.gz", hash = "sha256:4137516be2a90775f99d8ef80ec0283f8d78b5d8bd4630ff20163b72e7e9abf2", size = 4237780, upload-time = "2026-04-12T16:28:24.182Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/f6/550a1ed9afde66e24bfcf9892446ea9779152df336062c6df0f7733151a2/lxml-6.0.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecc3d55ed756ee6c3447748862a97e1f5392d2c5d7f474bace9382345e4fc274", size = 8559522, upload-time = "2026-04-12T16:24:51.563Z" },
-    { url = "https://files.pythonhosted.org/packages/11/93/3f687c14d2b4d24b60fe13fd5482c8853f82a10bb87f2b577123e342ed1a/lxml-6.0.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7d5a627a368a0e861350ccc567a70ec675d2bc4d8b3b54f48995ae78d8d530e", size = 4617380, upload-time = "2026-04-12T16:24:54.042Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/ed/91e443366063d3fb7640ae2badd5d7b65be4095ac6d849788e39c043baae/lxml-6.0.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d385141b186cc39ebe4863c1e41936282c65df19b2d06a701dedc2a898877d6a", size = 4922791, upload-time = "2026-04-12T16:24:56.381Z" },
-    { url = "https://files.pythonhosted.org/packages/30/4b/2243260b70974aca9ba0cc71bd668c0c3a79644d80ddcabbfbdb4b131848/lxml-6.0.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0132bb040e9bb5a199302e12bf942741defbc52922a2a06ce9ff7be0d0046483", size = 5080972, upload-time = "2026-04-12T16:24:58.823Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/c3/54c53c4f772341bc12331557f8b0882a426f53133926306cbe6d7f0ee7e4/lxml-6.0.4-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26aee5321e4aa1f07c9090a35f6ab8b703903fb415c6c823cfdb20ee0d779855", size = 4992236, upload-time = "2026-04-12T16:25:01.099Z" },
-    { url = "https://files.pythonhosted.org/packages/be/0f/416de42e22f287585abee610eb0d1c2638c9fe24cee7e15136e0b5e138f8/lxml-6.0.4-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b5652455de198ff76e02cfa57d5efc5f834fa45521aaf3fcc13d6b5a88bde23d", size = 5612398, upload-time = "2026-04-12T16:25:03.517Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/63/29a3fa79b8a182f5bd5b5bdcb6f625f49f08f41d60a26ca25482820a1b99/lxml-6.0.4-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75842801fb48aea73f4c281b923a010dfb39bad75edf8ceb2198ec30c27f01cc", size = 5227480, upload-time = "2026-04-12T16:25:06.119Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/4a/44d1843de599b1c6dbe578e4248c2f15e7fac90c5c86eb26775eaeac0fe0/lxml-6.0.4-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:94a1f74607a5a049ff6ff8de429fec922e643e32b5b08ec7a4fe49e8de76e17c", size = 5341001, upload-time = "2026-04-12T16:25:08.563Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/52/c8aebde49f169e4e3452e7756be35be1cb2903e30d961cb57aa65a27055f/lxml-6.0.4-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:173cc246d3d3b6d3b6491f0b3aaf22ebdf2eed616879482acad8bd84d73eb231", size = 4699105, upload-time = "2026-04-12T16:25:10.757Z" },
-    { url = "https://files.pythonhosted.org/packages/78/60/76fc3735c31c28b70220d99452fb72052e84b618693ca2524da96f0131d8/lxml-6.0.4-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f0f2ee1be1b72e9890da87e4e422f2f703ff4638fd5ec5383055db431e8e30e9", size = 5231095, upload-time = "2026-04-12T16:25:13.305Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/60/448f01c52110102f23df5f07b3f4fde57c8e13e497e182a743d125324c0b/lxml-6.0.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c51a274b7e8b9ce394c3f8b471eb0b23c1914eec64fdccf674e082daf72abf11", size = 5042411, upload-time = "2026-04-12T16:25:15.541Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/2a/90612a001fa4fa0ff0443ebb0256a542670fe35473734c559720293e7aff/lxml-6.0.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:210ea934cba1a1ec42f88c4190c4d5c67b2d14321a8faed9b39e8378198ff99d", size = 4768431, upload-time = "2026-04-12T16:25:17.581Z" },
-    { url = "https://files.pythonhosted.org/packages/84/d8/572845a7d741c8a8ffeaf928185263e14d97fbd355de164677340951d7a5/lxml-6.0.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:14fe654a59eebe16368c51778caeb0c8fda6f897adcd9afe828d87d13b5d5e51", size = 5634972, upload-time = "2026-04-12T16:25:20.111Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/1d/392b8c9f8cf1d502bbec50dee137c7af3dd5def5e5cd84572fbf0ba0541c/lxml-6.0.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:ec160a2b7e2b3cb71ec35010b19a1adea05785d19ba5c9c5f986b64b78fef564", size = 5222909, upload-time = "2026-04-12T16:25:22.243Z" },
-    { url = "https://files.pythonhosted.org/packages/21/ab/949fc96f825cf083612aee65d5a02eacc5eaeb2815561220e33e1e160677/lxml-6.0.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d305b86ef10b23cf3a6d62a2ad23fa296f76495183ee623f64d2600f65ffe09c", size = 5249096, upload-time = "2026-04-12T16:25:24.781Z" },
-    { url = "https://files.pythonhosted.org/packages/56/e8/fbe44df79ede5ff760401cc3c49c4204f49f0f529cc6b27d0af7b63f5472/lxml-6.0.4-cp313-cp313-win32.whl", hash = "sha256:a2f31380aa9a9b52591e79f1c1d3ac907688fbeb9d883ba28be70f2eb5db2277", size = 3595808, upload-time = "2026-04-12T16:25:26.747Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/df/e873abb881092256520edf0d67d686e36f3c86b3cf289f01b6458272dede/lxml-6.0.4-cp313-cp313-win_amd64.whl", hash = "sha256:b8efa9f681f15043e497293d58a4a63199564b253ed2291887d92bb3f74f59ab", size = 3994635, upload-time = "2026-04-12T16:25:28.828Z" },
-    { url = "https://files.pythonhosted.org/packages/23/a8/9c56c8914b9b18d89face5a7472445002baf309167f7af65d988842129fd/lxml-6.0.4-cp313-cp313-win_arm64.whl", hash = "sha256:905abe6a5888129be18f85f2aea51f0c9863fa0722fb8530dfbb687d2841d221", size = 3657374, upload-time = "2026-04-12T16:25:30.901Z" },
-    { url = "https://files.pythonhosted.org/packages/10/18/36e28a809c509a67496202771f545219ac5a2f1cd61aae325991fcf5ab91/lxml-6.0.4-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:569d3b18340863f603582d2124e742a68e85755eff5e47c26a55e298521e3a01", size = 8575045, upload-time = "2026-04-12T16:25:33.57Z" },
-    { url = "https://files.pythonhosted.org/packages/11/38/a168c820e3b08d3b4fa0f4e6b53b3930086b36cc11e428106d38c36778cd/lxml-6.0.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3b6245ee5241342d45e1a54a4a8bc52ef322333ada74f24aa335c4ab36f20161", size = 4622963, upload-time = "2026-04-12T16:25:36.818Z" },
-    { url = "https://files.pythonhosted.org/packages/53/e0/2c9d6abdd82358cea3c0d8d6ca272a6af0f38156abce7827efb6d5b62d17/lxml-6.0.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:79a1173ba3213a3693889a435417d4e9f3c07d96e30dc7cc3a712ed7361015fe", size = 4948832, upload-time = "2026-04-12T16:25:39.104Z" },
-    { url = "https://files.pythonhosted.org/packages/96/d7/f2202852e91d7baf3a317f4523a9c14834145301e5b0f2e80c01c4bfbd49/lxml-6.0.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc18bb975666b443ba23aedd2fcf57e9d0d97546b52a1de97a447c4061ba4110", size = 5085865, upload-time = "2026-04-12T16:25:41.226Z" },
-    { url = "https://files.pythonhosted.org/packages/09/57/abee549324496e92708f71391c6060a164d3c95369656a1a15e9f20d8162/lxml-6.0.4-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2079f5dc83291ac190a52f8354b78648f221ecac19fb2972a2d056b555824de7", size = 5030001, upload-time = "2026-04-12T16:25:43.695Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/f8/432da7178c5917a16468af6c5da68fef7cf3357d4bd0e6f50272ec9a59b5/lxml-6.0.4-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3eda02da4ca16e9ca22bbe5654470c17fa1abcd967a52e4c2e50ff278221e351", size = 5646303, upload-time = "2026-04-12T16:25:46.577Z" },
-    { url = "https://files.pythonhosted.org/packages/82/f9/e1c04ef667a6bf9c9dbd3bf04c50fa51d7ee25b258485bb748b27eb9a1c7/lxml-6.0.4-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c3787cdc3832b70e21ac2efafea2a82a8ccb5e85bec110dc68b26023e9d3caae", size = 5237940, upload-time = "2026-04-12T16:25:49.157Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/f0/cdea60d92df731725fc3c4f33e387b100f210acd45c92969e42d2ba993fa/lxml-6.0.4-cp314-cp314-manylinux_2_28_i686.whl", hash = "sha256:3f276d49c23103565d39440b9b3f4fc08fa22f5a96395ea4b4d4fea4458b1505", size = 5350050, upload-time = "2026-04-12T16:25:52.027Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/15/bf52c7a70b6081bb9e00d37cc90fcf60aa84468d9d173ad2fade38ec34c5/lxml-6.0.4-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:fdfdad73736402375b11b3a137e48cd09634177516baf5fc0bd80d1ca85f3cda", size = 4696409, upload-time = "2026-04-12T16:25:55.141Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/69/9bade267332cc06f9a9aa773b5a11bdfb249af485df9e142993009ea1fc4/lxml-6.0.4-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:75912421456946931daba0ec3cedfa824c756585d05bde97813a17992bfbd013", size = 5249072, upload-time = "2026-04-12T16:25:57.362Z" },
-    { url = "https://files.pythonhosted.org/packages/14/ca/043bcacb096d6ed291cbbc58724e9625a453069d6edeb840b0bf18038d05/lxml-6.0.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:48cd5a88da67233fd82f2920db344503c2818255217cd6ea462c9bb8254ba7cb", size = 5083779, upload-time = "2026-04-12T16:26:00.018Z" },
-    { url = "https://files.pythonhosted.org/packages/04/89/f5fb18d76985969e84af13682e489acabee399bb54738a363925ea6e7390/lxml-6.0.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:87af86a8fa55b9ff1e6ee4233d762296f2ce641ba948af783fb995c5a8a3371b", size = 4736953, upload-time = "2026-04-12T16:26:02.289Z" },
-    { url = "https://files.pythonhosted.org/packages/84/ba/d1d7284bb4ba951f188c3fc0455943c1fcbd1c33d1324d6d57b7d4a45be6/lxml-6.0.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a743714cd656ba7ccb29d199783906064c7b5ba3c0e2a79f0244ea0badc6a98c", size = 5669605, upload-time = "2026-04-12T16:26:04.694Z" },
-    { url = "https://files.pythonhosted.org/packages/72/05/1463e55f2de27bb60feddc894dd7c0833bd501f8861392ed416291b38db5/lxml-6.0.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e31c76bd066fb4f81d9a32e5843bffdf939ab27afb1ffc1c924e749bfbdb00e3", size = 5236886, upload-time = "2026-04-12T16:26:07.659Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/fb/0b6ee9194ce3ac49db4cadaa8a9158f04779fc768b6c27c4e2945d71a99d/lxml-6.0.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f185fd6e7d550e9917d7103dccf51be589aba953e15994fb04646c1730019685", size = 5263382, upload-time = "2026-04-12T16:26:10.067Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/93/ec18a08e98dd82cac39f1d2511ee2bed5affb94d228356d8ef165a4ec3b9/lxml-6.0.4-cp314-cp314-win32.whl", hash = "sha256:774660028f8722a598400430d2746fb0075949f84a9a5cd9767d9152e3baaac5", size = 3656164, upload-time = "2026-04-12T16:26:59.568Z" },
-    { url = "https://files.pythonhosted.org/packages/15/86/52507316abfc7150bf6bb191e39a12e301ee80334610a493884ae2f9d20d/lxml-6.0.4-cp314-cp314-win_amd64.whl", hash = "sha256:fbd7d14349413f5609c0b537b1a48117d6ccef1af37986af6b03766ad05bf43e", size = 4062512, upload-time = "2026-04-12T16:27:02.212Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/d5/09c593a2ef2234b8cd6cf059e2dc212e0654bf05c503f0ef2daf05adb680/lxml-6.0.4-cp314-cp314-win_arm64.whl", hash = "sha256:a61a01ec3fbfd5b73a69a7bf513271051fd6c5795d82fc5daa0255934cd8db3d", size = 3740745, upload-time = "2026-04-12T16:27:04.444Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/3c/42a98bf6693938bf7b285ec7f70ba2ae9d785d0e5b2cdb85d2ee29e287eb/lxml-6.0.4-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:504edb62df33cea502ea6e73847c647ba228623ca3f80a228be5723a70984dd5", size = 8826437, upload-time = "2026-04-12T16:26:12.911Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/c2/ad13f39b2db8709788aa2dcb6e90b81da76db3b5b2e7d35e0946cf984960/lxml-6.0.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f01b7b0316d4c0926d49a7f003b2d30539f392b140a3374bb788bad180bc8478", size = 4734892, upload-time = "2026-04-12T16:26:15.871Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/6d/c559d7b5922c5b0380fc2cb5ac134b6a3f9d79d368347a624ee5d68b0816/lxml-6.0.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab999933e662501efe4b16e6cfb7c9f9deca7d072cd1788b99c8defde78c0dfb", size = 4969173, upload-time = "2026-04-12T16:26:18.335Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/78/ca521e36157f38e3e1a29276855cdf48d213138fc0c8365693ff5c876ca7/lxml-6.0.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67c3f084389fe75932c39b6869a377f6c8e21e818f31ae8a30c71dd2e59360e2", size = 5103134, upload-time = "2026-04-12T16:26:20.612Z" },
-    { url = "https://files.pythonhosted.org/packages/28/a7/7d62d023bacaa0aaf60af8c0a77c6c05f84327396d755f3aa64b788678a9/lxml-6.0.4-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:377ea1d654f76ed6205c87d14920f829c9f4d31df83374d3cbcbdaae804d37b2", size = 5027205, upload-time = "2026-04-12T16:26:22.981Z" },
-    { url = "https://files.pythonhosted.org/packages/34/be/51b194b81684f2e85e5d992771c45d70cb22ac6f7291ac6bc7b255830afe/lxml-6.0.4-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e60cd0bcacbfd1a96d63516b622183fb2e3f202300df9eb5533391a8a939dbfa", size = 5594461, upload-time = "2026-04-12T16:26:25.316Z" },
-    { url = "https://files.pythonhosted.org/packages/39/24/8850f38fbf89dd072ff31ba22f9e40347aeada7cadf710ecb04b8d9f32d4/lxml-6.0.4-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e9e30fd63d41dd0bbdb020af5cdfffd5d9b554d907cb210f18e8fcdc8eac013", size = 5223378, upload-time = "2026-04-12T16:26:28.68Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/9b/595239ba8c719b0fdc7bc9ebdb7564459c9a6b24b8b363df4a02674aeece/lxml-6.0.4-cp314-cp314t-manylinux_2_28_i686.whl", hash = "sha256:1fb4a1606bb68c533002e7ed50d7e55e58f0ef1696330670281cb79d5ab2050d", size = 5311415, upload-time = "2026-04-12T16:26:31.513Z" },
-    { url = "https://files.pythonhosted.org/packages/be/cb/aa27ac8d041acf34691577838494ad08df78e83fdfdb66948d2903e9291e/lxml-6.0.4-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:695c7708438e449d57f404db8cc1b769e77ad5b50655f32f8175686ba752f293", size = 4637953, upload-time = "2026-04-12T16:26:33.806Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/f2/f19114fd86825c2d1ce41cd99daad218d30cfdd2093d4de9273986fb4d68/lxml-6.0.4-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d49c35ae1e35ee9b569892cf8f8f88db9524f28d66e9daee547a5ef9f3c5f468", size = 5231532, upload-time = "2026-04-12T16:26:36.518Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/0e/c3fa354039ec0b6b09f40fbe1129efc572ac6239faa4906de42d5ce87c0a/lxml-6.0.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5801072f8967625e6249d162065d0d6011ef8ce3d0efb8754496b5246b81a74b", size = 5083767, upload-time = "2026-04-12T16:26:39.332Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/4b/1a0dbb6d6ffae16e54a8a3796ded0ad2f9c3bc1ff3728bde33456f4e1d63/lxml-6.0.4-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cbf768541526eba5ef1a49f991122e41b39781eafd0445a5a110fc09947a20b5", size = 4758079, upload-time = "2026-04-12T16:26:42.138Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/01/a246cf5f80f96766051de4b305d6552f80bdaefb37f04e019e42af0aba69/lxml-6.0.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:eecce87cc09233786fc31c230268183bf6375126cfec1c8b3673fcdc8767b560", size = 5618686, upload-time = "2026-04-12T16:26:44.507Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/1f/b072a92369039ebef11b0a654be5134fcf3ed04c0f437faf9435ac9ba845/lxml-6.0.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:07dce892881179e11053066faca2da17b0eeb0bb7298f11bcf842a86db207dbd", size = 5227259, upload-time = "2026-04-12T16:26:47.083Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/a0/dc97034f9d4c0c4d30875147d81fd2c0c7f3d261b109db36ed746bf8ab1d/lxml-6.0.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e4f97aee337b947e6699e5574c90d087d3e2ce517016241c07e7e98a28dca885", size = 5246190, upload-time = "2026-04-12T16:26:49.468Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/ef/85cb69835113583c2516fee07d0ffb4d824b557424b06ba5872c20ba6078/lxml-6.0.4-cp314-cp314t-win32.whl", hash = "sha256:064477c0d4c695aa1ea4b9c1c4ee9043ab740d12135b74c458cc658350adcd86", size = 3896005, upload-time = "2026-04-12T16:26:52.163Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/5e/2231f34cc54b8422b793593138d86d3fa4588fb2297d4ea0472390f25627/lxml-6.0.4-cp314-cp314t-win_amd64.whl", hash = "sha256:25bad2d8438f4ef5a7ad4a8d8bcaadde20c0daced8bdb56d46236b0a7d1cbdd0", size = 4391037, upload-time = "2026-04-12T16:26:54.398Z" },
-    { url = "https://files.pythonhosted.org/packages/39/53/8ba3cd5984f8363635450c93f63e541a0721b362bb32ae0d8237d9674aee/lxml-6.0.4-cp314-cp314t-win_arm64.whl", hash = "sha256:1dcd9e6cb9b7df808ea33daebd1801f37a8f50e8c075013ed2a2343246727838", size = 3816184, upload-time = "2026-04-12T16:26:57.011Z" },
+version = "6.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/28/30/9abc9e34c657c33834eaf6cd02124c61bdf5944d802aa48e69be8da3585d/lxml-6.1.0.tar.gz", hash = "sha256:bfd57d8008c4965709a919c3e9a98f76c2c7cb319086b3d26858250620023b13", size = 4197006, upload-time = "2026-04-18T04:32:51.613Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/03/69347590f1cf4a6d5a4944bb6099e6d37f334784f16062234e1f892fdb1d/lxml-6.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a0092f2b107b69601adf562a57c956fbb596e05e3e6651cabd3054113b007e45", size = 8559689, upload-time = "2026-04-18T04:31:57.785Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/58/25e00bb40b185c974cfe156c110474d9a8a8390d5f7c92a4e328189bb60e/lxml-6.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fc7140d7a7386e6b545d41b7358f4d02b656d4053f5fa6859f92f4b9c2572c4d", size = 4617892, upload-time = "2026-04-18T04:32:01.78Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/54/92ad98a94ac318dc4f97aaac22ff8d1b94212b2ae8af5b6e9b354bf825f7/lxml-6.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:419c58fc92cc3a2c3fa5f78c63dbf5da70c1fa9c1b25f25727ecee89a96c7de2", size = 4923489, upload-time = "2026-04-18T04:33:31.401Z" },
+    { url = "https://files.pythonhosted.org/packages/15/3b/a20aecfab42bdf4f9b390590d345857ad3ffd7c51988d1c89c53a0c73faf/lxml-6.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:37fabd1452852636cf38ecdcc9dd5ca4bba7a35d6c53fa09725deeb894a87491", size = 5082162, upload-time = "2026-04-18T04:33:34.262Z" },
+    { url = "https://files.pythonhosted.org/packages/45/26/2cdb3d281ac1bd175603e290cbe4bad6eff127c0f8de90bafd6f8548f0fd/lxml-6.1.0-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2853c8b2170cc6cd54a6b4d50d2c1a8a7aeca201f23804b4898525c7a152cfc", size = 4993247, upload-time = "2026-04-18T04:33:36.674Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/05/d735aef963740022a08185c84821f689fc903acb3d50326e6b1e9886cc22/lxml-6.1.0-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8e369cbd690e788c8d15e56222d91a09c6a417f49cbc543040cba0fe2e25a79e", size = 5613042, upload-time = "2026-04-18T04:33:39.205Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/b8/ead7c10efff731738c72e59ed6eb5791854879fbed7ae98781a12006263a/lxml-6.1.0-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e69aa6805905807186eb00e66c6d97a935c928275182eb02ee40ba00da9623b2", size = 5228304, upload-time = "2026-04-18T04:33:41.647Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/10/e9842d2ec322ea65f0a7270aa0315a53abed06058b88ef1b027f620e7a5f/lxml-6.1.0-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:4bd1bdb8a9e0e2dd229de19b5f8aebac80e916921b4b2c6ef8a52bc131d0c1f9", size = 5341578, upload-time = "2026-04-18T04:33:44.596Z" },
+    { url = "https://files.pythonhosted.org/packages/89/54/40d9403d7c2775fa7301d3ddd3464689bfe9ba71acc17dfff777071b4fdc/lxml-6.1.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:cbd7b79cdcb4986ad78a2662625882747f09db5e4cd7b2ae178a88c9c51b3dfe", size = 4700209, upload-time = "2026-04-18T04:33:47.552Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b2/bbdcc2cf45dfc7dfffef4fd97e5c47b15919b6a365247d95d6f684ef5e82/lxml-6.1.0-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:43e4d297f11080ec9d64a4b1ad7ac02b4484c9f0e2179d9c4ef78e886e747b88", size = 5232365, upload-time = "2026-04-18T04:33:50.249Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5a/b06875665e53aaba7127611a7bed3b7b9658e20b22bc2dd217a0b7ab0091/lxml-6.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cc16682cc987a3da00aa56a3aa3075b08edb10d9b1e476938cfdbee8f3b67181", size = 5043654, upload-time = "2026-04-18T04:33:52.71Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/9c/e71a069d09641c1a7abeb30e693f828c7c90a41cbe3d650b2d734d876f85/lxml-6.1.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d6d8efe71429635f0559579092bb5e60560d7b9115ee38c4adbea35632e7fa24", size = 4769326, upload-time = "2026-04-18T04:33:55.244Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/06/7a9cd84b3d4ed79adf35f874750abb697dec0b4a81a836037b36e47c091a/lxml-6.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7e39ab3a28af7784e206d8606ec0e4bcad0190f63a492bca95e94e5a4aef7f6e", size = 5635879, upload-time = "2026-04-18T04:33:58.509Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/f0/9d57916befc1e54c451712c7ee48e9e74e80ae4d03bdce49914e0aee42cd/lxml-6.1.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:9eb667bf50856c4a58145f8ca2d5e5be160191e79eb9e30855a476191b3c3495", size = 5224048, upload-time = "2026-04-18T04:34:00.943Z" },
+    { url = "https://files.pythonhosted.org/packages/99/75/90c4eefda0c08c92221fe0753db2d6699a4c628f76ff4465ec20dea84cc1/lxml-6.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7f4a77d6f7edf9230cee3e1f7f6764722a41604ee5681844f18db9a81ea0ec33", size = 5250241, upload-time = "2026-04-18T04:34:03.365Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/73/16596f7e4e38fa33084b9ccbccc22a15f82a290a055126f2c1541236d2ff/lxml-6.1.0-cp313-cp313-win32.whl", hash = "sha256:28902146ffbe5222df411c5d19e5352490122e14447e98cd118907ee3fd6ee62", size = 3596938, upload-time = "2026-04-18T04:31:56.206Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/63/981401c5680c1eb30893f00a19641ac80db5d1e7086c62cb4b13ed813038/lxml-6.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:4a1503c56e4e2b38dc76f2f2da7bae69670c0f1933e27cfa34b2fa5876410b16", size = 3995728, upload-time = "2026-04-18T04:31:58.763Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/e8/c358a38ac3e541d16a1b527e4e9cb78c0419b0506a070ace11777e5e8404/lxml-6.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:e0af85773850417d994d019741239b901b22c6680206f46a34766926e466141d", size = 3658372, upload-time = "2026-04-18T04:32:03.629Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/45/cee4cf203ef0bab5c52afc118da61d6b460c928f2893d40023cfa27e0b80/lxml-6.1.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:ab863fd37458fed6456525f297d21239d987800c46e67da5ef04fc6b3dd93ac8", size = 8576713, upload-time = "2026-04-18T04:32:06.831Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/a7/eda05babeb7e046839204eaf254cd4d7c9130ce2bbf0d9e90ea41af5654d/lxml-6.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:6fd8b1df8254ff4fd93fd31da1fc15770bde23ac045be9bb1f87425702f61cc9", size = 4623874, upload-time = "2026-04-18T04:32:10.755Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/e9/db5846de9b436b91890a62f29d80cd849ea17948a49bf532d5278ee69a9e/lxml-6.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:47024feaae386a92a146af0d2aeed65229bf6fff738e6a11dda6b0015fb8fd03", size = 4949535, upload-time = "2026-04-18T04:34:06.657Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/ba/0d3593373dcae1d68f40dc3c41a5a92f2544e68115eb2f62319a4c2a6500/lxml-6.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3f00972f84450204cd5d93a5395965e348956aaceaadec693a22ec743f8ae3eb", size = 5086881, upload-time = "2026-04-18T04:34:09.556Z" },
+    { url = "https://files.pythonhosted.org/packages/43/76/759a7484539ad1af0d125a9afe9c3fb5f82a8779fd1f5f56319d9e4ea2fd/lxml-6.1.0-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97faa0860e13b05b15a51fb4986421ef7a30f0b3334061c416e0981e9450ca4c", size = 5031305, upload-time = "2026-04-18T04:34:12.336Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/b9/c1f0daf981a11e47636126901fd4ab82429e18c57aeb0fc3ad2940b42d8b/lxml-6.1.0-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:972a6451204798675407beaad97b868d0c733d9a74dafefc63120b81b8c2de28", size = 5647522, upload-time = "2026-04-18T04:34:14.89Z" },
+    { url = "https://files.pythonhosted.org/packages/31/e6/1f533dcd205275363d9ba3511bcec52fa2df86abf8abe6a5f2c599f0dc31/lxml-6.1.0-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fe022f20bc4569ec66b63b3fb275a3d628d9d32da6326b2982584104db6d3086", size = 5239310, upload-time = "2026-04-18T04:34:17.652Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/8c/4175fb709c78a6e315ed814ed33be3defd8b8721067e70419a6cf6f971da/lxml-6.1.0-cp314-cp314-manylinux_2_28_i686.whl", hash = "sha256:75c4c7c619a744f972f4451bf5adf6d0fb00992a1ffc9fd78e13b0bc817cc99f", size = 5350799, upload-time = "2026-04-18T04:34:20.529Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/77/6ffdebc5994975f0dde4acb59761902bd9d9bb84422b9a0bd239a7da9ca8/lxml-6.1.0-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:3648f20d25102a22b6061c688beb3a805099ea4beb0a01ce62975d926944d292", size = 4697693, upload-time = "2026-04-18T04:34:23.541Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f1/565f36bd5c73294602d48e04d23f81ff4c8736be6ba5e1d1ec670ac9be80/lxml-6.1.0-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:77b9f99b17cbf14026d1e618035077060fc7195dd940d025149f3e2e830fbfcb", size = 5250708, upload-time = "2026-04-18T04:34:26.001Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/11/a68ab9dd18c5c499404deb4005f4bc4e0e88e5b72cd755ad96efec81d18d/lxml-6.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:32662519149fd7a9db354175aa5e417d83485a8039b8aaa62f873ceee7ea4cad", size = 5084737, upload-time = "2026-04-18T04:34:28.32Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/78/e8f41e2c74f4af564e6a0348aea69fb6daaefa64bc071ef469823d22cc18/lxml-6.1.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:73d658216fc173cf2c939e90e07b941c5e12736b0bf6a99e7af95459cfe8eabb", size = 4737817, upload-time = "2026-04-18T04:34:30.784Z" },
+    { url = "https://files.pythonhosted.org/packages/06/2d/aa4e117aa2ce2f3b35d9ff246be74a2f8e853baba5d2a92c64744474603a/lxml-6.1.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ac4db068889f8772a4a698c5980ec302771bb545e10c4b095d4c8be26749616f", size = 5670753, upload-time = "2026-04-18T04:34:33.675Z" },
+    { url = "https://files.pythonhosted.org/packages/08/f5/dd745d50c0409031dbfcc4881740542a01e54d6f0110bd420fa7782110b8/lxml-6.1.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:45e9dfbd1b661eb64ba0d4dbe762bd210c42d86dd1e5bd2bdf89d634231beb43", size = 5238071, upload-time = "2026-04-18T04:34:36.12Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/74/ad424f36d0340a904665867dab310a3f1f4c96ff4039698de83b77f44c1f/lxml-6.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:89e8d73d09ac696a5ba42ec69787913d53284f12092f651506779314f10ba585", size = 5264319, upload-time = "2026-04-18T04:34:39.035Z" },
+    { url = "https://files.pythonhosted.org/packages/53/36/a15d8b3514ec889bfd6aa3609107fcb6c9189f8dc347f1c0b81eded8d87c/lxml-6.1.0-cp314-cp314-win32.whl", hash = "sha256:ebe33f4ec1b2de38ceb225a1749a2965855bffeef435ba93cd2d5d540783bf2f", size = 3657139, upload-time = "2026-04-18T04:32:20.006Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/a4/263ebb0710851a3c6c937180a9a86df1206fdfe53cc43005aa2237fd7736/lxml-6.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:398443df51c538bd578529aa7e5f7afc6c292644174b47961f3bf87fe5741120", size = 4064195, upload-time = "2026-04-18T04:32:23.876Z" },
+    { url = "https://files.pythonhosted.org/packages/80/68/2000f29d323b6c286de077ad20b429fc52272e44eae6d295467043e56012/lxml-6.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:8c8984e1d8c4b3949e419158fda14d921ff703a9ed8a47236c6eb7a2b6cb4946", size = 3741870, upload-time = "2026-04-18T04:32:27.922Z" },
+    { url = "https://files.pythonhosted.org/packages/30/e9/21383c7c8d43799f0da90224c0d7c921870d476ec9b3e01e1b2c0b8237c5/lxml-6.1.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:1081dd10bc6fa437db2500e13993abf7cc30716d0a2f40e65abb935f02ec559c", size = 8827548, upload-time = "2026-04-18T04:32:15.094Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/01/c6bc11cd587030dd4f719f65c5657960649fe3e19196c844c75bf32cd0d6/lxml-6.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:dabecc48db5f42ba348d1f5d5afdc54c6c4cc758e676926c7cd327045749517d", size = 4735866, upload-time = "2026-04-18T04:32:18.924Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/01/757132fff5f4acf25463b5298f1a46099f3a94480b806547b29ce5e385de/lxml-6.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e3dd5fe19c9e0ac818a9c7f132a5e43c1339ec1cbbfecb1a938bd3a47875b7c9", size = 4969476, upload-time = "2026-04-18T04:34:41.889Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/fb/1bc8b9d27ed64be7c8903db6c89e74dc8c2cd9ec630a7462e4654316dc5b/lxml-6.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9e7b0a4ca6dcc007a4cef00a761bba2dea959de4bd2df98f926b33c92ca5dfb9", size = 5103719, upload-time = "2026-04-18T04:34:44.797Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/e7/5bf82fa28133536a54601aae633b14988e89ed61d4c1eb6b899b023233aa/lxml-6.1.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d27bbe326c6b539c64b42638b18bc6003a8d88f76213a97ac9ed4f885efeab7", size = 5027890, upload-time = "2026-04-18T04:34:47.634Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/20/e048db5d4b4ea0366648aa595f26bb764b2670903fc585b87436d0a5032c/lxml-6.1.0-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4e425db0c5445ef0ad56b0eec54f89b88b2d884656e536a90b2f52aecb4ca86", size = 5596008, upload-time = "2026-04-18T04:34:51.503Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/c2/d10807bc8da4824b39e5bd01b5d05c077b6fd01bd91584167edf6b269d22/lxml-6.1.0-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4b89b098105b8599dc57adac95d1813409ac476d3c948a498775d3d0c6124bfb", size = 5224451, upload-time = "2026-04-18T04:34:54.263Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/15/2ebea45bea427e7f0057e9ce7b2d62c5aba20c6b001cca89ed0aadb3ad41/lxml-6.1.0-cp314-cp314t-manylinux_2_28_i686.whl", hash = "sha256:c4a699432846df86cc3de502ee85f445ebad748a1c6021d445f3e514d2cd4b1c", size = 5312135, upload-time = "2026-04-18T04:34:56.818Z" },
+    { url = "https://files.pythonhosted.org/packages/31/e2/87eeae151b0be2a308d49a7ec444ff3eb192b14251e62addb29d0bf3778f/lxml-6.1.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:30e7b2ed63b6c8e97cca8af048589a788ab5c9c905f36d9cf1c2bb549f450d2f", size = 4639126, upload-time = "2026-04-18T04:34:59.704Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/51/8a3f6a20902ad604dd746ec7b4000311b240d389dac5e9d95adefd349e0c/lxml-6.1.0-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:022981127642fe19866d2907d76241bb07ed21749601f727d5d5dd1ce5d1b773", size = 5232579, upload-time = "2026-04-18T04:35:02.658Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/d2/650d619bdbe048d2c3f2c31edb00e35670a5e2d65b4fe3b61bce37b19121/lxml-6.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:23cad0cc86046d4222f7f418910e46b89971c5a45d3c8abfad0f64b7b05e4a9b", size = 5084206, upload-time = "2026-04-18T04:35:05.175Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/8a/672ca1a3cbeabd1f511ca275a916c0514b747f4b85bdaae103b8fa92f307/lxml-6.1.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:21c3302068f50d1e8728c67c87ba92aa87043abee517aa2576cca1855326b405", size = 4758906, upload-time = "2026-04-18T04:35:08.098Z" },
+    { url = "https://files.pythonhosted.org/packages/be/f1/ef4b691da85c916cb2feb1eec7414f678162798ac85e042fa164419ac05c/lxml-6.1.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:be10838781cb3be19251e276910cd508fe127e27c3242e50521521a0f3781690", size = 5620553, upload-time = "2026-04-18T04:35:11.23Z" },
+    { url = "https://files.pythonhosted.org/packages/59/17/94e81def74107809755ac2782fdad4404420f1c92ca83433d117a6d5acf0/lxml-6.1.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2173a7bffe97667bbf0767f8a99e587740a8c56fdf3befac4b09cb29a80276fd", size = 5229458, upload-time = "2026-04-18T04:35:14.254Z" },
+    { url = "https://files.pythonhosted.org/packages/21/55/c4be91b0f830a871fc1b0d730943d56013b683d4671d5198260e2eae722b/lxml-6.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c6854e9cf99c84beb004eecd7d3a3868ef1109bf2b1df92d7bc11e96a36c2180", size = 5247861, upload-time = "2026-04-18T04:35:17.006Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/ca/77123e4d77df3cb1e968ade7b1f808f5d3a5c1c96b18a33895397de292c1/lxml-6.1.0-cp314-cp314t-win32.whl", hash = "sha256:00750d63ef0031a05331b9223463b1c7c02b9004cef2346a5b2877f0f9494dd2", size = 3897377, upload-time = "2026-04-18T04:32:07.656Z" },
+    { url = "https://files.pythonhosted.org/packages/64/ce/3554833989d074267c063209bae8b09815e5656456a2d332b947806b05ff/lxml-6.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:80410c3a7e3c617af04de17caa9f9f20adaa817093293d69eae7d7d0522836f5", size = 4392701, upload-time = "2026-04-18T04:32:12.113Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/a0/9b916c68c0e57752c07f8f64b30138d9d4059dbeb27b90274dedbea128ff/lxml-6.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:26dd9f57ee3bd41e7d35b4c98a2ffd89ed11591649f421f0ec19f67d50ec67ac", size = 3817120, upload-time = "2026-04-18T04:32:15.803Z" },
 ]
 
 [[package]]