From 1b1449b64096c9ea9ced85c400b70ea986a7137e Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 27 May 2026 09:07:13 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=AA=20Add=20tests=20for=20strip=5Fdoi?= =?UTF-8?q?=5Ftransport=5Fprefix=20helper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: SatoryKono <13055362+SatoryKono@users.noreply.github.com> --- .../adapters/common/test_doi_helpers.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/unit/infrastructure/adapters/common/test_doi_helpers.py diff --git a/tests/unit/infrastructure/adapters/common/test_doi_helpers.py b/tests/unit/infrastructure/adapters/common/test_doi_helpers.py new file mode 100644 index 0000000000..4ccf163473 --- /dev/null +++ b/tests/unit/infrastructure/adapters/common/test_doi_helpers.py @@ -0,0 +1,43 @@ +"""Unit tests for DOI transport helpers.""" + +import pytest + +from bioetl.infrastructure.adapters.common.doi_helpers import strip_doi_transport_prefix + + +@pytest.mark.parametrize( + ("input_doi", "expected_output"), + [ + # Bare DOIs (no change expected) + ("10.1038/s41586-020-2649-2", "10.1038/s41586-020-2649-2"), + ("10.1016/j.cell.2021.02.021", "10.1016/j.cell.2021.02.021"), + # HTTP URL formats + ("http://doi.org/10.1038/s41586-020-2649-2", "10.1038/s41586-020-2649-2"), + ("https://doi.org/10.1038/s41586-020-2649-2", "10.1038/s41586-020-2649-2"), + ("http://dx.doi.org/10.1038/s41586-020-2649-2", "10.1038/s41586-020-2649-2"), + ("https://dx.doi.org/10.1038/s41586-020-2649-2", "10.1038/s41586-020-2649-2"), + # DOI: prefix (case insensitive according to implementation) + ("doi:10.1038/s41586-020-2649-2", "10.1038/s41586-020-2649-2"), + ("DOI:10.1038/s41586-020-2649-2", "10.1038/s41586-020-2649-2"), + ("DoI:10.1038/s41586-020-2649-2", "10.1038/s41586-020-2649-2"), + # With whitespace + (" 10.1038/s41586-020-2649-2 ", "10.1038/s41586-020-2649-2"), + (" doi:10.1038/s41586-020-2649-2 ", "10.1038/s41586-020-2649-2"), + ], +) +def test_strip_doi_transport_prefix(input_doi: str, expected_output: str) -> None: + """Test that strip_doi_transport_prefix correctly removes prefixes.""" + assert strip_doi_transport_prefix(input_doi) == expected_output + + +def test_strip_doi_transport_prefix_allow_uppercase_prefix_ignored() -> None: + """Test that the allow_uppercase_prefix argument is safely ignored.""" + # It should strip "DOI:" even if allow_uppercase_prefix=False + assert ( + strip_doi_transport_prefix("DOI:10.123/abc", allow_uppercase_prefix=False) + == "10.123/abc" + ) + assert ( + strip_doi_transport_prefix("DOI:10.123/abc", allow_uppercase_prefix=True) + == "10.123/abc" + )