From 8e2a9d69ce57ec5b6a64028319779470e1b8ae89 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 22 May 2026 19:34:11 -0700 Subject: [PATCH 1/2] Reject uppercase HTTP(S) schemes in geotiff URL dispatch (#2323) URL schemes are case-insensitive per RFC 3986. The geotiff reader's dispatch helpers used case-sensitive `startswith(('http://', 'https://'))` checks, so an uppercase `HTTP://127.0.0.1/foo.tif` would skip `_HTTPSource` (and its SSRF allow-list + pinned DNS from #1664 / #1846) and route to the fsspec branch via `_is_fsspec_uri`. Add `_is_http_url` in `_sources.py` that parses the URL once and case-folds the scheme. Switch every dispatch site to use it: - `_reader.read_to_array` (line 145) - `_backends/dask` HTTP/fsspec branch (line 197) and the inner per-tile source-class check (line 580) - `_sources._is_fsspec_uri` (line 1454) - `_sources._open_source` (line 1639) - `_sidecar._is_http_url` (delegates to canonical helper) - `_writer._is_fsspec_uri` (delegates to canonical helper) - `_backends/gpu` eager path (line 350) and KvikIO eligibility check (line 1056) Add a focused regression test module (`test_uppercase_scheme_ssrf_2323`) that: - pins `_is_http_url` and `_is_fsspec_uri` against case-insensitive variants (`HTTP`, `Http`, `hTTpS`) - confirms `_open_source('HTTP://127.0.0.1/...')` and an uppercase URL pointing at the cloud-metadata IP (169.254.169.254) raise `UnsafeURLError` via the SSRF validator - confirms `read_to_array('HTTP://...')` reaches the `_read_cog_http` path (and not the fsspec `_CloudSource` path) --- xrspatial/geotiff/_backends/dask.py | 11 +- xrspatial/geotiff/_backends/gpu.py | 9 +- xrspatial/geotiff/_reader.py | 4 +- xrspatial/geotiff/_sidecar.py | 5 +- xrspatial/geotiff/_sources.py | 28 ++- xrspatial/geotiff/_writer.py | 9 +- .../tests/test_uppercase_scheme_ssrf_2323.py | 204 ++++++++++++++++++ 7 files changed, 251 insertions(+), 19 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_uppercase_scheme_ssrf_2323.py diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 95f7b09a..6da17cb3 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -194,11 +194,8 @@ def read_geotiff_dask(source: str, *, # and ``_CloudSource`` satisfies that contract. Going through it # bounds metadata reads to ``MAX_HTTP_HEADER_BYTES`` instead of # fetching the whole remote object up front. See PR #1755 review. - is_http = ( - isinstance(source, str) - and source.startswith(('http://', 'https://')) - ) - from .._reader import _is_fsspec_uri + from .._reader import _is_fsspec_uri, _is_http_url + is_http = _is_http_url(source) is_fsspec = isinstance(source, str) and _is_fsspec_uri(source) http_meta = None http_meta_key = None @@ -576,8 +573,8 @@ def _read(http_meta): # fsspec-addressable remotes (s3://, gs://, az://, memory://, ...). # Both source classes expose ``read_range``, which is all # ``_fetch_decode_cog_http_tiles`` needs. - _is_http_src = isinstance(source, str) and source.startswith( - ('http://', 'https://')) + from .._reader import _is_http_url as _ihu + _is_http_src = _ihu(source) _is_fsspec_src = False if http_meta is not None and isinstance(source, str) and \ not _is_http_src: diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 92ba91f0..85e1d19d 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -346,8 +346,9 @@ def read_geotiff_gpu(source: str, *, # whole image either way for the eager path; the trade-off is a CPU # decode instead of nvCOMP-on-GPU. Callers who want bounded GPU # memory should pass ``chunks=...``. + from .._reader import _is_http_url if isinstance(source, str) and ( - source.startswith(('http://', 'https://')) + _is_http_url(source) or _is_fsspec_uri(source)): return _read_geotiff_gpu_eager_via_cpu( source, dtype=dtype, window=window, @@ -1053,10 +1054,10 @@ def _gds_chunk_path_available(source, ifd, has_sparse_tile, orientation): """ if not isinstance(source, str): return False - if source.startswith(('http://', 'https://')): - return False try: - from .._reader import _is_fsspec_uri + from .._reader import _is_fsspec_uri, _is_http_url + if _is_http_url(source): + return False if _is_fsspec_uri(source): return False except Exception: diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index f9a3003e..3c639386 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -93,7 +93,7 @@ _BytesIOSource, _CloudSource, _coerce_path, _FileSource, _get_http_pool, _get_pinned_conn_classes, _http_allow_private_hosts, _http_connect_timeout, _http_read_timeout, _http_timeout_from_env, _HTTPSource, _ip_is_private, - _is_file_like, _is_fsspec_uri, _make_pinned_pool, + _is_file_like, _is_fsspec_uri, _is_http_url, _make_pinned_pool, _max_coalesced_range_bytes_from_env, _max_tile_bytes_from_env, _mmap_cache, _mmap_cache_size_from_env, _MmapCache, _open_source, _resolve_max_cloud_bytes, _validate_http_url, coalesce_ranges, @@ -142,7 +142,7 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, (np.ndarray, GeoInfo) tuple """ source = _coerce_path(source) - if isinstance(source, str) and source.startswith(('http://', 'https://')): + if _is_http_url(source): return _read_cog_http(source, overview_level=overview_level, band=band, max_pixels=max_pixels, window=window, allow_rotated=allow_rotated) diff --git a/xrspatial/geotiff/_sidecar.py b/xrspatial/geotiff/_sidecar.py index 050900f0..e4906823 100644 --- a/xrspatial/geotiff/_sidecar.py +++ b/xrspatial/geotiff/_sidecar.py @@ -43,7 +43,10 @@ class SidecarOverviews(NamedTuple): def _is_http_url(source: str) -> bool: - return source.startswith(("http://", "https://")) + # Delegate to the canonical case-insensitive check so uppercase + # ``HTTP://`` URLs cannot dodge SSRF validation (issue #2323). + from ._sources import _is_http_url as _ihu + return _ihu(source) def find_sidecar(source) -> str | None: diff --git a/xrspatial/geotiff/_sources.py b/xrspatial/geotiff/_sources.py index c774f465..ec948fb8 100644 --- a/xrspatial/geotiff/_sources.py +++ b/xrspatial/geotiff/_sources.py @@ -1451,11 +1451,33 @@ def close(self): _CLOUD_SCHEMES = ('s3://', 'gs://', 'az://', 'abfs://') +def _is_http_url(path) -> bool: + """Return True if *path* is an ``http://`` or ``https://`` URL. + + Case-insensitive: URL schemes are case-insensitive per RFC 3986, so an + uppercase ``HTTP://`` or mixed-case ``Http://`` must dispatch to the + SSRF-validating :class:`_HTTPSource`, not to the fsspec branch. See + issue #2323. + """ + if not isinstance(path, str): + return False + from urllib.parse import urlparse + try: + scheme = urlparse(path).scheme + except (ValueError, TypeError): + return False + return scheme.lower() in ('http', 'https') + + def _is_fsspec_uri(path: str) -> bool: - """Check if a path is a fsspec-compatible URI (not http/https/local).""" + """Check if a path is a fsspec-compatible URI (not http/https/local). + + Excludes http(s) case-insensitively so uppercase URLs cannot dodge the + SSRF allow-list and pinned DNS in :class:`_HTTPSource` (issue #2323). + """ if not isinstance(path, str): return False - if path.startswith(('http://', 'https://')): + if _is_http_url(path): return False return '://' in path @@ -1636,7 +1658,7 @@ def _open_source(source): raise TypeError( f"source must be a str path/URL or a binary file-like object " f"with read+seek methods, got {type(source).__name__}") - if source.startswith(('http://', 'https://')): + if _is_http_url(source): return _HTTPSource(source) if _is_fsspec_uri(source): return _CloudSource(source) diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 5230430b..810b8d75 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -1238,10 +1238,15 @@ def _write_streaming(dask_data, path: str, *, def _is_fsspec_uri(path) -> bool: - """Check if a path is a fsspec-compatible URI (string only).""" + """Check if a path is a fsspec-compatible URI (string only). + + Excludes http(s) case-insensitively so uppercase URLs are not routed + through fsspec on the writer side (issue #2323). + """ if not isinstance(path, str): return False - if path.startswith(('http://', 'https://')): + from ._sources import _is_http_url + if _is_http_url(path): return False return '://' in path diff --git a/xrspatial/geotiff/tests/test_uppercase_scheme_ssrf_2323.py b/xrspatial/geotiff/tests/test_uppercase_scheme_ssrf_2323.py new file mode 100644 index 00000000..8dee289f --- /dev/null +++ b/xrspatial/geotiff/tests/test_uppercase_scheme_ssrf_2323.py @@ -0,0 +1,204 @@ +"""Uppercase URL schemes must not dodge SSRF hardening (issue #2323). + +URL schemes are case-insensitive per RFC 3986. Before this fix, the +geotiff reader's dispatch helpers compared schemes case-sensitively, so +a URL like ``HTTP://127.0.0.1/foo.tif`` would skip ``_HTTPSource`` (and +its SSRF allow-list + pinned DNS) and land on the fsspec branch via +``_is_fsspec_uri``. Tests below pin each dispatch site to the +case-insensitive behaviour. + +No real HTTP calls are made: ``socket.getaddrinfo`` is monkeypatched per +test. +""" +from __future__ import annotations + +import socket + +import pytest + +from xrspatial.geotiff import UnsafeURLError +from xrspatial.geotiff import _reader as _reader_mod +from xrspatial.geotiff import _sources as _sources_mod +from xrspatial.geotiff._sidecar import _is_http_url as _sidecar_is_http_url +from xrspatial.geotiff._writer import _is_fsspec_uri as _writer_is_fsspec_uri + + +# Unique tmp-name prefix to keep parallel-rockout temp paths apart. +_ISSUE = "2323" + + +def _fake_getaddrinfo(ip: str): + """getaddrinfo replacement that always resolves to *ip*.""" + def _resolver(host, port, *args, **kwargs): + if ':' in ip: + return [(socket.AF_INET6, socket.SOCK_STREAM, 0, '', + (ip, port or 0, 0, 0))] + return [(socket.AF_INET, socket.SOCK_STREAM, 0, '', + (ip, port or 0))] + return _resolver + + +# --------------------------------------------------------------------------- +# _is_http_url / _is_fsspec_uri helpers +# --------------------------------------------------------------------------- + + +class TestIsHttpUrlCaseInsensitive: + @pytest.mark.parametrize("url", [ + "http://example.com/x.tif", + "https://example.com/x.tif", + "HTTP://example.com/x.tif", + "HTTPS://example.com/x.tif", + "Http://example.com/x.tif", + "hTTpS://example.com/x.tif", + "HTTP://127.0.0.1/foo.tif", + ]) + def test_http_variants_recognised(self, url): + assert _sources_mod._is_http_url(url) is True + assert _reader_mod._is_http_url(url) is True + assert _sidecar_is_http_url(url) is True + + @pytest.mark.parametrize("path", [ + "s3://bucket/x.tif", + "S3://bucket/x.tif", + "gs://bucket/x.tif", + "memory://buf", + "/local/path.tif", + "relative/path.tif", + "", + ]) + def test_non_http_rejected(self, path): + assert _sources_mod._is_http_url(path) is False + + def test_non_string_rejected(self): + assert _sources_mod._is_http_url(None) is False + assert _sources_mod._is_http_url(b"http://x/y") is False + assert _sources_mod._is_http_url(123) is False + + +class TestIsFsspecUriExcludesHttpCaseInsensitive: + @pytest.mark.parametrize("url", [ + "HTTP://127.0.0.1/foo.tif", + "HTTPS://example.com/x.tif", + "Http://example.com/x.tif", + "hTTpS://example.com/x.tif", + "http://example.com/x.tif", + ]) + def test_uppercase_http_not_fsspec(self, url): + # The bug: before the fix, uppercase URLs slipped past the + # http/https exclusion in _is_fsspec_uri and were routed to the + # fsspec branch (bypassing SSRF defences). + assert _sources_mod._is_fsspec_uri(url) is False + assert _writer_is_fsspec_uri(url) is False + + @pytest.mark.parametrize("uri", [ + "s3://bucket/x.tif", + "S3://bucket/x.tif", # fsspec accepts case-insensitive schemes too + "gs://bucket/x.tif", + "memory://buffer", + ]) + def test_real_fsspec_uris_still_match(self, uri): + assert _sources_mod._is_fsspec_uri(uri) is True + + +# --------------------------------------------------------------------------- +# _open_source dispatch -- uppercase URL must hit _HTTPSource +# (and therefore SSRF validation), not _CloudSource +# --------------------------------------------------------------------------- + + +class TestOpenSourceUppercaseDispatch: + def test_uppercase_loopback_rejected(self, monkeypatch): + # 127.0.0.1 is in the private/loopback range; the SSRF validator + # in _HTTPSource must reject it. If the URL were dispatched to + # fsspec instead, this would either silently succeed against a + # localhost service or raise a different error. + monkeypatch.setattr( + socket, 'getaddrinfo', _fake_getaddrinfo('127.0.0.1')) + with pytest.raises(UnsafeURLError): + _sources_mod._open_source(f'HTTP://127.0.0.1/x_{_ISSUE}.tif') + + def test_uppercase_https_loopback_rejected(self, monkeypatch): + monkeypatch.setattr( + socket, 'getaddrinfo', _fake_getaddrinfo('127.0.0.1')) + with pytest.raises(UnsafeURLError): + _sources_mod._open_source( + f'HTTPS://localhost/x_{_ISSUE}.tif') + + def test_uppercase_metadata_ip_rejected(self, monkeypatch): + # 169.254.169.254 is the cloud-metadata service IP that SSRF + # attacks typically target. The validator treats link-local as + # private and must reject it whether the scheme is upper or + # lower case. + monkeypatch.setattr( + socket, 'getaddrinfo', _fake_getaddrinfo('169.254.169.254')) + with pytest.raises(UnsafeURLError): + _sources_mod._open_source( + f'HTTP://metadata.example/x_{_ISSUE}.tif') + + def test_uppercase_public_routes_to_http_source(self, monkeypatch): + # A public IP should construct _HTTPSource successfully (rather + # than silently going to fsspec). We don't make a real request: + # the pinned-DNS resolution is enough to prove the dispatch + # branch picked _HTTPSource. + monkeypatch.setattr( + socket, 'getaddrinfo', _fake_getaddrinfo('93.184.216.34')) + src = _sources_mod._open_source( + f'HTTP://example.com/x_{_ISSUE}.tif') + try: + assert type(src).__name__ == '_HTTPSource' + finally: + src.close() + + +# --------------------------------------------------------------------------- +# read_to_array dispatcher -- uppercase URL must take the COG-HTTP path +# (which validates via _HTTPSource), not the fsspec _CloudSource path +# --------------------------------------------------------------------------- + + +class TestReadToArrayUppercaseDispatch: + def test_uppercase_url_takes_http_path(self, monkeypatch): + """``read_to_array`` must route uppercase URLs through the + ``_read_cog_http`` path so the SSRF allow-list runs. + + We stub ``_read_cog_http`` to capture the dispatch decision + without making any network calls. + """ + captured = {} + + def _fake_read_cog_http(source, **kwargs): + captured['source'] = source + captured['kwargs'] = kwargs + # Mimic the real return shape just enough to satisfy the + # caller: it isn't inspected here because we raise to short + # circuit any downstream logic. + raise RuntimeError("stubbed _read_cog_http reached") + + monkeypatch.setattr( + _reader_mod, '_read_cog_http', _fake_read_cog_http) + + with pytest.raises(RuntimeError, match="stubbed _read_cog_http"): + _reader_mod.read_to_array( + f'HTTP://example.com/x_{_ISSUE}.tif') + assert captured.get('source') == ( + f'HTTP://example.com/x_{_ISSUE}.tif' + ) + + def test_lowercase_url_still_takes_http_path(self, monkeypatch): + # Regression guard: don't break the existing lowercase path. + captured = {} + + def _fake_read_cog_http(source, **kwargs): + captured['source'] = source + raise RuntimeError("stubbed _read_cog_http reached") + + monkeypatch.setattr( + _reader_mod, '_read_cog_http', _fake_read_cog_http) + + with pytest.raises(RuntimeError): + _reader_mod.read_to_array( + f'http://example.com/x_{_ISSUE}.tif') + assert captured['source'] == ( + f'http://example.com/x_{_ISSUE}.tif' + ) From a73226eedbd46c5660c7504b02162c0802aa9bf7 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 22 May 2026 19:40:50 -0700 Subject: [PATCH 2/2] Address review: dask test, GPU try scope, module-scope imports (#2323) Follow-ups for PR #2326: - Add `TestDaskBackendUppercaseDispatch` covering `_read_geotiff_dask`'s own http/fsspec split. Stubs `_HTTPSource` / `_CloudSource` and asserts an uppercase `HTTP://` URL constructs `_HTTPSource`, an uppercase `S3://` URL constructs `_CloudSource`, and the lowercase http case still works. - `_can_use_kvikio`: move the `_is_http_url` check outside the `try/except Exception: pass` block. Inside the try, a hidden import failure would silently let an HTTP URL through to the kvikio branch (which opens the path as a local file). The fsspec check stays in the try because the rest of the kvikio eligibility logic should not break on a recoverable import error there. - Lift `urlparse` to a module-scope import in `_sources.py` so the helper does not re-resolve the import on every per-chunk dispatch. - Move the `_is_http_url` import in `_sidecar.py` and `_writer.py` to module scope. The function bodies now just call the canonical helper. --- xrspatial/geotiff/_backends/gpu.py | 12 ++- xrspatial/geotiff/_sidecar.py | 6 +- xrspatial/geotiff/_sources.py | 2 +- xrspatial/geotiff/_writer.py | 6 +- .../tests/test_uppercase_scheme_ssrf_2323.py | 85 +++++++++++++++++++ 5 files changed, 103 insertions(+), 8 deletions(-) diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 85e1d19d..47a600cc 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -1054,10 +1054,16 @@ def _gds_chunk_path_available(source, ifd, has_sparse_tile, orientation): """ if not isinstance(source, str): return False + # The http(s) gate must NOT live inside a ``try/except`` -- a hidden + # import failure would silently let an HTTP URL into the kvikio + # branch (which opens the path as a local file and panics). The + # canonical case-insensitive helper is a sibling module, so the + # import is safe at module load time (#2323). + from .._sources import _is_http_url + if _is_http_url(source): + return False try: - from .._reader import _is_fsspec_uri, _is_http_url - if _is_http_url(source): - return False + from .._reader import _is_fsspec_uri if _is_fsspec_uri(source): return False except Exception: diff --git a/xrspatial/geotiff/_sidecar.py b/xrspatial/geotiff/_sidecar.py index e4906823..6fcf2ac2 100644 --- a/xrspatial/geotiff/_sidecar.py +++ b/xrspatial/geotiff/_sidecar.py @@ -26,6 +26,9 @@ # ``_reader`` imports ``_sidecar`` lazily (inside functions), so this # top-level import does not form a cycle at module load time. from ._reader import _is_fsspec_uri +# Canonical case-insensitive http(s) check (#2323). Reused via the +# wrapper ``_is_http_url`` below so existing imports keep working. +from ._sources import _is_http_url as _canonical_is_http_url #: Type of the bytes-like buffer a sidecar carries: an mmap for local #: files, bytes for HTTP / fsspec downloads. Narrowed from ``object`` @@ -45,8 +48,7 @@ class SidecarOverviews(NamedTuple): def _is_http_url(source: str) -> bool: # Delegate to the canonical case-insensitive check so uppercase # ``HTTP://`` URLs cannot dodge SSRF validation (issue #2323). - from ._sources import _is_http_url as _ihu - return _ihu(source) + return _canonical_is_http_url(source) def find_sidecar(source) -> str | None: diff --git a/xrspatial/geotiff/_sources.py b/xrspatial/geotiff/_sources.py index ec948fb8..39cdbe4f 100644 --- a/xrspatial/geotiff/_sources.py +++ b/xrspatial/geotiff/_sources.py @@ -31,6 +31,7 @@ import threading from collections import OrderedDict from concurrent.futures import ThreadPoolExecutor +from urllib.parse import urlparse import urllib3 @@ -1461,7 +1462,6 @@ def _is_http_url(path) -> bool: """ if not isinstance(path, str): return False - from urllib.parse import urlparse try: scheme = urlparse(path).scheme except (ValueError, TypeError): diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 810b8d75..53f614ec 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -65,6 +65,9 @@ _assemble_tiff, _build_ifd, _compute_classic_ifd_overhead, _float_to_rational, _pack_tag_value, _promote_offsets_to_long8, _serialize_tag_value, _should_use_bigtiff_streaming) +# Canonical case-insensitive http(s) check (#2323) so the writer-side +# fsspec gate cannot be tricked by uppercase URLs. +from ._sources import _is_http_url as _is_http_url_canonical # Tag IDs the writer must never accept from ``extra_tags``. NewSubfileType # (254) is a per-IFD status flag the writer emits on its own for overview @@ -1245,8 +1248,7 @@ def _is_fsspec_uri(path) -> bool: """ if not isinstance(path, str): return False - from ._sources import _is_http_url - if _is_http_url(path): + if _is_http_url_canonical(path): return False return '://' in path diff --git a/xrspatial/geotiff/tests/test_uppercase_scheme_ssrf_2323.py b/xrspatial/geotiff/tests/test_uppercase_scheme_ssrf_2323.py index 8dee289f..ca38f75c 100644 --- a/xrspatial/geotiff/tests/test_uppercase_scheme_ssrf_2323.py +++ b/xrspatial/geotiff/tests/test_uppercase_scheme_ssrf_2323.py @@ -202,3 +202,88 @@ def _fake_read_cog_http(source, **kwargs): assert captured['source'] == ( f'http://example.com/x_{_ISSUE}.tif' ) + + +# --------------------------------------------------------------------------- +# Dask backend dispatch -- uppercase URL must construct _HTTPSource +# (not _CloudSource) inside ``_read_geotiff_dask`` +# --------------------------------------------------------------------------- + + +class TestDaskBackendUppercaseDispatch: + """The dask backend has its own ``is_http``/``is_fsspec`` split at + ``_backends/dask.py:197-199``. Confirm an uppercase URL lands on the + HTTP branch (which constructs ``_HTTPSource``) rather than the + fsspec branch (``_CloudSource``). + """ + + def _stub_dask_http_path(self, monkeypatch): + """Replace ``_HTTPSource``, ``_CloudSource``, and + ``_parse_cog_http_meta`` with tracking stubs that raise once the + dispatch decision is observable. Returns a dict the caller reads + to check which source class was instantiated. + """ + from xrspatial.geotiff import _reader as _r + + seen = {'http': 0, 'cloud': 0} + + class _Sentinel(Exception): + pass + + def _fake_http_source(url): + seen['http'] += 1 + seen['url'] = url + raise _Sentinel("dispatched to _HTTPSource") + + def _fake_cloud_source(url, **kw): + seen['cloud'] += 1 + seen['url'] = url + raise _Sentinel("dispatched to _CloudSource") + + monkeypatch.setattr(_r, '_HTTPSource', _fake_http_source) + monkeypatch.setattr(_r, '_CloudSource', _fake_cloud_source) + return seen, _Sentinel + + def test_uppercase_url_constructs_http_source(self, monkeypatch): + from xrspatial.geotiff._backends.dask import read_geotiff_dask + + seen, _Sentinel = self._stub_dask_http_path(monkeypatch) + + url = f'HTTP://example.com/x_dask_{_ISSUE}.tif' + with pytest.raises(_Sentinel, match="_HTTPSource"): + read_geotiff_dask(url) + + assert seen['http'] == 1, ( + "uppercase URL did not reach _HTTPSource; " + f"seen={seen!r}") + assert seen['cloud'] == 0, ( + "uppercase URL leaked to _CloudSource (fsspec branch); " + f"seen={seen!r}") + assert seen['url'] == url + + def test_lowercase_url_still_constructs_http_source(self, monkeypatch): + from xrspatial.geotiff._backends.dask import read_geotiff_dask + + seen, _Sentinel = self._stub_dask_http_path(monkeypatch) + + url = f'http://example.com/x_dask_{_ISSUE}.tif' + with pytest.raises(_Sentinel, match="_HTTPSource"): + read_geotiff_dask(url) + + assert seen['http'] == 1 + assert seen['cloud'] == 0 + + def test_uppercase_s3_url_still_constructs_cloud_source( + self, monkeypatch): + # Counter-check: a real fsspec URI (uppercase scheme too) must + # still go to the cloud branch. + from xrspatial.geotiff._backends.dask import read_geotiff_dask + + seen, _Sentinel = self._stub_dask_http_path(monkeypatch) + + url = f'S3://bucket/x_dask_{_ISSUE}.tif' + with pytest.raises(_Sentinel, match="_CloudSource"): + read_geotiff_dask(url) + + assert seen['cloud'] == 1 + assert seen['http'] == 0