From c3f0e08b97e2ecf3a5250ea012e902f3bda4cc32 Mon Sep 17 00:00:00 2001 From: genisis0x Date: Wed, 13 May 2026 14:12:25 +0530 Subject: [PATCH] fix(data): provide _dataset_uri fallback on DatasetProvider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #1843. When qlib runs without a `DatasetCache` wrapper the `DatasetD` `Wrapper` registers a bare `LocalDatasetProvider` instance. `LocalProvider.features_uri` then calls `DatasetD._dataset_uri(...)` unconditionally, which goes through `Wrapper.__getattr__` to look up `_dataset_uri` on the provider — and `LocalDatasetProvider` doesn't have one. The cache-aware override lives on `DatasetCache` / `DiskDatasetCache` only, so the no-cache code path crashes with `AttributeError: 'LocalDatasetProvider' object has no attribute '_dataset_uri'`. Add a base `_dataset_uri` to `DatasetProvider` that returns `""` by convention — the same "no URI, fetch directly" signal that `DiskDatasetCache._dataset_uri` already emits on its `disk_cache == 0` branch. Cache subclasses continue to override this with a real URI implementation, so the wrapped-with-cache path is unchanged. The new fallback covers any provider that subclasses `DatasetProvider` without going through a cache (LocalDatasetProvider, ClientDatasetProvider, plus any third-party providers users register). Adds `tests/misc/test_dataset_provider_uri.py` with three regressions: attribute presence, the empty-string return, and stability across `disk_cache` values so future refactors can't reintroduce the crash. --- qlib/data/data.py | 25 +++++++++++ tests/misc/test_dataset_provider_uri.py | 56 +++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 tests/misc/test_dataset_provider_uri.py diff --git a/qlib/data/data.py b/qlib/data/data.py index aba75c0b1ab..faac232305e 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -507,6 +507,31 @@ def _uri( # TODO: qlib-server support inst_processors return DiskDatasetCache._uri(instruments, fields, start_time, end_time, freq, disk_cache, inst_processors) + def _dataset_uri( + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=1, + inst_processors=[], + ): + """Default `_dataset_uri` for dataset providers that have no cache wrapper. + + When no `DatasetCache` is configured the wrapped provider (e.g. + `LocalDatasetProvider`) is registered directly as `DatasetD`, so the + `features_uri` -> `DatasetD._dataset_uri(...)` call would otherwise + raise `AttributeError`. Returning an empty string signals the caller + that the client should load the data itself (the same convention + `DiskDatasetCache._dataset_uri` already uses for `disk_cache == 0`). + Cache subclasses such as `DiskDatasetCache` continue to override this + method with a real URI implementation. + + See issue #1843. + """ + return "" + @staticmethod def get_instruments_d(instruments, freq): """ diff --git a/tests/misc/test_dataset_provider_uri.py b/tests/misc/test_dataset_provider_uri.py new file mode 100644 index 00000000000..5c3d836de7e --- /dev/null +++ b/tests/misc/test_dataset_provider_uri.py @@ -0,0 +1,56 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Unit tests for ``DatasetProvider._dataset_uri`` fallback (issue #1843). + +When qlib runs without a ``DatasetCache`` wrapper the ``DatasetD`` ``Wrapper`` +points directly at a ``LocalDatasetProvider`` instance. ``LocalProvider.features_uri`` +unconditionally calls ``DatasetD._dataset_uri(...)``, so the bare provider must +expose the method even though it has no cache to address. The base class now +returns an empty string by convention (``""`` = "no URI, fetch directly"), +matching ``DiskDatasetCache._dataset_uri`` 's behaviour for the ``disk_cache==0`` +branch. +""" + +import unittest + +from qlib.data.data import LocalDatasetProvider + + +class DatasetProviderURITest(unittest.TestCase): + def test_local_dataset_provider_has_dataset_uri(self): + provider = LocalDatasetProvider() + # Should not raise AttributeError (regression for #1843). + self.assertTrue(hasattr(provider, "_dataset_uri")) + + def test_local_dataset_provider_returns_empty_uri(self): + provider = LocalDatasetProvider() + uri = provider._dataset_uri( + instruments={"market": "csi300"}, + fields=["$close"], + start_time="2020-01-01", + end_time="2020-12-31", + freq="day", + disk_cache=1, + ) + # Empty string == "no cache configured, client should fetch directly". + self.assertEqual(uri, "") + + def test_disk_cache_value_is_ignored_in_fallback(self): + # The fallback returns "" regardless of disk_cache value because there + # is no cache to address. Cache subclasses (DiskDatasetCache) override + # this with the disk_cache-aware behaviour. + provider = LocalDatasetProvider() + for disk_cache in (0, 1, 2): + self.assertEqual( + provider._dataset_uri( + instruments=[], + fields=[], + disk_cache=disk_cache, + ), + "", + ) + + +if __name__ == "__main__": + unittest.main()