diff --git a/qlib/data/data.py b/qlib/data/data.py index aba75c0b1ab..faac232305e 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -507,6 +507,31 @@ def _uri( # TODO: qlib-server support inst_processors return DiskDatasetCache._uri(instruments, fields, start_time, end_time, freq, disk_cache, inst_processors) + def _dataset_uri( + self, + instruments, + fields, + start_time=None, + end_time=None, + freq="day", + disk_cache=1, + inst_processors=[], + ): + """Default `_dataset_uri` for dataset providers that have no cache wrapper. + + When no `DatasetCache` is configured the wrapped provider (e.g. + `LocalDatasetProvider`) is registered directly as `DatasetD`, so the + `features_uri` -> `DatasetD._dataset_uri(...)` call would otherwise + raise `AttributeError`. Returning an empty string signals the caller + that the client should load the data itself (the same convention + `DiskDatasetCache._dataset_uri` already uses for `disk_cache == 0`). + Cache subclasses such as `DiskDatasetCache` continue to override this + method with a real URI implementation. + + See issue #1843. + """ + return "" + @staticmethod def get_instruments_d(instruments, freq): """ diff --git a/tests/misc/test_dataset_provider_uri.py b/tests/misc/test_dataset_provider_uri.py new file mode 100644 index 00000000000..5c3d836de7e --- /dev/null +++ b/tests/misc/test_dataset_provider_uri.py @@ -0,0 +1,56 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Unit tests for ``DatasetProvider._dataset_uri`` fallback (issue #1843). + +When qlib runs without a ``DatasetCache`` wrapper the ``DatasetD`` ``Wrapper`` +points directly at a ``LocalDatasetProvider`` instance. ``LocalProvider.features_uri`` +unconditionally calls ``DatasetD._dataset_uri(...)``, so the bare provider must +expose the method even though it has no cache to address. The base class now +returns an empty string by convention (``""`` = "no URI, fetch directly"), +matching ``DiskDatasetCache._dataset_uri`` 's behaviour for the ``disk_cache==0`` +branch. +""" + +import unittest + +from qlib.data.data import LocalDatasetProvider + + +class DatasetProviderURITest(unittest.TestCase): + def test_local_dataset_provider_has_dataset_uri(self): + provider = LocalDatasetProvider() + # Should not raise AttributeError (regression for #1843). + self.assertTrue(hasattr(provider, "_dataset_uri")) + + def test_local_dataset_provider_returns_empty_uri(self): + provider = LocalDatasetProvider() + uri = provider._dataset_uri( + instruments={"market": "csi300"}, + fields=["$close"], + start_time="2020-01-01", + end_time="2020-12-31", + freq="day", + disk_cache=1, + ) + # Empty string == "no cache configured, client should fetch directly". + self.assertEqual(uri, "") + + def test_disk_cache_value_is_ignored_in_fallback(self): + # The fallback returns "" regardless of disk_cache value because there + # is no cache to address. Cache subclasses (DiskDatasetCache) override + # this with the disk_cache-aware behaviour. + provider = LocalDatasetProvider() + for disk_cache in (0, 1, 2): + self.assertEqual( + provider._dataset_uri( + instruments=[], + fields=[], + disk_cache=disk_cache, + ), + "", + ) + + +if __name__ == "__main__": + unittest.main()