Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions qlib/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,31 @@ def _uri(
# TODO: qlib-server support inst_processors
return DiskDatasetCache._uri(instruments, fields, start_time, end_time, freq, disk_cache, inst_processors)

def _dataset_uri(
self,
instruments,
fields,
start_time=None,
end_time=None,
freq="day",
disk_cache=1,
inst_processors=[],
):
"""Default `_dataset_uri` for dataset providers that have no cache wrapper.

When no `DatasetCache` is configured the wrapped provider (e.g.
`LocalDatasetProvider`) is registered directly as `DatasetD`, so the
`features_uri` -> `DatasetD._dataset_uri(...)` call would otherwise
raise `AttributeError`. Returning an empty string signals the caller
that the client should load the data itself (the same convention
`DiskDatasetCache._dataset_uri` already uses for `disk_cache == 0`).
Cache subclasses such as `DiskDatasetCache` continue to override this
method with a real URI implementation.

See issue #1843.
"""
return ""

@staticmethod
def get_instruments_d(instruments, freq):
"""
Expand Down
56 changes: 56 additions & 0 deletions tests/misc/test_dataset_provider_uri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Unit tests for ``DatasetProvider._dataset_uri`` fallback (issue #1843).

When qlib runs without a ``DatasetCache`` wrapper the ``DatasetD`` ``Wrapper``
points directly at a ``LocalDatasetProvider`` instance. ``LocalProvider.features_uri``
unconditionally calls ``DatasetD._dataset_uri(...)``, so the bare provider must
expose the method even though it has no cache to address. The base class now
returns an empty string by convention (``""`` = "no URI, fetch directly"),
matching ``DiskDatasetCache._dataset_uri`` 's behaviour for the ``disk_cache==0``
branch.
"""

import unittest

from qlib.data.data import LocalDatasetProvider


class DatasetProviderURITest(unittest.TestCase):
def test_local_dataset_provider_has_dataset_uri(self):
provider = LocalDatasetProvider()
# Should not raise AttributeError (regression for #1843).
self.assertTrue(hasattr(provider, "_dataset_uri"))

def test_local_dataset_provider_returns_empty_uri(self):
provider = LocalDatasetProvider()
uri = provider._dataset_uri(
instruments={"market": "csi300"},
fields=["$close"],
start_time="2020-01-01",
end_time="2020-12-31",
freq="day",
disk_cache=1,
)
# Empty string == "no cache configured, client should fetch directly".
self.assertEqual(uri, "")

def test_disk_cache_value_is_ignored_in_fallback(self):
# The fallback returns "" regardless of disk_cache value because there
# is no cache to address. Cache subclasses (DiskDatasetCache) override
# this with the disk_cache-aware behaviour.
provider = LocalDatasetProvider()
for disk_cache in (0, 1, 2):
self.assertEqual(
provider._dataset_uri(
instruments=[],
fields=[],
disk_cache=disk_cache,
),
"",
)


if __name__ == "__main__":
unittest.main()