Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openml/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def check_server(server: str) -> str:

def replace_shorthand(server: str) -> str:
if server == "test":
return "https://test.openml.org/api/v1/xml"
return f"{config.TEST_SERVER_URL}/api/v1/xml"
if server == "production":
return "https://www.openml.org/api/v1/xml"
return server
Expand Down
4 changes: 3 additions & 1 deletion openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"

TEST_SERVER_URL = "https://test.openml.org"


class _Config(TypedDict):
apikey: str
Expand Down Expand Up @@ -213,7 +215,7 @@ class ConfigurationForExamples:
_last_used_server = None
_last_used_key = None
_start_last_called = False
_test_server = "https://test.openml.org/api/v1/xml"
_test_server = f"{TEST_SERVER_URL}/api/v1/xml"
_test_apikey = _TEST_SERVER_NORMAL_USER_KEY

@classmethod
Expand Down
11 changes: 6 additions & 5 deletions openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,9 +415,10 @@ def get_task(
if not isinstance(task_id, int):
raise TypeError(f"Task id should be integer, is {type(task_id)}")

cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
tid_cache_dir = cache_key_dir / str(task_id)
tid_cache_dir_existed = tid_cache_dir.exists()
task_cache_directory = openml.utils._create_cache_directory_for_id(
TASKS_CACHE_DIR_NAME, task_id
)
task_cache_directory_existed = task_cache_directory.exists()
try:
task = _get_task_description(task_id)
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
Expand All @@ -431,8 +432,8 @@ def get_task(
if download_splits and isinstance(task, OpenMLSupervisedTask):
task.download_split()
except Exception as e:
if not tid_cache_dir_existed:
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
if not task_cache_directory_existed:
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory)
raise e

return task
Expand Down
2 changes: 1 addition & 1 deletion openml/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class TestBase(unittest.TestCase):
"user": [],
}
flow_name_tracker: ClassVar[list[str]] = []
test_server = "https://test.openml.org/api/v1/xml"
test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
admin_key = "abc"
user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY

Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def with_server(request):
openml.config.apikey = None
yield
return
openml.config.server = "https://test.openml.org/api/v1/xml"
openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
openml.config.apikey = TestBase.user_key
yield

Expand Down
1 change: 1 addition & 0 deletions tests/files/localhost:8080
37 changes: 13 additions & 24 deletions tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,27 +527,20 @@ def test_deletion_of_cache_dir(self):
def test_deletion_of_cache_dir_faulty_download(self, patch):
patch.side_effect = Exception("Boom!")
self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets")
assert len(os.listdir(datasets_cache_dir)) == 0

@pytest.mark.uses_test_server()
def test_publish_dataset(self):
# lazy loading not possible as we need the arff-file.
openml.datasets.get_dataset(3, download_data=True)
file_path = os.path.join(
openml.config.get_cache_directory(),
"datasets",
"3",
"dataset.arff",
)
arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
dataset = OpenMLDataset(
"anneal",
"test",
data_format="arff",
version=1,
licence="public",
default_target_attribute="class",
data_file=file_path,
data_file=arff_file_path,
)
dataset.publish()
TestBase._mark_entity_for_removal("data", dataset.dataset_id)
Expand Down Expand Up @@ -886,7 +879,7 @@ def test_create_invalid_dataset(self):

@pytest.mark.uses_test_server()
def test_get_online_dataset_arff(self):
dataset_id = 100 # Australian
dataset_id = 128 # iris -- one of the few datasets without parquet file
# lazy loading not used as arff file is checked.
dataset = openml.datasets.get_dataset(dataset_id, download_data=True)
decoder = arff.ArffDecoder()
Expand Down Expand Up @@ -1464,8 +1457,9 @@ def test_data_edit_critical_field(self):
raise e
time.sleep(10)
# Delete the cache dir to get the newer version of the dataset

shutil.rmtree(
os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)),
os.path.join(openml.config.get_cache_directory(), "datasets", str(did)),
)

@pytest.mark.uses_test_server()
Expand Down Expand Up @@ -1730,7 +1724,6 @@ def test_delete_dataset(self):

@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
)
Expand All @@ -1745,14 +1738,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
):
openml.datasets.delete_dataset(40_000)

dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
)
Expand All @@ -1767,14 +1759,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
):
openml.datasets.delete_dataset(40_000)

dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
)
Expand All @@ -1786,14 +1777,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
success = openml.datasets.delete_dataset(40000)
assert success

dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
)
Expand All @@ -1808,7 +1798,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
):
openml.datasets.delete_dataset(9_999_999)

dataset_url = "https://test.openml.org/api/v1/xml/data/9999999"
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")

Expand Down Expand Up @@ -1903,9 +1893,8 @@ def _dataset_features_is_downloaded(did: int):


def _dataset_data_file_is_downloaded(did: int):
parquet_present = _dataset_file_is_downloaded(did, "dataset.pq")
arff_present = _dataset_file_is_downloaded(did, "dataset.arff")
return parquet_present or arff_present
cache_directory = Path(openml.config.get_cache_directory()) / "datasets" / str(did)
return any(f.suffix in (".pq", ".arff") for f in cache_directory.iterdir())


def _assert_datasets_retrieved_successfully(
Expand Down Expand Up @@ -2010,7 +1999,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
)
# While the mocked example is from production, unit tests by default connect to the test server.
requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text())
requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text())
dataset = openml.datasets.get_dataset(61, download_data=True)
assert dataset._parquet_url is not None
assert dataset.parquet_file is not None
Expand Down
15 changes: 5 additions & 10 deletions tests/test_flows/test_flow_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,6 @@ def test_delete_flow(self):

@mock.patch.object(requests.Session, "delete")
def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -466,14 +465,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)

flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -486,14 +484,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)

flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -506,14 +503,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)

flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
Expand All @@ -523,15 +519,14 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
success = openml.flows.delete_flow(33364)
assert success

flow_url = "https://test.openml.org/api/v1/xml/flow/33364"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -544,6 +539,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(9_999_999)

flow_url = "https://test.openml.org/api/v1/xml/flow/9999999"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
2 changes: 1 addition & 1 deletion tests/test_openml/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_get_config_as_dict(self):
config = openml.config.get_config_as_dict()
_config = {}
_config["apikey"] = TestBase.user_key
_config["server"] = "https://test.openml.org/api/v1/xml"
_config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
_config["cachedir"] = self.workdir
_config["avoid_duplicate_runs"] = False
_config["connection_n_retries"] = 20
Expand Down
13 changes: 7 additions & 6 deletions tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,7 +1813,6 @@ def test_initialize_model_from_run_nonstrict(self):

@mock.patch.object(requests.Session, "delete")
def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -1826,14 +1825,13 @@ def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.runs.delete_run(40_000)

run_url = "https://test.openml.org/api/v1/xml/run/40000"
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/40000"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
Expand All @@ -1843,14 +1841,13 @@ def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
success = openml.runs.delete_run(10591880)
assert success

run_url = "https://test.openml.org/api/v1/xml/run/10591880"
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/10591880"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -1863,7 +1860,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
):
openml.runs.delete_run(9_999_999)

run_url = "https://test.openml.org/api/v1/xml/run/9999999"
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/9999999"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")

Expand All @@ -1873,6 +1870,10 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
Version(sklearn.__version__) < Version("0.21"),
reason="couldn't perform local tests successfully w/o bloating RAM",
)
@unittest.skipIf(
Version(sklearn.__version__) >= Version("1.8"),
reason="predictions differ significantly",
)
@mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
@pytest.mark.uses_test_server()
def test__run_task_get_arffcontent_2(parallel_mock):
Expand Down
Loading
Loading