diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py new file mode 100644 index 000000000..881f40671 --- /dev/null +++ b/openml/_api/__init__.py @@ -0,0 +1,8 @@ +from openml._api.runtime.core import APIContext + + +def set_api_version(version: str, *, strict: bool = False) -> None: + api_context.set_version(version=version, strict=strict) + + +api_context = APIContext() diff --git a/openml/_api/clients/__init__.py b/openml/_api/clients/__init__.py new file mode 100644 index 000000000..8a5ff94e4 --- /dev/null +++ b/openml/_api/clients/__init__.py @@ -0,0 +1,6 @@ +from .http import HTTPCache, HTTPClient + +__all__ = [ + "HTTPCache", + "HTTPClient", +] diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py new file mode 100644 index 000000000..65d7b2248 --- /dev/null +++ b/openml/_api/clients/http.py @@ -0,0 +1,426 @@ +from __future__ import annotations + +import json +import logging +import math +import random +import time +import xml +from collections.abc import Mapping +from pathlib import Path +from typing import Any +from urllib.parse import urlencode, urljoin, urlparse + +import requests +import xmltodict +from requests import Response + +from openml.__version__ import __version__ +from openml._api.config import RetryPolicy +from openml.exceptions import ( + OpenMLNotAuthorizedError, + OpenMLServerError, + OpenMLServerException, + OpenMLServerNoResult, +) + + +class HTTPCache: + def __init__(self, *, path: Path, ttl: int) -> None: + self.path = path + self.ttl = ttl + + def get_key(self, url: str, params: dict[str, Any]) -> str: + parsed_url = urlparse(url) + netloc_parts = parsed_url.netloc.split(".")[::-1] + path_parts = parsed_url.path.strip("/").split("/") + + filtered_params = {k: v for k, v in params.items() if k != "api_key"} + params_part = [urlencode(filtered_params)] if filtered_params else [] + + return str(Path(*netloc_parts, *path_parts, *params_part)) + + def _key_to_path(self, key: str) -> Path: + return self.path.joinpath(key) + + def load(self, key: str) -> Response: + path = self._key_to_path(key) + + if not path.exists(): + raise FileNotFoundError(f"Cache directory not found: {path}") + + meta_path = path / "meta.json" + headers_path = path / "headers.json" + body_path = path / "body.bin" + + if not (meta_path.exists() and headers_path.exists() and body_path.exists()): + raise FileNotFoundError(f"Incomplete cache at {path}") + + with meta_path.open("r", encoding="utf-8") as f: + meta = json.load(f) + + created_at = meta.get("created_at") + if created_at is None: + raise ValueError("Cache metadata missing 'created_at'") + + if time.time() - created_at > self.ttl: + raise TimeoutError(f"Cache expired for {path}") + + with headers_path.open("r", encoding="utf-8") as f: + headers = json.load(f) + + body = body_path.read_bytes() + + response = Response() + response.status_code = meta["status_code"] + response.url = meta["url"] + response.reason = meta["reason"] + response.headers = headers + response._content = body + response.encoding = meta["encoding"] + + return response + + def save(self, key: str, response: Response) -> None: + path = self._key_to_path(key) + path.mkdir(parents=True, exist_ok=True) + + (path / "body.bin").write_bytes(response.content) + + with (path / "headers.json").open("w", encoding="utf-8") as f: + json.dump(dict(response.headers), f) + + meta = { + "status_code": response.status_code, + "url": response.url, + "reason": response.reason, + "encoding": response.encoding, + "elapsed": response.elapsed.total_seconds(), + "created_at": time.time(), + "request": { + "method": response.request.method if response.request else None, + "url": response.request.url if response.request else None, + "headers": dict(response.request.headers) if response.request else None, + "body": response.request.body if response.request else None, + }, + } + + with (path / "meta.json").open("w", encoding="utf-8") as f: + json.dump(meta, f) + + +class HTTPClient: + def __init__( # noqa: PLR0913 + self, + *, + server: str, + base_url: str, + api_key: str, + timeout: int, + retries: int, + retry_policy: RetryPolicy, + cache: HTTPCache | None = None, + ) -> None: + self.server = server + self.base_url = base_url + self.api_key = api_key + self.timeout = timeout + self.retries = retries + self.retry_policy = retry_policy + self.cache = cache + + self.retry_func = ( + self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay + ) + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + + def _robot_delay(self, n: int) -> float: + wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 + variation = random.gauss(0, wait / 10) + return max(1.0, wait + variation) + + def _human_delay(self, n: int) -> float: + return max(1.0, n) + + def _parse_exception_response( + self, + response: Response, + ) -> tuple[int | None, str]: + content_type = response.headers.get("Content-Type", "").lower() + + if "json" in content_type: + server_exception = response.json() + server_error = server_exception["detail"] + code = server_error.get("code") + message = server_error.get("message") + additional_information = server_error.get("additional_information") + else: + server_exception = xmltodict.parse(response.text) + server_error = server_exception["oml:error"] + code = server_error.get("oml:code") + message = server_error.get("oml:message") + additional_information = server_error.get("oml:additional_information") + + if code is not None: + code = int(code) + + if message and additional_information: + full_message = f"{message} - {additional_information}" + elif message: + full_message = message + elif additional_information: + full_message = additional_information + else: + full_message = "" + + return code, full_message + + def _raise_code_specific_error( + self, + code: int, + message: str, + url: str, + files: Mapping[str, Any] | None, + ) -> None: + if code in [111, 372, 512, 500, 482, 542, 674]: + # 512 for runs, 372 for datasets, 500 for flows + # 482 for tasks, 542 for evaluations, 674 for setups + # 111 for dataset descriptions + raise OpenMLServerNoResult(code=code, message=message, url=url) + + # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow) + if code in [163] and files is not None and "description" in files: + # file_elements['description'] is the XML file description of the flow + message = f"\n{files['description']}\n{message}" + + if code in [ + 102, # flow/exists post + 137, # dataset post + 350, # dataset/42 delete + 310, # flow/ post + 320, # flow/42 delete + 400, # run/42 delete + 460, # task/42 delete + ]: + raise OpenMLNotAuthorizedError( + message=( + f"The API call {url} requires authentication via an API key.\nPlease configure " + "OpenML-Python to use your API as described in this example:" + "\nhttps://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication" + ) + ) + + # Propagate all server errors to the calling functions, except + # for 107 which represents a database connection error. + # These are typically caused by high server load, + # which means trying again might resolve the issue. + # DATABASE_CONNECTION_ERRCODE + if code != 107: + raise OpenMLServerException(code=code, message=message, url=url) + + def _validate_response( + self, + method: str, + url: str, + files: Mapping[str, Any] | None, + response: Response, + ) -> Exception | None: + if ( + "Content-Encoding" not in response.headers + or response.headers["Content-Encoding"] != "gzip" + ): + logging.warning(f"Received uncompressed content from OpenML for {url}.") + + if response.status_code == 200: + return None + + if response.status_code == requests.codes.URI_TOO_LONG: + raise OpenMLServerError(f"URI too long! ({url})") + + retry_raise_e: Exception | None = None + + try: + code, message = self._parse_exception_response(response) + + except (requests.exceptions.JSONDecodeError, xml.parsers.expat.ExpatError) as e: + if method != "GET": + extra = f"Status code: {response.status_code}\n{response.text}" + raise OpenMLServerError( + f"Unexpected server error when calling {url}. Please contact the " + f"developers!\n{extra}" + ) from e + + retry_raise_e = e + + except Exception as e: + # If we failed to parse it out, + # then something has gone wrong in the body we have sent back + # from the server and there is little extra information we can capture. + raise OpenMLServerError( + f"Unexpected server error when calling {url}. Please contact the developers!\n" + f"Status code: {response.status_code}\n{response.text}", + ) from e + + if code is not None: + self._raise_code_specific_error( + code=code, + message=message, + url=url, + files=files, + ) + + if retry_raise_e is None: + retry_raise_e = OpenMLServerException(code=code, message=message, url=url) + + return retry_raise_e + + def _request( # noqa: PLR0913 + self, + method: str, + url: str, + params: Mapping[str, Any], + data: Mapping[str, Any], + headers: Mapping[str, str], + timeout: float | int, + files: Mapping[str, Any] | None, + **request_kwargs: Any, + ) -> tuple[Response | None, Exception | None]: + retry_raise_e: Exception | None = None + response: Response | None = None + + try: + response = requests.request( + method=method, + url=url, + params=params, + data=data, + headers=headers, + timeout=timeout, + files=files, + **request_kwargs, + ) + except ( + requests.exceptions.ChunkedEncodingError, + requests.exceptions.ConnectionError, + requests.exceptions.SSLError, + ) as e: + retry_raise_e = e + + if response is not None: + retry_raise_e = self._validate_response( + method=method, + url=url, + files=files, + response=response, + ) + + return response, retry_raise_e + + def request( + self, + method: str, + path: str, + *, + use_cache: bool = False, + use_api_key: bool = False, + **request_kwargs: Any, + ) -> Response: + url = urljoin(self.server, urljoin(self.base_url, path)) + retries = max(1, self.retries) + + params = request_kwargs.pop("params", {}).copy() + data = request_kwargs.pop("data", {}).copy() + + if use_api_key: + params["api_key"] = self.api_key + + if method.upper() in {"POST", "PUT", "PATCH"}: + data = {**params, **data} + params = {} + + # prepare headers + headers = request_kwargs.pop("headers", {}).copy() + headers.update(self.headers) + + timeout = request_kwargs.pop("timeout", self.timeout) + files = request_kwargs.pop("files", None) + + if use_cache and self.cache is not None: + cache_key = self.cache.get_key(url, params) + try: + return self.cache.load(cache_key) + except (FileNotFoundError, TimeoutError): + pass # cache miss or expired, continue + except Exception: + raise # propagate unexpected cache errors + + for retry_counter in range(1, retries + 1): + response, retry_raise_e = self._request( + method=method, + url=url, + params=params, + data=data, + headers=headers, + timeout=timeout, + files=files, + **request_kwargs, + ) + + # executed successfully + if retry_raise_e is None: + break + # tries completed + if retry_counter >= retries: + raise retry_raise_e + + delay = self.retry_func(retry_counter) + time.sleep(delay) + + assert response is not None + + if use_cache and self.cache is not None: + self.cache.save(cache_key, response) + + return response + + def get( + self, + path: str, + *, + use_cache: bool = False, + use_api_key: bool = False, + **request_kwargs: Any, + ) -> Response: + return self.request( + method="GET", + path=path, + use_cache=use_cache, + use_api_key=use_api_key, + **request_kwargs, + ) + + def post( + self, + path: str, + **request_kwargs: Any, + ) -> Response: + return self.request( + method="POST", + path=path, + use_cache=False, + use_api_key=True, + **request_kwargs, + ) + + def delete( + self, + path: str, + **request_kwargs: Any, + ) -> Response: + return self.request( + method="DELETE", + path=path, + use_cache=False, + use_api_key=True, + **request_kwargs, + ) diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/config.py b/openml/_api/config.py new file mode 100644 index 000000000..6cce06403 --- /dev/null +++ b/openml/_api/config.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + + +class RetryPolicy(str, Enum): + HUMAN = "human" + ROBOT = "robot" + + +@dataclass +class APIConfig: + server: str + base_url: str + api_key: str + timeout: int = 10 # seconds + + +@dataclass +class APISettings: + v1: APIConfig + v2: APIConfig + + +@dataclass +class ConnectionConfig: + retries: int = 3 + retry_policy: RetryPolicy = RetryPolicy.HUMAN + + +@dataclass +class CacheConfig: + dir: str = "~/.openml/cache" + ttl: int = 60 * 60 * 24 * 7 # one week + + +@dataclass +class Settings: + api: APISettings + connection: ConnectionConfig + cache: CacheConfig + + +settings = Settings( + api=APISettings( + v1=APIConfig( + server="https://www.openml.org/", + base_url="api/v1/xml/", + api_key="...", + ), + v2=APIConfig( + server="http://127.0.0.1:8001/", + base_url="", + api_key="...", + ), + ), + connection=ConnectionConfig(), + cache=CacheConfig(), +) diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py new file mode 100644 index 000000000..cf83edea6 --- /dev/null +++ b/openml/_api/resources/__init__.py @@ -0,0 +1,14 @@ +from openml._api.resources.base.fallback import FallbackProxy +from openml._api.resources.datasets import DatasetsV1, DatasetsV2 +from openml._api.resources.setups import SetupsV1, SetupsV2 +from openml._api.resources.tasks import TasksV1, TasksV2 + +__all__ = [ + "DatasetsV1", + "DatasetsV2", + "FallbackProxy", + "SetupsV1", + "SetupsV2", + "TasksV1", + "TasksV2", +] diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py new file mode 100644 index 000000000..cee1316bf --- /dev/null +++ b/openml/_api/resources/base/__init__.py @@ -0,0 +1,16 @@ +from openml._api.resources.base.base import APIVersion, ResourceAPI, ResourceType +from openml._api.resources.base.fallback import FallbackProxy +from openml._api.resources.base.resources import DatasetsAPI, SetupsAPI, TasksAPI +from openml._api.resources.base.versions import ResourceV1, ResourceV2 + +__all__ = [ + "APIVersion", + "DatasetsAPI", + "FallbackProxy", + "ResourceAPI", + "ResourceType", + "ResourceV1", + "ResourceV2", + "SetupsAPI", + "TasksAPI", +] diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py new file mode 100644 index 000000000..63d4c40eb --- /dev/null +++ b/openml/_api/resources/base/base.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from enum import Enum +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Mapping + from typing import Any + + from openml._api.clients import HTTPClient + + +class APIVersion(str, Enum): + V1 = "v1" + V2 = "v2" + + +class ResourceType(str, Enum): + DATASET = "dataset" + TASK = "task" + TASK_TYPE = "task_type" + EVALUATION_MEASURE = "evaluation_measure" + ESTIMATION_PROCEDURE = "estimation_procedure" + EVALUATION = "evaluation" + FLOW = "flow" + STUDY = "study" + RUN = "run" + SETUP = "setup" + USER = "user" + + +class ResourceAPI(ABC): + api_version: APIVersion + resource_type: ResourceType + + def __init__(self, http: HTTPClient): + self._http = http + + @abstractmethod + def delete(self, resource_id: int) -> bool: ... + + @abstractmethod + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: ... + + @abstractmethod + def tag(self, resource_id: int, tag: str) -> list[str]: ... + + @abstractmethod + def untag(self, resource_id: int, tag: str) -> list[str]: ... + + def _get_not_implemented_message(self, method_name: str | None = None) -> str: + version = getattr(self.api_version, "name", "Unknown version") + resource = getattr(self.resource_type, "name", "Unknown resource") + method_info = f" Method: {method_name}" if method_name else "" + return ( + f"{self.__class__.__name__}: {version} API does not support this " + f"functionality for resource: {resource}.{method_info}" + ) diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py new file mode 100644 index 000000000..253ee3865 --- /dev/null +++ b/openml/_api/resources/base/fallback.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from collections.abc import Callable +from typing import Any + + +class FallbackProxy: + def __init__(self, *api_versions: Any): + if not api_versions: + raise ValueError("At least one API version must be provided") + self._apis = api_versions + + def __getattr__(self, name: str) -> Any: + api, attr = self._find_attr(name) + if callable(attr): + return self._wrap_callable(name, api, attr) + return attr + + def _find_attr(self, name: str) -> tuple[Any, Any]: + for api in self._apis: + attr = getattr(api, name, None) + if attr is not None: + return api, attr + raise AttributeError(f"{self.__class__.__name__} has no attribute {name}") + + def _wrap_callable( + self, + name: str, + primary_api: Any, + primary_attr: Callable[..., Any], + ) -> Callable[..., Any]: + def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + return primary_attr(*args, **kwargs) + except NotImplementedError: + return self._call_fallbacks(name, primary_api, *args, **kwargs) + + return wrapper + + def _call_fallbacks( + self, + name: str, + skip_api: Any, + *args: Any, + **kwargs: Any, + ) -> Any: + for api in self._apis: + if api is skip_api: + continue + attr = getattr(api, name, None) + if callable(attr): + try: + return attr(*args, **kwargs) + except NotImplementedError: + continue + raise NotImplementedError(f"Could not fallback to any API for method: {name}") diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py new file mode 100644 index 000000000..852ef2890 --- /dev/null +++ b/openml/_api/resources/base/resources.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from abc import abstractmethod +from collections.abc import Iterable +from typing import TYPE_CHECKING, Any + +from openml._api.resources.base import ResourceAPI, ResourceType + +if TYPE_CHECKING: + from requests import Response + + from openml.datasets.dataset import OpenMLDataset + from openml.setups.setup import OpenMLSetup + from openml.tasks.task import OpenMLTask + + +class DatasetsAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.DATASET + + @abstractmethod + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... + + +class TasksAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.TASK + + @abstractmethod + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... + + +class SetupsAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.SETUP + + @abstractmethod + def list( + self, + limit: int, + offset: int, + *, + setup: Iterable[int] | None = None, + flow: int | None = None, + tag: str | None = None, + ) -> list[OpenMLSetup]: ... + + @abstractmethod + def _create_setup(self, result_dict: dict) -> OpenMLSetup: ... + + @abstractmethod + def get(self, setup_id: int) -> tuple[str, OpenMLSetup]: ... + + @abstractmethod + def exists(self, file_elements: dict[str, Any]) -> int: ... diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py new file mode 100644 index 000000000..91c1a8c06 --- /dev/null +++ b/openml/_api/resources/base/versions.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +import xmltodict + +from openml._api.resources.base import APIVersion, ResourceAPI, ResourceType +from openml.exceptions import ( + OpenMLNotAuthorizedError, + OpenMLServerError, + OpenMLServerException, +) + + +class ResourceV1(ResourceAPI): + api_version: APIVersion = APIVersion.V1 + + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: + response = self._http.post(path, files=files) + parsed_response = xmltodict.parse(response.content) + return self._extract_id_from_upload(parsed_response) + + def delete(self, resource_id: int) -> bool: + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "flow", "task", "run", "study", "user"} + if resource_type not in legal_resources: + raise ValueError(f"Can't delete a {resource_type}") + + path = f"{resource_type}/{resource_id}" + try: + response = self._http.delete(path) + result = xmltodict.parse(response.content) + return f"oml:{resource_type}_delete" in result + except OpenMLServerException as e: + self._handle_delete_exception(resource_type, e) + raise + + def tag(self, resource_id: int, tag: str) -> list[str]: + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "task", "flow", "setup", "run"} + if resource_type not in legal_resources: + raise ValueError(f"Can't tag a {resource_type}") + + path = f"{resource_type}/tag" + data = {f"{resource_type}_id": resource_id, "tag": tag} + response = self._http.post(path, data=data) + + main_tag = f"oml:{resource_type}_tag" + parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) + result = parsed_response[main_tag] + tags: list[str] = result.get("oml:tag", []) + + return tags + + def untag(self, resource_id: int, tag: str) -> list[str]: + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "task", "flow", "setup", "run"} + if resource_type not in legal_resources: + raise ValueError(f"Can't tag a {resource_type}") + + path = f"{resource_type}/untag" + data = {f"{resource_type}_id": resource_id, "tag": tag} + response = self._http.post(path, data=data) + + main_tag = f"oml:{resource_type}_untag" + parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) + result = parsed_response[main_tag] + tags: list[str] = result.get("oml:tag", []) + + return tags + + def _get_endpoint_name(self) -> str: + if self.resource_type == ResourceType.DATASET: + return "data" + return self.resource_type.name + + def _handle_delete_exception( + self, resource_type: str, exception: OpenMLServerException + ) -> None: + # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php + # Most exceptions are descriptive enough to be raised as their standard + # OpenMLServerException, however there are two cases where we add information: + # - a generic "failed" message, we direct them to the right issue board + # - when the user successfully authenticates with the server, + # but user is not allowed to take the requested action, + # in which case we specify a OpenMLNotAuthorizedError. + by_other_user = [323, 353, 393, 453, 594] + has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] + unknown_reason = [325, 355, 394, 455, 593] + if exception.code in by_other_user: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because it was not uploaded by you." + ), + ) from exception + if exception.code in has_dependent_entities: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because " + f"it still has associated entities: {exception.message}" + ), + ) from exception + if exception.code in unknown_reason: + raise OpenMLServerError( + message=( + f"The {resource_type} can not be deleted for unknown reason," + " please open an issue at: https://github.com/openml/openml/issues/new" + ), + ) from exception + raise exception + + def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: + # reads id from + # sample parsed dict: {"oml:openml": {"oml:upload_flow": {"oml:id": "42"}}} + + # xmltodict always gives exactly one root key + ((_, root_value),) = parsed.items() + + if not isinstance(root_value, Mapping): + raise ValueError("Unexpected XML structure") + + # upload node (e.g. oml:upload_task, oml:study_upload, ...) + ((_, upload_value),) = root_value.items() + + if not isinstance(upload_value, Mapping): + raise ValueError("Unexpected upload node structure") + + # ID is the only leaf value + for v in upload_value.values(): + if isinstance(v, (str, int)): + return int(v) + + raise ValueError("No ID found in upload response") + + +class ResourceV2(ResourceAPI): + api_version: APIVersion = APIVersion.V2 + + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: + raise NotImplementedError(self._get_not_implemented_message("publish")) + + def delete(self, resource_id: int) -> bool: + raise NotImplementedError(self._get_not_implemented_message("delete")) + + def tag(self, resource_id: int, tag: str) -> list[str]: + raise NotImplementedError(self._get_not_implemented_message("untag")) + + def untag(self, resource_id: int, tag: str) -> list[str]: + raise NotImplementedError(self._get_not_implemented_message("untag")) diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py new file mode 100644 index 000000000..f3a49a84f --- /dev/null +++ b/openml/_api/resources/datasets.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from openml._api.resources.base import DatasetsAPI, ResourceV1, ResourceV2 + +if TYPE_CHECKING: + from responses import Response + + from openml.datasets.dataset import OpenMLDataset + + +class DatasetsV1(ResourceV1, DatasetsAPI): + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError + + +class DatasetsV2(ResourceV2, DatasetsAPI): + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError diff --git a/openml/_api/resources/setups.py b/openml/_api/resources/setups.py new file mode 100644 index 000000000..12caf7a23 --- /dev/null +++ b/openml/_api/resources/setups.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +import builtins +from collections.abc import Iterable +from typing import Any + +import xmltodict + +from openml._api.resources.base import ResourceV1, ResourceV2, SetupsAPI +from openml.setups.setup import OpenMLParameter, OpenMLSetup + + +class SetupsV1(ResourceV1, SetupsAPI): + """V1 XML API implementation for setups.""" + + def list( + self, + limit: int, + offset: int, + *, + setup: Iterable[int] | None = None, + flow: int | None = None, + tag: str | None = None, + ) -> builtins.list[OpenMLSetup]: + """Perform API call `/setup/list/{filters}` + + Parameters + ---------- + The setup argument that is a list is separated from the single value + filters which are put into the kwargs. + + limit : int + offset : int + setup : list(int), optional + flow : int, optional + tag : str, optional + + Returns + ------- + list + setups that match the filters, going from id to the OpenMLSetup object. + """ + api_call = self._build_url(limit, offset, setup=setup, flow=flow, tag=tag) + setup_response = self._http.get(api_call) + xml_content = setup_response.text + + return self._parse_list_xml(xml_content) + + def _build_url( + self, + limit: int, + offset: int, + *, + setup: Iterable[int] | None = None, + flow: int | None = None, + tag: str | None = None, + ) -> str: + """Construct an OpenML Setup API URL with filtering parameters. + + Parameters + ---------- + The setup argument that is a list is separated from the single value + filters which are put into the kwargs. + + limit : int + offset : int + setup : list(int), optional + flow : int, optional + tag : str, optional + + Returns + ------- + str + A relative API path suitable for an OpenML HTTP request. + """ + api_call = "setup/list" + if limit is not None: + api_call += f"/limit/{limit}" + if offset is not None: + api_call += f"/offset/{offset}" + if setup is not None: + api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" + if flow is not None: + api_call += f"/flow/{flow}" + if tag is not None: + api_call += f"/tag/{tag}" + + return api_call + + def _parse_list_xml(self, xml_content: str) -> builtins.list[OpenMLSetup]: + """Helper function to parse API calls which are lists of setups""" + setups_dict = xmltodict.parse(xml_content, force_list=("oml:setup",)) + openml_uri = "http://openml.org/openml" + # Minimalistic check if the XML is useful + if "oml:setups" not in setups_dict: + raise ValueError( + f'Error in return XML, does not contain "oml:setups": {setups_dict!s}', + ) + + if "@xmlns:oml" not in setups_dict["oml:setups"]: + raise ValueError( + f'Error in return XML, does not contain "oml:setups"/@xmlns:oml: {setups_dict!s}', + ) + + if setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri: + raise ValueError( + "Error in return XML, value of " + '"oml:seyups"/@xmlns:oml is not ' + f'"{openml_uri}": {setups_dict!s}', + ) + + assert isinstance(setups_dict["oml:setups"]["oml:setup"], list), type( + setups_dict["oml:setups"] + ) + + return [ + self._create_setup({"oml:setup_parameters": setup_}) + for setup_ in setups_dict["oml:setups"]["oml:setup"] + ] + + def _create_setup(self, result_dict: dict) -> OpenMLSetup: + """Turns an API xml result into a OpenMLSetup object (or dict)""" + setup_id = int(result_dict["oml:setup_parameters"]["oml:setup_id"]) + flow_id = int(result_dict["oml:setup_parameters"]["oml:flow_id"]) + + if "oml:parameter" not in result_dict["oml:setup_parameters"]: + return OpenMLSetup(setup_id, flow_id, parameters=None) + + xml_parameters = result_dict["oml:setup_parameters"]["oml:parameter"] + if isinstance(xml_parameters, dict): + parameters = { + int(xml_parameters["oml:id"]): self._create_setup_parameter_from_xml( + xml_parameters + ), + } + elif isinstance(xml_parameters, builtins.list): + parameters = { + int(xml_parameter["oml:id"]): self._create_setup_parameter_from_xml(xml_parameter) + for xml_parameter in xml_parameters + } + else: + raise ValueError( + f"Expected None, list or dict, received something else: {type(xml_parameters)!s}", + ) + + return OpenMLSetup(setup_id, flow_id, parameters) + + def _create_setup_parameter_from_xml(self, result_dict: dict[str, str]) -> OpenMLParameter: + """Create an OpenMLParameter object or a dictionary from an API xml result.""" + return OpenMLParameter( + input_id=int(result_dict["oml:id"]), + flow_id=int(result_dict["oml:flow_id"]), + flow_name=result_dict["oml:flow_name"], + full_name=result_dict["oml:full_name"], + parameter_name=result_dict["oml:parameter_name"], + data_type=result_dict["oml:data_type"], + default_value=result_dict["oml:default_value"], + value=result_dict["oml:value"], + ) + + def get(self, setup_id: int) -> tuple[str, OpenMLSetup]: + """ + Downloads the setup (configuration) description from OpenML + and returns a structured object + + Parameters + ---------- + setup_id : int + The Openml setup_id + + Returns + ------- + tuple[str, OpenMLSetup] + A tuple containing: + - xml_content: The raw XML response from the server + - setup: An initialized OpenMLSetup object parsed from the XML + """ + url_suffix = f"/setup/{setup_id}" + setup_response = self._http.get(url_suffix) + xml_content = setup_response.text + result_dict = xmltodict.parse(xml_content) + + setup = self._create_setup(result_dict) + return xml_content, setup + + def exists(self, file_elements: dict[str, Any]) -> int: + """ + Checks whether a hyperparameter configuration already exists on the server. + + Parameters + ---------- + file_elements : dict + Dictionary containing file data for the API request + + Returns + ------- + setup_id : int + setup id iff exists, False otherwise + """ + api_call = "/setup/exists/" + setup_response = self._http.post(api_call, files=file_elements) + xml_content = setup_response.text + result_dict = xmltodict.parse(xml_content) + + setup_id = int(result_dict["oml:setup_exists"]["oml:id"]) + return setup_id if setup_id > 0 else False + + +class SetupsV2(ResourceV2, SetupsAPI): + """V2 JSoN API implementation for setups.""" + + def list( + self, + limit: int, + offset: int, + *, + setup: Iterable[int] | None = None, + flow: int | None = None, + tag: str | None = None, + ) -> builtins.list[OpenMLSetup]: + raise NotImplementedError("V2 API implementation is not yet available") + + def _create_setup(self, result_dict: dict) -> OpenMLSetup: + raise NotImplementedError("V2 API implementation is not yet available") + + def get(self, setup_id: int) -> tuple[str, OpenMLSetup]: + raise NotImplementedError("V2 API implementation is not yet available") + + def exists(self, file_elements: dict[str, Any]) -> int: + raise NotImplementedError("V2 API implementation is not yet available") diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py new file mode 100644 index 000000000..295e7a73d --- /dev/null +++ b/openml/_api/resources/tasks.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import xmltodict + +from openml._api.resources.base import ResourceV1, ResourceV2, TasksAPI +from openml.tasks.task import ( + OpenMLClassificationTask, + OpenMLClusteringTask, + OpenMLLearningCurveTask, + OpenMLRegressionTask, + OpenMLTask, + TaskType, +) + +if TYPE_CHECKING: + from requests import Response + + +class TasksV1(ResourceV1, TasksAPI): + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + path = f"task/{task_id}" + response = self._http.get(path, use_cache=True) + xml_content = response.text + task = self._create_task_from_xml(xml_content) + + if return_response: + return task, response + + return task + + def _create_task_from_xml(self, xml: str) -> OpenMLTask: + """Create a task given a xml string. + + Parameters + ---------- + xml : string + Task xml representation. + + Returns + ------- + OpenMLTask + """ + dic = xmltodict.parse(xml)["oml:task"] + estimation_parameters = {} + inputs = {} + # Due to the unordered structure we obtain, we first have to extract + # the possible keys of oml:input; dic["oml:input"] is a list of + # OrderedDicts + + # Check if there is a list of inputs + if isinstance(dic["oml:input"], list): + for input_ in dic["oml:input"]: + name = input_["@name"] + inputs[name] = input_ + # Single input case + elif isinstance(dic["oml:input"], dict): + name = dic["oml:input"]["@name"] + inputs[name] = dic["oml:input"] + + evaluation_measures = None + if "evaluation_measures" in inputs: + evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ + "oml:evaluation_measure" + ] + + task_type = TaskType(int(dic["oml:task_type_id"])) + common_kwargs = { + "task_id": dic["oml:task_id"], + "task_type": dic["oml:task_type"], + "task_type_id": task_type, + "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], + "evaluation_measure": evaluation_measures, + } + # TODO: add OpenMLClusteringTask? + if task_type in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): + # Convert some more parameters + for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ + "oml:parameter" + ]: + name = parameter["@name"] + text = parameter.get("#text", "") + estimation_parameters[name] = text + + common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:type"] + common_kwargs["estimation_procedure_id"] = int( + inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] + ) + + common_kwargs["estimation_parameters"] = estimation_parameters + common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ + "oml:target_feature" + ] + common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:data_splits_url"] + + cls = { + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) + if cls is None: + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") + return cls(**common_kwargs) # type: ignore + + +class TasksV2(ResourceV2, TasksAPI): + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + raise NotImplementedError(self._get_not_implemented_message("get")) diff --git a/openml/_api/runtime/__init__.py b/openml/_api/runtime/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py new file mode 100644 index 000000000..5a3a4a04a --- /dev/null +++ b/openml/_api/runtime/core.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +from openml._api.clients import HTTPCache, HTTPClient +from openml._api.config import settings +from openml._api.resources import ( + DatasetsV1, + DatasetsV2, + FallbackProxy, + SetupsV1, + SetupsV2, + TasksV1, + TasksV2, +) + +if TYPE_CHECKING: + from openml._api.resources.base import DatasetsAPI, SetupsAPI, TasksAPI + + +class APIBackend: + def __init__( + self, + *, + datasets: DatasetsAPI | FallbackProxy, + tasks: TasksAPI | FallbackProxy, + setups: SetupsAPI | FallbackProxy, + ): + self.datasets = datasets + self.tasks = tasks + self.setups = setups + + +def build_backend(version: str, *, strict: bool) -> APIBackend: + http_cache = HTTPCache( + path=Path(settings.cache.dir), + ttl=settings.cache.ttl, + ) + v1_http_client = HTTPClient( + server=settings.api.v1.server, + base_url=settings.api.v1.base_url, + api_key=settings.api.v1.api_key, + timeout=settings.api.v1.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + cache=http_cache, + ) + v2_http_client = HTTPClient( + server=settings.api.v2.server, + base_url=settings.api.v2.base_url, + api_key=settings.api.v2.api_key, + timeout=settings.api.v2.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + cache=http_cache, + ) + + v1 = APIBackend( + datasets=DatasetsV1(v1_http_client), + tasks=TasksV1(v1_http_client), + setups=SetupsV1(v1_http_client), + ) + + if version == "v1": + return v1 + + v2 = APIBackend( + datasets=DatasetsV2(v2_http_client), + tasks=TasksV2(v2_http_client), + setups=SetupsV2(v2_http_client), + ) + + if strict: + return v2 + + return APIBackend( + datasets=FallbackProxy(DatasetsV2(v2_http_client), DatasetsV1(v1_http_client)), + tasks=FallbackProxy(TasksV2(v2_http_client), TasksV1(v1_http_client)), + setups=FallbackProxy(SetupsV2(v2_http_client), SetupsV1(v1_http_client)), + ) + + +class APIContext: + def __init__(self) -> None: + self._backend = build_backend("v1", strict=False) + + def set_version(self, version: str, *, strict: bool = False) -> None: + self._backend = build_backend(version=version, strict=strict) + + @property + def backend(self) -> APIBackend: + return self._backend diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 4bf279ed1..dc47ca419 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -6,7 +6,7 @@ from functools import partial from itertools import chain from pathlib import Path -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import pandas as pd import xmltodict @@ -15,9 +15,11 @@ import openml.exceptions import openml.utils from openml import config +from openml._api import api_context from openml.flows import OpenMLFlow, flow_exists -from .setup import OpenMLParameter, OpenMLSetup +if TYPE_CHECKING: + from .setup import OpenMLSetup def setup_exists(flow: OpenMLFlow) -> int: @@ -56,14 +58,8 @@ def setup_exists(flow: OpenMLFlow) -> int: file_elements = { "description": ("description.arff", description), } # type: openml._api_calls.FILE_ELEMENTS_TYPE - result = openml._api_calls._perform_api_call( - "/setup/exists/", - "post", - file_elements=file_elements, - ) - result_dict = xmltodict.parse(result) - setup_id = int(result_dict["oml:setup_exists"]["oml:id"]) - return setup_id if setup_id > 0 else False + + return api_context.backend.setups.exists(file_elements=file_elements) def _get_cached_setup(setup_id: int) -> OpenMLSetup: @@ -90,7 +86,7 @@ def _get_cached_setup(setup_id: int) -> OpenMLSetup: setup_file = setup_cache_dir / "description.xml" with setup_file.open(encoding="utf8") as fh: setup_xml = xmltodict.parse(fh.read()) - return _create_setup_from_xml(setup_xml) + return _create_setup(setup_xml) except OSError as e: raise openml.exceptions.OpenMLCacheException( @@ -120,13 +116,12 @@ def get_setup(setup_id: int) -> OpenMLSetup: try: return _get_cached_setup(setup_id) except openml.exceptions.OpenMLCacheException: - url_suffix = f"/setup/{setup_id}" - setup_xml = openml._api_calls._perform_api_call(url_suffix, "get") + result: tuple[str, OpenMLSetup] = api_context.backend.setups.get(setup_id=setup_id) + setup_xml, setup = result with setup_file.open("w", encoding="utf8") as fh: fh.write(setup_xml) - result_dict = xmltodict.parse(setup_xml) - return _create_setup_from_xml(result_dict) + return setup def list_setups( # noqa: PLR0913 @@ -161,7 +156,7 @@ def list_setups( # noqa: PLR0913 "Invalid output format selected. Only 'object', or 'dataframe' applicable.", ) - listing_call = partial(_list_setups, flow=flow, tag=tag, setup=setup) + listing_call = partial(api_context.backend.setups.list, flow=flow, tag=tag, setup=setup) batches = openml.utils._list_all( listing_call, batch_size=1_000, # batch size for setups is lower @@ -176,77 +171,6 @@ def list_setups( # noqa: PLR0913 return pd.DataFrame.from_records(records, index="setup_id") -def _list_setups( - limit: int, - offset: int, - *, - setup: Iterable[int] | None = None, - flow: int | None = None, - tag: str | None = None, -) -> list[OpenMLSetup]: - """Perform API call `/setup/list/{filters}` - - Parameters - ---------- - The setup argument that is a list is separated from the single value - filters which are put into the kwargs. - - limit : int - offset : int - setup : list(int), optional - flow : int, optional - tag : str, optional - - Returns - ------- - The setups that match the filters, going from id to the OpenMLSetup object. - """ - api_call = "setup/list" - if limit is not None: - api_call += f"/limit/{limit}" - if offset is not None: - api_call += f"/offset/{offset}" - if setup is not None: - api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" - if flow is not None: - api_call += f"/flow/{flow}" - if tag is not None: - api_call += f"/tag/{tag}" - - return __list_setups(api_call=api_call) - - -def __list_setups(api_call: str) -> list[OpenMLSetup]: - """Helper function to parse API calls which are lists of setups""" - xml_string = openml._api_calls._perform_api_call(api_call, "get") - setups_dict = xmltodict.parse(xml_string, force_list=("oml:setup",)) - openml_uri = "http://openml.org/openml" - # Minimalistic check if the XML is useful - if "oml:setups" not in setups_dict: - raise ValueError( - f'Error in return XML, does not contain "oml:setups": {setups_dict!s}', - ) - - if "@xmlns:oml" not in setups_dict["oml:setups"]: - raise ValueError( - f'Error in return XML, does not contain "oml:setups"/@xmlns:oml: {setups_dict!s}', - ) - - if setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri: - raise ValueError( - "Error in return XML, value of " - '"oml:seyups"/@xmlns:oml is not ' - f'"{openml_uri}": {setups_dict!s}', - ) - - assert isinstance(setups_dict["oml:setups"]["oml:setup"], list), type(setups_dict["oml:setups"]) - - return [ - _create_setup_from_xml({"oml:setup_parameters": setup_}) - for setup_ in setups_dict["oml:setups"]["oml:setup"] - ] - - def initialize_model(setup_id: int, *, strict_version: bool = True) -> Any: """ Initialized a model based on a setup_id (i.e., using the exact @@ -307,41 +231,6 @@ def _to_dict(flow_id: int, openml_parameter_settings: list[dict[str, Any]]) -> O return xml -def _create_setup_from_xml(result_dict: dict) -> OpenMLSetup: +def _create_setup(result_dict: dict) -> OpenMLSetup: """Turns an API xml result into a OpenMLSetup object (or dict)""" - setup_id = int(result_dict["oml:setup_parameters"]["oml:setup_id"]) - flow_id = int(result_dict["oml:setup_parameters"]["oml:flow_id"]) - - if "oml:parameter" not in result_dict["oml:setup_parameters"]: - return OpenMLSetup(setup_id, flow_id, parameters=None) - - xml_parameters = result_dict["oml:setup_parameters"]["oml:parameter"] - if isinstance(xml_parameters, dict): - parameters = { - int(xml_parameters["oml:id"]): _create_setup_parameter_from_xml(xml_parameters), - } - elif isinstance(xml_parameters, list): - parameters = { - int(xml_parameter["oml:id"]): _create_setup_parameter_from_xml(xml_parameter) - for xml_parameter in xml_parameters - } - else: - raise ValueError( - f"Expected None, list or dict, received something else: {type(xml_parameters)!s}", - ) - - return OpenMLSetup(setup_id, flow_id, parameters) - - -def _create_setup_parameter_from_xml(result_dict: dict[str, str]) -> OpenMLParameter: - """Create an OpenMLParameter object or a dictionary from an API xml result.""" - return OpenMLParameter( - input_id=int(result_dict["oml:id"]), - flow_id=int(result_dict["oml:flow_id"]), - flow_name=result_dict["oml:flow_name"], - full_name=result_dict["oml:full_name"], - parameter_name=result_dict["oml:parameter_name"], - data_type=result_dict["oml:data_type"], - default_value=result_dict["oml:default_value"], - value=result_dict["oml:value"], - ) + return api_context.backend.setups._create_setup(result_dict) diff --git a/openml/testing.py b/openml/testing.py index 8d3bbbd5b..b0aaac9be 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -11,10 +11,13 @@ import unittest from pathlib import Path from typing import ClassVar +from urllib.parse import urljoin import requests import openml +from openml._api.clients import HTTPCache, HTTPClient +from openml._api.config import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -276,6 +279,91 @@ def _check_fold_timing_evaluations( # noqa: PLR0913 assert evaluation <= max_val +class TestAPIBase(unittest.TestCase): + server: str + base_url: str + api_key: str + timeout: int + retries: int + retry_policy: RetryPolicy + dir: str + ttl: int + cache: HTTPCache + http_client: HTTPClient + + def setUp(self) -> None: + self.server = "https://test.openml.org/" + self.base_url = "api/v1/xml" + self.api_key = "normaluser" + self.timeout = 10 + self.retries = 3 + self.retry_policy = RetryPolicy.HUMAN + self.dir = "test_cache" + self.ttl = 60 * 60 * 24 * 7 + + self.cache = self._get_http_cache( + path=Path(self.dir), + ttl=self.ttl, + ) + self.http_client = self._get_http_client( + server=self.server, + base_url=self.base_url, + api_key=self.api_key, + timeout=self.timeout, + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ) + + if self.cache.path.exists(): + shutil.rmtree(self.cache.path) + + def tearDown(self) -> None: + if self.cache.path.exists(): + shutil.rmtree(self.cache.path) + + def _get_http_cache( + self, + path: Path, + ttl: int, + ) -> HTTPCache: + return HTTPCache( + path=path, + ttl=ttl, + ) + + def _get_http_client( # noqa: PLR0913 + self, + server: str, + base_url: str, + api_key: str, + timeout: int, + retries: int, + retry_policy: RetryPolicy, + cache: HTTPCache | None = None, + ) -> HTTPClient: + return HTTPClient( + server=server, + base_url=base_url, + api_key=api_key, + timeout=timeout, + retries=retries, + retry_policy=retry_policy, + cache=cache, + ) + + def _get_url( + self, + server: str | None = None, + base_url: str | None = None, + path: str | None = None, + ) -> str: + server = server if server else self.server + base_url = base_url if base_url else self.base_url + path = path if path else "" + return urljoin(self.server, urljoin(self.base_url, path)) + + def check_task_existence( task_type: TaskType, dataset_id: int, diff --git a/tests/test_api/__init__.py b/tests/test_api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py new file mode 100644 index 000000000..94ce5ee93 --- /dev/null +++ b/tests/test_api/test_http.py @@ -0,0 +1,140 @@ +from requests import Response, Request +import time +import xmltodict +import pytest +from openml.testing import TestAPIBase + + +class TestHTTPClient(TestAPIBase): + def test_cache(self): + url = self._get_url(path="task/31") + params = {"param1": "value1", "param2": "value2"} + + key = self.cache.get_key(url, params) + + # validate key + self.assertEqual( + key, + "org/openml/test/api/v1/task/31/param1=value1¶m2=value2", + ) + + # create fake response + req = Request("GET", url).prepare() + response = Response() + response.status_code = 200 + response.url = url + response.reason = "OK" + response._content = b"test" + response.headers = {"Content-Type": "text/xml"} + response.encoding = "utf-8" + response.request = req + response.elapsed = type("Elapsed", (), {"total_seconds": lambda self: 0.1})() + + # save to cache + self.cache.save(key, response) + + # load from cache + cached_response = self.cache.load(key) + + # validate loaded response + self.assertEqual(cached_response.status_code, 200) + self.assertEqual(cached_response.url, url) + self.assertEqual(cached_response.content, b"test") + self.assertEqual( + cached_response.headers["Content-Type"], "text/xml" + ) + + @pytest.mark.uses_test_server() + def test_get(self): + response = self.http_client.get("task/1") + + self.assertEqual(response.status_code, 200) + self.assertIn(b" new request + self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) + self.assertEqual(response2.status_code, 200) + self.assertEqual(response1.content, response2.content) + + @pytest.mark.uses_test_server() + def test_post_and_delete(self): + task_xml = """ + + 5 + 193 + 17 + + """ + + task_id = None + try: + # POST the task + post_response = self.http_client.post( + "task", + files={"description": task_xml}, + ) + self.assertEqual(post_response.status_code, 200) + xml_resp = xmltodict.parse(post_response.content) + task_id = int(xml_resp["oml:upload_task"]["oml:id"]) + + # GET the task to verify it exists + get_response = self.http_client.get(f"task/{task_id}") + self.assertEqual(get_response.status_code, 200) + + finally: + # DELETE the task if it was created + if task_id is not None: + try: + del_response = self.http_client.delete(f"task/{task_id}") + # optional: verify delete + if del_response.status_code != 200: + print(f"Warning: delete failed for task {task_id}") + except Exception as e: + print(f"Warning: failed to delete task {task_id}: {e}") diff --git a/tests/test_api/test_setups.py b/tests/test_api/test_setups.py new file mode 100644 index 000000000..9177a8205 --- /dev/null +++ b/tests/test_api/test_setups.py @@ -0,0 +1,87 @@ +# License: BSD 3-Clause +from __future__ import annotations + +import pytest +from openml._api.config import settings + +from openml._api.resources.setups import SetupsV1, SetupsV2 +from openml.setups.setup import OpenMLSetup +from openml.testing import TestAPIBase +from openml._api.resources.base.fallback import FallbackProxy + + + +class TestSetupsV1(TestAPIBase): + """Tests for V1 XML API implementation of setups.""" + + _multiprocess_can_split_ = True + + def setUp(self) -> None: + super().setUp() + self.client = self._get_http_client( + server=settings.api.v1.server, + base_url=settings.api.v1.base_url, + api_key=settings.api.v1.api_key, + timeout=settings.api.v1.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + ) + self.resource = SetupsV1(self.client) + + @pytest.mark.uses_test_server() + def test_list(self): + setups = self.resource.list(limit=10, offset=0) + + assert isinstance(setups, list) + assert len(setups) > 0 + assert all(isinstance(s, OpenMLSetup) for s in setups) + + + def test_get(self): + setup_id = 1 + xml_content, setup = self.resource.get(setup_id) + + assert isinstance(xml_content, str) + assert isinstance(setup, OpenMLSetup) + assert setup.setup_id == setup_id + + +class TestSetupsV2(TestAPIBase): + """Tests for V2 JSON API implementation of setups.""" + + _multiprocess_can_split_ = True + + def setUp(self) -> None: + super().setUp() + self.client = self._get_http_client( + server=settings.api.v2.server, + base_url=settings.api.v2.base_url, + api_key=settings.api.v2.api_key, + timeout=settings.api.v2.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + ) + self.resource = SetupsV2(self.client) + +class TestSetupsCombined(TestAPIBase): + def setUp(self): + super().setUp() + self.v1_client = self._get_http_client( + server=settings.api.v1.server, + base_url=settings.api.v1.base_url, + api_key=settings.api.v1.api_key, + timeout=settings.api.v1.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + ) + self.v2_client = self._get_http_client( + server=settings.api.v2.server, + base_url=settings.api.v2.base_url, + api_key=settings.api.v2.api_key, + timeout=settings.api.v2.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + ) + self.resource_v1 = SetupsV1(self.client) + self.resource_v2 = SetupsV2(self.client) + self.resource_fallback = FallbackProxy(self.resource_v2, self.resource_v1)