Skip to content

Commit 9bcbcb3

Browse files
committed
Refactor functions, except get
1 parent 18e85de commit 9bcbcb3

1 file changed

Lines changed: 21 additions & 160 deletions

File tree

openml/datasets/functions.py

Lines changed: 21 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import logging
66
import os
77
import warnings
8-
from collections import OrderedDict
98
from functools import partial
109
from pathlib import Path
1110
from pyexpat import ExpatError
@@ -22,6 +21,7 @@
2221

2322
import openml._api_calls
2423
import openml.utils
24+
from openml._api import api_context
2525
from openml.config import OPENML_SKIP_PARQUET_ENV_VAR
2626
from openml.exceptions import (
2727
OpenMLHashException,
@@ -65,17 +65,7 @@ def list_qualities() -> list[str]:
6565
-------
6666
list
6767
"""
68-
api_call = "data/qualities/list"
69-
xml_string = openml._api_calls._perform_api_call(api_call, "get")
70-
qualities = xmltodict.parse(xml_string, force_list=("oml:quality"))
71-
# Minimalistic check if the XML is useful
72-
if "oml:data_qualities_list" not in qualities:
73-
raise ValueError('Error in return XML, does not contain "oml:data_qualities_list"')
74-
75-
if not isinstance(qualities["oml:data_qualities_list"]["oml:quality"], list):
76-
raise TypeError('Error in return XML, does not contain "oml:quality" as a list')
77-
78-
return qualities["oml:data_qualities_list"]["oml:quality"]
68+
return api_context.backend.datasets.list_qualities()
7969

8070

8171
def list_datasets(
@@ -129,7 +119,7 @@ def list_datasets(
129119
these are also included as columns.
130120
"""
131121
listing_call = partial(
132-
_list_datasets,
122+
api_context.backend.datasets.list,
133123
data_id=data_id,
134124
status=status,
135125
tag=tag,
@@ -147,92 +137,6 @@ def list_datasets(
147137
return pd.concat(batches)
148138

149139

150-
def _list_datasets(
151-
limit: int,
152-
offset: int,
153-
*,
154-
data_id: list[int] | None = None,
155-
**kwargs: Any,
156-
) -> pd.DataFrame:
157-
"""
158-
Perform api call to return a list of all datasets.
159-
160-
Parameters
161-
----------
162-
The arguments that are lists are separated from the single value
163-
ones which are put into the kwargs.
164-
display_errors is also separated from the kwargs since it has a
165-
default value.
166-
167-
limit : int
168-
The maximum number of datasets to show.
169-
offset : int
170-
The number of datasets to skip, starting from the first.
171-
data_id : list, optional
172-
173-
kwargs : dict, optional
174-
Legal filter operators (keys in the dict):
175-
tag, status, limit, offset, data_name, data_version, number_instances,
176-
number_features, number_classes, number_missing_values.
177-
178-
Returns
179-
-------
180-
datasets : dataframe
181-
"""
182-
api_call = "data/list"
183-
184-
if limit is not None:
185-
api_call += f"/limit/{limit}"
186-
if offset is not None:
187-
api_call += f"/offset/{offset}"
188-
189-
if kwargs is not None:
190-
for operator, value in kwargs.items():
191-
if value is not None:
192-
api_call += f"/{operator}/{value}"
193-
if data_id is not None:
194-
api_call += f"/data_id/{','.join([str(int(i)) for i in data_id])}"
195-
return __list_datasets(api_call=api_call)
196-
197-
198-
def __list_datasets(api_call: str) -> pd.DataFrame:
199-
xml_string = openml._api_calls._perform_api_call(api_call, "get")
200-
datasets_dict = xmltodict.parse(xml_string, force_list=("oml:dataset",))
201-
202-
# Minimalistic check if the XML is useful
203-
assert isinstance(datasets_dict["oml:data"]["oml:dataset"], list), type(
204-
datasets_dict["oml:data"],
205-
)
206-
assert datasets_dict["oml:data"]["@xmlns:oml"] == "http://openml.org/openml", datasets_dict[
207-
"oml:data"
208-
]["@xmlns:oml"]
209-
210-
datasets = {}
211-
for dataset_ in datasets_dict["oml:data"]["oml:dataset"]:
212-
ignore_attribute = ["oml:file_id", "oml:quality"]
213-
dataset = {
214-
k.replace("oml:", ""): v for (k, v) in dataset_.items() if k not in ignore_attribute
215-
}
216-
dataset["did"] = int(dataset["did"])
217-
dataset["version"] = int(dataset["version"])
218-
219-
# The number of qualities can range from 0 to infinity
220-
for quality in dataset_.get("oml:quality", []):
221-
try:
222-
dataset[quality["@name"]] = int(quality["#text"])
223-
except ValueError:
224-
dataset[quality["@name"]] = float(quality["#text"])
225-
datasets[dataset["did"]] = dataset
226-
227-
return pd.DataFrame.from_dict(datasets, orient="index").astype(
228-
{
229-
"did": int,
230-
"version": int,
231-
"status": pd.CategoricalDtype(["active", "deactivated", "in_preparation"]),
232-
}
233-
)
234-
235-
236140
def _expand_parameter(parameter: str | list[str] | None) -> list[str]:
237141
expanded_parameter = []
238142
if isinstance(parameter, str):
@@ -808,14 +712,7 @@ def status_update(data_id: int, status: Literal["active", "deactivated"]) -> Non
808712
if status not in legal_status:
809713
raise ValueError(f"Illegal status value. Legal values: {legal_status}")
810714

811-
data: openml._api_calls.DATA_TYPE = {"data_id": data_id, "status": status}
812-
result_xml = openml._api_calls._perform_api_call("data/status/update", "post", data=data)
813-
result = xmltodict.parse(result_xml)
814-
server_data_id = result["oml:data_status_update"]["oml:id"]
815-
server_status = result["oml:data_status_update"]["oml:status"]
816-
if status != server_status or int(data_id) != int(server_data_id):
817-
# This should never happen
818-
raise ValueError("Data id/status does not collide")
715+
api_context.backend.datasets.status_update(data_id=data_id, status=status)
819716

820717

821718
def edit_dataset(
@@ -889,43 +786,20 @@ def edit_dataset(
889786
-------
890787
Dataset id
891788
"""
892-
if not isinstance(data_id, int):
893-
raise TypeError(f"`data_id` must be of type `int`, not {type(data_id)}.")
894-
895-
# compose data edit parameters as xml
896-
form_data = {"data_id": data_id} # type: openml._api_calls.DATA_TYPE
897-
xml = OrderedDict() # type: 'OrderedDict[str, OrderedDict]'
898-
xml["oml:data_edit_parameters"] = OrderedDict()
899-
xml["oml:data_edit_parameters"]["@xmlns:oml"] = "http://openml.org/openml"
900-
xml["oml:data_edit_parameters"]["oml:description"] = description
901-
xml["oml:data_edit_parameters"]["oml:creator"] = creator
902-
xml["oml:data_edit_parameters"]["oml:contributor"] = contributor
903-
xml["oml:data_edit_parameters"]["oml:collection_date"] = collection_date
904-
xml["oml:data_edit_parameters"]["oml:language"] = language
905-
xml["oml:data_edit_parameters"]["oml:default_target_attribute"] = default_target_attribute
906-
xml["oml:data_edit_parameters"]["oml:row_id_attribute"] = row_id_attribute
907-
xml["oml:data_edit_parameters"]["oml:ignore_attribute"] = ignore_attribute
908-
xml["oml:data_edit_parameters"]["oml:citation"] = citation
909-
xml["oml:data_edit_parameters"]["oml:original_data_url"] = original_data_url
910-
xml["oml:data_edit_parameters"]["oml:paper_url"] = paper_url
911-
912-
# delete None inputs
913-
for k in list(xml["oml:data_edit_parameters"]):
914-
if not xml["oml:data_edit_parameters"][k]:
915-
del xml["oml:data_edit_parameters"][k]
916-
917-
file_elements = {
918-
"edit_parameters": ("description.xml", xmltodict.unparse(xml)),
919-
} # type: openml._api_calls.FILE_ELEMENTS_TYPE
920-
result_xml = openml._api_calls._perform_api_call(
921-
"data/edit",
922-
"post",
923-
data=form_data,
924-
file_elements=file_elements,
789+
return api_context.backend.datasets.edit(
790+
data_id,
791+
description,
792+
creator,
793+
contributor,
794+
collection_date,
795+
language,
796+
default_target_attribute,
797+
ignore_attribute,
798+
citation,
799+
row_id_attribute,
800+
original_data_url,
801+
paper_url,
925802
)
926-
result = xmltodict.parse(result_xml)
927-
data_id = result["oml:data_edit"]["oml:id"]
928-
return int(data_id)
929803

930804

931805
def fork_dataset(data_id: int) -> int:
@@ -957,14 +831,7 @@ def fork_dataset(data_id: int) -> int:
957831
Dataset id of the forked dataset
958832
959833
"""
960-
if not isinstance(data_id, int):
961-
raise TypeError(f"`data_id` must be of type `int`, not {type(data_id)}.")
962-
# compose data fork parameters
963-
form_data = {"data_id": data_id} # type: openml._api_calls.DATA_TYPE
964-
result_xml = openml._api_calls._perform_api_call("data/fork", "post", data=form_data)
965-
result = xmltodict.parse(result_xml)
966-
data_id = result["oml:data_fork"]["oml:id"]
967-
return int(data_id)
834+
return api_context.backend.datasets.fork(data_id=data_id)
968835

969836

970837
def data_feature_add_ontology(data_id: int, index: int, ontology: str) -> bool:
@@ -988,10 +855,7 @@ def data_feature_add_ontology(data_id: int, index: int, ontology: str) -> bool:
988855
-------
989856
True or throws an OpenML server exception
990857
"""
991-
upload_data: dict[str, int | str] = {"data_id": data_id, "index": index, "ontology": ontology}
992-
openml._api_calls._perform_api_call("data/feature/ontology/add", "post", data=upload_data)
993-
# an error will be thrown in case the request was unsuccessful
994-
return True
858+
return api_context.backend.datasets.feature_add_ontology(data_id, index, ontology)
995859

996860

997861
def data_feature_remove_ontology(data_id: int, index: int, ontology: str) -> bool:
@@ -1014,10 +878,7 @@ def data_feature_remove_ontology(data_id: int, index: int, ontology: str) -> boo
1014878
-------
1015879
True or throws an OpenML server exception
1016880
"""
1017-
upload_data: dict[str, int | str] = {"data_id": data_id, "index": index, "ontology": ontology}
1018-
openml._api_calls._perform_api_call("data/feature/ontology/remove", "post", data=upload_data)
1019-
# an error will be thrown in case the request was unsuccessful
1020-
return True
881+
return api_context.backend.datasets.feature_remove_ontology(data_id, index, ontology)
1021882

1022883

1023884
def _topic_add_dataset(data_id: int, topic: str) -> int:
@@ -1460,4 +1321,4 @@ def delete_dataset(dataset_id: int) -> bool:
14601321
bool
14611322
True if the deletion was successful. False otherwise.
14621323
"""
1463-
return openml.utils._delete_entity("data", dataset_id)
1324+
return api_context.backend.datasets.delete(dataset_id)

0 commit comments

Comments
 (0)