55import logging
66import os
77import warnings
8- from collections import OrderedDict
98from functools import partial
109from pathlib import Path
1110from pyexpat import ExpatError
2221
2322import openml ._api_calls
2423import openml .utils
24+ from openml ._api import api_context
2525from openml .config import OPENML_SKIP_PARQUET_ENV_VAR
2626from openml .exceptions import (
2727 OpenMLHashException ,
@@ -65,17 +65,7 @@ def list_qualities() -> list[str]:
6565 -------
6666 list
6767 """
68- api_call = "data/qualities/list"
69- xml_string = openml ._api_calls ._perform_api_call (api_call , "get" )
70- qualities = xmltodict .parse (xml_string , force_list = ("oml:quality" ))
71- # Minimalistic check if the XML is useful
72- if "oml:data_qualities_list" not in qualities :
73- raise ValueError ('Error in return XML, does not contain "oml:data_qualities_list"' )
74-
75- if not isinstance (qualities ["oml:data_qualities_list" ]["oml:quality" ], list ):
76- raise TypeError ('Error in return XML, does not contain "oml:quality" as a list' )
77-
78- return qualities ["oml:data_qualities_list" ]["oml:quality" ]
68+ return api_context .backend .datasets .list_qualities ()
7969
8070
8171def list_datasets (
@@ -129,7 +119,7 @@ def list_datasets(
129119 these are also included as columns.
130120 """
131121 listing_call = partial (
132- _list_datasets ,
122+ api_context . backend . datasets . list ,
133123 data_id = data_id ,
134124 status = status ,
135125 tag = tag ,
@@ -147,92 +137,6 @@ def list_datasets(
147137 return pd .concat (batches )
148138
149139
150- def _list_datasets (
151- limit : int ,
152- offset : int ,
153- * ,
154- data_id : list [int ] | None = None ,
155- ** kwargs : Any ,
156- ) -> pd .DataFrame :
157- """
158- Perform api call to return a list of all datasets.
159-
160- Parameters
161- ----------
162- The arguments that are lists are separated from the single value
163- ones which are put into the kwargs.
164- display_errors is also separated from the kwargs since it has a
165- default value.
166-
167- limit : int
168- The maximum number of datasets to show.
169- offset : int
170- The number of datasets to skip, starting from the first.
171- data_id : list, optional
172-
173- kwargs : dict, optional
174- Legal filter operators (keys in the dict):
175- tag, status, limit, offset, data_name, data_version, number_instances,
176- number_features, number_classes, number_missing_values.
177-
178- Returns
179- -------
180- datasets : dataframe
181- """
182- api_call = "data/list"
183-
184- if limit is not None :
185- api_call += f"/limit/{ limit } "
186- if offset is not None :
187- api_call += f"/offset/{ offset } "
188-
189- if kwargs is not None :
190- for operator , value in kwargs .items ():
191- if value is not None :
192- api_call += f"/{ operator } /{ value } "
193- if data_id is not None :
194- api_call += f"/data_id/{ ',' .join ([str (int (i )) for i in data_id ])} "
195- return __list_datasets (api_call = api_call )
196-
197-
198- def __list_datasets (api_call : str ) -> pd .DataFrame :
199- xml_string = openml ._api_calls ._perform_api_call (api_call , "get" )
200- datasets_dict = xmltodict .parse (xml_string , force_list = ("oml:dataset" ,))
201-
202- # Minimalistic check if the XML is useful
203- assert isinstance (datasets_dict ["oml:data" ]["oml:dataset" ], list ), type (
204- datasets_dict ["oml:data" ],
205- )
206- assert datasets_dict ["oml:data" ]["@xmlns:oml" ] == "http://openml.org/openml" , datasets_dict [
207- "oml:data"
208- ]["@xmlns:oml" ]
209-
210- datasets = {}
211- for dataset_ in datasets_dict ["oml:data" ]["oml:dataset" ]:
212- ignore_attribute = ["oml:file_id" , "oml:quality" ]
213- dataset = {
214- k .replace ("oml:" , "" ): v for (k , v ) in dataset_ .items () if k not in ignore_attribute
215- }
216- dataset ["did" ] = int (dataset ["did" ])
217- dataset ["version" ] = int (dataset ["version" ])
218-
219- # The number of qualities can range from 0 to infinity
220- for quality in dataset_ .get ("oml:quality" , []):
221- try :
222- dataset [quality ["@name" ]] = int (quality ["#text" ])
223- except ValueError :
224- dataset [quality ["@name" ]] = float (quality ["#text" ])
225- datasets [dataset ["did" ]] = dataset
226-
227- return pd .DataFrame .from_dict (datasets , orient = "index" ).astype (
228- {
229- "did" : int ,
230- "version" : int ,
231- "status" : pd .CategoricalDtype (["active" , "deactivated" , "in_preparation" ]),
232- }
233- )
234-
235-
236140def _expand_parameter (parameter : str | list [str ] | None ) -> list [str ]:
237141 expanded_parameter = []
238142 if isinstance (parameter , str ):
@@ -808,14 +712,7 @@ def status_update(data_id: int, status: Literal["active", "deactivated"]) -> Non
808712 if status not in legal_status :
809713 raise ValueError (f"Illegal status value. Legal values: { legal_status } " )
810714
811- data : openml ._api_calls .DATA_TYPE = {"data_id" : data_id , "status" : status }
812- result_xml = openml ._api_calls ._perform_api_call ("data/status/update" , "post" , data = data )
813- result = xmltodict .parse (result_xml )
814- server_data_id = result ["oml:data_status_update" ]["oml:id" ]
815- server_status = result ["oml:data_status_update" ]["oml:status" ]
816- if status != server_status or int (data_id ) != int (server_data_id ):
817- # This should never happen
818- raise ValueError ("Data id/status does not collide" )
715+ api_context .backend .datasets .status_update (data_id = data_id , status = status )
819716
820717
821718def edit_dataset (
@@ -889,43 +786,20 @@ def edit_dataset(
889786 -------
890787 Dataset id
891788 """
892- if not isinstance (data_id , int ):
893- raise TypeError (f"`data_id` must be of type `int`, not { type (data_id )} ." )
894-
895- # compose data edit parameters as xml
896- form_data = {"data_id" : data_id } # type: openml._api_calls.DATA_TYPE
897- xml = OrderedDict () # type: 'OrderedDict[str, OrderedDict]'
898- xml ["oml:data_edit_parameters" ] = OrderedDict ()
899- xml ["oml:data_edit_parameters" ]["@xmlns:oml" ] = "http://openml.org/openml"
900- xml ["oml:data_edit_parameters" ]["oml:description" ] = description
901- xml ["oml:data_edit_parameters" ]["oml:creator" ] = creator
902- xml ["oml:data_edit_parameters" ]["oml:contributor" ] = contributor
903- xml ["oml:data_edit_parameters" ]["oml:collection_date" ] = collection_date
904- xml ["oml:data_edit_parameters" ]["oml:language" ] = language
905- xml ["oml:data_edit_parameters" ]["oml:default_target_attribute" ] = default_target_attribute
906- xml ["oml:data_edit_parameters" ]["oml:row_id_attribute" ] = row_id_attribute
907- xml ["oml:data_edit_parameters" ]["oml:ignore_attribute" ] = ignore_attribute
908- xml ["oml:data_edit_parameters" ]["oml:citation" ] = citation
909- xml ["oml:data_edit_parameters" ]["oml:original_data_url" ] = original_data_url
910- xml ["oml:data_edit_parameters" ]["oml:paper_url" ] = paper_url
911-
912- # delete None inputs
913- for k in list (xml ["oml:data_edit_parameters" ]):
914- if not xml ["oml:data_edit_parameters" ][k ]:
915- del xml ["oml:data_edit_parameters" ][k ]
916-
917- file_elements = {
918- "edit_parameters" : ("description.xml" , xmltodict .unparse (xml )),
919- } # type: openml._api_calls.FILE_ELEMENTS_TYPE
920- result_xml = openml ._api_calls ._perform_api_call (
921- "data/edit" ,
922- "post" ,
923- data = form_data ,
924- file_elements = file_elements ,
789+ return api_context .backend .datasets .edit (
790+ data_id ,
791+ description ,
792+ creator ,
793+ contributor ,
794+ collection_date ,
795+ language ,
796+ default_target_attribute ,
797+ ignore_attribute ,
798+ citation ,
799+ row_id_attribute ,
800+ original_data_url ,
801+ paper_url ,
925802 )
926- result = xmltodict .parse (result_xml )
927- data_id = result ["oml:data_edit" ]["oml:id" ]
928- return int (data_id )
929803
930804
931805def fork_dataset (data_id : int ) -> int :
@@ -957,14 +831,7 @@ def fork_dataset(data_id: int) -> int:
957831 Dataset id of the forked dataset
958832
959833 """
960- if not isinstance (data_id , int ):
961- raise TypeError (f"`data_id` must be of type `int`, not { type (data_id )} ." )
962- # compose data fork parameters
963- form_data = {"data_id" : data_id } # type: openml._api_calls.DATA_TYPE
964- result_xml = openml ._api_calls ._perform_api_call ("data/fork" , "post" , data = form_data )
965- result = xmltodict .parse (result_xml )
966- data_id = result ["oml:data_fork" ]["oml:id" ]
967- return int (data_id )
834+ return api_context .backend .datasets .fork (data_id = data_id )
968835
969836
970837def data_feature_add_ontology (data_id : int , index : int , ontology : str ) -> bool :
@@ -988,10 +855,7 @@ def data_feature_add_ontology(data_id: int, index: int, ontology: str) -> bool:
988855 -------
989856 True or throws an OpenML server exception
990857 """
991- upload_data : dict [str , int | str ] = {"data_id" : data_id , "index" : index , "ontology" : ontology }
992- openml ._api_calls ._perform_api_call ("data/feature/ontology/add" , "post" , data = upload_data )
993- # an error will be thrown in case the request was unsuccessful
994- return True
858+ return api_context .backend .datasets .feature_add_ontology (data_id , index , ontology )
995859
996860
997861def data_feature_remove_ontology (data_id : int , index : int , ontology : str ) -> bool :
@@ -1014,10 +878,7 @@ def data_feature_remove_ontology(data_id: int, index: int, ontology: str) -> boo
1014878 -------
1015879 True or throws an OpenML server exception
1016880 """
1017- upload_data : dict [str , int | str ] = {"data_id" : data_id , "index" : index , "ontology" : ontology }
1018- openml ._api_calls ._perform_api_call ("data/feature/ontology/remove" , "post" , data = upload_data )
1019- # an error will be thrown in case the request was unsuccessful
1020- return True
881+ return api_context .backend .datasets .feature_remove_ontology (data_id , index , ontology )
1021882
1022883
1023884def _topic_add_dataset (data_id : int , topic : str ) -> int :
@@ -1460,4 +1321,4 @@ def delete_dataset(dataset_id: int) -> bool:
14601321 bool
14611322 True if the deletion was successful. False otherwise.
14621323 """
1463- return openml . utils . _delete_entity ( "data" , dataset_id )
1324+ return api_context . backend . datasets . delete ( dataset_id )
0 commit comments