-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathload_dataset.py
More file actions
37 lines (25 loc) · 1.02 KB
/
load_dataset.py
File metadata and controls
37 lines (25 loc) · 1.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from .datasets_folder import get_datasets_folder_path
from .extension_utils import is_csv, is_excel, is_json
from os.path import join, isfile
from pandas import DataFrame, read_csv, read_excel, read_json
_NA_VALUES = ['null', 'NULL', 'nan', 'NaN']
class UnknownDatasetType(Exception):
pass
def load_data_frame(dataset_path: str) -> DataFrame:
datasets_folder_path = get_datasets_folder_path()
path_on_datasets_folder = join(datasets_folder_path, dataset_path)
path_to_open = path_on_datasets_folder
if not isfile(path_on_datasets_folder):
path_to_open = dataset_path
return _open_dataset_file(path_to_open)
def _open_dataset_file(file_path: str) -> DataFrame:
data_frame = None
if is_csv(file_path):
data_frame = read_csv(file_path, sep=';', index_col=0, na_values=_NA_VALUES)
elif is_json(file_path):
data_frame = read_json(file_path)
elif is_excel(file_path):
data_frame = read_excel(file_path)
else:
raise UnknownDatasetType
return data_frame