From f4784395045af6bc839e06680e2285b5089e9f52 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Fri, 4 Feb 2022 21:07:28 +0530 Subject: [PATCH 01/11] Added from_csv function --- cleverdict/cleverdict.py | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index c2841f2..6641298 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -1,3 +1,5 @@ +from audioop import reverse +import csv import os import json import inspect @@ -7,6 +9,7 @@ from pprint import pprint from datetime import datetime import types +from typing import Union import inspect """ @@ -1045,6 +1048,55 @@ def from_json( else: return cls(data, **kwargs) + + @classmethod + def from_csv( + cls, + file_path: Union[Path, str], + header: bool = True, + names: list = None, + skip_rows: int = None, + delimiter: str = ',', + ignore: Union[str, list] = None, + exclude: Union[str, list] = None, + only: Union[str, list] = None + ) -> dict: + """Converts a CSV file to a Python dictionary""" + + ignore, only = _preprocess_options(ignore, exclude, only) + kwargs = {"ignore": ignore, "only": only} + if isinstance(file_path, str): + file_path = Path(file_path) + if not file_path.exists(): + raise ValueError('File not found') + + with open(file_path, "r", encoding="utf-8") as file: + reader = csv.reader(file, delimiter=delimiter) + csv_data = list(reader) + + if skip_rows is None: + start_row = 1 if header else 0 + else: + start_row = skip_rows + (1 if header else 0) + + if header: + names = csv_data[0] + elif names is not None: + if len(names) != len(csv_data[0]): + raise ValueError("Number of items in names does not match the number of columns") + else: + names = list(range(len(csv_data[0]))) + + if len(names) != len(set(names)): + raise ValueError("Names contain one or more duplicate values") + + data = {} + for idx, row in enumerate(csv_data[start_row:]): + current_row_dict = cls(dict(zip(names, row)), **kwargs) + data[idx] = current_row_dict + + return cls(data) + @classmethod def get_new_save_path(cls): """ From 9978cdb79416822145d0a8cc65c777a95d07ef90 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Sun, 6 Feb 2022 17:16:07 +0530 Subject: [PATCH 02/11] Added documentation, handled more exceptions --- cleverdict/cleverdict.py | 72 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 5 deletions(-) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index 6641298..b15ef54 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -1052,23 +1052,77 @@ def from_json( @classmethod def from_csv( cls, - file_path: Union[Path, str], + file_path: Union[Path, str] = None, + skip_rows: int = None, + nrows: int = None, header: bool = True, names: list = None, - skip_rows: int = None, delimiter: str = ',', ignore: Union[str, list] = None, exclude: Union[str, list] = None, only: Union[str, list] = None ) -> dict: - """Converts a CSV file to a Python dictionary""" + """Creates a new CleverDict object from a CSV file. + Each row is also encoded as a CleverDict object with the key being the row number starting from zero + + Parameters: + ----------- + file_path: str | pathlib.Path + The path to the csv file + + skip_rows: int + Number of rows to skip from the beginning of the file + Does not count the header if header is True + + nrows: int + Number of rows to read + Does not count the header if header is True + + header: bool + Parses the first row of the csv file as headers + + names: list + List of names to be used as keys instead of csv headers + Cannot be used if header is True + The number of items in list must be the same as number of columns + + delimiter: str + The delimiter used in the csv file + + ignore: str | list + Any keys to ignore from output. + + exclude: iterable | str + Alias for ignore + + only: iterable | str + Only return output with the specified keys + + Returns + ------- + New CleverDict: CleverDict + + Raises: + ------- + TypeError: + If more than one of ignore, exclude, and only are specified + + ValueError: + - If file path is not provided or is invalid + - If names are specified without headers=False + - If number of items in names is not the same as number of columns + - If names, whether specified or read from the csv file, contains one or more duplicate values + """ + + if file_path is None: + raise ValueError("File path is not provided") ignore, only = _preprocess_options(ignore, exclude, only) kwargs = {"ignore": ignore, "only": only} if isinstance(file_path, str): file_path = Path(file_path) if not file_path.exists(): - raise ValueError('File not found') + raise ValueError("File not found") with open(file_path, "r", encoding="utf-8") as file: reader = csv.reader(file, delimiter=delimiter) @@ -1079,6 +1133,14 @@ def from_csv( else: start_row = skip_rows + (1 if header else 0) + if nrows is None: + end_row = None + else: + end_row = start_row + nrows + + if header and names: + raise ValueError("Names cannot be specified if header is True.\nHint: To specify custom names for CSV with headers, set header=False and skip_rows=1") + if header: names = csv_data[0] elif names is not None: @@ -1091,7 +1153,7 @@ def from_csv( raise ValueError("Names contain one or more duplicate values") data = {} - for idx, row in enumerate(csv_data[start_row:]): + for idx, row in enumerate(csv_data[start_row:end_row]): current_row_dict = cls(dict(zip(names, row)), **kwargs) data[idx] = current_row_dict From 3d0cdaef2a980db27ef450b26ec5dc71a8810386 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Mon, 7 Feb 2022 23:40:48 +0530 Subject: [PATCH 03/11] Added test for from_csv --- cleverdict/test_cleverdict.py | 93 ++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/cleverdict/test_cleverdict.py b/cleverdict/test_cleverdict.py index f50d1eb..0982335 100644 --- a/cleverdict/test_cleverdict.py +++ b/cleverdict/test_cleverdict.py @@ -334,7 +334,7 @@ def test_only_OR_ignore_OR_exclude_as_args(self): perms = ["".join(list(x)).replace("=", "=['Yes'],") for x in perms] for args in perms: with pytest.raises(TypeError): - eval("x." + func.replace("(", "(" + args)) + eval("x." + func.replace("(", "(" + args)) def test_filters_with_init(self): """ @@ -676,6 +676,97 @@ def test_import_existing_cleverdict(test): assert list(y.keys()) == ["nationality"] +class Test_From_CSV: + def create_csv(self, tmpdir, delimiter): + data = [ + ['id', 'name', 'color'], + [1, 'Banana', 'yellow'], + [2, 'Apple', 'green'], + [3, 'Blueberry', 'blue'], + [4, 'Kinnow', 'orange'], + [5, 'Kiwi', 'brown'] + ] + with open(f'{tmpdir}/test_csv.csv', 'w') as f: + f.write('\n'.join(delimiter.join(str(k) for k in i) for i in data)) + + def test_missing_file(self): + """Creates a csv file from data and tests the output""" + + with pytest.raises(ValueError): + CleverDict.from_csv() + with pytest.raises(ValueError): + CleverDict.from_csv('test_csv.csv') + + def test_header_names(self, tmpdir): + self.create_csv(tmpdir, delimiter=',') + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv') + assert 0 in data.keys() + assert len(data.keys()) == 5 + assert data._0.name == 'Banana' + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', header=False, skip_rows=1, names=['sl', 'fruit', 'appearance']) + assert 'color' not in data._0.keys() + assert len(data._0.keys()) == 3 + assert len(data) == 5 + assert 'fruit' in data._0.keys() + assert data._0.fruit == 'Banana' + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', header=False, skip_rows=1) + assert 'color' not in data._0.keys() + assert len(data._0.keys()) == 3 + assert len(data) == 5 + assert 1 in data._0.keys() + assert data._0._1 == 'Banana' + + with pytest.raises(ValueError): + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', header=False, ignore='id') + with pytest.raises(ValueError): + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', header=False, only='id') + + def test_ignore_only(self, tmpdir): + self.create_csv(tmpdir, delimiter=',') + + with pytest.raises(TypeError): + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', ignore='id', only='name') + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', ignore='id') + assert 'id' not in data._0.keys() + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', only='name') + assert 'color' not in data._0.keys() + assert len(data._0.keys()) == 1 + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', ignore=['id', 'name']) + assert 'name' not in data._0.keys() + assert len(data._0.keys()) == 1 + + def test_skiprows_nrows(self, tmpdir): + self.create_csv(tmpdir, delimiter=',') + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', nrows=3) + assert len(data) == 3 + assert data._0.name == 'Banana' + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', skip_rows=1, nrows=3) + assert len(data) == 3 + assert data._0.name == 'Apple' + + def test_delimiter(self, tmpdir): + self.create_csv(tmpdir, delimiter='|') + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', delimiter='|') + assert 0 in data.keys() + assert len(data.keys()) == 5 + assert data._0.name == 'Banana' + + self.create_csv(tmpdir, delimiter='\t') + + data = CleverDict.from_csv(f'{tmpdir}/test_csv.csv', delimiter='\t') + assert 0 in data.keys() + assert len(data.keys()) == 5 + assert data._0.name == 'Banana' + + class Test_Internal_Logic: def test_raises_error(self): """ From b61cf8a03142c6231f8a83165a39865239b3a331 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Mon, 7 Feb 2022 23:40:58 +0530 Subject: [PATCH 04/11] Handled edge cases thrown up by tests --- cleverdict/cleverdict.py | 63 ++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index b15ef54..7330698 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -1,4 +1,3 @@ -from audioop import reverse import csv import os import json @@ -303,6 +302,22 @@ def make_set(arg): return make_set(ignore) | make_set(exclude) | CleverDict.ignore, only +def _preprocess_csv(file_path: Union[str, Path], delimiter: str): + """Validates a CSV file and returns the data as a list of lists""" + + if isinstance(file_path, str): + file_path = Path(file_path) + if not file_path.exists(): + raise ValueError("File not found") + + with open(file_path, "r", encoding="utf-8") as file: + reader = csv.reader(file, delimiter=delimiter) + csv_data = list(reader) + if not csv_data: + raise ValueError("File is empty") + + return csv_data + class Expand: def __init__(self, ok): """ @@ -1056,12 +1071,12 @@ def from_csv( skip_rows: int = None, nrows: int = None, header: bool = True, - names: list = None, + names: list = [], delimiter: str = ',', ignore: Union[str, list] = None, exclude: Union[str, list] = None, only: Union[str, list] = None - ) -> dict: + ) -> dict: """Creates a new CleverDict object from a CSV file. Each row is also encoded as a CleverDict object with the key being the row number starting from zero @@ -1112,21 +1127,35 @@ def from_csv( - If names are specified without headers=False - If number of items in names is not the same as number of columns - If names, whether specified or read from the csv file, contains one or more duplicate values + + Example: + -------- + >>> data = [ + ... ['id', 'name', 'color'], + ... [1, 'Banana', 'yellow'], + ... [2, 'Apple', 'green'] + ... ] + >>> with open('test_csv.csv', 'w') as f: + ... f.write('\\n'.join(delimiter.join(str(k) for k in i) for i in data)) + >>> data = CleverDict.from_csv(f'test_csv.csv') + >>> print(data) + CleverDict( + { + 0: CleverDict({'id': '1', 'name': 'Banana', 'color': 'yellow'}, _aliases={}, _vars={}), + 1: CleverDict({'id': '2', 'name': 'Apple', 'color': 'green'}, _aliases={}, _vars={}) + }, _aliases={'_0': 0, '_False': 0, '_1': 1, '_True': 1}, _vars={} + ) """ if file_path is None: raise ValueError("File path is not provided") + if not header and not names and (ignore or only): + raise ValueError('Ignore, Exclude, and Only cannot be used without column names') ignore, only = _preprocess_options(ignore, exclude, only) kwargs = {"ignore": ignore, "only": only} - if isinstance(file_path, str): - file_path = Path(file_path) - if not file_path.exists(): - raise ValueError("File not found") - - with open(file_path, "r", encoding="utf-8") as file: - reader = csv.reader(file, delimiter=delimiter) - csv_data = list(reader) + + csv_data = _preprocess_csv(file_path, delimiter) if skip_rows is None: start_row = 1 if header else 0 @@ -1141,14 +1170,12 @@ def from_csv( if header and names: raise ValueError("Names cannot be specified if header is True.\nHint: To specify custom names for CSV with headers, set header=False and skip_rows=1") - if header: - names = csv_data[0] - elif names is not None: - if len(names) != len(csv_data[0]): + if not names: + names = csv_data[0] if header else list(range(len(csv_data[0]))) + print(names) + if len(names) != len(csv_data[0]): raise ValueError("Number of items in names does not match the number of columns") - else: - names = list(range(len(csv_data[0]))) - + if len(names) != len(set(names)): raise ValueError("Names contain one or more duplicate values") From f1e9b3ccec0d2b07980dac668a3b1da29c1976a6 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Thu, 10 Feb 2022 23:36:31 +0530 Subject: [PATCH 05/11] Added checks for to_csv function --- cleverdict/cleverdict.py | 59 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index 7330698..07c1a77 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -8,7 +8,7 @@ from pprint import pprint from datetime import datetime import types -from typing import Union +from typing import Union, Iterable import inspect """ @@ -1186,6 +1186,63 @@ def from_csv( return cls(data) + def to_csv( + self, + file_path: Path = None, + ignore: Union[Iterable, str] = None, + exclude: Union[Iterable, str] = None, + only: Union[Iterable, str] = None + ) -> None: + """[summary] + + Args: + file_path (Path, optional): [description]. Defaults to None. + ignore (Union[Iterable, str], optional): [description]. Defaults to None. + exclude (Union[Iterable, str], optional): [description]. Defaults to None. + only (Union[Iterable, str], optional): [description]. Defaults to None. + + Raises: + TypeError: [description] + TypeError: [description] + + Returns:ye + + [type]: [description] + """ + + # if file_path is None: + # raise ValueError("File path not provided") + + ignore, exclude = _preprocess_options(ignore, exclude, only) + mapping = self._filtered_mapping(ignore, only) + + all_types = [] + for k, v in mapping.items(): + if isinstance(v, dict) or isinstance(v, CleverDict): + all_types.append(True) + else: + all_types.append(False) + + same_type = True if all(i for i in all_types) else False + + if same_type: + for _, v in mapping.items(): + for _, val in v.items(): + if (hasattr(val, '__iter__') or hasattr(val, '__getitem__')) and not isinstance(val, str): + raise ValueError("Values cannot contain iterables") + else: + raise ValueError("Parent object should only contain CleverDict objects or dict objects to be converted to a CSV") + + keys = [] + for _, v in mapping.items(): + if not keys: + keys.extend(list(v)) + elif list(v) != keys: + raise ValueError('All subitems should have the same keys') + else: + continue + print("all good") + @classmethod def get_new_save_path(cls): """ From ccc0bb2130f814686e7a926a279fe01ec9d7b01e Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Sat, 12 Feb 2022 08:42:57 +0530 Subject: [PATCH 06/11] more validations in to_csv --- cleverdict/cleverdict.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index 07c1a77..b60ec0c 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -8,7 +8,7 @@ from pprint import pprint from datetime import datetime import types -from typing import Union, Iterable +from typing import Union, Iterable, List import inspect """ @@ -318,6 +318,14 @@ def _preprocess_csv(file_path: Union[str, Path], delimiter: str): return csv_data +def _write_csv(file_path: Path, data: List[List]): + + with open(file_path, 'w') as file: + writer = csv.writer(file) + writer.writerows(data) + + return True + class Expand: def __init__(self, ok): """ @@ -1210,10 +1218,11 @@ def to_csv( [type]: [description] """ - # if file_path is None: - # raise ValueError("File path not provided") + if file_path is None: + raise ValueError("File path not provided") + + ignore, only = _preprocess_options(ignore, exclude, only) - ignore, exclude = _preprocess_options(ignore, exclude, only) mapping = self._filtered_mapping(ignore, only) all_types = [] @@ -1241,7 +1250,19 @@ def to_csv( raise ValueError('All subitems should have the same keys') else: continue - print("all good") + + data_list = [] + if ignore: + data_list.append([i for i in keys if i not in ignore]) + if only: + for i in keys: + data_list.append([i for i in keys if i in only.union(set(data_list[0]))]) + + for k, v in mapping.items(): + data_list.append([v[key] for key in keys]) + + written = _write_csv(file_path, data_list) + return written @classmethod def get_new_save_path(cls): From 988283e6fd964170d818fa02afcd30d4bb199d21 Mon Sep 17 00:00:00 2001 From: gouravkr Date: Sat, 12 Feb 2022 13:16:10 +0530 Subject: [PATCH 07/11] refactored to_csv function --- cleverdict/cleverdict.py | 62 ++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 38 deletions(-) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index b60ec0c..8ea7d65 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -11,6 +11,8 @@ from typing import Union, Iterable, List import inspect +from attr import field + """ Change log ========== @@ -318,13 +320,6 @@ def _preprocess_csv(file_path: Union[str, Path], delimiter: str): return csv_data -def _write_csv(file_path: Path, data: List[List]): - - with open(file_path, 'w') as file: - writer = csv.writer(file) - writer.writerows(data) - - return True class Expand: def __init__(self, ok): @@ -1180,7 +1175,7 @@ def from_csv( if not names: names = csv_data[0] if header else list(range(len(csv_data[0]))) - print(names) + if len(names) != len(csv_data[0]): raise ValueError("Number of items in names does not match the number of columns") @@ -1222,45 +1217,26 @@ def to_csv( raise ValueError("File path not provided") ignore, only = _preprocess_options(ignore, exclude, only) - - mapping = self._filtered_mapping(ignore, only) - - all_types = [] - for k, v in mapping.items(): - if isinstance(v, dict) or isinstance(v, CleverDict): - all_types.append(True) - else: - all_types.append(False) - - same_type = True if all(i for i in all_types) else False - + mapping = self._filtered_mapping(ignore, None) + + all_types = [1 if isinstance(v, (dict, CleverDict)) else 0 for _, v in mapping.items()] + same_type = False if 0 in all_types else True if same_type: for _, v in mapping.items(): for _, val in v.items(): if (hasattr(val, '__iter__') or hasattr(val, '__getitem__')) and not isinstance(val, str): raise ValueError("Values cannot contain iterables") else: - raise ValueError("Parent object should only contain CleverDict objects or dict objects to be converted to a CSV") + raise ValueError("Parent object should only contain CleverDict objects for CSV conversion.") - keys = [] - for _, v in mapping.items(): - if not keys: - keys.extend(list(v)) - elif list(v) != keys: - raise ValueError('All subitems should have the same keys') - else: - continue - - data_list = [] - if ignore: - data_list.append([i for i in keys if i not in ignore]) - if only: - for i in keys: - data_list.append([i for i in keys if i in only.union(set(data_list[0]))]) + if any(v.keys() != mapping[0].keys() for _, v in mapping.items()): + raise ValueError("All subitems should have the same keys") - for k, v in mapping.items(): - data_list.append([v[key] for key in keys]) + data_list = [] + for _, v in mapping.items(): + data_list.append(v._filtered_mapping(ignore, only)) + written = _write_csv(file_path, data_list) return written @@ -1435,3 +1411,13 @@ def _auto_save_json(self, name=None, value=None, fullcopy=False): path = self.get_new_save_path().with_suffix(".json") self.setattr_direct("save_path", Path(path)) self.to_json(file_path=self.save_path, fullcopy=fullcopy) + + +def _write_csv(file_path: Path, data: List[CleverDict]): + """Write a list of CleverDict objects to a csv file""" + with open(file_path, 'w') as file: + writer = csv.DictWriter(file, fieldnames=data[0].keys()) + writer.writeheader() + writer.writerows(data) + + return True From fd151a978e71a6d97de557fbd9e9b1d08f2d18f5 Mon Sep 17 00:00:00 2001 From: gouravkr Date: Sat, 12 Feb 2022 15:58:25 +0530 Subject: [PATCH 08/11] improved to_csv, made it more terse --- cleverdict/cleverdict.py | 93 +++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 30 deletions(-) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index 8ea7d65..e59b6cd 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -12,6 +12,7 @@ import inspect from attr import field +from isort import file """ Change log @@ -1196,49 +1197,79 @@ def to_csv( exclude: Union[Iterable, str] = None, only: Union[Iterable, str] = None ) -> None: - """[summary] + """Write a nested CleverDict object to a CSV file + Only CleverDicts consisting of CleverDicts can be written to a CSV file + The input object should have the same format as the output of from_csv - Args: - file_path (Path, optional): [description]. Defaults to None. - ignore (Union[Iterable, str], optional): [description]. Defaults to None. - exclude (Union[Iterable, str], optional): [description]. Defaults to None. - only (Union[Iterable, str], optional): [description]. Defaults to None. + Parameters + ---------- + file_path : Path | str + Path for the output csv file - Raises: - TypeError: [description] - TypeError: [description] + ignore : Iterable | str, optional + Keys to ignore from the subitem CleverDicts - Returns:ye + exclude : Iterable | str, optional + alias for ignore - [type]: [description] - """ + only : Iterable | str, optional + Only include these keys in the output csv file + + Returns + ------- + Returns a pathlib.Path object containing the path to the output file. + + Raises + ------ + ValueError + - If the file path is not provided + - If the subitems contain different lengths or keys + + TypeError + - If the underlying items are not CleverDicts + - If any of the values in the sub-items are iterables + Example + ------- + >>> my_list = [ + ... {'id': ''.join(random.sample(string.ascii_lowercase, 6)), + ... 'value': random.randint(10, 100)} + for i in range(3)] + >>> c_dict = CleverDict({i: CleverDict(j) for i, j in enumerate(my_list)}) + >>> print(c_dict) + CleverDict( + { + 0: CleverDict({'id': 'argyso', 'value': 61}, _aliases={}, _vars={}), + 1: CleverDict({'id': 'xnsjcu', 'value': 70}, _aliases={}, _vars={}), + 2: CleverDict({'id': 'fabxvc', 'value': 91}, _aliases={}, _vars={}) + }, _aliases={'_0': 0, '_False': 0, '_1': 1, '_True': 1, '_2': 2}, _vars={} + ) + >>> c_dict.to_csv('my_csv.csv') + WindowsPath('C:/example/my_csv.csv') + """ + if file_path is None: raise ValueError("File path not provided") + if isinstance(file_path, str): + file_path = Path(file_path) ignore, only = _preprocess_options(ignore, exclude, only) - mapping = self._filtered_mapping(ignore, None) - all_types = [1 if isinstance(v, (dict, CleverDict)) else 0 for _, v in mapping.items()] - same_type = False if 0 in all_types else True - if same_type: - for _, v in mapping.items(): - for _, val in v.items(): - if (hasattr(val, '__iter__') or hasattr(val, '__getitem__')) and not isinstance(val, str): - raise ValueError("Values cannot contain iterables") - else: - raise ValueError("Parent object should only contain CleverDict objects for CSV conversion.") + if any(not isinstance(v, CleverDict) for _, v in self.items()): + raise TypeError("Parent object should only contain CleverDict objects for CSV conversion.") + + data_list = [v._filtered_mapping(ignore, only) for _, v in self.items()] - if any(v.keys() != mapping[0].keys() for _, v in mapping.items()): + if any(v.keys() != self[0].keys() for _, v in self.items()): raise ValueError("All subitems should have the same keys") - data_list = [] - - for _, v in mapping.items(): - data_list.append(v._filtered_mapping(ignore, only)) + for i in data_list: + for _, val in i.items(): + if (hasattr(val, '__iter__') or hasattr(val, '__getitem__')) and not isinstance(val, str): + raise TypeError("Values to be written cannot be iterables") - written = _write_csv(file_path, data_list) - return written + output_file = _write_csv(file_path, data_list) + return output_file @classmethod def get_new_save_path(cls): @@ -1420,4 +1451,6 @@ def _write_csv(file_path: Path, data: List[CleverDict]): writer.writeheader() writer.writerows(data) - return True + if file_path.exists(): + return file_path.absolute() + return None From 494b610222e80f53fe9a48af11f024533e7b96fa Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Sat, 12 Feb 2022 22:52:07 +0530 Subject: [PATCH 09/11] made from_csv function more compact --- cleverdict/cleverdict.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index e59b6cd..249fe8d 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -11,9 +11,6 @@ from typing import Union, Iterable, List import inspect -from attr import field -from isort import file - """ Change log ========== @@ -1193,6 +1190,7 @@ def from_csv( def to_csv( self, file_path: Path = None, + delimiter: str = ',', ignore: Union[Iterable, str] = None, exclude: Union[Iterable, str] = None, only: Union[Iterable, str] = None @@ -1205,6 +1203,8 @@ def to_csv( ---------- file_path : Path | str Path for the output csv file + delimiter: str, default ',' + The delimiter to use in the csv file ignore : Iterable | str, optional Keys to ignore from the subitem CleverDicts @@ -1268,7 +1268,7 @@ def to_csv( if (hasattr(val, '__iter__') or hasattr(val, '__getitem__')) and not isinstance(val, str): raise TypeError("Values to be written cannot be iterables") - output_file = _write_csv(file_path, data_list) + output_file = _write_csv(file_path, data_list, delimiter=delimiter) return output_file @classmethod @@ -1444,10 +1444,10 @@ def _auto_save_json(self, name=None, value=None, fullcopy=False): self.to_json(file_path=self.save_path, fullcopy=fullcopy) -def _write_csv(file_path: Path, data: List[CleverDict]): +def _write_csv(file_path: Path, data: List[CleverDict], delimiter: str): """Write a list of CleverDict objects to a csv file""" - with open(file_path, 'w') as file: - writer = csv.DictWriter(file, fieldnames=data[0].keys()) + with open(file_path, 'w', newline='') as file: + writer = csv.DictWriter(file, fieldnames=data[0].keys(), delimiter=delimiter) writer.writeheader() writer.writerows(data) From 72494813b42435df8feb622f1f3c3619feff3ff6 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Sat, 12 Feb 2022 22:52:17 +0530 Subject: [PATCH 10/11] added tests for to_csv --- cleverdict/test_cleverdict.py | 84 +++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/cleverdict/test_cleverdict.py b/cleverdict/test_cleverdict.py index 0982335..b84dfde 100644 --- a/cleverdict/test_cleverdict.py +++ b/cleverdict/test_cleverdict.py @@ -1,3 +1,4 @@ +from multiprocessing.sharedctypes import Value from cleverdict import CleverDict, Expand, all_aliases import pytest import os @@ -8,6 +9,8 @@ import keyring from itertools import permutations +import cleverdict + def example_save_function(self, name=None, value=None): """ @@ -767,6 +770,87 @@ def test_delimiter(self, tmpdir): assert data._0.name == 'Banana' +class Test_To_CSV: + def create_data(self): + keys = ['id', 'name', 'color'] + data = [ + [1, 'Banana', 'yellow'], + [2, 'Apple', 'green'], + [3, 'Blueberry', 'blue'], + [4, 'Kinnow', 'orange'], + [5, 'Kiwi', 'brown'] + ] + data_list = [CleverDict(zip(keys, i)) for i in data] + c_dict = CleverDict({i: v for i, v in enumerate(data_list)}) + return c_dict + + def test_invalid_params(self, tmpdir): + c_dict = self.create_data() + with pytest.raises(ValueError): + c_dict.to_csv() + with pytest.raises(TypeError): + c_dict.to_csv(f'{tmpdir}/test.csv', exclude='name', only='id') + + def test_bad_dicts(self, tmpdir): + c_dict = self.create_data() + c_dict._0.name2 = 'temp' + with pytest.raises(ValueError): + c_dict.to_csv(f'{tmpdir}/test.csv') + + c_dict = self.create_data() + c_dict[6] = CleverDict([('key1', 'temp'), ('key2', 'temp')]) + with pytest.raises(ValueError): + c_dict.to_csv(f'{tmpdir}/test.csv') + + c_dict = self.create_data() + c_dict[6] = dict(key1='temp', key2='temp') + with pytest.raises(TypeError): + c_dict.to_csv(f'{tmpdir}/test.csv') + + c_dict = self.create_data() + c_dict._1.color = ['green', 'red'] + with pytest.raises(TypeError): + c_dict.to_csv(f'{tmpdir}/test.csv') + + def test_file_creation(self, tmpdir): + c_dict = self.create_data() + file_path = c_dict.to_csv(f'{tmpdir}/test1.csv') + assert file_path.exists() + assert file_path.suffix == '.csv' + assert file_path.name == 'test1.csv' + + def test_written_file(self, tmpdir): + c_dict = self.create_data() + file_path = c_dict.to_csv(f'{tmpdir}/test2.csv') + data = CleverDict.from_csv(file_path) + assert data._0.name == 'Banana' + assert len(data.keys()) == 5 + + def test_delimiter(self, tmpdir): + c_dict = self.create_data() + delimiter = '|' + file_path = c_dict.to_csv(f'{tmpdir}/test3.csv', delimiter=delimiter) + data = CleverDict.from_csv(file_path, delimiter=delimiter) + assert data._0.name == 'Banana' + assert len(data._0.keys()) == 3 + assert len(data.keys()) == 5 + + def test_ignore_only(self, tmpdir): + c_dict = self.create_data() + file_path = c_dict.to_csv(f'{tmpdir}/test4.csv', ignore='id') + data = CleverDict.from_csv(file_path) + assert 'id' not in data._0 + assert data._1.name == 'Apple' + assert list(data._1) == ['name', 'color'] + + file_path = c_dict.to_csv(f'{tmpdir}/test5.csv', only='id') + data = CleverDict.from_csv(file_path) + assert 'id' in data._0 + assert 'color' not in data._0 + assert data._1.id == '2' + assert list(data._1) == ['id'] + + class Test_Internal_Logic: def test_raises_error(self): """ From fb5c8e3e2110dad1973bcff5f9c81e75951da478 Mon Sep 17 00:00:00 2001 From: Gourav Kumar Date: Wed, 16 Feb 2022 13:08:00 +0530 Subject: [PATCH 11/11] Improved code using black and flake8 --- cleverdict/cleverdict.py | 97 ++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/cleverdict/cleverdict.py b/cleverdict/cleverdict.py index 249fe8d..31c1e73 100644 --- a/cleverdict/cleverdict.py +++ b/cleverdict/cleverdict.py @@ -1,15 +1,16 @@ +from __future__ import annotations + import csv -import os -import json import inspect -import keyword import itertools +import json +import keyword +import os +import types +from datetime import datetime from pathlib import Path from pprint import pprint -from datetime import datetime -import types -from typing import Union, Iterable, List -import inspect +from typing import Iterable, List, Union """ Change log @@ -309,7 +310,7 @@ def _preprocess_csv(file_path: Union[str, Path], delimiter: str): file_path = Path(file_path) if not file_path.exists(): raise ValueError("File not found") - + with open(file_path, "r", encoding="utf-8") as file: reader = csv.reader(file, delimiter=delimiter) csv_data = list(reader) @@ -319,6 +320,18 @@ def _preprocess_csv(file_path: Union[str, Path], delimiter: str): return csv_data +def _write_csv(file_path: Path, data: List[CleverDict], delimiter: str) -> Union[Path, None]: + """Write a list of CleverDict objects to a csv file""" + with open(file_path, 'w', newline='') as file: + writer = csv.DictWriter(file, fieldnames=data[0].keys(), delimiter=delimiter) + writer.writeheader() + writer.writerows(data) + + if file_path.exists(): + return file_path.absolute() + return None + + class Expand: def __init__(self, ok): """ @@ -1064,28 +1077,27 @@ def from_json( else: return cls(data, **kwargs) - @classmethod def from_csv( cls, - file_path: Union[Path, str] = None, + file_path: Union[Path, str] = None, skip_rows: int = None, nrows: int = None, - header: bool = True, - names: list = [], + header: bool = True, + names: list = [], delimiter: str = ',', ignore: Union[str, list] = None, exclude: Union[str, list] = None, only: Union[str, list] = None - ) -> dict: - """Creates a new CleverDict object from a CSV file. + ) -> CleverDict: + """Creates a new CleverDict object from a CSV file. Each row is also encoded as a CleverDict object with the key being the row number starting from zero Parameters: ----------- file_path: str | pathlib.Path The path to the csv file - + skip_rows: int Number of rows to skip from the beginning of the file Does not count the header if header is True @@ -1120,9 +1132,9 @@ def from_csv( Raises: ------- - TypeError: + TypeError: If more than one of ignore, exclude, and only are specified - + ValueError: - If file path is not provided or is invalid - If names are specified without headers=False @@ -1142,7 +1154,7 @@ def from_csv( >>> print(data) CleverDict( { - 0: CleverDict({'id': '1', 'name': 'Banana', 'color': 'yellow'}, _aliases={}, _vars={}), + 0: CleverDict({'id': '1', 'name': 'Banana', 'color': 'yellow'}, _aliases={}, _vars={}), 1: CleverDict({'id': '2', 'name': 'Apple', 'color': 'green'}, _aliases={}, _vars={}) }, _aliases={'_0': 0, '_False': 0, '_1': 1, '_True': 1}, _vars={} ) @@ -1162,20 +1174,21 @@ def from_csv( start_row = 1 if header else 0 else: start_row = skip_rows + (1 if header else 0) - + if nrows is None: end_row = None else: end_row = start_row + nrows if header and names: - raise ValueError("Names cannot be specified if header is True.\nHint: To specify custom names for CSV with headers, set header=False and skip_rows=1") + raise ValueError("Names cannot be specified if header is True.\n" + "Hint: To specify custom names for CSV with headers, set header=False and skip_rows=1") if not names: names = csv_data[0] if header else list(range(len(csv_data[0]))) - + if len(names) != len(csv_data[0]): - raise ValueError("Number of items in names does not match the number of columns") + raise ValueError("Number of items in names does not match the number of columns") if len(names) != len(set(names)): raise ValueError("Names contain one or more duplicate values") @@ -1184,17 +1197,17 @@ def from_csv( for idx, row in enumerate(csv_data[start_row:end_row]): current_row_dict = cls(dict(zip(names, row)), **kwargs) data[idx] = current_row_dict - + return cls(data) def to_csv( - self, - file_path: Path = None, + self, + file_path: Path = None, delimiter: str = ',', - ignore: Union[Iterable, str] = None, + ignore: Union[Iterable, str] = None, exclude: Union[Iterable, str] = None, only: Union[Iterable, str] = None - ) -> None: + ) -> Union[Path, None]: """Write a nested CleverDict object to a CSV file Only CleverDicts consisting of CleverDicts can be written to a CSV file The input object should have the same format as the output of from_csv @@ -1232,32 +1245,32 @@ def to_csv( Example ------- >>> my_list = [ - ... {'id': ''.join(random.sample(string.ascii_lowercase, 6)), - ... 'value': random.randint(10, 100)} + ... {'id': ''.join(random.sample(string.ascii_lowercase, 6)), + ... 'value': random.randint(10, 100)} for i in range(3)] >>> c_dict = CleverDict({i: CleverDict(j) for i, j in enumerate(my_list)}) >>> print(c_dict) CleverDict( { - 0: CleverDict({'id': 'argyso', 'value': 61}, _aliases={}, _vars={}), - 1: CleverDict({'id': 'xnsjcu', 'value': 70}, _aliases={}, _vars={}), + 0: CleverDict({'id': 'argyso', 'value': 61}, _aliases={}, _vars={}), + 1: CleverDict({'id': 'xnsjcu', 'value': 70}, _aliases={}, _vars={}), 2: CleverDict({'id': 'fabxvc', 'value': 91}, _aliases={}, _vars={}) }, _aliases={'_0': 0, '_False': 0, '_1': 1, '_True': 1, '_2': 2}, _vars={} ) >>> c_dict.to_csv('my_csv.csv') WindowsPath('C:/example/my_csv.csv') """ - + if file_path is None: raise ValueError("File path not provided") if isinstance(file_path, str): file_path = Path(file_path) ignore, only = _preprocess_options(ignore, exclude, only) - + if any(not isinstance(v, CleverDict) for _, v in self.items()): raise TypeError("Parent object should only contain CleverDict objects for CSV conversion.") - + data_list = [v._filtered_mapping(ignore, only) for _, v in self.items()] if any(v.keys() != self[0].keys() for _, v in self.items()): @@ -1267,10 +1280,10 @@ def to_csv( for _, val in i.items(): if (hasattr(val, '__iter__') or hasattr(val, '__getitem__')) and not isinstance(val, str): raise TypeError("Values to be written cannot be iterables") - + output_file = _write_csv(file_path, data_list, delimiter=delimiter) return output_file - + @classmethod def get_new_save_path(cls): """ @@ -1442,15 +1455,3 @@ def _auto_save_json(self, name=None, value=None, fullcopy=False): path = self.get_new_save_path().with_suffix(".json") self.setattr_direct("save_path", Path(path)) self.to_json(file_path=self.save_path, fullcopy=fullcopy) - - -def _write_csv(file_path: Path, data: List[CleverDict], delimiter: str): - """Write a list of CleverDict objects to a csv file""" - with open(file_path, 'w', newline='') as file: - writer = csv.DictWriter(file, fieldnames=data[0].keys(), delimiter=delimiter) - writer.writeheader() - writer.writerows(data) - - if file_path.exists(): - return file_path.absolute() - return None