-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLoad_data.py
More file actions
59 lines (52 loc) · 1.74 KB
/
Load_data.py
File metadata and controls
59 lines (52 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
import pandas as pd
import yaml
import argparse
import typing
def read_params(config_path: str):
"""
Reads the parameters from the .yaml file
input: params.yaml location
output: paramaeters in a dictionary
"""
with open(config_path, 'r') as yaml_file:
try:
config = yaml.safe_load(yaml_file)
except yaml.YAMLError as exc:
print(exc)
return config
def load_data(data_path: str, data_type = 'train', model_var: typing.List[str] = None):
"""
Reads the data from the .csv file from given path
input: data_path, data_type
output: data in a pandas dataframe
"""
if data_type == 'train':
data = pd.read_csv(data_path,
sep=',',
encoding='utf-8')
data = data[model_var]
return data
def load_raw_data(config_path: str):
"""
Load datafrom external location (data/external) tp raw folder (data/raw)
with train and test data
input: data_path
output: save file in data/raw folder
"""
config = read_params(config_path)
external_data_path = config['external_data_config']['external_data_csv']
raw_data_path = config['raw_data_config']['raw_data_csv']
model_var = config['raw_data_config']['model_var']
df = load_data(data_path=external_data_path,
data_type='train',
model_var=model_var)
df.to_csv(raw_data_path, index = False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--config',
type=str,
default='params.yaml')
args = parser.parse_args()
config_path = args.config
load_raw_data(config_path)