-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpandas_loader.py
More file actions
52 lines (43 loc) · 1.79 KB
/
pandas_loader.py
File metadata and controls
52 lines (43 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""
Loads CSV file as dataframe, and computes first & second derivatives for time series.
@author: bartulem
"""
import pandas as pd
def data_load(file_loc=None, file_separator=',',
insert_derivatives=False, der_variables=None):
"""
Parameters
----------
file_loc : str
Absolute location of the weather data CSV file; defaults to None.
file_separator : str
File delimiter of choice; defaults to ",".
insert_derivatives : bool
Yey or ney on the derivatives; defaults to False.
der_variables : list
Variables to calculate derivatives on; defaults to None.
----------
Returns
----------
augmented_csv_data : int64
The loaded data with all the calculated derivatives.
----------
"""
csv_data = pd.read_csv(filepath_or_buffer=file_loc, sep=file_separator)
if insert_derivatives:
if der_variables is None:
der_variables = ['Min_temp (°C)', 'Max_temp (°C)',
'Mean_temp (°C)', 'Pressure (hPa)',
'Humidity (%)', 'Wind_speed (m/s)',
'Wind_deg (°)', 'Clouds (%)']
first_der_names = [f'{var.split(" ")[0]}_1st_der' for var in der_variables]
second_der_names = [f'{var.split(" ")[0]}_2nd_der' for var in der_variables]
for idx, var in enumerate(der_variables):
var_position = csv_data.columns.get_loc(var)
csv_data.insert(loc=var_position+1,
column=first_der_names[idx],
value=csv_data.loc[:, var].diff())
csv_data.insert(loc=var_position + 2,
column=second_der_names[idx],
value=csv_data.loc[:, first_der_names[idx]].diff())
return csv_data