-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcleanteamdata.py
More file actions
86 lines (52 loc) · 2.01 KB
/
cleanteamdata.py
File metadata and controls
86 lines (52 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!usr/bin/env python3
import pandas as pd
import numpy as np
def clean_age(cell):
years = cell[ : 2]
days = cell[3 : ]
age = int(years) + int(days)/365
return age
def clean_game_location(cell):
if cell == '@':
return 'Away'
else:
return 'Home'
def clean_game_result(cell):
return cell.split()[0]
def engineer_all(df):
df = engineer_previous_game_averages(
df=df,
columns = [col for col in df if df.dtypes[col] in [int, float]],
previous_game_averages = range(1,16,2)
)
return df
def engineer_previous_game_averages(df, columns, previous_game_averages):
for column in columns:
df.sort_values(by=['date_game', 'team_id'],
ascending=True,
inplace=True)
for offset in range(1, max(previous_game_averages) + 1):
df[column + "-" + str(offset)] = df.groupby(
'team_id')[column].shift(periods=offset)
for average in previous_game_averages:
selector = [column + "-" + str(x) for x in range(1, average + 1)]
df[column + str(average) + 'game-avg'] = df.loc[:,selector].mean(axis=1)
for offset in range(1, max(previous_game_averages) + 1):
del df[column + "-" + str(offset)]
df[column + 'seasonavg'] = df.groupby('team_id')[column].apply(
lambda x: pd.expanding_mean(x).shift())
return df
if __name__ == '__main__':
teamDF = pd.read_csv('team_stats.csv',
index_col=0,
parse_dates=['date_game'],
infer_datetime_format=True,
converters={
'age' : clean_age,
'game_location' : clean_game_location,
'game_result' : clean_game_result,
})
teamDF.fillna(0, inplace=True)
teamDF = engineer_all(teamDF)
teamDF.dropna(how='any', inplace=True)
teamDF.to_csv('clean_team_stats.csv')