-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess.py
More file actions
110 lines (88 loc) · 3.94 KB
/
preprocess.py
File metadata and controls
110 lines (88 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
__author__ = 'Thurston Sexton'
from sklearn.externals import joblib
from sklearn.decomposition import TruncatedSVD, FastICA
import numpy as np
import json
import pandas as pd
import os
class Preprocess:
def __init__(self, pca_model=None, all_dat=None):
if pca_model is not None:
self.pca = joblib.load(pca_model) # try 'eco_full_pca.pkl'
self.full_tab = pd.read_json("c:\users\p2admin\documents\max\projects\ecoracer2016/data.json")
self.full_tab["rem_nrg"] = self.full_tab.apply(lambda x: self.remaining_energy(x.score), axis=1)
if all_dat is not None:
self.all_dat = joblib.load(all_dat) # try 'all_games.pkl'
drop = np.any(self.all_dat, axis=1)
self.all_dat = self.all_dat[drop]
self.full_tab = pd.read_json("c:\users\p2admin\documents\max\projects\ecoracer2016/data.json")[drop]
self.full_tab["rem_nrg"] = self.full_tab.apply(lambda x: self.remaining_energy(x.score), axis=1)
self.proj = None
# print os.system('pwd')
@staticmethod
def remaining_energy(consumption):
max_batt = 0.55
# consumption = np.linspace(0,2000000)
# print consumption
if consumption == -1:
return 0
else:
return 100-(consumption/36000/max_batt)
def totuple(self, a):
try:
return tuple(self.totuple(i) for i in a)
except TypeError:
return a
def full_vec(self, pos, sig, size):
series=np.zeros((size,), dtype=np.int)
try:
for i,x in enumerate(pos[:-1]):
series[x:pos[i+1]] = sig[i]
except Exception:
pass
#print series
return series
def get_json(self, file):
with open(file) as json_data:
data = json.load(json_data)
self.dat=pd.DataFrame.from_dict(data['alluser_control'])
self.dat["series"] = self.dat.apply(lambda x: self.totuple(self.full_vec(x['x'], x['sig'], 18160)),
axis=1, raw=True)
self.all_dat=np.empty((2391,18160))
for i,x in enumerate(self.dat.x):
self.all_dat[i,:]=self.full_vec(x, self.dat.sig[i], 18160)
joblib.dump(self.all_dat, '../all_games.pkl')
def train_pca(self, ndim=30): # uses complete data-set
# self.pca = TruncatedSVD(n_components=ndim)
self.pca = FastICA(n_components=ndim)
self.pca.fit(self.all_dat)
joblib.dump(self.pca, '../eco_full_pca.pkl') # save for later importing
def ready_player_one(self, place):
# place must be less than 7.
top6 = [78, 122, 166, 70, 67, 69] #best players
m1, m2, m3, m4, m5, m6 = [self.full_tab.userid.values == i for i in top6]
masks = [m1, m2, m3, m4, m5, m6]
X = self.all_dat[masks[place-1]]
y = self.full_tab["rem_nrg"].values[masks[place-1]]
X_pca = self.pca.transform(X)
X_pca = np.vstack((X_pca.T, self.full_tab["finaldrive"].values[masks[place-1]])).T
return (X_pca, y)
def ready_bad_player(self):
# mask = [self.full_tab.userid.values == 1] # gets mediocre score (~12 plays
mask = [self.full_tab.userid.values == 79] # gets zero score (~12 plays)
X = self.all_dat[mask]
y = self.full_tab["rem_nrg"].values[mask]
X_pca = self.pca.transform(X)
X_pca = np.vstack((X_pca.T, self.full_tab["finaldrive"].values[mask])).T
return (X_pca, y)
def prep_by_id(self, play_no):
id_no = self.full_tab['userid'][self.full_tab['id'] == play_no].values[0]
# print id_no
mask_a = self.full_tab.userid.values == id_no
mask_b = self.full_tab.id.values <= play_no
mask = np.logical_and(mask_a, mask_b)
X = self.all_dat[mask]
y = self.full_tab["rem_nrg"].values[mask]
X_pca = self.pca.transform(X)
X_pca = np.vstack((X_pca.T, self.full_tab["finaldrive"].values[mask])).T
return (X_pca, y)