-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpredict.py
More file actions
110 lines (91 loc) · 3.52 KB
/
predict.py
File metadata and controls
110 lines (91 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from sklearn.feature_extraction import text
from sklearn.base import BaseEstimator
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_regression
from math import sqrt
import pandas as pd
import numpy as np
logging.getLogger("requests").setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(module)s - %(process)d - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
def predictWinners(predictions):
"""
Given a datafile with 4 columns:
index (id)
gameId
standing
team
Predict who wins and who looses a game.
"""
#load data
#predictions = pd.read_csv(datafile)
logger.info("Converting probabilities to 0s and 1s")
#group by game id
groupedByGame = predictions.groupby("gameId")
#for each game, predict one winner and one loser
#UNLESS there is only one team, in which case they are automatically assinged as the winner (standing -> 0 is winner, 1 is loser)
for gameId in groupedByGame.groups.keys():
group = groupedByGame.get_group(gameId)
#print(group)
if len(group) == 1:
predictions.ix[predictions['id'] == group['id'].values[0],'standing'] = 0
else:
if group['standing'].values[0] > group['standing'].values[1]:
predictions.ix[predictions['id'] == group['id'].values[0], 'standing'] = 1
predictions.ix[predictions['id'] == group['id'].values[1], 'standing'] = 0
else:
predictions.ix[predictions['id'] == group['id'].values[0], 'standing'] = 0
predictions.ix[predictions['id'] == group['id'].values[1], 'standing'] = 1
predictions.to_csv("predictions_binomial.csv")
return predictions
if __name__ == "__main__":
#create training and testing datasets
#just split the data in half
teamData = pd.read_csv("datafiles/teamData.csv")
train = teamData.ix[0:len(teamData)/2, ]
test = teamData.ix[(len(teamData)/2 + 1):len(teamData), ]
#print train[[c for c in train.columns if c != "standing" and c!="date"].columns
features = ['characterLevel',
'combatRating',
'combatRatingStd',
'killsDeathsRatio',
'killsDeathsAssists',
'defensiveKills',
'offensiveKills',
'objectivesCompleted',
'refrencedId',
'team',
'hasHighestScoringPlayer',
'hasLowestScoringPlayer',
'numberOfFireTeams',
'weaponKillsHeavy',
'players',
'averageScorePerKill',
'longestKillSpree',
'dominationKills',
]
logger.info("Building random forest!")
randomForest = RandomForestClassifier(n_estimators=1000, )
randomForest.fit(train[features], train['standing'])
#predictions = randomForest.predict(test[features])
#get the probability that the team wins and that it loses
predictions = randomForest.predict_proba(test[features])
submission = pd.DataFrame({"id":test.index.values,
"standing":[i[1] for i in predictions],
"gameId":test['gameId'],
"team":test['team'],
"probabilityOfVictory":[i[0] for i in predictions]})
#given the probabilities of victory (and defeat) and turn those into 0s and 1s
submission = predictWinners(submission)
rms = sqrt(mean_squared_error(test['standing'], submission['standing']))
logger.info("RMSE: {0}".format(rms))
submission.to_csv("datafiles/submission.csv")