-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_json.py
More file actions
354 lines (303 loc) · 14.1 KB
/
parse_json.py
File metadata and controls
354 lines (303 loc) · 14.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
## This file parses the raw json from the API and
import pandas as pd
import json
import os
import pandasql
import glob
from measure_pressure import ParseTimelinePressure
## Takes in: RAW Json Formatted file for the high level match data
## A lot of the variables in here are NOT used, but they are kept just incase - no harm
def parseMatch(matchData):
## Parse the json files and create a dataframe with all the required fields
puuidDict = {}
dictList = []
for i in range(10):
curentParticipant = matchData['info']['participants'][i]
curentGame = matchData['info']
curentGameMetadata = matchData['metadata']
puuidDict = {}
puuidDict['gameId'] = curentGameMetadata['matchId']
puuidDict['gameDuration'] = (curentGame['gameDuration'] / 60) # post patch 11.20, gameDuration is returned in seconds and not milliseconds
puuidDict['puuid'] = curentParticipant['puuid']
puuidDict['summonerId'] = curentParticipant['summonerId']
puuidDict['teamId'] = curentParticipant['teamId']
puuidDict['win'] = curentParticipant['win']
puuidDict['teamPosition'] = curentParticipant['teamPosition']
puuidDict['champExperience'] = curentParticipant['champExperience']
puuidDict['kills'] = curentParticipant['kills']
puuidDict['assists'] = curentParticipant['assists']
puuidDict['turretKills'] = curentParticipant['turretKills']
puuidDict['epicMonsterKills'] = curentParticipant['baronKills'] + curentParticipant['dragonKills']
puuidDict['visionScore'] = curentParticipant['visionScore']
puuidDict['visionWardsBoughtInGame'] = curentParticipant['visionWardsBoughtInGame']
puuidDict['magicDamageDealtToChampions'] = curentParticipant['magicDamageDealtToChampions']
puuidDict['deaths'] = curentParticipant['deaths']
puuidDict['totalMinionsKilled'] = curentParticipant['totalMinionsKilled']
puuidDict['timeCCingOthers'] = curentParticipant['timeCCingOthers']
puuidDict['totalDamageTaken'] = curentParticipant['totalDamageTaken']
puuidDict['totalHealsOnTeammates'] = curentParticipant['totalHealsOnTeammates']
puuidDict['totalTimeSpentDead'] = curentParticipant['totalTimeSpentDead']
puuidDict['goldEarned'] = curentParticipant['goldEarned']
puuidDict['objectivesStolen'] = curentParticipant['objectivesStolen']
puuidDict['objectivesStolenAssists'] = curentParticipant['objectivesStolenAssists']
dictList.append(puuidDict)
df = pd.DataFrame(dictList)
return df
## Function to join the tables generated by parsing the Timeframe json
def joinTimelineTables(dfParticipants,dfAssist,dfTower,dfMonster,dfPressure):
sql = '''
SELECT puuid, participantId, participantsAssisted, towerKillsAssisted, monsterKillsAssisted, participantsAssistedWithPressure
FROM dfParticipants
INNER JOIN dfAssist
ON dfParticipants.participantId = dfAssist.participant
INNER JOIN dfTower
ON dfParticipants.participantId = dfTower.participant
INNER JOIN dfMonster
ON dfParticipants.participantId = dfMonster.participant
INNER JOIN dfPressure
ON dfParticipants.participantId = dfPressure.participant
'''
dfFinal = pandasql.sqldf(sql, locals())
return dfFinal
## Parse the timeline data
## Takes in: RAW Json Formatted file for the timeline
def parseTimeline(match_timeline):
## Get the participant ID by puuid
dfParticipants = pd.DataFrame(match_timeline['info']['participants'])
matchFrames = match_timeline['info']['frames']
## Init 2 dictionaries to store the killer and who assisted the killer
killAssistDict = {} # killer (key) and assisting participants (value)
finalAssistDict = {} # participants (key) and who they assisted (value)
towerAssistDict = {} # killer (key) and assisting participants (value)
finalTowerAssistDict = {} # participants (key) and who they assisted (value)
monsterAssistDict = {} # killer (key) and assisting participants (value)
finalMonsterAssistDict = {} # participants (key) and who they assisted (value)
allAssistsCnt = 0
allAssistsCntLst = []
## init
for i in range(1,11):
killAssistDict[str(i)] = []
finalAssistDict[str(i)] = []
towerAssistDict[str(i)] = []
finalTowerAssistDict[str(i)] = []
monsterAssistDict[str(i)] = []
finalMonsterAssistDict[str(i)] = []
## Loop through each frame of the game (1 minute intervals)
## This is a very complex function:
## 1. It loops through every event that occured during the game, looking for:
## player kills, tower kills and elite monster kills
## 2. Once an event (fromt he above) is found, it notes the players that assisted the kill
## 3. A dataframe is crated for each metric
## 4. Finally the dataframes are all joined together, along with the pressure measure from
## measure_pressure.py ParseTimelinePressure()
for frame in matchFrames:
currentEventList = frame['events']
## Loop through each event in the time frame
for event in currentEventList:
## Looking for the type of event
if event['type'] == 'CHAMPION_KILL':
try: #trying incase there was no assisting participant
for assistingParticipant in event['assistingParticipantIds']:
killAssistDict[str(event['killerId'])].append(assistingParticipant)
allAssistsCnt+=1
except:
if event['killerId'] == 0: # Nobody killed the champion, they were EXECUTED
pass
else:
killAssistDict[str(event['killerId'])].append(-1) # use -1 if it was a solo kill
elif event['type'] == 'BUILDING_KILL':
try: #trying incase there was no assisting participant
for assistingParticipant in event['assistingParticipantIds']:
towerAssistDict[str(event['killerId'])].append(assistingParticipant)
allAssistsCnt+=1
except:
if event['killerId'] == 0: # Nobody killed the tower, minions did
pass
else:
towerAssistDict[str(event['killerId'])].append(-1) # use -1 if it was a solo kill
elif event['type'] == 'ELITE_MONSTER_KILL':
try: #trying incase there was no assisting participant
for assistingParticipant in event['assistingParticipantIds']:
monsterAssistDict[str(event['killerId'])].append(assistingParticipant)
allAssistsCnt+=1
except:
if event['killerId'] == 0: # Nobody killed the tower, minions did
pass
else:
monsterAssistDict[str(event['killerId'])].append(-1) # use -1 if it was a solo kill
#rearrange the dictionary to have the structure: participants (key) and who they assisted (value)
for i in range(1,11):
for pid in killAssistDict[str(i)]:
if pid > 0:
finalAssistDict[str(pid)].append(i)
for pidt in towerAssistDict[str(i)]:
if pidt > 0:
finalTowerAssistDict[str(pidt)].append(i)
for pidm in monsterAssistDict[str(i)]:
if pidm > 0:
finalMonsterAssistDict[str(pidm)].append(i)
for i in range(1,11):
finalAssistDict[str(i)] = str(finalAssistDict[str(i)])
finalTowerAssistDict[str(i)] = str(finalTowerAssistDict[str(i)])
finalMonsterAssistDict[str(i)] = str(finalMonsterAssistDict[str(i)])
## Convert the dict to a dataframe, fill inf nan with -1 and converting back to int
# TODO: the column `participantsAssisted` should be renamed to `killsAssisted` makes better sense
dfAssist = pd.DataFrame.from_dict(finalAssistDict, orient='index', columns=['participantsAssisted'])
dfAssist.index.name = 'participant'
dfTower = pd.DataFrame.from_dict(finalTowerAssistDict, orient='index', columns=['towerKillsAssisted'])
dfTower.index.name = 'participant'
dfMonster = pd.DataFrame.from_dict(finalMonsterAssistDict, orient='index', columns=['monsterKillsAssisted'])
dfMonster.index.name = 'participant'
## Call the function from measure_pressure.py to get the dataframe of the custom pressure metric!
dfPressure = ParseTimelinePressure(match_timeline)
## Count the total number of assists per player
return joinTimelineTables(dfParticipants, dfAssist, dfTower, dfMonster, dfPressure)
## Join files together!
def joinMatchAndTimeline(matchDF,timelineDF):
sql = '''
SELECT *
FROM matchDF
INNER JOIN timelineDF
ON matchDF.puuid = timelineDF.puuid
'''
dfFinal = pandasql.sqldf(sql, locals())
dfFinal = dfFinal.loc[:,~dfFinal.columns.duplicated()]
## APPLY FILTERS
sql = '''
SELECT *
FROM dfFinal
WHERE gameDuration > 23
ORDER BY gameId asc
'''
dfFinal = pandasql.sqldf(sql, locals())
return dfFinal
#Requires a list with the index 'rank'
## assigning a numerical value to the ranks
def calculateAvgRank(listOfRanks,indexOfRankCol):
rankedDict = {
'NA':0,
'IRON-IV':1,
'IRON-III':2,
'IRON-II':3,
'IRON-I':4,
'BRONZE-IV':5,
'BRONZE-III':6,
'BRONZE-II':7,
'BRONZE-I':8,
'SILVER-IV':9,
'SILVER-III':10,
'SILVER-II':11,
'SILVER-I':12,
'GOLD-IV':13,
'GOLD-III':14,
'GOLD-II':15,
'GOLD-I':16,
'PLATINUM-IV':17,
'PLATINUM-III':18,
'PLATINUM-II':19,
'PLATINUM-I':20,
'DIAMOND-IV':21,
'DIAMOND-III':22,
'DIAMOND-II':23,
'DIAMOND-I':24,
'MASTER-I':25,
'GRANDMASTER-I':26,
'CHALLENGER-I':27
}
playerCounter = 0
totalRank = 0
naCount = 0
finalList = []
cnt = 0
for player in listOfRanks:
playerRank = player[indexOfRankCol]
if playerRank == 'NA':
naCount += 1
totalRank += int(rankedDict[playerRank])
playerCounter += 1
cnt+=1
if playerCounter == 10: #ALWYAS 10 players
avgRank = int(round(totalRank / (10 - naCount),0))
# append it for each player - not efficient, but works
for i in range(10):
finalList.append(avgRank)
## reset all values
playerCounter = 0
naCount = 0
avgRank = 0
totalRank = 0
return finalList
## Used to get a single player's summId from a game ID and participant index
def getPlayerIdFromGameIdIndex(pathToFile,index):
index = int(index)
f = open(pathToFile)
gameInfo = json.load(f)
return gameInfo['info']['participants'][index]['summonerId']
## Process the players ranks json files
def ProcessPlayerRanksJson(pathToJsonFiles):
dfData = []
counter = 0
pathToMatchJson = os.path.join(os.getcwd(),'match json files/')
## For every file in the ranked folder
for filename in glob.glob(os.path.join(os.getcwd(),pathToJsonFiles, '*.json')):
f = open(filename)
playerInfo = json.load(f)
## look through each option in the array to make sure were looking at solo ranked games only
# Get the game ID
ogGameId = filename.rsplit('/')[-1].rsplit("_")
gameId = ogGameId[0] + "_" + ogGameId[1]
playerIndex = ogGameId[2].split('.')[0]
if len(playerInfo) < 1: #the player has no ranked info
dirToMatchFile = pathToMatchJson + gameId + "_match.json"
summId = getPlayerIdFromGameIdIndex(dirToMatchFile,int(playerIndex) -1)
dfData.append([gameId, summId, "NA"]) # append to data list
counter+=1
continue #since we have appended to the list, we are finished with the player, so continue
cnt = 0
for queue in playerInfo:
if queue['queueType'] == 'RANKED_SOLO_5x5': #this is the correct queue type
rank = f"{queue['tier']}-{queue['rank']}" #concat these
# rank = f"{queue['tier']}" #concat these
dfData.append([gameId, queue['summonerId'],rank]) # append to data list
counter+=1
continue
cnt+=1
## Catch the rest of the items that dont fit the usual criteria
if len(playerInfo) > 0 and queue['queueType'] != 'RANKED_SOLO_5x5' and cnt == len(playerInfo):
rank = f"{queue['tier']}-{queue['rank']}" #concat these
dfData.append([gameId, queue['summonerId'],rank]) # append to data list
counter+=1
avgRankList = calculateAvgRank(dfData,2)
df = pd.DataFrame(data=dfData, columns =['gameId','summonerId','rank'])
## DEBUG PURPOSES ONLY
# print("counter: ", counter)
# print("avg rank list len: ", len(avgRankList))
# print("df shape: ", df.shape[0])
# print("df data list len: ", len(dfData))
## Calculate avg rank
df['avgrank'] = avgRankList
df = df[~df.gameId.str.contains("EUN1")]
newDF = removeNaValues(df) #remove games with all NA values
newDF.to_csv("playerRanks.csv", index=False) ## write to CSV
## Remove bad data
def removeNaValues(df):
gamesToRemove = []
for index, row in df.iterrows():
if row['rank'] == "NA":
gamesToRemove.append(row['gameId'])
gamesToRemoveDeduped = list(dict.fromkeys(gamesToRemove)) # deduplicate the games to remove
newDF = df[~df['gameId'].isin(gamesToRemoveDeduped)]
return newDF
## If the file is called directly.. Mainly for debugging and testing
## If you are nto calling htis file directly, IGNORE THIS
if __name__ == '__main__':
pass
# json_file = open("match.json")
# variables = json.load(json_file)
# matchDF = Parse_match(variables)
# json_file = open("timeline.json")
# match_timeline = json.load(json_file)
# timelineDF = Parse_Timeline(match_timeline)
# joinedDF = JoinMatchAndTimeline(matchDF,timelineDF)
# joinedDF.to_csv('joined.csv',index=False, mode='a')
# ProcessPlayerRanksJson("rank json files")