-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasic_logParser.py
More file actions
59 lines (50 loc) · 2.04 KB
/
basic_logParser.py
File metadata and controls
59 lines (50 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
'''
The parser result should contains:
- How many stage does the whole application have ?
- In each stage, how many seconds does each operation occupied?
- In each stage, how many parititon
'''
import sys, getopt
from collections import defaultdict
from datetime import datetime, timedelta
filename = "../Desktop/result.log"
output = "output"
old_stage_id = 0
op_info = defaultdict(list)
# nested defaultdict
stage_info = defaultdict(lambda: defaultdict(list))
def nano2Readable(ns):
return timedelta(microseconds=round(ns, -3) // 1000)
def output_stage_info(stage_id, stage_info, output):
print "printing..."
num_of_par = len(stage_info)
output.write( "Number of Partitions(tasks):" + str(num_of_par) + " in stage Id: " + str(stage_id) + "\n" )
for par_id,op_info in stage_info.iteritems():
output.write("Partition " + str(par_id) + "\n")
for op_name, time_interval in op_info.iteritems():
output.write("\t" + op_name + ":" +str(nano2Readable(time_interval[0])) + " -----> " \
+ str(nano2Readable(time_interval[1]))+ ", lasting:" + str( nano2Readable(time_interval[1] - time_interval[0])) + "\n")
output.write("\n\n")
with open (output, 'w') as f :
with open(filename, 'r') as fd :
for line in fd:
# First 33 characters, just strip it.
line = line[34:]
[rdd_type, task_attmpt_id, par_id, stage_id] = line.split(",")
stage_id = int(stage_id.split(":")[1])
par_id = int(par_id[(par_id.find(":")+1):])
task_attmpt_id = int(task_attmpt_id[(task_attmpt_id.find(":")+1):])
op_name = rdd_type.split("]")[0]
# # End or start is not important, just make sure each operation's time interval has two elements.
timestamp = long(rdd_type.split("]")[1].split(":")[1])
if (old_stage_id != stage_id):
# write the things into file.
output_stage_info(old_stage_id, stage_info, f)
old_stage_id = stage_id
stage_info.clear()
stage_info[par_id][op_name].append(timestamp)
else:
stage_info[par_id][op_name].append(timestamp)
output_stage_info(old_stage_id, stage_info, f)
fd.close()
f.close()