-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_data.py
More file actions
135 lines (112 loc) · 4.38 KB
/
parse_data.py
File metadata and controls
135 lines (112 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import matplotlib
# headless server, use backend with display
matplotlib.use('Agg')
import pylab
import pprint
import json
import datetime
import matplotlib.dates as mdates
import numpy
def init_month():
# initialize new data structure for month and return it
data ={}
for language in get_languages():
data[language] = 0
return data
def get_languages():
languages = []
with open("languages.json") as fi:
data = json.loads(fi.read())
for item in data:
languages.append(item['name'])
return languages
def get_data():
# read the core data and return it
with open("results.txt") as fi:
data = eval(fi.read())
return data
def build_data():
# init
month_data = []
this_month = None
monthly_totals = {}
key = (0, 0)
start_year = 2010
# month_data: [{'date': {'language': score, 'language': score}}, ...]
for item in get_data():
for date, values in item.items():
day, month, year = eval(date)
if year < start_year:
break
if month != key[0]:
# we have reached a month boundary
if this_month:
# (ignore initial empty month)
if monthly_totals[key] == 0:
print("empty month: %s" % str(key))
# save current month
#print("saving %s" % this_month)
month_data.append({key: this_month})
# init new month
key = (month, year)
this_month = init_month()
#print("init %s" % str(key))
monthly_totals[key] = 0
for language, score in values.items():
# add this day to monthly totals
#print("%s: %s" % (language, score))
this_month[language] += score
# keep track of monthly total
monthly_totals[key] += score
# by_language: {'language': {'date': [date, date,..], 'ratio': [ratio, ratio, ...], 'score': [score, score]}}
by_language = {}
for language in get_languages():
by_language[language] = {'date': [], 'ratio': [], 'score': [], 'total': 0}
for info in month_data:
for date, scores in info.items():
for language, score in scores.items():
month, year = date
date_obj = datetime.date(year, month, 15)
# get ratios
if monthly_totals[date] != 0:
ratio = float(score) / float(monthly_totals[date])
by_language[language]['ratio'].append(ratio)
by_language[language]['date'].append(date_obj)
by_language[language]['total'] += score
by_language[language]['score'].append(score)
pprint.pprint(by_language)
exclude = ['c']
contenders = []
for language, data in by_language.items():
if language in exclude:
continue
if not contenders:
contenders.append(language)
else:
score = data['total']
for i, contender in enumerate(contenders):
if score > by_language[contender]['total']:
print("inserting %s at position %s" % (language, i))
contenders.insert(i, language)
break
contenders = contenders[0:5]
print("Final list: %s" % contenders)
colormap = pylab.cm.gist_ncar
pylab.gca().set_color_cycle([colormap(i) for i in numpy.linspace(0, 0.9, len(contenders))])
years = mdates.YearLocator() # every year
fig, ax = pylab.subplots()
ax.xaxis.set_major_locator(years)
for language in contenders:
data = by_language[language]
ax.plot(data['date'], data['ratio'], '-', label=language)
ax.legend(ncol=1, loc='upper left', fontsize='x-small')
pylab.savefig('ratio.png')
fig, ax = pylab.subplots()
ax.xaxis.set_major_locator(years)
for language in contenders:
data = by_language[language]
ax.plot(data['date'], data['score'], '-', label=language)
ax.legend(ncol=1, loc='upper left', fontsize='x-small')
pylab.savefig('score.png')
if __name__ == "__main__":
build_data()