forked from legis-graph/legis-graph
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparse_legislators.py
More file actions
114 lines (98 loc) · 3.88 KB
/
parse_legislators.py
File metadata and controls
114 lines (98 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import csv
import yaml
OUTPUT_COLUMNS = [
'thomasID',
'govtrackID',
'bioguideID',
'opensecretsID',
'lisID',
'votesmartID',
'fecIDs',
'icpsrID',
'wikipediaID',
'cspanID',
'washpostID',
'firstName',
'lastName',
'birthday',
'gender',
'religion',
'party',
'democratCount',
'republicanCount',
'otherCount',
'state',
'district',
'type',
'currentParty'
]
def load_legistors(kind):
if kind not in ['current', 'historical']:
raise Exception('Legislator kind must be either "current" or "historical"')
inpath = 'data/congress-legislators/legislators-{}.yaml'.format(kind)
with open(inpath, 'r') as f:
current = yaml.load(f)
outpath = 'outputs/legislators-{}.csv'.format(kind)
with open(outpath, 'w') as f:
writer = csv.DictWriter(f, OUTPUT_COLUMNS, extrasaction='ignore')
writer.writeheader()
for person in current:
record = {}
record['thomasID'] = person['id'].get('thomas', '')
record['govtrackID'] = person['id'].get('govtrack', '')
record['bioguideID'] = person['id'].get('bioguide', '')
record['opensecretsID'] = person['id'].get('opensecrets', '')
record['lisID'] = person['id'].get('lis', '')
record['votesmartID'] = person['id'].get('votesmart', '')
record['fecIDs'] = person['id'].get('fec', [])
record['icpsrID'] = person['id'].get('icpsr', '')
record['wikipediaID'] = person['id'].get('wikipedia', '')
record['cspanID'] = person['id'].get('cspan', '')
record['washpostID'] = person['id'].get('washington_post', '')
record['firstName'] = person['name']['first']
record['lastName'] = person['name']['last']
record['fullName'] = person['name']['last'] + ', ' + person['name']['first']
if 'bio' in person:
record['birthday'] = person['bio'].get('birthday', '')
record['gender'] = person['bio'].get('gender', '')
record['religion'] = person['bio'].get('religion', '')
if 'terms' in person:
demct = 0
repct = 0
othct = 0
type = ''
currentParty = ''
for term in person['terms']:
# just use the most recent state / district
# FIXME: incorporate possible multiple state / districts into the data model
record['state'] = term.get('state', '')
# NOTE: districts are not relevant for Senators
record['district'] = term.get('district', '')
party = term.get("party", '')
if party == 'Democrat':
demct += 1
elif party == 'Republican':
repct += 1
else:
othct += 1
type = term['type']
currentParty = party
record['democratCount'] = demct
record['republicanCount'] = repct
record['otherCount'] = othct
record['type'] = type
record['currentParty'] = currentParty
maxct = max(demct, repct, othct)
if demct == maxct:
record['party'] = 'democrat'
if repct == maxct:
record['party'] = 'republican'
if othct == maxct:
record['party'] = 'other'
if record['type'] == 'rep':
record['type'] = 'House'
elif record['type'] == 'sen':
record['type'] = 'Senate'
writer.writerow(record)
load_legistors('current')
load_legistors('historical')