-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix-tags.py
More file actions
156 lines (126 loc) · 5.4 KB
/
fix-tags.py
File metadata and controls
156 lines (126 loc) · 5.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""This script was written for the express purpose of fixing the tags in a
particular OLD after the lingsync2old.py migration script failed to do so.
It SHOULD NOT BE RUN on an existing OLD without careful inspection first since
doing so may cause data corruption.
"""
import json, pprint, re
from old_client import OLDClient
def get_correct_tags():
correct_tags = set()
datum_ids2tag_set = {}
with open('tag-fix-data.json') as f:
tag_fix_data = json.load(f)
for tag_original, tag_meta in tag_fix_data.iteritems():
correct_tag_set = [tn.strip() for tn in tag_original.split(',') if \
tn.strip()]
for tag in correct_tag_set:
correct_tags.add(tag)
for datum_id in tag_meta['datum_ids']:
datum_ids2tag_set.setdefault(datum_id, [])
datum_ids2tag_set[datum_id] += correct_tag_set
datum_ids2tag_set[datum_id] = list(set(datum_ids2tag_set[datum_id]))
return correct_tags, datum_ids2tag_set
def get_current_tags(c):
return c.get('tags')
def login():
c = OLDClient('<URL>')
logged_in = c.login('<USERNAME>', '<PASSWORD>')
if not logged_in:
sys.exit(u'%sUnable to log in to %s with username %s and password %s.'
u' Aborting.%s' % (ANSI_FAIL, old_url, old_username, old_password,
ANSI_ENDC))
return c
def delete_current_tags(current_tags, c):
for tag in current_tags:
if tag['id'] > 3:
r = c.delete('tags/%d' % tag['id'])
if r.get('id') == tag['id']:
print 'Deleted tag %d: %s' % (tag['id'], tag['name'])
def create_correct_tags(correct_tags, c):
tag_name2id = {}
for tag_name in correct_tags:
tag = {
'name': tag_name,
'description': u''
}
r = c.create('tags', tag)
if r.get('id'):
tag_name2id[tag_name] = r['id']
else:
print
print 'FAIL: failed to create a tag named %s' % tag_name
print r
print
return tag_name2id
def get_tag_name2id(c):
tag_name2id = {}
current_tags = get_current_tags(c)
for tag in current_tags:
tag_name2id[tag['name']] = tag['id']
return tag_name2id
def main():
c = login()
# 1. Get all current tags.
# current_tags = get_current_tags(c)
# print '\n\n\n'
# print 'Current Tags'
# print '\n'.join(sorted([t['name'] for t in current_tags]))
# print '\n\n\n'
# 2. Delete all current tags, except the import tag.
# delete_current_tags(current_tags, c)
# 3. Get the correct tags and a mapper from datum ids to lists of tags.
correct_tags, datum_ids2tag_set = get_correct_tags()
# print '\n'.join(sorted(list(correct_tags)))
# print '\n\n\n'
# for datum_id, tag_list in datum_ids2tag_set.iteritems():
# print '%s\n %s' % (datum_id, ' | '.join(tag_list))
# 4. Create all of the correct tags.
# WARNING: don't call this twice!
# tag_name2id = create_correct_tags(correct_tags, c)
tag_name2id = get_tag_name2id(c)
# pprint.pprint(tag_name2id)
# 5. Get all forms
forms = c.get('forms')
with open('tag-fix-data.json') as f:
tag_fix_data = json.load(f)
# 6. Update forms with correct tags, based on datum id.
p = re.compile('This form was created from LingSync datum ([abcdefABCDEF0123456789]+)')
for form in forms:
if 'This form was created from LingSync datum' in form['comments']:
if p.search(form['comments']):
datum_id = p.search(form['comments']).group(1)
tag_set = datum_ids2tag_set.get(datum_id)
if tag_set:
tags = [tag['id'] for tag in form['tags']]
for tag_name in tag_set:
tags.append(tag_name2id[tag_name])
# print ('Form "%s" (from datum %s) should have tags:\n'
# ' "%s"\n %s\n' % (form['transcription'], datum_id,
# '", "'.join(tag_set), ', '.join([str(i) for i in tags])))
form['tags'] = tags
if form['elicitation_method']:
form['elicitation_method'] = form['elicitation_method']['id']
if form['syntactic_category']:
form['syntactic_category'] = form['syntactic_category']['id']
if form['speaker']:
form['speaker'] = form['speaker']['id']
if form['elicitor']:
form['elicitor'] = form['elicitor']['id']
if form['verifier']:
form['verifier'] = form['verifier']['id']
if form['source']:
form['source'] = form['source']['id']
if form['files']:
form['files'] = [t['id'] for t in form['files']]
if form['date_elicited']:
x = form['date_elicited']
if len(x.split('-')) == 3:
y, m, d = x.split('-')
form['date_elicited'] = u'%s/%s/%s' % (m, d, y)
r = c.update('forms/%d' % form['id'], form)
if not r.get('id'):
print '\n\nFailed to update form %d' % form['id']
print r
print '\n\n'
if __name__ == '__main__':
main()