-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlocalisation.py
More file actions
467 lines (423 loc) · 19.6 KB
/
localisation.py
File metadata and controls
467 lines (423 loc) · 19.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
# utf-8 encoding
from statistics import stdev, mean
from matplotlib import pyplot as pl
import numpy as np
from math import sqrt
class Localisation(object):
def __init__(
self, input_name, operating_system, dict1, dict_main,
NL_split, sorter, local0, local1, local2, dcutoff, pdiff
):
self.input_name = input_name # name of csv file with cell data
self.operating_system = operating_system # running on windows/linux
self.dict1 = dict1 # dictionary containing all cell data
self.dict_main = dict_main # dict for bins containing each cell
self.NL_split = NL_split # number of data points for graph
self.sorter = sorter # True (do localisation), False (don't)
self.local0 = local0 # dict of cells with no localisation
self.local1 = local1 # dict of cells with 1 pole localisation
self.local2 = local2 # dict of cells with 2 pole localisation
self.dcutoff = dcutoff # distance cutoff
self.pdiff = pdiff # percent difference from middle
def normalise(self, values):
""" input is a non-normalised list
output is a normalised list
formula is:
(x-min)/(max-min)
"""
result = []
for item in values:
result.append((item-min(values))/(max(values)-min(values)))
return result
def open_input(self):
""" collects the data from the csv input file for each cell
write the data to a dictionary (self.dict1)
the dictionary keys are the names of the cells
the dictionary value is a list of lists, with two parts
the first part of the list of lists is the normalized
intensity values
the second part of the list of lists is the normalized
distance values
i.e. dict1 = { cell1: [[NI_list],[ND_list]] }
all values are floating point numbers
"""
with open(self.input_name, "r") as f:
for line in f:
line = line.strip() #get rid of \n
if line[0] != ',': #Intensity
name = line[:line.index(',')] #assign ProP cell name
intensity = line[line.index("I")+2:] #get rid of header
intensity_list = intensity.split(",") #list by comma
try: #remove blanks/newlines
intensity_list = intensity_list[0:intensity_list.index('')]
except ValueError:
pass
NI_list = self.normalise([float(i) for i in intensity_list])
self.dict1[name] = [NI_list,''] #add floats to dictionary
elif line[0] == ',': #normalised distances
dist = line[line.index('0'):] #get rid of header
dist_list = dist.split(",") #list by comma
try:
dist_list = dist_list[0:dist_list.index('')]
except ValueError:
pass
ND_list = [float(i) for i in dist_list]
self.dict1[name][1] = ND_list
def ask_organise(self):
""" asks whether to sort each file in the group via
localisation (aka at one pole, two poles, or evenly
distributed)
returns False if no sorting is requested.
executes organise_cells(self) if sorting is requested
"""
query = 'blank'
while query != "Y" and query != "n":
query = input("Sort files by their localisation patterns? (Y/n) ")
if query == 'Y':
self.sorter = True
self.get_local_values() # determine pdiff and dcutoff
q1 = self.organise_cells() # sort cells by localisation
while q1 == 'error': # if ill-defined ends
self.local0 = {} # reset dictionaries
self.local1 = {}
self.local2 = {}
self.get_local_values() # reset pdfiff and dcutoff
q1 = self.organise_cells() # try again
self.write_cell_local() # write data to a csv file
return True
elif query == 'n':
self.sorter = False
return False
def get_local_values(self):
""" gets dcutoff and pdiff from user. reprompts for input
if the user inputs something wrong
"""
# set dcutoff
while True:
self.dcutoff = input("Choose a distance cut-off: ")
try:
self.dcutoff = float(self.dcutoff)
except ValueError:
print(
"Please enter numbers only.",
"No special characters or letters."
)
continue
if 0.5 <= self.dcutoff <= 0:
print(
"Value must be larger than 0 and smaller than 0.5.",
"Please try again."
)
continue
break
# set pdiff
while True:
self.pdiff = input("Choose a percentage difference from average: ")
try:
self.pdiff = float(self.pdiff)
except ValueError:
print(
"Please enter numbers only.",
"No special characters or letters."
)
continue
if self.pdiff <= 0:
print("Value must be larger than 0. Please try again.")
continue
break
def organise_cells(self):
""" designates localisation patterns for each cell within
a folder/group
working here to fix cells that are localised left
being localised right later on (due perhaps to iterating
through the keys while changing their contents)
"""
for key in self.dict1:
""" (1) get distance values between DC and 1-DC
(2) get the average of the intensity value
between DC, 1-DC
(3) determine if any intensity values between
0, DC and/or 1-DC, 1 are higher than 100% +
PD of average intensity value in center
(4) group accordingly (no pole, one pole, two pole)
"""
flagged_keys = [] # list of keys needing reversal
middle_values = self.sort_middle(key)
if middle_values == 'error':
return 'error'
det_localisation = self.sort_cell(key, middle_values)
if det_localisation == 0:
self.local0[key] = self.dict1[key]
elif det_localisation == 2:
self.local2[key] = self.dict1[key]
# add all right localised cells to dict local1
elif det_localisation == 'r1':
self.local1[key] = self.dict1[key]
# reverse cells if they are left localised and add to local1
# new fix June 2017
# WORKING HERE
elif det_localisation == 'l1':
flagged_keys.append(key)
# if there are any flagged keys, reverse them
if len(flagged_keys) > 0:
self.fix_left_cells(flagged_keys)
def fix_left_cells(self, flagged):
""" flagged is a list of cell names that
are single pole but left justified.
this code changes their localisation
to right justified in dict1
and in the local1 dict
"""
for key in flagged:
# reverse intensity values
self.dict1[key][0] = self.dict1[key][0][::-1]
# reverse distance values
self.dict1[key][1] = self.dict1[key][1][::-1]
new_dists = []
for value in self.dict1[key][1]:
new_dists.append(1-value)
self.dict1[key][1] = new_dists
# add to local1
self.local1[key] = self.dict1[key]
def write_cell_local(self):
""" (5) write data to .csv file for reference
(6) print number of cells in each group
"""
g = str(self.input_name[:-4])
with open(g + '_localisation_data.csv', 'w') as l:
l.write("Chosen distance cutoff, "+str(self.dcutoff)+'\n')
l.write("Chosen percent difference, "+str(self.pdiff)+'\n')
l.write(g+" delocalised count, "+str(len(self.local0))+'\n')
l.write(g+" one pole count, "+str(len(self.local1))+'\n')
l.write(g+" two pole count, "+str(len(self.local2))+'\n')
l.write('\n')
l.write(g+" delocalised cells, "+', '.join(str(key0) for key0 in sorted(self.local0))+'\n')
l.write(g+" one pole cells, "+', '.join(str(key1) for key1 in sorted(self.local1))+'\n')
l.write(g+" two pole cells, "+', '.join(str(key2) for key2 in sorted(self.local2))+'\n')
l.write('\n')
print()
print('Found '+str(len(self.local0))+' delocalised cells in '+str(self.input_name)+'.')
print('Found '+str(len(self.local1))+' cells with polar localisation at one pole in '+str(self.input_name)+'.')
print('Found '+str(len(self.local2))+' cells with polar localisation at both poles in '+str(self.input_name)+'.')
print()
def sort_middle(self, key):
""" determines the distance values of within the
cut-off points (dcutoff, 1-dcutoff)
returns a list containing:
[0] starting middle value index
[1] ending middle value index
[2] average of the middle intensity values
"""
key_dists = self.dict1[key][1]
key_intens = self.dict1[key][0]
if key_dists[1] > self.dcutoff:
print("Error. Cell "+str(key)+" only has one data point to define its end. Please increase the value for distance cut-off.")
return 'error'
elif key_dists[-2] < 1-self.dcutoff:
print("Error. Cell "+str(key)+" only has one data point to define its end. Please increase the value for distance cut-off.")
return 'error'
# key's normalised distances go from 0-1
elif key_dists[0] == 0 and key_dists[-1] == 1.0:
for i, value in enumerate(key_dists):
if value > self.dcutoff:
middle_start_index = i
break
for i2, value2 in enumerate(key_dists):
if value2 > 1-self.dcutoff:
middle_end_index = i2-1
break
middle_int_values = key_intens[middle_start_index:middle_end_index+1]
# make sure the number of values in the middle is non-zero
# otherwise, will have to start the process again for filling
# localisation dictionaries
try:
av_middle = sum(middle_int_values)/len(middle_int_values)
except ZeroDivisionError:
print("The cell "+str(key)+' has an empty set of middle values, which has caused a division by zero. Please re-enter the distance cut-off.')
return 'error'
return [middle_start_index, middle_end_index, av_middle]
def sort_cell(self, key, middle_values):
""" returns 0 if no localisation at poles is found
returns l1 if localisation is found at left pole ONLY
returns r1 if localisation is found at right pole ONLY
returns 2 if localisation is found at both poles
note: localisation means that the AVERAGE value at
either pole is higher (by pdiff %) than the average
value in the middle of the cell
"""
counter = ''
av_int = middle_values[2]
middle_start = middle_values[0]
middle_end = middle_values[1]
int_to_beat = (1+(self.pdiff/100))*av_int
# first segment of intensity values
inten1 = self.dict1[key][0][:middle_start]
# last segment of intensity values
inten2 = self.dict1[key][0][middle_end+1:]
# check beginning of cell for intensity values higher
# than average intensity value from middle values
# times 100+pdiff%
if sum(inten1)/len(inten1) > int_to_beat:
counter += 'l'
# check end of cell for intensity values as before
if sum(inten2)/len(inten2) > int_to_beat:
counter += 'r'
if counter == 'lr':
return 2
elif counter == 'l':
return 'l1'
elif counter == 'r':
return 'r1'
elif counter == '':
return 0
def choose_NL_split(self):
""" determines how many times the normalised distance can
be split such that there is at least one data point
in each "bin" for each cell
"""
max_bins_list = []
counter = 1
for key in self.dict1:
while self.compare_checklist(key, counter) is True:
counter += 1
else:
counter -= 1
max_bins_list.append(counter)
counter = 1
self.NL_split = min(max_bins_list)
return min(max_bins_list)
def make_checklist(self, NL_split):
""" uses NL_split to divide 1.0 evenly into pieces
returns a list with these values
each value has max 3 decimal places
"""
check_list = [
"%.3f" % ((1.0/NL_split)*x) for x in range(1, NL_split + 1)
]
return [float(i) for i in check_list]
def compare_checklist(self, key, counter):
""" uses counter to determine how many bins to use
then checks if the key contains one value per bin
returns true if there is at least one value for each
bin from the key
returns false if a bin is empty from one key
note: not necessary step, but each time a value is
found to be in a given range, it is replaced (in a copy
list) with -1.
"""
checklist = self.make_checklist(counter)
comp_list = self.dict1[key][1][:]
passed_checks = 0
for num, check in enumerate(checklist):
for num2, value in enumerate(comp_list):
if num == 0:
if 0 <= value <= checklist[num]:
passed_checks += 1
comp_list[num2] = -1
break
elif checklist[num-1] < value <= checklist[num]:
passed_checks += 1
comp_list[num2] = -1
break
if passed_checks == len(checklist):
return True
else:
return False
def make_dict_main(self):
""" keys in dictionary correspond to 1-NL_split value
eg dict_1 dict_2 ... dict_NL_split
each is written to contain two empty lists
"""
for i in range(1, self.NL_split+1):
self.dict_main["dict_"+str(i)] = [[], []]
return self.dict_main
def analyse_dict(self):
check_list = self.make_checklist(self.NL_split)
for key in self.dict1:
for counter1, value in enumerate(self.dict1[key][1]):
for counter2, check in enumerate(check_list):
if value <= check:
self.dict_main['dict_'+str(counter2+1)][0].append(self.dict1[key][0][counter1])
self.dict_main['dict_'+str(counter2+1)][1].append(value)
break
return self.dict_main
def write_dict_main(self):
""" writes a file containing rows, where each row is intensity average
followed by distance average for each segment in 1-NL_split
output name is input name with _output added
"""
# statistics will throw an error if there are not at least
# two datapoints for standard deviation and mean calculations
if len(self.dict1) == 1:
key_name = str([key for key in self.dict1]).strip("'[]'")
print("Only one cell ("+key_name+")"+" available in "+str(self.input_name)+". Unable to plot.")
print()
return 'onecell'
output_name = self.input_name[:-4]+'_output.csv'
with open(output_name, "w") as g:
g.write("av_intensity,stdev_intensity,av_distance,stdev_distance\n")
for i in range(1, self.NL_split+1):
key_name = "dict_"+str(i)
av_intensity = mean(self.dict_main[key_name][0])
stdev_intensity = stdev(self.dict_main[key_name][0])
#av_error_int = stdev_intensity/sqrt(len(self.dict_main[key_name][0]))
av_dist = mean(self.dict_main[key_name][1])
stdev_dist = stdev(self.dict_main[key_name][1])
#av_error_dist = stdev_dist/sqrt(len(self.dict_main[key_name][1]))
line = str(av_intensity)+','+str(stdev_intensity)+','+str(av_dist)+','+str(stdev_dist)+'\n'
g.write(line)
def plot_data(self):
""" uses matplotlib to make pictures with shading for y-error
from http://stackoverflow.com/questions/12957582/matplotlib-plot-
yerr-xerr-as-shaded-region-rather-than-error-bars
"""
y = []
x = []
y_error = []
# try to fix plot layout issues
# the computer crops off the axis labels
# http://stackoverflow.com/questions/6774086/why-is-my-xlabel-cut-off-in-my-matplotlib-plot
pl.rcParams.update({'figure.autolayout': True})
for i in range(1, self.NL_split+1):
key_name = "dict_" + str(i)
# average intensity values
y.append(mean(self.dict_main[key_name][0]))
# standard deviation intensity
y_error.append(stdev(self.dict_main[key_name][0]))
# standard error of intensity
# y_error.append((stdev(self.dict_main[key_name][0]))/sqrt(len(self.dict_main[key_name][0])))
# average distance vls
x.append(mean(self.dict_main[key_name][1]))
y_upper = []
y_lower = []
for index, value in enumerate(y):
y_upper.append(y[index]+y_error[index])
y_lower.append(y[index]-y_error[index])
# pl.figure(figsize=(10,8))
pl.rc('font', family='arial')
pl.tick_params(axis='y', which='major', labelsize=18)
pl.tick_params(axis='x', which='major', labelsize=18)
# if self.input_name in ["cls-polar.csv", "cls+twopole.csv"]:
# pl.tick_params(axis='x',which='major',labelsize=18)
# else:
# pl.tick_params(axis='x',which='major',labelbottom='off')
pl.yticks(np.arange(-0.2, 1.4, 0.2))
pl.ylim(ymin=-0.2, ymax=1.2)
pl.plot(x, y, 'k-', lw=5, color='red')
pl.fill_between(
x, y_lower, y_upper, facecolor='red', edgecolor='none', alpha=0.2
)
oname = self.input_name[:-3] + "png"
if self.operating_system == 'linux':
csfont = {'fontname': 'Arial', 'size': 20, 'weight': 'bold'}
tifont = {'fontname': 'Arial', 'size': 16, 'weight': 'bold'}
else:
csfont = {'fontname': 'Arial', 'size': 16, 'weight': 'bold'}
tifont = {'fontname': 'Arial', 'size': 12, 'weight': 'bold'}
# pl.title(self.input_name[:-4],**tifont)
# if self.input_name in ["cls-polar.csv", "cls+twopole.csv"]:
# pl.xlabel("Normalized Cell Length",**csfont)
pl.xlabel("Normalized Cell Length", **csfont)
pl.ylabel("Normalized Fluorescence Intensity", **csfont)
pl.savefig(oname, dpi=300)
pl.close()