Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions brkpoints/fragile_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

####################################################
# where the local biodata lives on the host system
BIODATA = os.environ('BIODATA', '/opt/biodata')
BIODATA = os.environ.get('BIODATA', '/opt/biodata')
DEFAULT_DB = '/usr/local/share/Blast/db/human_genomic'


Expand Down Expand Up @@ -133,10 +133,11 @@ def remove_duplicates(fragile_list, distance):
gene = name[1]
elif name[4] == 'b':
gene = name[2]
if by_chr.get(gene + '_' + site[1]):
by_chr[gene + '_' + site[1]].append(site)
chr_key = gene + '_' + site[1]
if by_chr.get(chr_key):
by_chr[chr_key].append(site)
else:
by_chr[gene + '_' + site[1]] = [site]
by_chr[chr_key] = [site]
# find duplicates, one chromosome at a time
dups = []
for chr in by_chr:
Expand All @@ -148,9 +149,12 @@ def remove_duplicates(fragile_list, distance):
# add to positions list if midpoint of j is less than Xbp from i
# if duplicates are found they will make identical lists for each
# site in i
positions = [j for j in by_chr[chr] if
(int(i[2])+int(i[3]))/2 >= (int(j[2])+int(j[3]))/2 - X and
(int(i[2])+int(i[3]))/2 <= (int(j[2])+int(j[3]))/2 + X]
positions = []
for j in by_chr[chr]:
i_mid = (int(i[2]) + int(i[3])) / 2
j_mid = (int(j[2]) + int(j[3])) / 2
if i_mid >= j_mid - X and i_mid <= j_mid + X:
positions.append(j)
# form one entry from each list covering full range of site
# k = each site in the duplicated positions list.
# obtain minimum and maximum genomic coordinates across all
Expand Down
16 changes: 16 additions & 0 deletions notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Good
- The code might work?

# Bad
- No packaging integration
- Can't run tests without 10GB of data?
- Running tests as described in docs fails with `ImportError`
- Code doesn't compile `AttributeError: _Environ instance has no __call__ method` cf7140f
- Code doesn't pass PEP8
- [Illegible syntax](https://github.com/DeskGen/brkpoints/pull/5#discussion_r24162577)
- Single character variable names in nested loops aren't readable even if
they're not in a convoluted list comp
- "Predictably" formatted strings used instead of
[data](https://docs.python.org/2/library/stdtypes.html#dict)
[structures](https://docs.python.org/2/library/stdtypes.html#typesseq)!?
- Reticent to assign? 438f80a