From cf7140f9826f8df37645a7977216ec61532d303a Mon Sep 17 00:00:00 2001 From: bmcorser Date: Thu, 5 Feb 2015 15:51:20 +0000 Subject: [PATCH 1/3] Make code compile --- brkpoints/fragile_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brkpoints/fragile_finder.py b/brkpoints/fragile_finder.py index 281576b..07ea63e 100644 --- a/brkpoints/fragile_finder.py +++ b/brkpoints/fragile_finder.py @@ -16,7 +16,7 @@ #################################################### # where the local biodata lives on the host system -BIODATA = os.environ('BIODATA', '/opt/biodata') +BIODATA = os.environ.get('BIODATA', '/opt/biodata') DEFAULT_DB = '/usr/local/share/Blast/db/human_genomic' From 438f80ac3d1654df73e7b6f83fe8641f3ece615b Mon Sep 17 00:00:00 2001 From: bmcorser Date: Thu, 5 Feb 2015 15:53:11 +0000 Subject: [PATCH 2/3] Assignments aren't that expensive --- brkpoints/fragile_finder.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/brkpoints/fragile_finder.py b/brkpoints/fragile_finder.py index 07ea63e..2cf3d71 100644 --- a/brkpoints/fragile_finder.py +++ b/brkpoints/fragile_finder.py @@ -133,10 +133,11 @@ def remove_duplicates(fragile_list, distance): gene = name[1] elif name[4] == 'b': gene = name[2] - if by_chr.get(gene + '_' + site[1]): - by_chr[gene + '_' + site[1]].append(site) + chr_key = gene + '_' + site[1] + if by_chr.get(chr_key): + by_chr[chr_key].append(site) else: - by_chr[gene + '_' + site[1]] = [site] + by_chr[chr_key] = [site] # find duplicates, one chromosome at a time dups = [] for chr in by_chr: @@ -148,9 +149,12 @@ def remove_duplicates(fragile_list, distance): # add to positions list if midpoint of j is less than Xbp from i # if duplicates are found they will make identical lists for each # site in i - positions = [j for j in by_chr[chr] if - (int(i[2])+int(i[3]))/2 >= (int(j[2])+int(j[3]))/2 - X and - (int(i[2])+int(i[3]))/2 <= (int(j[2])+int(j[3]))/2 + X] + positions = [] + for j in by_chr[chr]: + i_mid = (int(i[2]) + int(i[3])) / 2 + j_mid = (int(j[2]) + int(j[3])) / 2 + if i_mid >= j_mid - X and i_mid <= j_mid + X: + positions.append(j) # form one entry from each list covering full range of site # k = each site in the duplicated positions list. # obtain minimum and maximum genomic coordinates across all From 20a0fa4429dffe1b51113a58bf0968c14467b608 Mon Sep 17 00:00:00 2001 From: bmcorser Date: Thu, 5 Feb 2015 15:55:15 +0000 Subject: [PATCH 3/3] Comments --- notes.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 notes.md diff --git a/notes.md b/notes.md new file mode 100644 index 0000000..cb24099 --- /dev/null +++ b/notes.md @@ -0,0 +1,16 @@ +# Good +- The code might work? + +# Bad +- No packaging integration +- Can't run tests without 10GB of data? +- Running tests as described in docs fails with `ImportError` +- Code doesn't compile `AttributeError: _Environ instance has no __call__ method` cf7140f +- Code doesn't pass PEP8 +- [Illegible syntax](https://github.com/DeskGen/brkpoints/pull/5#discussion_r24162577) +- Single character variable names in nested loops aren't readable even if + they're not in a convoluted list comp +- "Predictably" formatted strings used instead of + [data](https://docs.python.org/2/library/stdtypes.html#dict) + [structures](https://docs.python.org/2/library/stdtypes.html#typesseq)!? +- Reticent to assign? 438f80a