From 282b3ba2a8dc3a362a138741939e00a8fa85e788 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evanbiederstedt@users.noreply.github.com>
Date: Sat, 11 Aug 2018 21:37:54 -0400
Subject: [PATCH 01/60] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index a77b0c5..42c842a 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,8 @@ Latest tested version in parentheses.
     a. NumPy (1.11.0)
 
     b. SciPy (0.17.0)
+    
+    c. NetworkX (1.11) ??
 
 2. gcc (4.9.2)
 

From a4106f64de00440c606ec2b8641b9929c1aaffd4 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Sat, 11 Aug 2018 21:49:53 -0400
Subject: [PATCH 02/60] revised requirements.txt, README

---
 README.md        | 2 --
 requirements.txt | 6 +++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 42c842a..a77b0c5 100644
--- a/README.md
+++ b/README.md
@@ -11,8 +11,6 @@ Latest tested version in parentheses.
     a. NumPy (1.11.0)
 
     b. SciPy (0.17.0)
-    
-    c. NetworkX (1.11) ??
 
 2. gcc (4.9.2)
 
diff --git a/requirements.txt b/requirements.txt
index 0e48cd0..12f43db 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-numpy
-scipy
-networkx
+numpy >=1.11.0
+scipy >=0.17.0
+networkx >= 1.11

From 8effca7fafa408a7a0bbb0885198d6bc9a5bd031 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Sat, 11 Aug 2018 21:50:55 -0400
Subject: [PATCH 03/60] added travis config

---
 .travis.yml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..c6d60c5
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,17 @@
+language: python
+python:
+    - 2.7
+    - 3.4
+    - 3.5
+    - 3.6
+install:
+    - sudo apt-get -y update
+    - sudo apt-get -y update
+    - sudo apt-get -y install r-base
+    - sudo apt-get -y install python-matplotlib
+    - pip install codecov
+    - pip install -r requirements.txt
+script:
+    - nosetests
+after_success:
+    - codecov

From 4ff185842255a2f67068fa4a9a004172fc1189ba Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Tue, 14 Aug 2018 16:18:36 -0400
Subject: [PATCH 04/60] updated to python3.x in "examples", "experiments",
 "viz"

---
 examples/generate_data.py                     |  2 +-
 experiments/eccb2016/scripts/helper.py        | 11 +++---
 experiments/eccb2016/scripts/pairs_summary.py | 14 +++----
 .../scripts/permutation_test_helper.py        |  2 +-
 .../eccb2016/scripts/permute_single_matrix.py | 10 ++---
 .../eccb2016/scripts/pval_correlations.py     | 28 +++++++-------
 .../reconcile_grid_permutation_test.py        | 12 +++---
 .../scripts/remove_genes_with_no_length.py    |  6 +--
 experiments/eccb2016/scripts/results_table.py | 38 ++++++++++---------
 .../scripts/sample_mutation_frequency_plot.py |  8 ++--
 .../eccb2016/scripts/triple_pval_scatter.py   | 26 ++++++-------
 .../eccb2016/scripts/unweighted_comparison.py | 16 ++++----
 .../eccb2016/scripts/weights_matrix.py        | 15 ++++----
 viz/generate_viz_data.py                      | 31 +++++++--------
 viz/server.py                                 |  5 ++-
 15 files changed, 115 insertions(+), 109 deletions(-)

diff --git a/examples/generate_data.py b/examples/generate_data.py
index e5d5606..50200dc 100644
--- a/examples/generate_data.py
+++ b/examples/generate_data.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, numpy as np, random
diff --git a/experiments/eccb2016/scripts/helper.py b/experiments/eccb2016/scripts/helper.py
index 597270b..2017240 100644
--- a/experiments/eccb2016/scripts/helper.py
+++ b/experiments/eccb2016/scripts/helper.py
@@ -1,4 +1,5 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+
 import numpy as np
 
 # Add a y=x line to the given matplotlib axis
@@ -29,14 +30,14 @@ def aligned_plaintext_table(table, sep='\t', spaces=2):
 
     # Find numbers of rows and columns.
     m = len(rows)
-    lengths = map(len, rows)
+    lengths = list(map(len, rows))
     n = max(lengths)
 
     # Pad rows with a deficient number of columns.
     entries = [[rows[i][j] if j<lengths[i] else '' for j in range(n)] for i in range(m)]
 
     # Find column widths.
-    sizes = [max(len(entries[i][j]) for i in range(m)) for j in range(n)]
+    sizes = [max(len(entries[i][j]) for i in list(range(m))) for j in range(n)]
 
     # Return results.
     return '\n'.join([''.join([entries[i][j].rjust(sizes[j]+spaces) for j in range(n)]).rstrip() for i in range(m)])
@@ -80,14 +81,14 @@ def rank(a, reverse=False, ties=2):
     elif ties==1 :
         z = np.zeros(n, dtype=y.dtype)
         j = 0
-        for i in xrange(1, n):
+        for i in range(1, n):
             if x[y[i]]!=x[y[i-1]]:
                 j += 1
             z[y[i]] = j
     elif ties==2:
         z = np.zeros(n, dtype=y.dtype)
         j = 0
-        for i in xrange(1, n):
+        for i in range(1, n):
             if x[y[i]]!=x[y[i-1]]:
                 j = i
             z[y[i]] = j
diff --git a/experiments/eccb2016/scripts/pairs_summary.py b/experiments/eccb2016/scripts/pairs_summary.py
index 8275746..3e9d2ce 100755
--- a/experiments/eccb2016/scripts/pairs_summary.py
+++ b/experiments/eccb2016/scripts/pairs_summary.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import matplotlib
@@ -86,7 +86,7 @@
                         "Cancer": cancer})
 df = pd.DataFrame(items)
 
-print 'Testing {} pairs...'.format(len(weighted_exact_pvals))
+print('Testing {} pairs...'.format(len(weighted_exact_pvals)))
 
 # Set up the figure
 fig, ((ax1, ax2, ax3, ax4)) = plt.subplots(1, 4)
@@ -138,15 +138,15 @@
 # Output the correlation between
 all_correlation = spearmanr(weighted_exact_pvals, weighted_saddlepoint_pvals)
 tail_correlation = spearmanr(weighted_exact_tail_pvals, weighted_saddlepoint_tail_pvals)
-print '-' * 14, 'Correlation: WRE (Saddlepoint) and WRE (Recursive)', '-' * 14
-print 'All: \\rho={:.5}, P={:.5}'.format(*all_correlation)
-print '\Phi_WR < 10^-4: \\rho={:.5}, P={:.5}'.format(*tail_correlation)
+print('-' * 14, 'Correlation: WRE (Saddlepoint) and WRE (Recursive)', '-' * 14)
+print('All: \\rho={:.5}, P={:.5}'.format(*all_correlation))
+print('\Phi_WR < 10^-4: \\rho={:.5}, P={:.5}'.format(*tail_correlation))
     
 # Output a table summarizing the runtimes (Table 3)
-print '-' * 35, 'Runtimes', '-' * 35
+print('-' * 35, 'Runtimes', '-' * 35)
 tbl = ['#Method\tMinimum\tMedian\tMaximum\tTotal']
 for method in ["WRE (Exact)", "WRE (Saddlepoint)"]:
-    print method, sum(list(df.loc[df['Method'] == method]['Runtime (seconds)']))
+    print(method, sum(list(df.loc[df['Method'] == method]['Runtime (seconds)'])))
 
 # Output to file
 plt.tight_layout()
diff --git a/experiments/eccb2016/scripts/permutation_test_helper.py b/experiments/eccb2016/scripts/permutation_test_helper.py
index fc9b90a..cb6770a 100644
--- a/experiments/eccb2016/scripts/permutation_test_helper.py
+++ b/experiments/eccb2016/scripts/permutation_test_helper.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse
diff --git a/experiments/eccb2016/scripts/permute_single_matrix.py b/experiments/eccb2016/scripts/permute_single_matrix.py
index 1efa4f0..00000ba 100755
--- a/experiments/eccb2016/scripts/permute_single_matrix.py
+++ b/experiments/eccb2016/scripts/permute_single_matrix.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Import modules.
 import numpy as np, os, sys, argparse, json
@@ -33,7 +33,7 @@ def run( args ):
     indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )
 
     edges = set()
-    for gene, cases in geneToCases.iteritems():
+    for gene, cases in list(geneToCases.items()):
         for patient in cases:
             edges.add( (geneToIndex[gene], patientToIndex[patient]) )
 
@@ -57,16 +57,16 @@ def run( args ):
         permutedPatientToMutations[patient].add(gene)
         
     # Verify the number of mutations per gene/patient is preserved
-    for g, cases in geneToCases.iteritems():
+    for g, cases in list(geneToCases.items()):
         assert( len(cases) == len(permutedGeneToCases[g]) )
 
-    for p, muts in patientToMutations.iteritems():
+    for p, muts in list(patientToMutations.items()):
         assert( len(muts) == len(permutedPatientToMutations[p]) )
 
     # Save edge list.
     output_file = '{}-{}.json'.format(args.output_prefix, args.job_id)
     permutation = dict(params=params, permutation_number=args.job_id,
-                       geneToCases=dict( (g, list(cases)) for g, cases in permutedGeneToCases.iteritems()))
+                       geneToCases=dict( (g, list(cases)) for g, cases in iter(list(permutedGeneToCases.items()))))
     with open(output_file, 'w') as OUT: json.dump( permutation, OUT )
     
 if __name__ == '__main__':
diff --git a/experiments/eccb2016/scripts/pval_correlations.py b/experiments/eccb2016/scripts/pval_correlations.py
index a124476..55c280f 100755
--- a/experiments/eccb2016/scripts/pval_correlations.py
+++ b/experiments/eccb2016/scripts/pval_correlations.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, pandas as pd
@@ -46,33 +46,33 @@
                 row.append(rho)
         tbl.append(row)
 
-    print '-' * 80
-    print 'CORRELATIONS ({})'.format(val)
-    print aligned_plaintext_table('\n'.join([ '\t'.join(map(str, row)) for row in tbl ]) )
+    print('-' * 80)
+    print('CORRELATIONS ({})'.format(val))
+    print(aligned_plaintext_table('\n'.join([ '\t'.join(map(str, row)) for row in tbl ])))
 
 permutational_pvals_no_zeros = [ p for p in permutational_pvals_with_zeros if p > 0 ]
 for method in ["Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"]:
     pvals = list(df.loc[df['Method'] == method]['P-value'])
-    print 'Correlation:', method, 'with Permutational'
+    print('Correlation:', method, 'with Permutational')
     rho, pval = spearmanr(permutational_pvals, pvals)
-    print '\tIncluding P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals), rho, pval)
+    print('\tIncluding P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals), rho, pval))
     pvals_no_zeros = [ p for i, p in enumerate(pvals) if permutational_pvals_with_zeros[i] > 0 ]
     rho, pval = spearmanr(permutational_pvals_no_zeros, pvals_no_zeros)
-    print '\tWithout P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals_no_zeros), rho, pval)
-print
+    print('\tWithout P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals_no_zeros), rho, pval))
+
 # Compute the correlations of weighted saddlepoint and exact test
 weighted_exact_pvals = list(df.loc[df['Method'] == 'Weighted (exact test)']['P-value'])
 weighted_saddlepoint_pvals = list(df.loc[df['Method'] == 'Weighted (saddlepoint)']['P-value'])
 rho, pval = spearmanr(weighted_exact_pvals, weighted_saddlepoint_pvals)
 
-print 'Correlation of weighted exact test and saddlepoint (all P-values)'
-print '\tN={}, \\rho: {}, P={}'.format(len(weighted_exact_pvals), rho, pval)
+print('Correlation of weighted exact test and saddlepoint (all P-values)')
+print('\tN={}, \\rho: {}, P={}'.format(len(weighted_exact_pvals), rho, pval))
 
 tail_weighted_exact_pvals = [ p for p in weighted_exact_pvals if p < 1e-4 ]
 rho, pval = spearmanr(tail_weighted_exact_pvals, [ p for i, p in enumerate(weighted_saddlepoint_pvals) if weighted_exact_pvals[i] < 1e-4])
-print 'Correlation of weighted exact test and saddlepoint (P < 0.0001)'
-print '\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval)
+print('Correlation of weighted exact test and saddlepoint (P < 0.0001)')
+print('\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval))
 
 rho, pval = spearmanr(tail_weighted_exact_pvals, [ p for i, p in enumerate(permutational_pvals) if weighted_exact_pvals[i] < 1e-4])
-print 'Correlation of weighted exact test and permutational (P < 0.0001)'
-print '\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval)
+print('Correlation of weighted exact test and permutational (P < 0.0001)')
+print('\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval))
diff --git a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
index 3b123a2..2f3e250 100644
--- a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
+++ b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, json, multiprocessing as mp
@@ -42,7 +42,7 @@ def load_json_files(( json_files )):
     json_files = [ '{}/{}'.format(root, f) for f in files if f.lower().endswith('.json') ]
 
 # Set up the multiprocessing and run
-print '* Loading {} JSON files...'.format(len(json_files))
+print('* Loading {} JSON files...'.format(len(json_files)))
 num_cores = args.num_cores if args.num_cores != -1 else mp.cpu_count()
 if num_cores != 1:
     pool = mp.Pool(num_cores)
@@ -58,7 +58,7 @@ def load_json_files(( json_files )):
     pool.join()
 
 # Merge the results
-print '\t- Merging results...'
+print('\t- Merging results...')
 setToCount       = defaultdict( int )
 setToRuntime     = defaultdict( float )
 setToObs         = dict()
@@ -72,16 +72,16 @@ def load_json_files(( json_files )):
 
 setToPval = dict( (M, count/num_permutations) for M, count in setToCount.iteritems() )
 
-print '\t- Loaded {} sets with {} permutations'.format(len(setToPval), int(num_permutations))
+print('\t- Loaded {} sets with {} permutations'.format(len(setToPval), int(num_permutations)))
 
 # Compute FDR
-print '* Computing FDRs...'
+print('* Computing FDRs...')
 tested_sets = setToPval.keys()
 pvals       = [ setToPval[M] for M in tested_sets ]
 setToFDR    = dict(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))
 
 # Output the merged file
-print '* Outputting to file...'
+print('* Outputting to file...')
 k                  = len(tested_sets[0])
 args.json_format   = True
 args.test          = 'RCE'
diff --git a/experiments/eccb2016/scripts/remove_genes_with_no_length.py b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
index 5441499..3aa9e9f 100644
--- a/experiments/eccb2016/scripts/remove_genes_with_no_length.py
+++ b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, json
@@ -28,8 +28,8 @@
 obj['num_genes'] = len(obj['genes'])
 obj['params']['lengths_file'] = os.path.abspath(args.lengths_file)
 obj['genes_with_no_length_removed'] = sorted(original_genes - set(obj['genes']))
-obj['patientToMutations'] = dict( (p, sorted(set(muts) & remaining_genes)) for p, muts in obj['patientToMutations'].iteritems() )
-print 'Removed {} genes with no length'.format(len(obj['genes_with_no_length_removed']))
+obj['patientToMutations'] = dict((p, sorted(set(muts) & remaining_genes)) for p, muts in iter(list(obj['patientToMutations'].items())))
+print('Removed {} genes with no length'.format(len(obj['genes_with_no_length_removed'])))
 
 # Output the new file
 with open(args.output_file, 'w') as OUT:
diff --git a/experiments/eccb2016/scripts/results_table.py b/experiments/eccb2016/scripts/results_table.py
index a8514db..ad39a94 100755
--- a/experiments/eccb2016/scripts/results_table.py
+++ b/experiments/eccb2016/scripts/results_table.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, json
@@ -22,47 +22,49 @@
 with open(args.lengths_file, 'r') as IN:
     arrs = [ l.rstrip().split('\t') for l in IN if not l.startswith('#') ]
     geneToLength = dict( (arr[0], float(arr[1])) for arr in arrs )
-    lengths = geneToLength.values()
+    lengths = list(geneToLength.values())
     length_ranks = rank(lengths, reverse=True)
     geneToLengthRank = defaultdict( lambda : args.length_threshold + 1 )
-    geneToLengthRank.update(zip(geneToLength.keys(), length_ranks))
-    threshold_gene = sorted(geneToLength.keys(), key=lambda g: geneToLengthRank[g])[args.length_threshold]
-    print 'Length of {} longest gene: {}'.format(args.length_threshold, geneToLength[threshold_gene])
+    geneToLengthRank.update(list(zip(list(geneToLength.keys()), length_ranks)))
+    threshold_gene = sorted(list(geneToLength.keys()), key=lambda g: geneToLengthRank[g])[args.length_threshold]
+    print('Length of {} longest gene: {}'.format(args.length_threshold, geneToLength[threshold_gene]))
 
 # Load the mutations
 with open(args.mutation_file, 'r') as IN:
     obj = json.load(IN)
     genes, patients = obj['genes'], obj['patients']
     hypermutators = set(obj['hypermutators'])
-    geneToCases = dict( (g, set(cases)) for g, cases in obj['geneToCases'].iteritems() )
+    geneToCases = dict((g, set(cases)) for g, cases in iter(list(obj['geneToCases'].items())))
 
 # Load the triples
 with open(args.unweighted_exact_file, 'r') as IN:
     obj            = json.load(IN)
-    unweightedPval = dict( (frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].iteritems()  )
-    assert( all( not(isnan(pval)) for pval in unweightedPval.values() ))
-    unweightedFDR  = dict( (frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].iteritems() )
+    unweightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in iter(list(obj['setToPval'].items())))
+    assert( all( not(isnan(pval)) for pval in list(unweightedPval.values()) ))
+    unweightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in iter(list(obj['setToFDR'].items())))
 
 with open(args.weighted_saddlepoint_file, 'r') as IN:
     obj          = json.load(IN)
-    weightedPval = dict( (frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].iteritems() )
-    assert( all( not(isnan(pval)) for pval in weightedPval.values() ))
-    weightedFDR  = dict( (frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].iteritems() )
+    weightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in iter(list(obj['setToPval'].items())))
+    assert( all( not(isnan(pval)) for pval in list(weightedPval.values()) ))
+    weightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in iter(list(obj['setToFDR'].items())))
 
-print 'Triples with weighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in weightedFDR.iteritems() if fdr < args.fdr_cutoff), len(weightedFDR))
-print 'Triples with unweighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in unweightedFDR.iteritems() if fdr < args.fdr_cutoff), len(unweightedFDR))
+print('Triples with weighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in weightedFDR.iteritems() if fdr < args.fdr_cutoff), len(weightedFDR)))
+print('Triples with unweighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in unweightedFDR.iteritems() if fdr < args.fdr_cutoff), len(unweightedFDR)))
 
 # Rank triples by P-value
 triples = sorted(set(weightedPval.keys()) & set(unweightedPval.keys()))
 top_weighted_triples = sorted(triples, key=lambda t: weightedPval[t])
-weightedRank = dict(zip(triples, rank([ weightedPval[t] for t in triples ])))
+weightedRank = dict(list(zip(triples, rank([ weightedPval[t] for t in triples ]))))
 top_unweighted_triples = sorted(triples, key=lambda t: unweightedPval[t])
-unweightedRank = dict(zip(triples, rank([ unweightedPval[t] for t in triples ])))
+unweightedRank = dict(list(zip(triples, rank([ unweightedPval[t] for t in triples ]))))
 
 # Create tables
 def length_indicate(g):
-    if geneToLengthRank[g] > args.length_threshold: return g
-    else: return '\\textbf{%s}' % g
+    if geneToLengthRank[g] > args.length_threshold: 
+        return g
+    else: 
+        return '\\textbf{%s}' % g
 
 header = ['CoMEt rank', 'Weighted rank', 'Triple', 'Phi(M)', 'Psi(M)', 'Hypermutator mutations']
 tbl = [ header ]
diff --git a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
index 3435794..77a25df 100755
--- a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
+++ b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import matplotlib
@@ -26,12 +26,12 @@
 
         # Make a map of patients to their mutated genes
         patientToMutations = dict( (p, set()) for p in patients )
-        for g, cases in obj['geneToCases'].iteritems():
+        for g, cases in list(obj['geneToCases'].items()):
             for p in cases:
                 patientToMutations[p].add( g )
 
         # Assemble the data into dictionaries for Pandas
-        for p, mutations in patientToMutations.iteritems():
+        for p, mutations in list(patientToMutations.items()):
             ty = "Hypermutator" if p in hypermutators else "Non-hypermutator"
             items.append({ "Sample": p, "Mutated genes per sample": len(mutations),
                            "Type": ty, "Cancer": cancer })
@@ -51,4 +51,4 @@
     non_hyper_rates = list(df.loc[(df['Cancer'] == c) & (df['Type'] == "Non-hypermutator")]['Mutated genes per sample'])
     tbl.append([ c, np.median(all_rates), np.median(hyper_rates) if len(hyper_rates) > 0 else '--', np.median(non_hyper_rates)])
 
-print aligned_plaintext_table('\n'.join([ '\t'.join(map(str, row)) for row in tbl ]))
+print(aligned_plaintext_table('\n'.join([ '\t'.join(map(str, row)) for row in tbl ])))
diff --git a/experiments/eccb2016/scripts/triple_pval_scatter.py b/experiments/eccb2016/scripts/triple_pval_scatter.py
index 59a6eee..da966b3 100755
--- a/experiments/eccb2016/scripts/triple_pval_scatter.py
+++ b/experiments/eccb2016/scripts/triple_pval_scatter.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import matplotlib
@@ -36,16 +36,16 @@
     with open(permuted_file, 'r') as IN:
         setToPermuted.update( json.load(IN)['setToPval'] )
 
-for M, pval in setToPermuted.iteritems():
+for M, pval in list(setToPermuted.items()):
     if pval == 0:
         setToPermuted[M] = 1./args.num_permutations
 
 sets = set(setToWeighted.keys()) & set(setToUnweighted.keys()) & set(setToPermuted.keys())
 
-print '* Loaded weighted/unweighted P-values in {} triples...'.format(len(setToWeighted))
-print '\t- Weighted range: [{}, {}]'.format(np.min(setToWeighted.values()), np.max(setToWeighted.values()))
-print '\t- Unweighted range: [{}, {}]'.format(np.min(setToUnweighted.values()), np.max(setToUnweighted.values()))
-print '* Loaded permuted P-values for {} sets ({} intersection)...'.format(len(setToPermuted), len(sets))
+print('* Loaded weighted/unweighted P-values in {} triples...'.format(len(setToWeighted)))
+print('\t- Weighted range: [{}, {}]'.format(np.min(setToWeighted.values()), np.max(setToWeighted.values())))
+print('\t- Unweighted range: [{}, {}]'.format(np.min(setToUnweighted.values()), np.max(setToUnweighted.values())))
+print('* Loaded permuted P-values for {} sets ({} intersection)...'.format(len(setToPermuted), len(sets)))
 
 # Create two scatter plots
 fig, (ax1, ax2) = plt.subplots(1, 2)
@@ -77,17 +77,17 @@
 ax2.plot(ax2.get_xlim(), ax2.get_xlim(), ls="--", c=".3")
 
 # Output maximum deviation and correlations
-print 'Max deviation permutational vs. weighted (1E-3 to 1E-5):',
+print('Max deviation permutational vs. weighted (1E-3 to 1E-5):')
 deviations = [ (x, y, np.abs(y/x)) for x, y in zip(xs, ys) if 1e-3 > x > 1e-5 ]
 if deviations:
-    print max(deviations, key=lambda (x, y, z): z)
+    print(max(deviations, key=lambda (x, y, z): z))
 else:
-    print 'None in p-value interval'
+    print('None in p-value interval')
 
-print 'Unweighted correlation (all): \\rho={}'.format(unweighted_rho)
-print 'Unweighted correlation (P<0.001): \\rho={}'.format(unweighted_tail_rho)
-print 'Weighted correlation (all): \\rho={}'.format(weighted_rho)
-print 'Weighted correlation (P<0.001): \\rho={}'.format(weighted_tail_rho)
+print('Unweighted correlation (all): \\rho={}'.format(unweighted_rho))
+print('Unweighted correlation (P<0.001): \\rho={}'.format(unweighted_tail_rho))
+print('Weighted correlation (all): \\rho={}'.format(weighted_rho))
+print('Weighted correlation (P<0.001): \\rho={}'.format(weighted_tail_rho))
 
 # Output to file
 plt.tight_layout()
diff --git a/experiments/eccb2016/scripts/unweighted_comparison.py b/experiments/eccb2016/scripts/unweighted_comparison.py
index 6367890..b6ffce4 100755
--- a/experiments/eccb2016/scripts/unweighted_comparison.py
+++ b/experiments/eccb2016/scripts/unweighted_comparison.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
  #Load required modules
 import matplotlib
@@ -27,7 +27,7 @@
         exactPval[cancer] = obj['setToPval']
         exactRuntime[cancer] = obj['setToRuntime']
 
-num_exact = sum( 1 for c in args.cancers for M in exactPval[c].keys() )
+num_exact = sum(1 for c in args.cancers for M in list(exactPval[c].keys()))
 
 for cancer, saddlepoint_file in zip(args.cancers, args.saddlepoint_files):
     with open(saddlepoint_file, 'r') as IN:
@@ -35,8 +35,8 @@
         saddlepointPval[cancer] = obj['setToPval']
         saddlepointRuntime[cancer] = obj['setToRuntime']
 
-num_saddlepoint = sum( 1 for c in args.cancers for M in saddlepointPval[c].keys() )
-print '* Loaded {} exact sets and {} saddlepoint sets...'.format(num_exact, num_saddlepoint)
+num_saddlepoint = sum(1 for c in args.cancers for M in list(saddlepointPval[c].keys()))
+print('* Loaded {} exact sets and {} saddlepoint sets...'.format(num_exact, num_saddlepoint))
 
 # Construct the arrays of data
 saddlepoint_pvals, exact_pvals, items = [], [], []
@@ -52,13 +52,13 @@
 
 df = pd.DataFrame(items)
 
-print '* Testing {} triples in the intersection (ignoring sets with invalid P-values)...'.format(len(saddlepoint_pvals))
+print('* Testing {} triples in the intersection (ignoring sets with invalid P-values)...'.format(len(saddlepoint_pvals)))
 
 # Output spearman correlations between the saddlepoint and exact
 rho, pval = spearmanr(exact_pvals, saddlepoint_pvals)
-print '-' * 80
-print 'CORRELATION'
-print "Spearman's Rho: {}\nSpearman's P-value: {}\n".format(rho, pval)
+print('-' * 80)
+print('CORRELATION')
+print("Spearman's Rho: {}\nSpearman's P-value: {}\n".format(rho, pval))
 
 # Set up the figure
 fig, (ax1, ax2) = plt.subplots(1, 2)
diff --git a/experiments/eccb2016/scripts/weights_matrix.py b/experiments/eccb2016/scripts/weights_matrix.py
index e7d2c54..d263bca 100755
--- a/experiments/eccb2016/scripts/weights_matrix.py
+++ b/experiments/eccb2016/scripts/weights_matrix.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import matplotlib
@@ -17,7 +17,7 @@
 assert( len(args.cancers) == len(args.weights_files) == len(args.mutation_files) )
 
 # Load the mutation file
-print '* Loading mutation files...'
+print('* Loading mutation files...')
 cancerToWeights, cancerToPatients, cancerToGenes, cancerToHypermutators, patientToMutations, geneToCases = dict(), dict(), dict(), dict(), dict(), dict()
 for cancer, mutation_file, weights_file in zip(args.cancers, args.mutation_files, args.weights_files):
     with open(mutation_file, 'r') as IN:
@@ -28,24 +28,25 @@
         cancerToHypermutators[cancer] = set(obj['hypermutators'])
         geneToCases[cancer] = obj['geneToCases']
         patientToMutations[cancer] = dict( (p, set()) for p in obj['patients'] )
-        for g, cases in geneToCases[cancer].iteritems():
+        for g, cases in list(geneToCases[cancer].items()):
             for p in cases:
                 patientToMutations[cancer][p].add( g )
     cancerToWeights[cancer] = np.load(weights_file)
-    print '\t{}\n\t\t- Genes: {}\n\t\t- Patients: {}'.format(cancer, num_genes, num_patients)
+    print('\t{}\n\t\t- Genes: {}\n\t\t- Patients: {}'.format(cancer, num_genes, num_patients))
 
 # Set up the figure
 fig, axes = plt.subplots( 1, len(args.cancers))
 fig.set_size_inches( len(args.cancers) * 5, 5)
-min_weight = min([ np.min(W) for W in cancerToWeights.values() ])
-print 'Min weight:', min_weight
+min_weight = min([ np.min(W) for W in list(cancerToWeights.values()) ])
+print('Min weight:', min_weight)
+
 for ax, cancer in zip(axes, args.cancers):
     # Sort the weights so that hypermutators are all on one side
     patients = cancerToPatients[cancer]
     genes = cancerToGenes[cancer]
     hypermutators = cancerToHypermutators[cancer]
     num_non_hypermutators = len(set(patients) - hypermutators)
-    patient_indices = sorted(range(len(patients)), key=lambda p: (patients[p] in hypermutators, len(patientToMutations[cancer][patients[p]])))
+    patient_indices = sorted(list(range(len(patients))), key=lambda p: (patients[p] in hypermutators, len(patientToMutations[cancer][patients[p]])))
     gene_indices = sorted([ i for i, g in enumerate(genes) if g in geneToCases[cancer]], key=lambda g: len(geneToCases[cancer].get(genes[g], [])), reverse=True)
     weights = [ row[patient_indices] for row in cancerToWeights[cancer][gene_indices] ]
 
diff --git a/viz/generate_viz_data.py b/viz/generate_viz_data.py
index a675096..cf80f37 100755
--- a/viz/generate_viz_data.py
+++ b/viz/generate_viz_data.py
@@ -35,42 +35,42 @@ def run( args ):
             method_paren = '' if is_rce else ' ({})'.format(params['method'])
             run_name = '{}{}'.format(params['test'], method_paren)
             methods.add( run_name )
-            setToPval[run_name].update( obj['setToPval'].items() )
-            setToRuntime[run_name].update( obj['setToRuntime'].items() )
-            setToFDR[run_name].update( obj['setToFDR'].items() )
-            setToObs[run_name].update(obj['setToObs'].items() )
+            setToPval[run_name].update( list(obj['setToPval'].items()) )
+            setToRuntime[run_name].update( list(obj['setToRuntime'].items()) )
+            setToFDR[run_name].update( list(obj['setToFDR'].items()) )
+            setToObs[run_name].update(list(obj['setToObs'].items()) )
             sets |= set(obj['setToPval'].keys())
 
     # Load the mutation data
     mutation_data = load_mutation_data( args.mutation_file, min_frequency )
     genes, _, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data
     num_genes, num_patients = len(genes), len(patients)
-    geneToIndex = dict(zip(genes, range(num_genes)))
+    geneToIndex = dict(list(zip(genes, list(range(num_genes)))))
     patientToType = dict( (p, "Hypermutator" if p in hypermutators else "Non-hypermutator")
                           for p in patients )
 
     # Load the weights
     P = np.load(args.weights_file)
-    P = dict( (g, dict(zip(patients, P[geneToIndex[g]]))) for g in genes )
+    P = dict( (g, dict(list(zip(patients, P[geneToIndex[g]])))) for g in genes )
 
     # Restrict the sets (if necessary)
     if args.num_sets:
         new_sets = set()
         for run_name in methods:
-            new_sets |= set(sorted( setToPval[run_name].keys(), key=lambda M: setToPval[run_name][M] )[:args.num_sets])
+            new_sets |= set(sorted( list(setToPval[run_name].keys()), key=lambda M: setToPval[run_name][M] )[:args.num_sets])
 
         sets = new_sets
-        setToPval       = dict( (run_name, dict( (M, pval) for M, pval in setToPval[run_name].iteritems() if M in new_sets)) for run_name in methods )
-        setToRuntime    = dict( (run_name, dict( (M, pval) for M, pval in setToRuntime[run_name].iteritems() if M in new_sets)) for run_name in methods )
-        setToObs        = dict( (run_name, dict( (M, pval) for M, pval in setToObs[run_name].iteritems() if M in new_sets)) for run_name in methods )
-        setToFDR        = dict( (run_name, dict( (M, pval) for M, pval in setToFDR[run_name].iteritems() if M in new_sets)) for run_name in methods )
+        setToPval = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToPval[run_name].items())) if M in new_sets)) for run_name in methods )
+        setToRuntime = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToRuntime[run_name].items())) if M in new_sets)) for run_name in methods )
+        setToObs = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToObs[run_name].items())) if M in new_sets)) for run_name in methods )
+        setToFDR = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToFDR[run_name].items())) if M in new_sets)) for run_name in methods )
 
     # Restrict the weights
     genes_in_sets = set( g for M in sets for g in M.split('\t') )
     P = dict( (g, P[g]) for g in genes_in_sets )
-    geneToCases = dict( (g, cases) for g, cases in geneToCases.iteritems() if g in genes_in_sets )
+    geneToCases = dict( (g, cases) for g, cases in iter(list(geneToCases.items())) if g in genes_in_sets )
 
-    print '* Considering {} sets...'.format(len(new_sets))
+    print('* Considering {} sets...'.format(len(new_sets)))
 
     # Output the JSON file
     with open(args.output_file, 'w') as OUT:
@@ -81,11 +81,12 @@ def run( args ):
         params['weights_file'] = os.path.abspath(args.weights_file)
 
         # Output
-        output = dict(params=params, geneToCases=dict( (g, list(cases)) for g, cases in geneToCases.iteritems() ),
+        output = dict(params=params, geneToCases=dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) ),
                       setToPval=setToPval, methods=sorted(methods),
                       patientToType=patientToType, setToFDR=setToFDR,
                       setToRuntime=setToRuntime, setToObs=setToObs, sets=list(sets),
                       genes=list(genes), patients=patients, P=P)
         json.dump( output, OUT )
 
-if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) )
+if __name__ == '__main__': 
+    run( get_parser().parse_args(sys.argv[1:]) )
diff --git a/viz/server.py b/viz/server.py
index 568c36d..38a21ab 100644
--- a/viz/server.py
+++ b/viz/server.py
@@ -47,7 +47,8 @@ def run( args ):
 	# Start server
 	app = tornado.web.Application(routes)
 	app.listen(args.port)
-	print 'Listening on port {}'.format(args.port)
+	print('Listening on port {}'.format(args.port))
 	tornado.ioloop.IOLoop.current().start()
 
-if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) )
+if __name__ == '__main__': 
+	run( get_parser().parse_args(sys.argv[1:]) )

From f4c339cf652a305d8c92c94bb379b1c23698c07e Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Tue, 14 Aug 2018 16:37:07 -0400
Subject: [PATCH 05/60] updated certain scripts to python3.x

---
 compute_mutation_probabilities.py | 42 +++++++++++++++--------------
 find_exclusive_sets.py            | 29 +++++++++++---------
 find_sets.py                      | 30 ++++++++++++---------
 process_mutations.py              | 44 ++++++++++++++++++-------------
 4 files changed, 81 insertions(+), 64 deletions(-)

diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py
index da2a41d..a924427 100755
--- a/compute_mutation_probabilities.py
+++ b/compute_mutation_probabilities.py
@@ -20,12 +20,13 @@ def get_parser():
     parser.add_argument('-q', '--swap_multiplier', type=int, required=False, default=100)
     parser.add_argument('-nc', '--num_cores', type=int, required=False, default=1)
     parser.add_argument('-s', '--seed', type=int, required=False, default=None)
-    parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=range(5))
+    parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=list(range(5)))
     return parser
 
-def permute_matrices_wrapper(args): return permute_matrices(*args)
-def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose,
-                     m, n, num_edges, indexToGene, indexToPatient):
+def permute_matrices_wrapper(args): 
+    return permute_matrices(*args)
+
+def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, m, n, num_edges, indexToGene, indexToPatient):
     # Initialize our output
     observed     = np.zeros((m, n))
     permutations = []
@@ -43,8 +44,8 @@ def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose,
             indices.append( (edge[0]-1, edge[1]-1) )
 
         # Record the permutation
-        observed[zip(*indices)] += 1.
-        geneToCases = dict( (g, list(cases)) for g, cases in geneToCases.iteritems() )
+        observed[list(zip(*indices))] += 1.
+        geneToCases = dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) )
         permutations.append( dict(geneToCases=geneToCases, permutation_number=seed) )
 
     return observed/float(len(seeds)), permutations
@@ -60,7 +61,7 @@ def postprocess_weight_matrix(P, r, s):
 
     # Average weights over entries of weight matrix with same marginals
     P_mean = np.zeros(np.shape(P))
-    for marginals, indices in marginals_to_indices.items():
+    for marginals, indices in list(marginals_to_indices.items()):
         mean_value = float(sum(P[i, j] for i, j in indices))/float(len(indices))
         for i, j in indices:
             P_mean[i, j] = mean_value
@@ -76,20 +77,20 @@ def run( args ):
 
     # Load mutation data
     if args.verbose > 0:
-        print '* Loading mutation data...'
+        print('* Loading mutation data...')
 
     mutation_data = load_mutation_data( args.mutation_file )
     genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data
 
-    geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.iteritems() )
-    patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.iteritems() )
+    geneToObserved = dict( (g, len(cases)) for g, cases in iter(list(geneToCases.items())) )
+    patientToObserved = dict( (p, len(muts)) for p, muts in iter(list(patientToMutations.items())) )
     geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) )
     indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) )
     patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) )
     indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )
 
     edges = set()
-    for gene, cases in geneToCases.iteritems():
+    for gene, cases in list(geneToCases.items()):
         for patient in cases:
             edges.add( (geneToIndex[gene], patientToIndex[patient]) )
 
@@ -97,7 +98,7 @@ def run( args ):
 
     # Run the bipartite edge swaps
     if args.verbose > 0:
-        print '* Permuting matrices...'
+        print('* Permuting matrices...')
 
     m = len(all_genes)
     n = len(patients)
@@ -106,7 +107,7 @@ def run( args ):
     max_tries = 10**9
     if args.seed is not None:
         random.seed(args.seed)
-    seeds = random.sample(xrange(1, 2*10**9), args.num_permutations)
+    seeds = random.sample(list(range(1, 2*10**9)), args.num_permutations)
 
     # Run the bipartite edge swaps in parallel if more than one core indicated
     num_cores = min(args.num_cores if args.num_cores != -1 else mp.cpu_count(), args.num_permutations)
@@ -127,7 +128,7 @@ def run( args ):
     # Create the weights file
     if args.weights_file:
         if args.verbose > 0:
-            print '* Saving weights file...'
+            print('* Saving weights file...')
 
         # Allow for small accumulated numerical errors
         tol = 1e3*max(m, n)*args.num_permutations*np.finfo(np.float64).eps
@@ -137,10 +138,10 @@ def run( args ):
         P = np.add.reduce(observeds) / float(len(observeds))
 
         # Verify the weights
-        for g, obs in geneToObserved.iteritems():
+        for g, obs in list(geneToObserved.items()):
             assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol)
 
-        for p, obs in patientToObserved.iteritems():
+        for p, obs in list(patientToObserved.items()):
             assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol)
 
         # Construct mutation matrix to compute marginals
@@ -154,10 +155,10 @@ def run( args ):
         P = postprocess_weight_matrix(P, r, s)
 
         # Verify the weights again
-        for g, obs in geneToObserved.iteritems():
+        for g, obs in list(geneToObserved.items()):
             assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol)
 
-        for p, obs in patientToObserved.iteritems():
+        for p, obs in list(patientToObserved.items()):
             assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol)
 
         # Add pseudocounts to entries with no mutations observed; unlikely or impossible after post-processing step
@@ -171,7 +172,7 @@ def run( args ):
     if args.permutation_directory:
         output_prefix = args.permutation_directory + '/permuted-mutations-{}.json'
         if args.verbose > 0:
-            print '* Saving permuted mutation data...'
+            print('* Saving permuted mutation data...')
 
         for _, permutation_list in results:
             for permutation in permutation_list:
@@ -180,4 +181,5 @@ def run( args ):
                     permutation['params'] = params
                     json.dump( permutation, OUT )
 
-if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) )
+if __name__ == '__main__': 
+    run( get_parser().parse_args(sys.argv[1:]) )
diff --git a/find_exclusive_sets.py b/find_exclusive_sets.py
index d15ae05..88a6401 100755
--- a/find_exclusive_sets.py
+++ b/find_exclusive_sets.py
@@ -123,12 +123,12 @@ def run( args ):
 
     # Load the mutation data
     if args.verbose > 0:
-        print ('-' * 30), 'Input Mutation Data', ('-' * 29)
+        print(('-' * 30), 'Input Mutation Data', ('-' * 29))
     genes, patients, geneToCases, typeToGeneIndex, typeToPatientIndex = load_mutation_files( args.mutation_files )
     num_all_genes, num_patients = len(genes), len(patients)
 
     # Restrict to genes mutated in a minimum number of samples
-    geneToCases = dict( (g, cases) for g, cases in geneToCases.iteritems() if g in genes and len(cases) >= args.min_frequency )
+    geneToCases = dict( (g, cases) for g, cases in list(geneToCases.items()) if g in genes and len(cases) >= args.min_frequency )
     genes     = set(geneToCases.keys())
     num_genes = len(genes)
 
@@ -141,7 +141,7 @@ def run( args ):
         # Since we are looking for co-occurrence between exclusive sets with
         # an annotation A, we add events for each patient NOT annotated by
         # the given annotation
-        for annotation, cases in annotationToPatients.iteritems():
+        for annotation, cases in list(annotationToPatients.items()):
             not_cases = patients - cases
             if len(not_cases) > 0:
                 geneToCases[annotation] = not_cases
@@ -149,18 +149,18 @@ def run( args ):
         annotations = set()
 
     if args.verbose > 0:
-        print '- Genes:', num_all_genes
-        print '- Patients:', num_patients
-        print '- Genes mutated in >={} patients: {}'.format(args.min_frequency, num_genes)
+        print('- Genes:', num_all_genes)
+        print('- Patients:', num_patients)
+        print('- Genes mutated in >={} patients: {}'.format(args.min_frequency, num_genes))
         if args.patient_annotation_file:
-            print '- Patient annotations:', len(annotations)
+            print('- Patient annotations:', len(annotations))
 
     # Load the weights (if necessary)
     test = nameToTest[args.test]
     if test == WRE:
         # Create master versions of the indices
-        masterGeneToIndex    = dict(zip(sorted(genes), range(num_genes)))
-        masterPatientToIndex = dict( zip(sorted(patients), range(num_patients)) )
+        masterGeneToIndex    = dict(list(zip(sorted(genes), list(range(num_genes)))))
+        masterPatientToIndex = dict(list(zip(sorted(patients), list(range(num_patients)))))
         geneToP = load_weight_files(args.weights_files, genes, patients, typeToGeneIndex, typeToPatientIndex, masterGeneToIndex, masterPatientToIndex)
     else:
         geneToP = None
@@ -169,17 +169,19 @@ def run( args ):
     if test == RCE:
         permuted_files = get_permuted_files(args.permuted_matrix_directories, args.num_permutations)
         if args.verbose > 0:
-            print '* Using {} permuted matrix files'.format(len(permuted_files))
+            print('* Using {} permuted matrix files'.format(len(permuted_files)))
 
     #Enumeration
     if args.search_strategy == 'Enumerate':
-        if args.verbose > 0: print ('-' * 31), 'Enumerating Sets', ('-' * 31)
+        if args.verbose > 0: 
+            print(('-' * 31), 'Enumerating Sets', ('-' * 31))
         for k in set( args.gene_set_sizes ): # we don't need to enumerate the same size more than once
             # Create a list of sets to test
             sets = list( frozenset(t) for t in combinations(genes, k) )
             num_sets = len(sets)
 
-            if args.verbose  > 0: print 'k={}: {} sets...'.format(k, num_sets)
+            if args.verbose  > 0: 
+                print('k={}: {} sets...'.format(k, num_sets))
             if test == RCE:
                 # Run the permutational
                 setToPval, setToRuntime, setToFDR, setToObs = rce_permutation_test( sets, geneToCases, num_patients, permuted_files, args.num_cores, args.verbose )
@@ -199,4 +201,5 @@ def run( args ):
     else:
         raise NotImplementedError("Strategy '{}' not implemented.".format(args.strategy))
 
-if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) )
+if __name__ == '__main__': 
+    run( get_parser().parse_args(sys.argv[1:]) )
diff --git a/find_sets.py b/find_sets.py
index 903f3c3..1154da7 100755
--- a/find_sets.py
+++ b/find_sets.py
@@ -87,7 +87,8 @@ def load_mutation_files(mutation_files):
         genes    |= set(type_genes)
 
         # Record the mutations in each gene
-        for g, cases in typeGeneToCases.iteritems(): geneToCases[g] |= cases
+        for g, cases in list(typeGeneToCases.items()): 
+            geneToCases[g] |= cases
 
         # Record the genes, patients, and their indices for later
         typeToGeneIndex.append(dict(zip(type_genes, range(len(type_genes)))))
@@ -101,12 +102,12 @@ def run( args ):
 
     # Load the mutation data
     if args.verbose > 0:
-        print ('-' * 30), 'Input Mutation Data', ('-' * 29)
+        print(('-' * 30), 'Input Mutation Data', ('-' * 29))
     genes, patients, geneToCases, typeToGeneIndex, typeToPatientIndex = load_mutation_files( args.mutation_files )
     num_all_genes, num_patients = len(genes), len(patients)
 
     # Restrict to genes mutated in a minimum number of samples
-    geneToCases = dict( (g, cases) for g, cases in geneToCases.iteritems() if g in genes and len(cases) >= args.min_frequency )
+    geneToCases = dict( (g, cases) for g, cases in list(geneToCases.items()) if g in genes and len(cases) >= args.min_frequency )
     genes     = set(geneToCases.keys())
     num_genes = len(genes)
 
@@ -119,7 +120,7 @@ def run( args ):
         # Since we are looking for co-occurrence between exclusive sets with
         # an annotation A, we add events for each patient NOT annotated by
         # the given annotation
-        for annotation, cases in annotationToPatients.iteritems():
+        for annotation, cases in list(annotationToPatients.items()):
             not_cases = patients - cases
             if len(not_cases) > 0:
                 geneToCases[annotation] = not_cases
@@ -127,26 +128,28 @@ def run( args ):
         annotations = set()
 
     if args.verbose > 0:
-        print '- Genes:', num_all_genes
-        print '- Patients:', num_patients
-        print '- Genes mutated in >={} patients: {}'.format(args.min_frequency, num_genes)
+        print('- Genes:', num_all_genes)
+        print('- Patients:', num_patients)
+        print('- Genes mutated in >={} patients: {}'.format(args.min_frequency, num_genes))
         if args.patient_annotation_file:
-            print '- Patient annotations:', len(annotations)
+            print('- Patient annotations:', len(annotations))
 
     # Load the weights (if necessary)
 
     # Create master versions of the indices
-    masterGeneToIndex    = dict(zip(sorted(genes), range(num_genes)))
-    masterPatientToIndex = dict( zip(sorted(patients), range(num_patients)) )
+    masterGeneToIndex    = dict(list(zip(sorted(genes), list(range(num_genes)))))
+    masterPatientToIndex = dict(list(zip(sorted(patients), list(range(num_patients)))))
     geneToP = load_weight_files(args.weights_files, genes, patients, typeToGeneIndex, typeToPatientIndex, masterGeneToIndex, masterPatientToIndex)
 
-    if args.verbose > 0: print ('-' * 31), 'Enumerating Sets', ('-' * 31)
+    if args.verbose > 0: 
+        print(('-' * 31), 'Enumerating Sets', ('-' * 31))
     k = args.gene_set_size
     # Create a list of sets to test
     sets = list( frozenset(t) for t in combinations(genes, k) )
     num_sets = len(sets)
 
-    if args.verbose  > 0: print 'k={}: {} sets...'.format(k, num_sets)
+    if args.verbose  > 0: 
+        print('k={}: {} sets...'.format(k, num_sets))
     # Run the test
     method = nameToMethod['Saddlepoint']
     test = nameToTest['WRE']
@@ -155,4 +158,5 @@ def run( args ):
                                                             verbose=args.verbose, report_invalids=args.report_invalids)
     output_enumeration_table( args, k, setToPval, setToRuntime, setToFDR, setToObs, args.fdr_threshold )
 
-if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) )
+if __name__ == '__main__': 
+    run( get_parser().parse_args(sys.argv[1:]) )
diff --git a/process_mutations.py b/process_mutations.py
index 4b39585..c886271 100755
--- a/process_mutations.py
+++ b/process_mutations.py
@@ -19,11 +19,12 @@ def get_parser():
     parser.add_argument('-ivs', '--ignored_validation_statuses', type=str, required=False, nargs='*',
                         default=['Wildtype', 'Invalid'])
     parser.add_argument('-o', '--output_file', type=str, required=True)
-    parser.add_argument('-v', '--verbose', type=int, default=1, required=False, choices=range(5))
+    parser.add_argument('-v', '--verbose', type=int, default=1, required=False, choices=list(range(5)))
     return parser
 
 def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc, vt, vs, ivc, ivt, ivs, verbose ):
-    if verbose > 1: print '\tLoading MAF:', maf_file
+    if verbose > 1: 
+        print('\tLoading MAF:', maf_file)
     genes, patients = set(), set()
     with open(maf_file, 'r') as IN:
         seenHeader = False
@@ -31,7 +32,7 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc
             arr = l.rstrip('\n').split('\t')
             # Parse the header if we haven't seen it yet
             if not seenHeader and arr[0].lower() == 'hugo_symbol':
-                arr              = map(str.lower, arr)
+                arr              = list(map(str.lower, arr))
                 seenHeader       = True
                 gene_index       = 0
                 patient_index    = arr.index('tumor_sample_barcode')
@@ -44,7 +45,8 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc
                 # Record the patients and genes, even if we ignore their mutations
                 patient, gene = '-'.join(arr[patient_index].split('-')[:3]), arr[gene_index]
 
-                if not patientWhitelist[patient]: continue
+                if not patientWhitelist[patient]: 
+                    continue
 
                 patients.add(patient)
                 genes.add(gene)
@@ -83,7 +85,8 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc
     return genes, patients
 
 def process_events_file( events_file, patientWhitelist, geneToCases, patientToMutations, verbose ):
-    if verbose > 1: print '\tProcessing events file:', events_file
+    if verbose > 1: 
+        print('\tProcessing events file:', events_file)
 
     # Parse the events file
     events, patients = set(), set()
@@ -92,7 +95,8 @@ def process_events_file( events_file, patientWhitelist, geneToCases, patientToMu
         for arr in arrs:
             # Skip patients that aren't whitelisted
             patient, mutations = arr[0], set(arr[1:])
-            if not patientWhitelist[patient]: continue
+            if not patientWhitelist[patient]: 
+                continue
 
             # Record the events and mutations
             patients.add(patient)
@@ -112,16 +116,19 @@ def run( args ):
 
     # Load the patient whitelist (if supplied)
     if args.patient_whitelist:
-        if args.verbose > 0: print '* Loading patient whitelist...'
+        if args.verbose > 0: 
+            print('* Loading patient whitelist...')
         patientWhitelist = defaultdict( lambda : False )
         with open(args.patient_whitelist, 'r') as IN:
             patientWhitelist.update( (l.rstrip('\n').split()[0], True) for l in IN if not l.startswith('#') )
     else:
-        if args.verbose > 0: print '* No patient whitelist provided, including all patients...'
+        if args.verbose > 0: 
+            print('* No patient whitelist provided, including all patients...')
         patientWhitelist = defaultdict( lambda : True )
 
     # Load the mutations from each MAF
-    if args.verbose > 0: print '* Loading and combining {} datasets...'.format(len(args.cancer_types))
+    if args.verbose > 0: 
+        print('* Loading and combining {} datasets...'.format(len(args.cancer_types)))
     geneToCases, patientToMutations = defaultdict( set ), defaultdict( set )
     genes, patients = set(), set()
     vc, vt, vs = set(), set(), set() # variant classes/types and validation statuses
@@ -154,12 +161,12 @@ def run( args ):
 
     # Summarize the data
     if args.verbose > 0:
-        print '* Summary of mutation data...'
-        print '\tGenes: {}'.format(num_genes)
-        print '\tPatients: {} ({} hypermutators)'.format(num_patients, len(hypermutators))
-        print '\tUsed variant classes:', ', '.join(sorted(vc))
-        print '\tUsed variant types:', ', '.join(sorted(vt))
-        print '\tUsed validation statuses:', ', '.join(sorted(vs))
+        print('* Summary of mutation data...')
+        print('\tGenes: {}'.format(num_genes))
+        print('\tPatients: {} ({} hypermutators)'.format(num_patients, len(hypermutators)))
+        print('\tUsed variant classes:', ', '.join(sorted(vc)))
+        print('\tUsed variant types:', ', '.join(sorted(vt)))
+        print('\tUsed validation statuses:', ', '.join(sorted(vs)))
 
     # Output to file
     with open(args.output_file, 'w') as OUT:
@@ -171,10 +178,11 @@ def run( args ):
                       patient_whitelist_file=os.path.abspath(args.patient_whitelist) if args.patient_whitelist else None,
                       hypermutators_file=os.path.abspath(args.hypermutators_file) if args.hypermutators_file else None)
         output = dict(params=params, patients=patients, genes=genes, hypermutators=list(hypermutators),
-                      geneToCases=dict( (g, list(cases)) for g, cases in geneToCases.items()),
+                      geneToCases=dict( (g, list(cases)) for g, cases in list(geneToCases.items())),
                       patientToType=patientToType,
-                      patientToMutations=dict( (p, list(muts)) for p, muts in patientToMutations.items()),
+                      patientToMutations=dict( (p, list(muts)) for p, muts in list(patientToMutations.items())),
                       num_genes=num_genes, num_patients=num_patients)
         json.dump( output, OUT )
 
-if __name__ == '__main__': run( get_parser().parse_args( sys.argv[1:]) )
+if __name__ == '__main__': 
+    run( get_parser().parse_args( sys.argv[1:]) )

From 98086fc423372fcba36d9445eba6db9e87172bc9 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Tue, 14 Aug 2018 16:37:37 -0400
Subject: [PATCH 06/60] updated certain scripts to python3.x

---
 compute_mutation_probabilities.py | 2 +-
 find_exclusive_sets.py            | 2 +-
 find_sets.py                      | 2 +-
 process_mutations.py              | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py
index a924427..acac439 100755
--- a/compute_mutation_probabilities.py
+++ b/compute_mutation_probabilities.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, json, numpy as np, multiprocessing as mp, random
diff --git a/find_exclusive_sets.py b/find_exclusive_sets.py
index 88a6401..40eb062 100755
--- a/find_exclusive_sets.py
+++ b/find_exclusive_sets.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, numpy as np, json
diff --git a/find_sets.py b/find_sets.py
index 1154da7..69e8388 100755
--- a/find_sets.py
+++ b/find_sets.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, numpy as np, json
diff --git a/process_mutations.py b/process_mutations.py
index c886271..4d4a9a3 100755
--- a/process_mutations.py
+++ b/process_mutations.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, argparse, json, numpy as np

From e3a0a710eb4ba977eb3a0aad98140dc983e86ef5 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Wed, 15 Aug 2018 03:04:34 -0400
Subject: [PATCH 07/60] ported scripts from "wext" to python3.x

---
 wext/__init__.py          |  2 +-
 wext/constants.py         |  2 +-
 wext/enumerate_sets.py    | 72 ++++++++++++++++++---------------------
 wext/exact.py             |  8 ++---
 wext/exclusivity_tests.py |  4 +--
 wext/i_o.py               | 20 +++++------
 wext/mcmc.py              | 31 +++++++++--------
 wext/saddlepoint.py       |  4 ++-
 wext/setup.py             |  2 +-
 wext/statistics.py        |  6 ++--
 10 files changed, 76 insertions(+), 75 deletions(-)

diff --git a/wext/__init__.py b/wext/__init__.py
index 08c6b5e..c746bc2 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Import modules.
 from constants import *
diff --git a/wext/constants.py b/wext/constants.py
index 7cffc13..9f6425b 100755
--- a/wext/constants.py
+++ b/wext/constants.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # P-values are called invalid if P > 1+PTOL or P < -PTOL
 PTOL = 10**-3
diff --git a/wext/enumerate_sets.py b/wext/enumerate_sets.py
index 780deb6..25d7c0f 100755
--- a/wext/enumerate_sets.py
+++ b/wext/enumerate_sets.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, multiprocessing as mp, json
@@ -17,7 +17,7 @@
 # Compute the mutual exclusivity T for the given gene set
 def T(M, geneToCases):
     sampleToCount = Counter( s for g in M for s in geneToCases.get(g, []) )
-    return sum( 1 for sample, count in sampleToCount.iteritems() if count == 1 )
+    return sum( 1 for sample, count in list(sampleToCount.items()) if count == 1 )
 
 # Compute the permutational
 def permutational_dist_wrapper( args ): return permutational_dist( *args )
@@ -29,7 +29,7 @@ def permutational_dist( sets, permuted_files ):
         permutedGeneToCases = defaultdict(set)
         for pf in pf_group:
             with open(pf, 'r') as IN:
-                for g, cases in json.load(IN)['geneToCases'].iteritems():
+                for g, cases in list(json.load(IN)['geneToCases'].items()):
                     permutedGeneToCases[g] |= set(cases)
 
         reading_time = time() - reading_start
@@ -55,7 +55,7 @@ def rce_permutation_test(sets, geneToCases, num_patients, permuted_files, num_co
     # Filter the sets based on the observed values
     k = len(next(iter(sets)))
     setToObs = dict( (M, observed_values(M, num_patients, geneToCases)) for M in sets )
-    sets = set( M for M, (X, T, Z, tbl) in setToObs.iteritems() if testable_set(k, T, Z, tbl) )
+    sets = set( M for M, (X, T, Z, tbl) in list(setToObs.items()) if testable_set(k, T, Z, tbl) )
 
     # Compute the distribution of exclusivity for each pair across the permuted files
     np    = float(len(permuted_files))
@@ -69,22 +69,22 @@ def rce_permutation_test(sets, geneToCases, num_patients, permuted_files, num_co
     # Merge the different distributions
     setToDist, setToTime = defaultdict(list), dict()
     for dist, times in empirical_distributions:
-        setToTime.update(times.items())
-        for k, v in dist.iteritems():
+        setToTime.update(list(times.items()))
+        for k, v in list(dist.tems()):
             setToDist[k].extend(v)
 
     # Compute the observed values and then the P-values
     setToObs = dict( (M, setToObs[M]) for M in sets )
     setToPval = dict()
-    for M, (X, T, Z, tbl) in setToObs.iteritems():
+    for M, (X, T, Z, tbl) in list(setToObs.items()):
         # Compute the P-value.
         count = sum( 1. for d in setToDist[M] if d >= T )
         setToPval[M] = count / np
 
     # Compute FDRs
-    tested_sets = setToPval.keys()
+    tested_sets = list(setToPval.keys())
     pvals = [ setToPval[M] for M in tested_sets ]
-    setToFDR = dict(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))
+    setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))))
 
     return setToPval, setToTime, setToFDR, setToObs
 
@@ -151,8 +151,6 @@ def test_set_group( sets, geneToCases, num_patients, method, test, P=None, verbo
 
         setToTime[M] = time() - start
 
-    if verbose > 1: print
-
     return setToPval, setToTime, setToObs
 
 def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores=1, verbose=0,
@@ -177,13 +175,13 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores=
     # Combine the dictionaries
     setToPval, setToTime, setToObs = dict(), dict(), dict()
     for pval, time, obs in results:
-        setToPval.update(pval.items())
-        setToTime.update(time.items())
-        setToObs.update(obs.items())
+        setToPval.update(list(pval.items()))
+        setToTime.update(list(time.items()))
+        setToObs.update(list(obs.items()))
 
     # Make sure all P-values are numbers
     tested_sets = len(setToPval)
-    invalid_sets = set( M for M, pval in setToPval.iteritems() if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
+    invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
 
     # Report invalid sets
     if verbose > 0 and report_invalids:
@@ -194,19 +192,19 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores=
             invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ])
         sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' )
 
-    setToPval = dict( (M, pval) for M, pval in setToPval.iteritems() if not M in invalid_sets )
-    setToTime = dict( (M, runtime) for M, runtime in setToTime.iteritems() if not M in invalid_sets )
-    setToObs = dict( (M, obs) for M, obs in setToObs.iteritems() if not M in invalid_sets )
+    setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets )
+    setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets )
+    setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets )
 
     if verbose > 0:
-        print '- Output {} sets'.format(len(setToPval))
-        print '\tRemoved {} sets with NaN or invalid P-values'.format(len(invalid_sets))
-        print '\tIgnored {} sets with Z >= T or a gene with no exclusive mutations'.format(len(sets)-tested_sets)
+        print('- Output {} sets'.format(len(setToPval)))
+        print('\tRemoved {} sets with NaN or invalid P-values'.format(len(invalid_sets)))
+        print('\tIgnored {} sets with Z >= T or a gene with no exclusive mutations'.format(len(sets)-tested_sets))
 
     # Compute the FDRs
-    tested_sets = setToPval.keys()
+    tested_sets = list(setToPval.keys())
     pvals = [ setToPval[M] for M in tested_sets ]
-    setToFDR = dict(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))
+    setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))))
 
     return setToPval, setToTime, setToFDR, setToObs
 
@@ -232,8 +230,6 @@ def general_test_set_group( sets, geneToCases, num_patients, method, test, stati
         setToPval[M] = general_wre_test( sorted_M, geneToCases, [ P[g] for g in sorted_M ], statistic )
         setToTime[M] = time() - start
 
-    if verbose > 1: print
-
     return setToPval, setToTime, setToObs
 
 def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, P=None, num_cores=1, verbose=0,
@@ -258,13 +254,13 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
     # Combine the dictionaries
     setToPval, setToTime, setToObs = dict(), dict(), dict()
     for pval, time, obs in results:
-        setToPval.update(pval.items())
-        setToTime.update(time.items())
-        setToObs.update(obs.items())
+        setToPval.update(list(pval.items()))
+        setToTime.update(list(time.items()))
+        setToObs.update(list(obs.items()))
 
     # Make sure all P-values are numbers
     tested_sets = len(setToPval)
-    invalid_sets = set( M for M, pval in setToPval.iteritems() if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
+    invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
 
     # Report invalid sets
     if verbose > 0 and report_invalids:
@@ -275,19 +271,19 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
             invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ])
         sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' )
 
-    setToPval = dict( (M, pval) for M, pval in setToPval.iteritems() if not M in invalid_sets )
-    setToTime = dict( (M, runtime) for M, runtime in setToTime.iteritems() if not M in invalid_sets )
-    setToObs = dict( (M, obs) for M, obs in setToObs.iteritems() if not M in invalid_sets )
+    setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets )
+    setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets )
+    setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets )
 
     if verbose > 0:
-        print '- Output {} sets'.format(len(setToPval))
-        print '\tRemoved {} sets with NaN or invalid P-values'.format(len(invalid_sets))
-        print '\tIgnored {} sets with Z >= T or a gene with no exclusive mutations'.format(len(sets)-tested_sets)
+        print('- Output {} sets'.format(len(setToPval)))
+        print('\tRemoved {} sets with NaN or invalid P-values'.format(len(invalid_sets)))
+        print('\tIgnored {} sets with Z >= T or a gene with no exclusive mutations'.format(len(sets)-tested_sets))
 
     # Compute the FDRs
-    tested_sets = setToPval.keys()
+    tested_sets = list(setToPval.keys())
     pvals = [ min(max(0.0, setToPval[M]), 1.0) for M in tested_sets ]
-    setToFDR = dict(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))
+    setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))))
 
     return setToPval, setToTime, setToFDR, setToObs
 
@@ -296,4 +292,4 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
 ################################################################################
 # Testable set
 def testable_set( k, T, Z, tbl ):
-    return T > Z and all( tbl[2**i] > 0 for i in range(k) )
+    return T > Z and all( tbl[2**i] > 0 for i in list(range(k)) )
diff --git a/wext/exact.py b/wext/exact.py
index 052a578..32636d9 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,10 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import numpy as np
 import wext_exact_test
 from constants import *
 
-def exact_test( t, x, p, verbose=False ):
+def exact_test(t, x, p, verbose=False):
     k = len(x)
     if k == 2:
         return exact_test_k2( t, x, p, verbose )
@@ -19,11 +19,11 @@ def exact_test_k3(t, x, p, verbose):
     return wext_exact_test.triple_exact_test( N, t, x[0], x[1], x[2], p )
 
 # Wrapper for k=2 exact test C function
-def exact_test_k2(t, (x, y), (p_x, p_y), verbose):
+def exact_test_k2(t, x, y, p_x, p_y, verbose):
 	# Two-sided test
     N = len(p_x)
     z = (x + y - t)/2 # count number of co-occurrences
-    tail_masses = wext_exact_test.conditional(N, range(z+1), x, y, p_x, p_y)
+    tail_masses = wext_exact_test.conditional(N, list(range(z+1)), x, y, p_x, p_y)
     obs_mass  = tail_masses[-1]
     pval = sum(tail_masses)
     return pval
diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py
index 985ce2e..dfc4322 100755
--- a/wext/exclusivity_tests.py
+++ b/wext/exclusivity_tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import numpy as np
@@ -22,7 +22,7 @@ def wre_test(t, x, p, method=EXACT, verbose=0):
     # Check that the probabilities are in (0, 1].
     assert(all(0<b<= 1 for a in p for b in a))
     # Check that the number of mutations in each gene is not greater than the number of samples.
-    assert(all(a<=len(b) for a, b in zip(x, p)))
+    assert(all(a<=len(b) for a, b in list(zip(x, p))))
     # Check that the number of mutually exclusive mutations is not greater than the total number of mutations.
     assert(t<=sum(x))
     #Check that we've implemented the given set size with the exact test
diff --git a/wext/i_o.py b/wext/i_o.py
index 679ca30..fd7c706 100755
--- a/wext/i_o.py
+++ b/wext/i_o.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Load required modules
 import sys, os, json, numpy as np
@@ -11,13 +11,13 @@ def load_mutation_data( mutation_file, min_freq=1 ):
         obj         = json.load(IN)
         all_genes   = obj['genes']
         patients    = obj['patients']
-        geneToCases = dict( (g, set(cases)) for g, cases in obj['geneToCases'].iteritems() )
-        patientToMutations = dict( (p, set(muts)) for p, muts in obj['patientToMutations'].iteritems() )
+        geneToCases = dict( (g, set(cases)) for g, cases in list(obj['geneToCases'].items()) )
+        patientToMutations = dict( (p, set(muts)) for p, muts in list(obj['patientToMutations'].items()) )
         hypermutators = set(obj['hypermutators'])
         params      = obj['params']
 
     # Restrict the genes based on the minimum frequency
-    genes = set( g for g, cases in geneToCases.iteritems() if len(cases) >= min_freq )
+    genes = set( g for g, cases in list(geneToCases.items()) if len(cases) >= min_freq )
 
     return genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators
 
@@ -34,11 +34,11 @@ def load_patient_annotation_file(patient_annotation_file):
 # Converts keys from an iterable to tab-separated, so the dictionary can be
 # output as JSON
 def convert_dict_for_json( setToVal, sep='\t' ):
-    return dict( (sep.join(sorted(M)), val) for M, val in setToVal.iteritems() )
+    return dict( (sep.join(sorted(M)), val) for M, val in list(setToVal.items()) )
 
 # Converts tab-separated keys back to frozensets
 def convert_dict_from_json( setToVal, sep='\t', iterable=frozenset ):
-    return dict( (iterable(M.split(sep)), val) for M, val in setToVal.iteritems() )
+    return dict( (iterable(M.split(sep)), val) for M, val in list(setToVal.items()) )
 
 # Create the header strings for a contingency table
 def create_tbl_header( k ):
@@ -54,7 +54,7 @@ def output_enumeration_table(args, k, setToPval, setToRuntime, setToFDR, setToOb
         if not args.json_format:
             # Construct the rows
             rows = []
-            for M, pval in setToPval.iteritems():
+            for M, pval in list(setToPval.items()):
                 if setToFDR[M]<=fdr_threshold:
                     X, T, Z, tbl = setToObs[M]
                     row = [ ', '.join(sorted(M)), pval, setToFDR[M], setToRuntime[M], T, Z ] + tbl
@@ -90,14 +90,14 @@ def output_mcmc(args, setsToFreq, setToPval, setToObs):
         params = vars(args)
         output = dict(params=params, setToPval=convert_dict_for_json(setToPval),
                       setToObs=convert_dict_for_json(setToObs),
-                      setsToFreq=dict( (' '.join([ ','.join(sorted(M)) for M in sets ]), freq) for sets, freq in setsToFreq.iteritems() ))
+                      setsToFreq=dict( (' '.join([ ','.join(sorted(M)) for M in sets ]), freq) for sets, freq in list(setsToFreq.items()) ))
         with open(args.output_prefix + '.json', 'w') as OUT:
             json.dump( output, OUT )
     else:
         # Output a gene set file
         with open(args.output_prefix + '-sampled-collections.tsv', 'w') as OUT:
             rows = []
-            for sets, freq in setsToFreq.iteritems():
+            for sets, freq in list(setsToFreq.items()):
                 row = [ ' '.join([ ','.join(M) for M in sets ]), freq ]
                 row.append( sum( -np.log10(setToPval[M] ** args.alpha) for M in sets ))
                 rows.append(row)
@@ -109,7 +109,7 @@ def output_mcmc(args, setsToFreq, setToPval, setToObs):
         # Output each of the sample gene sets
         with open(args.output_prefix + '-sampled-sets.tsv', 'w') as OUT:
             rows = []
-            for M, pval in setToPval.iteritems():
+            for M, pval in list(setToPval.items()):
                 X, T, Z, tbl = setToObs[M]
                 rows.append([ ','.join(sorted(M)), pval, T, Z] + tbl )
             rows.sort(key=lambda r: r[1])
diff --git a/wext/mcmc.py b/wext/mcmc.py
index 06ca876..550b541 100755
--- a/wext/mcmc.py
+++ b/wext/mcmc.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 
 import sys, os, numpy as np
 from collections import defaultdict
@@ -10,7 +10,8 @@
 from exclusivity_tests import re_test, wre_test
 
 def mcmc(ks, geneToCases, num_patients, method, test, geneToP, seed, annotations=set(), verbose=0, step_len=100, nchains=1, niters=1000, alpha=1):
-    if verbose > 0: print '-' * 33, 'Running MCMC', '-' * 33
+    if verbose > 0: 
+        print('-' * 33, 'Running MCMC', '-' * 33)
 
     # Set up a local version of the weight function
     if test == WRE:
@@ -45,7 +46,7 @@ def _collection_weight(collection):
         return sum( _weight(M) for M in collection )
 
     def _to_collection(solution):
-        return frozenset( frozenset(M) for M in solution.values() )
+        return frozenset( frozenset(M) for M in list(solution.values()) )
 
     # Compute the acceptance ratio
     def _log_accept_ratio( W_current, W_next ): return W_next - W_current
@@ -53,11 +54,12 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current
     # Set up PRNG, sample space, and output
     random_seed(seed)
     t          = len(ks)
-    genespace  = geneToCases.keys()
-    setsToFreq = [ defaultdict(int) for _ in xrange(nchains) ]
+    genespace  = list(geneToCases.keys())
+    setsToFreq = [ defaultdict(int) for _ in range(nchains) ]
     setToPval, setToObs =  dict(), dict()
-    for c in xrange(nchains):
-        if verbose > 0: print '- Experiment', c+1
+    for c in range(nchains):
+        if verbose > 0: 
+            print('- Experiment', c+1)
 
         # Seed Markov chain
         soln, assigned = choose_random_set(ks, genespace)
@@ -75,8 +77,8 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current
                 sys.stdout.flush()
 
             # Sample the next gene to swap in/around the set
-            next_soln = dict( (index, set(M)) for index, M in soln.iteritems() )
-            next_assigned = dict(assigned.items())
+            next_soln = dict( (index, set(M)) for index, M in list(soln.items()) )
+            next_assigned = dict(list(assigned.items()))
             next_gene = choice(genespace)
 
             # There are two possibilities for the next gene
@@ -86,7 +88,7 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current
                 # if we only have one set, we can't swap between sets
                 if t == 1: continue
                 i = next_assigned[next_gene]
-                swap_gene = choice([ g for g in next_assigned.keys() if g not in next_soln[i] ])
+                swap_gene = choice([ g for g in list(next_assigned.keys()) if g not in next_soln[i] ])
                 j = next_assigned[swap_gene]
                 next_assigned[swap_gene] = i
                 next_soln[i].add(swap_gene)
@@ -101,14 +103,15 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current
             # 2) The gene is not in the current solution. In this case, we choose
             #    a random gene in the solution to remove, and add the next gene.
             else:
-                swap_gene = choice(next_assigned.keys())
+                swap_gene = choice(list(next_assigned.keys()))
                 j = next_assigned[swap_gene]
                 del next_assigned[swap_gene]
                 next_assigned[next_gene] = j
                 next_soln[j].remove(swap_gene)
                 next_soln[j].add(next_gene)
 
-                if not _valid_set(next_soln[j]): continue
+                if not _valid_set(next_soln[j]): 
+                    continue
 
             # Compare the current soln to the next soln
             next_weight = _collection_weight(_to_collection(next_soln))
@@ -121,12 +124,12 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current
                 setsToFreq[c][_to_collection(soln)] += 1
 
         if verbose > 0:
-            print '\r[' + ('='*71) + '>] 100%'
+            print('\r[' + ('='*71) + '>] 100%')
 
     # Merge the various chains
     setsToTotalFreq = defaultdict(int)
     for counter in setsToFreq:
-        for sets, freq in counter.iteritems():
+        for sets, freq in list(counter.items()):
             setsToTotalFreq[sets] += freq
 
     return setsToTotalFreq, setToPval, setToObs
diff --git a/wext/saddlepoint.py b/wext/saddlepoint.py
index 02334c2..dac4e44 100644
--- a/wext/saddlepoint.py
+++ b/wext/saddlepoint.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import numpy as np
 from numpy.linalg import det
 from scipy.optimize import fsolve
@@ -82,7 +84,7 @@ def saddlepoint(observed_t, observed_y, probabilities, condition='exclusivity'):
 
     w = np.zeros((2**k, n))
     for i, state in enumerate(states):
-        w[i, :] = np.product(p[state, range(k), :], axis=0)
+        w[i, :] = np.product(p[state, list(range(k)), :], axis=0)
 
     # Define the moment generating functions and cumulant generating functions.  These functions
     # use the above constants.
diff --git a/wext/setup.py b/wext/setup.py
index 072ae0f..873678a 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """Compiles the C modules used by the weighted exclusivity test."""
 
diff --git a/wext/statistics.py b/wext/statistics.py
index 4ceef54..1593b16 100755
--- a/wext/statistics.py
+++ b/wext/statistics.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import numpy as np
 
@@ -36,7 +36,7 @@ def multiple_hypothesis_correction(p_values_, method='BH'):
 
         sorted_q_values = np.zeros(n)
         sorted_q_values[n-1] = min(sorted_p_values[n-1], 1.0)
-        for i in reversed(range(n-1)):
+        for i in reversed(list(range(n-1))):
             sorted_q_values[i] = min(float(n)/float(i+1)*sorted_p_values[i], sorted_q_values[i+1])
 
         q_values = np.zeros(n)
@@ -49,7 +49,7 @@ def multiple_hypothesis_correction(p_values_, method='BH'):
         c = np.sum(1.0/np.arange(1, n+1, dtype=np.float64))
         sorted_q_values = np.zeros(n)
         sorted_q_values[n-1] = min(c*sorted_p_values[n-1], 1.0)
-        for i in reversed(range(n-1)):
+        for i in reversed(list(range(n-1))):
             sorted_q_values[i] = min(c*(float(n)/float(i+1))*sorted_p_values[i], sorted_q_values[i+1])
 
         q_values = np.zeros(n)

From f613b306dd28f96e925a6f4e42d4d3ab5def88dc Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evanbiederstedt@users.noreply.github.com>
Date: Wed, 15 Aug 2018 03:35:36 -0400
Subject: [PATCH 08/60] update travis config, compile 'wext' C/Fortran code

---
 .travis.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index c6d60c5..16933a0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,6 +11,9 @@ install:
     - sudo apt-get -y install python-matplotlib
     - pip install codecov
     - pip install -r requirements.txt
+    - cd wext
+    - python setup.py build
+    - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
 script:
     - nosetests
 after_success:

From e45829c6317974262c989f6b8e94ba7ec497f41e Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Wed, 15 Aug 2018 03:43:24 -0400
Subject: [PATCH 09/60] revise travis config

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 16933a0..fc7f9bc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,6 +14,7 @@ install:
     - cd wext
     - python setup.py build
     - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
+    - cd ..
 script:
     - nosetests
 after_success:

From 4612a70b88cdc9110099d277b002b07355e98eac Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Wed, 15 Aug 2018 03:55:35 -0400
Subject: [PATCH 10/60] revise using python3.x syntax for explicit relative
 imports

---
 wext/__init__.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/wext/__init__.py b/wext/__init__.py
index c746bc2..1c5a62b 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -1,14 +1,14 @@
 #!/usr/bin/env python3
 
 # Import modules.
-from constants import *
-from statistics import *
-from i_o import *
-from enumerate_sets import *
-from mcmc import mcmc
-from exact import exact_test
+from .constants import *
+from .statistics import *
+from .i_o import *
+from .enumerate_sets import *
+from .mcmc import mcmc
+from .exact import exact_test
 import cpoibin
-from saddlepoint import saddlepoint
+from .saddlepoint import saddlepoint
 from comet_exact_test import comet_exact_test
-from exclusivity_tests import re_test, wre_test
+from .exclusivity_tests import re_test, wre_test
 from bipartite_edge_swap_module import bipartite_edge_swap

From 844ec7075fd064c5e8a0f87e364b7aedf391a8e4 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Wed, 15 Aug 2018 04:03:15 -0400
Subject: [PATCH 11/60] fixed relative import syntax for all scripts in "wext"

---
 wext/enumerate_sets.py    | 6 +++---
 wext/exact.py             | 2 +-
 wext/exclusivity_tests.py | 6 +++---
 wext/i_o.py               | 2 +-
 wext/mcmc.py              | 6 +++---
 wext/saddlepoint.py       | 2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/wext/enumerate_sets.py b/wext/enumerate_sets.py
index 25d7c0f..8b80295 100755
--- a/wext/enumerate_sets.py
+++ b/wext/enumerate_sets.py
@@ -7,9 +7,9 @@
 from math import ceil, isnan
 
 # Load local modules
-from exclusivity_tests import wre_test, re_test, general_wre_test
-from constants import *
-from statistics import multiple_hypothesis_correction
+from .exclusivity_tests import wre_test, re_test, general_wre_test
+from .constants import *
+from .statistics import multiple_hypothesis_correction
 
 ################################################################################
 # Permutational test
diff --git a/wext/exact.py b/wext/exact.py
index 32636d9..5b79167 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import wext_exact_test
-from constants import *
+from .constants import *
 
 def exact_test(t, x, p, verbose=False):
     k = len(x)
diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py
index dfc4322..697cfd9 100755
--- a/wext/exclusivity_tests.py
+++ b/wext/exclusivity_tests.py
@@ -2,10 +2,10 @@
 
 # Load required modules
 import numpy as np
-from constants import *
-from exact import exact_test
+from .constants import *
+from .exact import exact_test
 import cpoibin
-from saddlepoint import saddlepoint, check_condition
+from .saddlepoint import saddlepoint, check_condition
 from comet_exact_test import comet_exact_test
 import warnings
 
diff --git a/wext/i_o.py b/wext/i_o.py
index fd7c706..241f340 100755
--- a/wext/i_o.py
+++ b/wext/i_o.py
@@ -3,7 +3,7 @@
 # Load required modules
 import sys, os, json, numpy as np
 from collections import defaultdict
-from constants import *
+from .constants import *
 
 # Load mutation data from one of our processed JSON file
 def load_mutation_data( mutation_file, min_freq=1 ):
diff --git a/wext/mcmc.py b/wext/mcmc.py
index 550b541..675a410 100755
--- a/wext/mcmc.py
+++ b/wext/mcmc.py
@@ -5,9 +5,9 @@
 from time import time
 from random import random, sample, choice, seed as random_seed
 
-from constants import *
-from enumerate_sets import observed_values
-from exclusivity_tests import re_test, wre_test
+from .constants import *
+from .enumerate_sets import observed_values
+from .exclusivity_tests import re_test, wre_test
 
 def mcmc(ks, geneToCases, num_patients, method, test, geneToP, seed, annotations=set(), verbose=0, step_len=100, nchains=1, niters=1000, alpha=1):
     if verbose > 0: 
diff --git a/wext/saddlepoint.py b/wext/saddlepoint.py
index dac4e44..8869c79 100644
--- a/wext/saddlepoint.py
+++ b/wext/saddlepoint.py
@@ -5,7 +5,7 @@
 from scipy.optimize import fsolve
 from scipy.stats import norm
 import itertools
-from constants import *
+from .constants import *
 
 def check_condition(state, condition):
 

From 300f1d82cc7cffd47e280926c1d091802ae3e787 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Wed, 15 Aug 2018 04:10:05 -0400
Subject: [PATCH 12/60] attempt to fix issue with wext_exact_test

---
 wext/exact.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/exact.py b/wext/exact.py
index 5b79167..39232b9 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 import numpy as np
-import wext_exact_test
+from .wext_exact_test import *
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):

From 4a87caa6aeab785ecde6eb98cab8e05cd06cabe3 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Wed, 15 Aug 2018 04:14:58 -0400
Subject: [PATCH 13/60] 2nd attempt to fix issue with wext_exact_test

---
 wext/exact.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/exact.py b/wext/exact.py
index 39232b9..fc3c64d 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 import numpy as np
-from .wext_exact_test import *
+import .wext_exact_test 
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):

From 36a22bc327ae4088675a604567d5087dc0788f02 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Wed, 15 Aug 2018 04:19:12 -0400
Subject: [PATCH 14/60] 3rd attempt to fix issue with wext_exact_test

---
 wext/exact.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wext/exact.py b/wext/exact.py
index fc3c64d..c7494ac 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 import numpy as np
-import .wext_exact_test 
+from .wext_exact_test import triple_exact_test
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):
@@ -16,7 +16,7 @@ def exact_test(t, x, p, verbose=False):
 # Wrapper for k=3 exact test C function
 def exact_test_k3(t, x, p, verbose):
     N = len(p[0])
-    return wext_exact_test.triple_exact_test( N, t, x[0], x[1], x[2], p )
+    return triple_exact_test( N, t, x[0], x[1], x[2], p )
 
 # Wrapper for k=2 exact test C function
 def exact_test_k2(t, x, y, p_x, p_y, verbose):

From 7c6427ede313c692f2f0b90c8e1c189eefb28087 Mon Sep 17 00:00:00 2001
From: Evan Biederstedt <evan.biederstedt@gmail.com>
Date: Mon, 20 Aug 2018 23:09:01 -0400
Subject: [PATCH 15/60] removed python3 shebangs

---
 compute_mutation_probabilities.py                            | 2 +-
 examples/generate_data.py                                    | 5 +++--
 experiments/eccb2016/scripts/helper.py                       | 4 +++-
 experiments/eccb2016/scripts/pairs_summary.py                | 2 +-
 experiments/eccb2016/scripts/permutation_test_helper.py      | 2 +-
 experiments/eccb2016/scripts/permute_single_matrix.py        | 3 ++-
 experiments/eccb2016/scripts/pval_correlations.py            | 4 ++--
 .../eccb2016/scripts/reconcile_grid_permutation_test.py      | 2 +-
 experiments/eccb2016/scripts/remove_genes_with_no_length.py  | 2 +-
 experiments/eccb2016/scripts/results_table.py                | 2 +-
 .../eccb2016/scripts/sample_mutation_frequency_plot.py       | 2 +-
 experiments/eccb2016/scripts/triple_pval_scatter.py          | 2 +-
 experiments/eccb2016/scripts/unweighted_comparison.py        | 2 +-
 experiments/eccb2016/scripts/weights_matrix.py               | 2 +-
 find_exclusive_sets.py                                       | 2 +-
 find_sets.py                                                 | 2 +-
 process_mutations.py                                         | 2 +-
 wext/__init__.py                                             | 2 +-
 wext/constants.py                                            | 2 +-
 wext/enumerate_sets.py                                       | 2 +-
 wext/exact.py                                                | 2 +-
 wext/exclusivity_tests.py                                    | 2 +-
 wext/i_o.py                                                  | 2 +-
 wext/mcmc.py                                                 | 2 +-
 wext/saddlepoint.py                                          | 2 +-
 wext/setup.py                                                | 2 +-
 wext/statistics.py                                           | 2 +-
 27 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py
index acac439..a924427 100755
--- a/compute_mutation_probabilities.py
+++ b/compute_mutation_probabilities.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, json, numpy as np, multiprocessing as mp, random
diff --git a/examples/generate_data.py b/examples/generate_data.py
index 50200dc..5a6c4c9 100644
--- a/examples/generate_data.py
+++ b/examples/generate_data.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, numpy as np, random
@@ -81,4 +81,5 @@ def run(args):
         raise NotImplementedError('Data generation mode "%s" is not implemented.' % args.mode)
     return
 
-if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) )
\ No newline at end of file
+if __name__ == '__main__': 
+    run( get_parser().parse_args(sys.argv[1:]) )
\ No newline at end of file
diff --git a/experiments/eccb2016/scripts/helper.py b/experiments/eccb2016/scripts/helper.py
index 2017240..35fc5b5 100644
--- a/experiments/eccb2016/scripts/helper.py
+++ b/experiments/eccb2016/scripts/helper.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 import numpy as np
 
@@ -16,6 +16,7 @@ def add_y_equals_x(ax, c='k', line_style='--', alpha=0.75):
     ax.set_xlim(lims)
     ax.set_ylim(lims)
 
+
 def aligned_plaintext_table(table, sep='\t', spaces=2):
     """
     Create and return an aligned plaintext table.
@@ -42,6 +43,7 @@ def aligned_plaintext_table(table, sep='\t', spaces=2):
     # Return results.
     return '\n'.join([''.join([entries[i][j].rjust(sizes[j]+spaces) for j in range(n)]).rstrip() for i in range(m)])
 
+
 def rank(a, reverse=False, ties=2):
     """
     Find the ranks of the elements of a.
diff --git a/experiments/eccb2016/scripts/pairs_summary.py b/experiments/eccb2016/scripts/pairs_summary.py
index 3e9d2ce..2920d18 100755
--- a/experiments/eccb2016/scripts/pairs_summary.py
+++ b/experiments/eccb2016/scripts/pairs_summary.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import matplotlib
diff --git a/experiments/eccb2016/scripts/permutation_test_helper.py b/experiments/eccb2016/scripts/permutation_test_helper.py
index cb6770a..fc9b90a 100644
--- a/experiments/eccb2016/scripts/permutation_test_helper.py
+++ b/experiments/eccb2016/scripts/permutation_test_helper.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse
diff --git a/experiments/eccb2016/scripts/permute_single_matrix.py b/experiments/eccb2016/scripts/permute_single_matrix.py
index 00000ba..12c6d4e 100755
--- a/experiments/eccb2016/scripts/permute_single_matrix.py
+++ b/experiments/eccb2016/scripts/permute_single_matrix.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Import modules.
 import numpy as np, os, sys, argparse, json
@@ -17,6 +17,7 @@ def get_parser():
                         default=os.environ.get('SGE_TASK_ID', 0))
     return parser
 
+
 def run( args ):
     # Load WExT
     sys.path.append(args.wext_dir)
diff --git a/experiments/eccb2016/scripts/pval_correlations.py b/experiments/eccb2016/scripts/pval_correlations.py
index 55c280f..75e3c86 100755
--- a/experiments/eccb2016/scripts/pval_correlations.py
+++ b/experiments/eccb2016/scripts/pval_correlations.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, pandas as pd
@@ -31,7 +31,7 @@
 # Compute the correlations with permutational
 # permutational_pvals_with_zeros = list(df.loc[df['Method'] == 'Permutational']['Raw P-value'])
 # all_indices =
-tests       = ["Permutational", "Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"]
+tests = ["Permutational", "Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"]
 for val, indices in [("All", []), (0, 1./args.num_permutations), (1./args.num_permutations, 2)]:
     tbl = [list(tests)]
     for t1 in tests:
diff --git a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
index 2f3e250..d7c0354 100644
--- a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
+++ b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, json, multiprocessing as mp
diff --git a/experiments/eccb2016/scripts/remove_genes_with_no_length.py b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
index 3aa9e9f..7017b08 100644
--- a/experiments/eccb2016/scripts/remove_genes_with_no_length.py
+++ b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, json
diff --git a/experiments/eccb2016/scripts/results_table.py b/experiments/eccb2016/scripts/results_table.py
index ad39a94..3989369 100755
--- a/experiments/eccb2016/scripts/results_table.py
+++ b/experiments/eccb2016/scripts/results_table.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, json
diff --git a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
index 77a25df..7b66ee6 100755
--- a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
+++ b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import matplotlib
diff --git a/experiments/eccb2016/scripts/triple_pval_scatter.py b/experiments/eccb2016/scripts/triple_pval_scatter.py
index da966b3..6e2f168 100755
--- a/experiments/eccb2016/scripts/triple_pval_scatter.py
+++ b/experiments/eccb2016/scripts/triple_pval_scatter.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import matplotlib
diff --git a/experiments/eccb2016/scripts/unweighted_comparison.py b/experiments/eccb2016/scripts/unweighted_comparison.py
index b6ffce4..369174b 100755
--- a/experiments/eccb2016/scripts/unweighted_comparison.py
+++ b/experiments/eccb2016/scripts/unweighted_comparison.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
  #Load required modules
 import matplotlib
diff --git a/experiments/eccb2016/scripts/weights_matrix.py b/experiments/eccb2016/scripts/weights_matrix.py
index d263bca..9d13cbb 100755
--- a/experiments/eccb2016/scripts/weights_matrix.py
+++ b/experiments/eccb2016/scripts/weights_matrix.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import matplotlib
diff --git a/find_exclusive_sets.py b/find_exclusive_sets.py
index 40eb062..88a6401 100755
--- a/find_exclusive_sets.py
+++ b/find_exclusive_sets.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, numpy as np, json
diff --git a/find_sets.py b/find_sets.py
index 69e8388..1154da7 100755
--- a/find_sets.py
+++ b/find_sets.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, numpy as np, json
diff --git a/process_mutations.py b/process_mutations.py
index 4d4a9a3..c886271 100755
--- a/process_mutations.py
+++ b/process_mutations.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, argparse, json, numpy as np
diff --git a/wext/__init__.py b/wext/__init__.py
index 1c5a62b..7df3a5f 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Import modules.
 from .constants import *
diff --git a/wext/constants.py b/wext/constants.py
index 9f6425b..7cffc13 100755
--- a/wext/constants.py
+++ b/wext/constants.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # P-values are called invalid if P > 1+PTOL or P < -PTOL
 PTOL = 10**-3
diff --git a/wext/enumerate_sets.py b/wext/enumerate_sets.py
index 8b80295..7908b42 100755
--- a/wext/enumerate_sets.py
+++ b/wext/enumerate_sets.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, multiprocessing as mp, json
diff --git a/wext/exact.py b/wext/exact.py
index c7494ac..6fc0a0f 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 import numpy as np
 from .wext_exact_test import triple_exact_test
diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py
index 697cfd9..c29ff56 100755
--- a/wext/exclusivity_tests.py
+++ b/wext/exclusivity_tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import numpy as np
diff --git a/wext/i_o.py b/wext/i_o.py
index 241f340..a77a0bc 100755
--- a/wext/i_o.py
+++ b/wext/i_o.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Load required modules
 import sys, os, json, numpy as np
diff --git a/wext/mcmc.py b/wext/mcmc.py
index 675a410..29f43c9 100755
--- a/wext/mcmc.py
+++ b/wext/mcmc.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 import sys, os, numpy as np
 from collections import defaultdict
diff --git a/wext/saddlepoint.py b/wext/saddlepoint.py
index 8869c79..49df27b 100644
--- a/wext/saddlepoint.py
+++ b/wext/saddlepoint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 import numpy as np
 from numpy.linalg import det
diff --git a/wext/setup.py b/wext/setup.py
index 873678a..072ae0f 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Compiles the C modules used by the weighted exclusivity test."""
 
diff --git a/wext/statistics.py b/wext/statistics.py
index 1593b16..b1a3834 100755
--- a/wext/statistics.py
+++ b/wext/statistics.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 import numpy as np
 

From 02e55866829724087b407cae8d84d8d58b0476aa Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 19:00:28 -0400
Subject: [PATCH 16/60] corrected structures for PyMethodDef

---
 wext/src/c/comet_exact_test.c | 12 +++++++++---
 wext/src/c/poibinmodule.c     |  3 ++-
 wext/src/c/wext_exact_test.c  |  5 ++++-
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c
index 903d991..e30f3f8 100755
--- a/wext/src/c/comet_exact_test.c
+++ b/wext/src/c/comet_exact_test.c
@@ -281,8 +281,11 @@ struct Pvalues comet_exact_test(int k, int N, int *ctbl, double pvalthresh){
 
 }
 
+
+
+
 ////////////////////////////////////////////////////////////////////////////////
-// Python registration
+// Python wrapper functions
 ////////////////////////////////////////////////////////////////////////////////
 
 // The CoMEt exact test, callable from Python
@@ -307,7 +310,7 @@ PyObject *py_comet_exact_test(PyObject *self, PyObject *args){
     tbl[i] = (int) PyLong_AsLong (PyList_GetItem(py_tbl, i));
 
   // Compute the P-values
-  pval   = comet_exact_test(k, N, tbl, pvalthresh);
+  pval = comet_exact_test(k, N, tbl, pvalthresh);
 
   // Free memory 
   free(tbl);
@@ -316,10 +319,13 @@ PyObject *py_comet_exact_test(PyObject *self, PyObject *args){
 
 }
 
+// methods definition: cometExactTest
+
 
 // Register the functions we want to be accessible from Python
 PyMethodDef cometExactTest[] = {
-    {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}
+    {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}, 
+    {NULL, NULL, 0, NULL}
 };
 
 // Note that the suffix of init has to match the name of the module,
diff --git a/wext/src/c/poibinmodule.c b/wext/src/c/poibinmodule.c
index 8d5b3a3..4a55a00 100755
--- a/wext/src/c/poibinmodule.c
+++ b/wext/src/c/poibinmodule.c
@@ -73,7 +73,8 @@ PyObject *py_pmf(PyObject *self, PyObject *args){
 
 // Register the functions we want to be accessible from Python
 PyMethodDef poibinMethods[] = {
-    {"pmf", py_pmf, METH_VARARGS, "Poisson-Binomial PMF"}
+    {"pmf", py_pmf, METH_VARARGS, "Poisson-Binomial PMF"}, 
+    {NULL, NULL, 0, NULL}
 };
 
 // Note that the suffix of init has to match the name of the module,
diff --git a/wext/src/c/wext_exact_test.c b/wext/src/c/wext_exact_test.c
index ce49264..fe5e170 100755
--- a/wext/src/c/wext_exact_test.c
+++ b/wext/src/c/wext_exact_test.c
@@ -39,6 +39,8 @@ double joint_mass(int n, int z, int x, int y, double *p_x, double *p_y, double *
     return cache[n][z][x][y];
 }
 
+// python wrapper
+
 PyObject *py_conditional(PyObject *self, PyObject *args){
     // Parameters
     int i, j, i2, j2, N, x, y, *zs, num_zs;
@@ -206,7 +208,8 @@ PyObject *triple_exact_test(PyObject *self, PyObject *args){
 // Register the functions we want to be accessible from Python
 PyMethodDef weightedEnrichmentMethods[] = {
     {"conditional", py_conditional, METH_VARARGS, "Weighted enrichment test conditional PMF for pairs"},
-    {"triple_exact_test", triple_exact_test, METH_VARARGS, "Weighted enrichment test for triples"}
+    {"triple_exact_test", triple_exact_test, METH_VARARGS, "Weighted enrichment test for triples"}, 
+    {NULL, NULL, 0, NULL}
 };
 
 // Note that the suffix of init has to match the name of the module,

From 934904a25fdea281bacc7577a8dfe3828a15b6bc Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 19:25:01 -0400
Subject: [PATCH 17/60] check compiles under Python3.x

---
 wext/src/c/comet_exact_test.c | 37 +++++++++++++++++++++++++-------
 wext/src/c/poibinmodule.c     | 38 +++++++++++++++++++++++++++------
 wext/src/c/wext_exact_test.c  | 40 ++++++++++++++++++++++++++++-------
 3 files changed, 92 insertions(+), 23 deletions(-)

diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c
index e30f3f8..5e9ab1f 100755
--- a/wext/src/c/comet_exact_test.c
+++ b/wext/src/c/comet_exact_test.c
@@ -289,7 +289,7 @@ struct Pvalues comet_exact_test(int k, int N, int *ctbl, double pvalthresh){
 ////////////////////////////////////////////////////////////////////////////////
 
 // The CoMEt exact test, callable from Python
-PyObject *py_comet_exact_test(PyObject *self, PyObject *args){
+static PyObject *py_comet_exact_test(PyObject *self, PyObject *args){
   // Parameters
   int k, N; // k: gene set size; N: number of samples
   PyObject *py_tbl; // FLAT Python contingency table
@@ -320,19 +320,40 @@ PyObject *py_comet_exact_test(PyObject *self, PyObject *args){
 }
 
 // methods definition: cometExactTest
-
+// name of module: comet_exact_test ... which is also the name of the function in Python
 
 // Register the functions we want to be accessible from Python
-PyMethodDef cometExactTest[] = {
+static PyMethodDef cometExactTest[] = {
     {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}, 
     {NULL, NULL, 0, NULL}
 };
 
+// PYTHON 2
 // Note that the suffix of init has to match the name of the module,
 // both here and in the setup.py file
-PyMODINIT_FUNC initcomet_exact_test(void) {
-    PyObject *m = Py_InitModule("comet_exact_test", cometExactTest);
-    if (m == NULL) {
-        return;
-    }
+// PyMODINIT_FUNC initcomet_exact_test(void) {
+//     PyObject *m = Py_InitModule("comet_exact_test", cometExactTest);
+//     if (m == NULL) {
+//         return;
+//     }
+// }
+
+// define structure for module
+static struct PyModuleDef comet_exact_test = {
+  PyModuleDef_HEAD_INIT,   // required
+  "comet_exact_test",           // name of module
+  "documentation detailed here",   // documentation
+  -1,
+  cometExactTest            // method definitions
+};
+
+
+// finally, write the initalizer function
+
+PyMODINIT_FUNC PyInit_comet_exact_test(void)  
+{
+    return PyModule_Create(&comet_exact_test);
 }
+
+
+
diff --git a/wext/src/c/poibinmodule.c b/wext/src/c/poibinmodule.c
index 4a55a00..f2bf67b 100755
--- a/wext/src/c/poibinmodule.c
+++ b/wext/src/c/poibinmodule.c
@@ -45,7 +45,7 @@ double pmf(int k, int N, double *ps){
     return mass;
 }
 
-PyObject *py_pmf(PyObject *self, PyObject *args){
+static PyObject *py_pmf(PyObject *self, PyObject *args){
   // Parameters
   int i, k, N;
   double result, *ps;
@@ -71,17 +71,41 @@ PyObject *py_pmf(PyObject *self, PyObject *args){
   return Py_BuildValue("d", result);
 }
 
+
+// methods definition: poibinMethods
+// name of module: cpoibin
+
 // Register the functions we want to be accessible from Python
-PyMethodDef poibinMethods[] = {
+static PyMethodDef poibinMethods[] = {
     {"pmf", py_pmf, METH_VARARGS, "Poisson-Binomial PMF"}, 
     {NULL, NULL, 0, NULL}
 };
 
+// PYTHON 2
 // Note that the suffix of init has to match the name of the module,
 // both here and in the setup.py file
-PyMODINIT_FUNC initcpoibin(void) {
-    PyObject *m = Py_InitModule("cpoibin", poibinMethods);
-    if (m == NULL) {
-        return;
-    }
+// PyMODINIT_FUNC initcpoibin(void) {
+//    PyObject *m = Py_InitModule("cpoibin", poibinMethods);
+//    if (m == NULL) {
+//        return;
+//    }
+// }
+
+// define the module structure
+
+static struct PyModuleDef cpoibin = {
+  PyModuleDef_HEAD_INIT,   // required
+  "cpoibin",           // name of module
+  "ocumentation detailed here",   // documentation
+  -1,
+  poibinMethods             // method definitions
+};
+
+// finally, write the initalizer function
+
+PyMODINIT_FUNC PyInit_cpoibin(void)  
+{
+    return PyModule_Create(&cpoibin);
 }
+
+
diff --git a/wext/src/c/wext_exact_test.c b/wext/src/c/wext_exact_test.c
index fe5e170..7064296 100755
--- a/wext/src/c/wext_exact_test.c
+++ b/wext/src/c/wext_exact_test.c
@@ -41,7 +41,7 @@ double joint_mass(int n, int z, int x, int y, double *p_x, double *p_y, double *
 
 // python wrapper
 
-PyObject *py_conditional(PyObject *self, PyObject *args){
+static PyObject *py_conditional(PyObject *self, PyObject *args){
     // Parameters
     int i, j, i2, j2, N, x, y, *zs, num_zs;
     double *p_x, *p_y, joint_marginal, mass, ****cache;
@@ -138,7 +138,7 @@ double P(int n, int t, int w, int x, int y, double **p, double *****cache){
 	return cache[n][t][w][x][y];
 }
 
-PyObject *triple_exact_test(PyObject *self, PyObject *args){
+static PyObject *triple_exact_test(PyObject *self, PyObject *args){
     // Parameters
     int i, j, i2, j2, i3, N, w, x, y, t, T;
     double **p, marginals, joint, result, *****cache;
@@ -204,19 +204,43 @@ PyObject *triple_exact_test(PyObject *self, PyObject *args){
     return Py_BuildValue("f", result);
 }
 
+
+// methods definition: weightedEnrichmentMethods
+// name of module: wext_exact_test
+
 ////////////////////////////////////////////////////////////////////////////////
 // Register the functions we want to be accessible from Python
-PyMethodDef weightedEnrichmentMethods[] = {
+static PyMethodDef weightedEnrichmentMethods[] = {
     {"conditional", py_conditional, METH_VARARGS, "Weighted enrichment test conditional PMF for pairs"},
     {"triple_exact_test", triple_exact_test, METH_VARARGS, "Weighted enrichment test for triples"}, 
     {NULL, NULL, 0, NULL}
 };
 
+// PYTHON 2
 // Note that the suffix of init has to match the name of the module,
 // both here and in the setup.py file
-PyMODINIT_FUNC initwext_exact_test(void) {
-    PyObject *m = Py_InitModule("wext_exact_test", weightedEnrichmentMethods);
-    if (m == NULL) {
-        return;
-    }
+// PyMODINIT_FUNC initwext_exact_test(void) {
+//     PyObject *m = Py_InitModule("wext_exact_test", weightedEnrichmentMethods);
+//     if (m == NULL) {
+//         return;
+//     }
+// }
+
+// define module structure
+
+static struct PyModuleDef wext_exact_test = {
+    PyModuleDef_HEAD_INIT,   // required
+    "wext_exact_test",           // name of module
+    "documentation detailed here",   // documentation
+    -1,
+    weightedEnrichmentMethods             // method definitions
+};
+
+
+// finally, write the initalizer function
+
+PyMODINIT_FUNC PyInit_wext_exact_test(void)  
+{
+    return PyModule_Create(&wext_exact_test);
 }
+

From 602e301eff11991d3d090f5d1b2b48b70daf5843 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 19:29:30 -0400
Subject: [PATCH 18/60] revise poibinmodule header file, defin py_pmf as static

---
 wext/src/c/poibinmodule.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/src/c/poibinmodule.h b/wext/src/c/poibinmodule.h
index 2d851e4..59b3511 100755
--- a/wext/src/c/poibinmodule.h
+++ b/wext/src/c/poibinmodule.h
@@ -8,4 +8,4 @@
 // Function declarations
 double pmf_recursion(int k, int j, double *ps, double **cache);
 double pmf(int k, int N, double *ps);
-PyObject *py_pmf(PyObject *self, PyObject *args);
+static PyObject *py_pmf(PyObject *self, PyObject *args);

From 49750b768d8eabc83a12cb88e741c22887b5fbb5 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 19:35:24 -0400
Subject: [PATCH 19/60] try renaming to comet_exact_tests

---
 wext/setup.py                 |  4 ++--
 wext/src/c/comet_exact_test.c | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/wext/setup.py b/wext/setup.py
index 072ae0f..22cf1d2 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -26,8 +26,8 @@
 
 # Compile the CoMEt exact test module
 srcs = ['/src/c/comet_exact_test.c']
-module = Extension('comet_exact_test', include_dirs=[numpy.get_include()],
+module = Extension('comet_exact_tests', include_dirs=[numpy.get_include()],
     sources = [ thisDir + s for s in srcs ],
     extra_compile_args = ['-g', '-O0'])
-setup(name='comet_exact_test', version='0.0.1',  ext_modules=[module],
+setup(name='comet_exact_tests', version='0.0.1',  ext_modules=[module],
       description='CoMEt exact test implementation.')
diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c
index 5e9ab1f..dc36a59 100755
--- a/wext/src/c/comet_exact_test.c
+++ b/wext/src/c/comet_exact_test.c
@@ -322,6 +322,8 @@ static PyObject *py_comet_exact_test(PyObject *self, PyObject *args){
 // methods definition: cometExactTest
 // name of module: comet_exact_test ... which is also the name of the function in Python
 
+// try renaming this to 'comet_exact_tests'
+
 // Register the functions we want to be accessible from Python
 static PyMethodDef cometExactTest[] = {
     {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}, 
@@ -339,9 +341,9 @@ static PyMethodDef cometExactTest[] = {
 // }
 
 // define structure for module
-static struct PyModuleDef comet_exact_test = {
+static struct PyModuleDef comet_exact_tests = {
   PyModuleDef_HEAD_INIT,   // required
-  "comet_exact_test",           // name of module
+  "comet_exact_tests",           // name of module
   "documentation detailed here",   // documentation
   -1,
   cometExactTest            // method definitions
@@ -350,9 +352,9 @@ static struct PyModuleDef comet_exact_test = {
 
 // finally, write the initalizer function
 
-PyMODINIT_FUNC PyInit_comet_exact_test(void)  
+PyMODINIT_FUNC PyInit_comet_exact_tests(void)  
 {
-    return PyModule_Create(&comet_exact_test);
+    return PyModule_Create(&comet_exact_tests);
 }
 
 
From 6b2cdda4794e567b2fa169be16c190ecafd02d22 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 19:40:29 -0400
Subject: [PATCH 20/60] revised setup.py, module should be cpoibin

---
 wext/setup.cfg | 2 --
 wext/setup.py  | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)
 delete mode 100755 wext/setup.cfg

diff --git a/wext/setup.cfg b/wext/setup.cfg
deleted file mode 100755
index 8f69613..0000000
--- a/wext/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[build_ext]
-inplace=1
diff --git a/wext/setup.py b/wext/setup.py
index 22cf1d2..1c76366 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -13,7 +13,7 @@
 module = Extension('cpoibin', include_dirs=[numpy.get_include()],
     sources = [ thisDir + s for s in srcs ],
     extra_compile_args = ['-g', '-O0'])
-setup(name='poibin', version='0.0.1',  ext_modules=[module],
+setup(name='cpoibin', version='0.0.1',  ext_modules=[module],
       description='Module for analyzing the Poisson-Binomial distribution.')
 
 # Compile the weighted enrichment module

From 053e680e88a8f3480ed1da8a23679a5f1e49b022 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 19:50:12 -0400
Subject: [PATCH 21/60] corrected typo with "from wext_exact_test import
 triple_exact_test"

---
 wext/exact.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/exact.py b/wext/exact.py
index 6fc0a0f..dcd4800 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-from .wext_exact_test import triple_exact_test
+from wext_exact_test import triple_exact_test
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):

From 29de50b6067f7448f6f5512649382a9d9d8f6e2a Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 20:00:31 -0400
Subject: [PATCH 22/60] cannot find module, but it does install...

---
 .travis.yml   | 2 ++
 wext/exact.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index fc7f9bc..d947b5f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,8 @@ install:
     - python setup.py build
     - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
     - cd ..
+    - pwd
+    - ls
 script:
     - nosetests
 after_success:
diff --git a/wext/exact.py b/wext/exact.py
index dcd4800..9bcfb5c 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-from wext_exact_test import triple_exact_test
+import wext_exact_test 
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):
@@ -16,7 +16,7 @@ def exact_test(t, x, p, verbose=False):
 # Wrapper for k=3 exact test C function
 def exact_test_k3(t, x, p, verbose):
     N = len(p[0])
-    return triple_exact_test( N, t, x[0], x[1], x[2], p )
+    return wext_exact_test.triple_exact_test( N, t, x[0], x[1], x[2], p )
 
 # Wrapper for k=2 exact test C function
 def exact_test_k2(t, x, y, p_x, p_y, verbose):

From 61fc045ea33fff1bd6c925611823ebb83f49d429 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 20:03:11 -0400
Subject: [PATCH 23/60] revised __init__.py to correclty import modules

---
 wext/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/wext/__init__.py b/wext/__init__.py
index 7df3a5f..d144487 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -8,7 +8,9 @@
 from .mcmc import mcmc
 from .exact import exact_test
 import cpoibin
+import wext_exact_test 
+import comet_exact_tests
 from .saddlepoint import saddlepoint
-from comet_exact_test import comet_exact_test
+from comet_exact_tests import comet_exact_test
 from .exclusivity_tests import re_test, wre_test
 from bipartite_edge_swap_module import bipartite_edge_swap

From da5a5a980c2909658fe18fade40f7cde8d725a13 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 20:07:57 -0400
Subject: [PATCH 24/60] run nosetests in different subdirectory

---
 .travis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d947b5f..e41baa6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,9 +14,9 @@ install:
     - cd wext
     - python setup.py build
     - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
-    - cd ..
-    - pwd
-    - ls
+    ##- cd ..
+    ##- pwd
+    ##- ls
 script:
     - nosetests
 after_success:

From 2beb71ffdbd4c9c3f841013330410066043fd194 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 20:08:58 -0400
Subject: [PATCH 25/60] try nosetests in upper subdirectory

---
 .travis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e41baa6..41186f8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,9 +14,9 @@ install:
     - cd wext
     - python setup.py build
     - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
-    ##- cd ..
-    ##- pwd
-    ##- ls
+    - cd ../../
+    - pwd
+    - ls
 script:
     - nosetests
 after_success:

From 8f646d4af6d970bfc58d71c06c2e9b6b650c4b12 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 20:20:52 -0400
Subject: [PATCH 26/60] revised __init__.py

---
 .travis.yml      |  4 ++--
 wext/__init__.py | 11 +++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 41186f8..5554f3a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,6 @@
 language: python
 python:
-    - 2.7
+    ##- 2.7
     - 3.4
     - 3.5
     - 3.6
@@ -14,7 +14,7 @@ install:
     - cd wext
     - python setup.py build
     - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
-    - cd ../../
+    - cd ../
     - pwd
     - ls
 script:
diff --git a/wext/__init__.py b/wext/__init__.py
index d144487..0b273e8 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -1,16 +1,15 @@
 #!/usr/bin/env python
 
-# Import modules.
+# Import modules
 from .constants import *
 from .statistics import *
 from .i_o import *
 from .enumerate_sets import *
 from .mcmc import mcmc
 from .exact import exact_test
-import cpoibin
-import wext_exact_test 
-import comet_exact_tests
+from .. import cpoibin
+from .. import wext_exact_test 
 from .saddlepoint import saddlepoint
-from comet_exact_tests import comet_exact_test
+from .. import comet_exact_tests
 from .exclusivity_tests import re_test, wre_test
-from bipartite_edge_swap_module import bipartite_edge_swap
+from .. import bipartite_edge_swap_module
\ No newline at end of file

From c04fdc874cc12cfd7b2db825795e50941134150b Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 20:23:26 -0400
Subject: [PATCH 27/60] changed __init__.py again, try explicit imports

---
 wext/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/wext/__init__.py b/wext/__init__.py
index 0b273e8..b9a1ddb 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -7,9 +7,9 @@
 from .enumerate_sets import *
 from .mcmc import mcmc
 from .exact import exact_test
-from .. import cpoibin
-from .. import wext_exact_test 
+from ..c import cpoibin
+from ..c import wext_exact_test 
 from .saddlepoint import saddlepoint
-from .. import comet_exact_tests
+from ..c import comet_exact_tests
 from .exclusivity_tests import re_test, wre_test
-from .. import bipartite_edge_swap_module
\ No newline at end of file
+from ..fortran import bipartite_edge_swap_module
\ No newline at end of file

From 82547561af7be8e7a9c3347e7c5c30366552cb0e Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 20:27:23 -0400
Subject: [PATCH 28/60] now change exact.py, from ..c import wext_exact_test

---
 wext/exact.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/exact.py b/wext/exact.py
index 9bcfb5c..46f4a1e 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-import wext_exact_test 
+from ..c import wext_exact_test 
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):

From 7fa5cbb8a94bc596cd0f966c5adf9e634c88fd86 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 21:26:51 -0400
Subject: [PATCH 29/60] revised relative imports

---
 wext/__init__.py | 8 ++++----
 wext/exact.py    | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/wext/__init__.py b/wext/__init__.py
index b9a1ddb..de218c5 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -7,9 +7,9 @@
 from .enumerate_sets import *
 from .mcmc import mcmc
 from .exact import exact_test
-from ..c import cpoibin
-from ..c import wext_exact_test 
+from .c import cpoibin
+from .c import wext_exact_test 
 from .saddlepoint import saddlepoint
-from ..c import comet_exact_tests
+from .c import comet_exact_tests
 from .exclusivity_tests import re_test, wre_test
-from ..fortran import bipartite_edge_swap_module
\ No newline at end of file
+from .fortran import bipartite_edge_swap_module
\ No newline at end of file
diff --git a/wext/exact.py b/wext/exact.py
index 46f4a1e..2ce07e9 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-from ..c import wext_exact_test 
+from .c import wext_exact_test 
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):

From c43aa8872b30dd08b1c8b86e8064e9c5ac032df6 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 21:45:47 -0400
Subject: [PATCH 30/60] try "from .wext_exact_test import * "

---
 wext/exact.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/exact.py b/wext/exact.py
index 2ce07e9..c81e269 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-from .c import wext_exact_test 
+from .wext_exact_test import * 
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):

From fcea393f7997665677bf1c330a389dd9f5deb363 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 22:52:30 -0400
Subject: [PATCH 31/60] revise import

---
 wext/__init__.py | 8 ++++----
 wext/exact.py    | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/wext/__init__.py b/wext/__init__.py
index de218c5..cb1908f 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -7,9 +7,9 @@
 from .enumerate_sets import *
 from .mcmc import mcmc
 from .exact import exact_test
-from .c import cpoibin
-from .c import wext_exact_test 
+from .src.c import cpoibin
+from .src.c import wext_exact_test 
 from .saddlepoint import saddlepoint
-from .c import comet_exact_tests
+from .src.c import comet_exact_tests
 from .exclusivity_tests import re_test, wre_test
-from .fortran import bipartite_edge_swap_module
\ No newline at end of file
+from .src.fortran import bipartite_edge_swap_module
\ No newline at end of file
diff --git a/wext/exact.py b/wext/exact.py
index c81e269..566dd33 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-from .wext_exact_test import * 
+from .src.c import wext_exact_test 
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):

From 3a3e8d18d81f24248e271af94d7361d55c2a59ac Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 22:55:47 -0400
Subject: [PATCH 32/60] try from .src.c.wext_exact_test import *

---
 wext/exact.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/exact.py b/wext/exact.py
index 566dd33..03447b0 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-from .src.c import wext_exact_test 
+from .src.c.wext_exact_test import *
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):

From a08fa528891e5f60652220f973562720431e5331 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 9 Sep 2018 23:28:42 -0400
Subject: [PATCH 33/60] added __init__.py files

---
 wext/exact.py          | 2 +-
 wext/src/__init__.py   | 0
 wext/src/c/__init__.py | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 wext/src/__init__.py
 create mode 100644 wext/src/c/__init__.py

diff --git a/wext/exact.py b/wext/exact.py
index 03447b0..9bcfb5c 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
-from .src.c.wext_exact_test import *
+import wext_exact_test 
 from .constants import *
 
 def exact_test(t, x, p, verbose=False):
diff --git a/wext/src/__init__.py b/wext/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/wext/src/c/__init__.py b/wext/src/c/__init__.py
new file mode 100644
index 0000000..e69de29

From 38acd11da31d97db9bbe38bc9b4bc17979be0968 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 10:19:52 -0400
Subject: [PATCH 34/60] these should be global modules

---
 wext/__init__.py       | 8 ++++----
 wext/src/__init__.py   | 0
 wext/src/c/__init__.py | 0
 3 files changed, 4 insertions(+), 4 deletions(-)
 delete mode 100644 wext/src/__init__.py
 delete mode 100644 wext/src/c/__init__.py

diff --git a/wext/__init__.py b/wext/__init__.py
index cb1908f..de8e09d 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -7,9 +7,9 @@
 from .enumerate_sets import *
 from .mcmc import mcmc
 from .exact import exact_test
-from .src.c import cpoibin
-from .src.c import wext_exact_test 
+import cpoibin
+import wext_exact_test 
 from .saddlepoint import saddlepoint
-from .src.c import comet_exact_tests
+import comet_exact_tests
 from .exclusivity_tests import re_test, wre_test
-from .src.fortran import bipartite_edge_swap_module
\ No newline at end of file
+from bipartite_edge_swap_module import bipartite_edge_swap
\ No newline at end of file
diff --git a/wext/src/__init__.py b/wext/src/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/wext/src/c/__init__.py b/wext/src/c/__init__.py
deleted file mode 100644
index e69de29..0000000

From 885059be8be03afb16584433df0ef43c57c9441a Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 13:40:56 -0400
Subject: [PATCH 35/60] install instead

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 5554f3a..d45cf56 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,7 +12,7 @@ install:
     - pip install codecov
     - pip install -r requirements.txt
     - cd wext
-    - python setup.py build
+    - python setup.py install
     - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
     - cd ../
     - pwd

From 2717e4a6d7e62f661038139151f1ce6be476119d Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 13:48:03 -0400
Subject: [PATCH 36/60] revised module name

---
 wext/exclusivity_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py
index c29ff56..a7c4229 100755
--- a/wext/exclusivity_tests.py
+++ b/wext/exclusivity_tests.py
@@ -6,7 +6,7 @@
 from .exact import exact_test
 import cpoibin
 from .saddlepoint import saddlepoint, check_condition
-from comet_exact_test import comet_exact_test
+from comet_exact_tests import comet_exact_test
 import warnings
 
 # Perform the weighted-row exclusivity test (WR-test) using the given method.

From f0498a122cdfcdf74f77b823c89906b097c02805 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 14:34:26 -0400
Subject: [PATCH 37/60] check if fortran extension module can be imported

---
 wext/__init__.py                              |  2 +-
 .../Contents/Info.plist                       | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist

diff --git a/wext/__init__.py b/wext/__init__.py
index de8e09d..8a0c19e 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -12,4 +12,4 @@
 from .saddlepoint import saddlepoint
 import comet_exact_tests
 from .exclusivity_tests import re_test, wre_test
-from bipartite_edge_swap_module import bipartite_edge_swap
\ No newline at end of file
+import bipartite_edge_swap_module
\ No newline at end of file
diff --git a/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist b/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist
new file mode 100644
index 0000000..b5e4350
--- /dev/null
+++ b/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+	<dict>
+		<key>CFBundleDevelopmentRegion</key>
+		<string>English</string>
+		<key>CFBundleIdentifier</key>
+		<string>com.apple.xcode.dsym.bipartite_edge_swap_module.cpython-37m-darwin.so</string>
+		<key>CFBundleInfoDictionaryVersion</key>
+		<string>6.0</string>
+		<key>CFBundlePackageType</key>
+		<string>dSYM</string>
+		<key>CFBundleSignature</key>
+		<string>????</string>
+		<key>CFBundleShortVersionString</key>
+		<string>1.0</string>
+		<key>CFBundleVersion</key>
+		<string>1</string>
+	</dict>
+</plist>

From 096a4126dab464e6cbe2e4aa5f3e73755508d5be Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 14:44:11 -0400
Subject: [PATCH 38/60] revised travis config

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index d45cf56..f62bf87 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,8 +13,8 @@ install:
     - pip install -r requirements.txt
     - cd wext
     - python setup.py install
-    - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
     - cd ../
+    - f2py -c wext/src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
     - pwd
     - ls
 script:

From 3d4972cd467defee96e360f25e65d75932865c5a Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 14:50:18 -0400
Subject: [PATCH 39/60] check the FORTRAN code installs via setup.py

---
 .travis.yml                                   |  2 +-
 .../Contents/Info.plist                       | 20 -------------------
 wext/setup.py                                 |  7 +++++++
 3 files changed, 8 insertions(+), 21 deletions(-)
 delete mode 100644 wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist

diff --git a/.travis.yml b/.travis.yml
index f62bf87..7c684c4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,7 @@ install:
     - cd wext
     - python setup.py install
     - cd ../
-    - f2py -c wext/src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
+    ##- f2py -c wext/src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
     - pwd
     - ls
 script:
diff --git a/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist b/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist
deleted file mode 100644
index b5e4350..0000000
--- a/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist
+++ /dev/null
@@ -1,20 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-	<dict>
-		<key>CFBundleDevelopmentRegion</key>
-		<string>English</string>
-		<key>CFBundleIdentifier</key>
-		<string>com.apple.xcode.dsym.bipartite_edge_swap_module.cpython-37m-darwin.so</string>
-		<key>CFBundleInfoDictionaryVersion</key>
-		<string>6.0</string>
-		<key>CFBundlePackageType</key>
-		<string>dSYM</string>
-		<key>CFBundleSignature</key>
-		<string>????</string>
-		<key>CFBundleShortVersionString</key>
-		<string>1.0</string>
-		<key>CFBundleVersion</key>
-		<string>1</string>
-	</dict>
-</plist>
diff --git a/wext/setup.py b/wext/setup.py
index 1c76366..8ec5bda 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -31,3 +31,10 @@
     extra_compile_args = ['-g', '-O0'])
 setup(name='comet_exact_tests', version='0.0.1',  ext_modules=[module],
       description='CoMEt exact test implementation.')
+
+## Compile the FORTRAN extension, bipartite_edge_swap_module
+srcs = ['/src/fortran/bipartite_edge_swap_module.f95']
+module = Extension('bipartite_edge_swap_module', include_dirs=[numpy.get_include()],
+    sources = [ thisDir + s for s in srcs ]
+setup(name='bipartite_edge_swap_module', version='0.0.1',  ext_modules=[module],
+      description='FORTRAN code description')

From 81ebf4303b65410b0798cfac0c66e5b8a693354b Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 15:05:52 -0400
Subject: [PATCH 40/60] missing )

---
 wext/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wext/setup.py b/wext/setup.py
index 8ec5bda..f28e3ec 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -35,6 +35,6 @@
 ## Compile the FORTRAN extension, bipartite_edge_swap_module
 srcs = ['/src/fortran/bipartite_edge_swap_module.f95']
 module = Extension('bipartite_edge_swap_module', include_dirs=[numpy.get_include()],
-    sources = [ thisDir + s for s in srcs ]
+    sources = [ thisDir + s for s in srcs ])
 setup(name='bipartite_edge_swap_module', version='0.0.1',  ext_modules=[module],
       description='FORTRAN code description')

From a8ff8d207efa768aa5c7bcad0ee17e3874e04d54 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 15:24:08 -0400
Subject: [PATCH 41/60] revise setup.py

---
 wext/setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/wext/setup.py b/wext/setup.py
index f28e3ec..c4ba272 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -3,7 +3,8 @@
 """Compiles the C modules used by the weighted exclusivity test."""
 
 # Load required modules
-from distutils.core import setup, Extension
+from numpy.distutils.core import setup
+from numpy.distutils.extension import Extension
 import numpy, os
 
 thisDir = os.path.dirname(os.path.realpath(__file__))

From 9e5d982089857445d87c9cc26fc20d42ccf5c99d Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 23:45:54 -0400
Subject: [PATCH 42/60] revise c extensions

---
 wext/src/c/comet_exact_test.c | 18 +++++++++++++++++-
 wext/src/c/poibinmodule.c     | 11 +++++++++++
 wext/src/c/wext_exact_test.c  | 14 ++++++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c
index dc36a59..7736a5f 100755
--- a/wext/src/c/comet_exact_test.c
+++ b/wext/src/c/comet_exact_test.c
@@ -324,12 +324,16 @@ static PyObject *py_comet_exact_test(PyObject *self, PyObject *args){
 
 // try renaming this to 'comet_exact_tests'
 
+
+
 // Register the functions we want to be accessible from Python
 static PyMethodDef cometExactTest[] = {
     {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}, 
     {NULL, NULL, 0, NULL}
 };
 
+
+
 // PYTHON 2
 // Note that the suffix of init has to match the name of the module,
 // both here and in the setup.py file
@@ -340,6 +344,9 @@ static PyMethodDef cometExactTest[] = {
 //     }
 // }
 
+
+#if PY_MAJOR_VERSION >= 3
+
 // define structure for module
 static struct PyModuleDef comet_exact_tests = {
   PyModuleDef_HEAD_INIT,   // required
@@ -349,7 +356,6 @@ static struct PyModuleDef comet_exact_tests = {
   cometExactTest            // method definitions
 };
 
-
 // finally, write the initalizer function
 
 PyMODINIT_FUNC PyInit_comet_exact_tests(void)  
@@ -357,5 +363,15 @@ PyMODINIT_FUNC PyInit_comet_exact_tests(void)
     return PyModule_Create(&comet_exact_tests);
 }
 
+#else
+
+PyMODINIT_FUNC initcomet_exact_test(void) {
+    PyObject *m = Py_InitModule("comet_exact_test", cometExactTest);
+        if (m == NULL) {
+            return;
+    }
+}
+
+#endif
 
 
diff --git a/wext/src/c/poibinmodule.c b/wext/src/c/poibinmodule.c
index f2bf67b..a04d558 100755
--- a/wext/src/c/poibinmodule.c
+++ b/wext/src/c/poibinmodule.c
@@ -93,6 +93,8 @@ static PyMethodDef poibinMethods[] = {
 
 // define the module structure
 
+#if PY_MAJOR_VERSION >= 3
+
 static struct PyModuleDef cpoibin = {
   PyModuleDef_HEAD_INIT,   // required
   "cpoibin",           // name of module
@@ -108,4 +110,13 @@ PyMODINIT_FUNC PyInit_cpoibin(void)
     return PyModule_Create(&cpoibin);
 }
 
+#else
+
+PyMODINIT_FUNC initcpoibin(void) {
+    PyObject *m = Py_InitModule("cpoibin", poibinMethods);
+    if (m == NULL) {
+        return;
+    }
+}
 
+#endif
diff --git a/wext/src/c/wext_exact_test.c b/wext/src/c/wext_exact_test.c
index 7064296..86f9533 100755
--- a/wext/src/c/wext_exact_test.c
+++ b/wext/src/c/wext_exact_test.c
@@ -226,6 +226,9 @@ static PyMethodDef weightedEnrichmentMethods[] = {
 //     }
 // }
 
+
+#if PY_MAJOR_VERSION >= 3
+
 // define module structure
 
 static struct PyModuleDef wext_exact_test = {
@@ -244,3 +247,14 @@ PyMODINIT_FUNC PyInit_wext_exact_test(void)
     return PyModule_Create(&wext_exact_test);
 }
 
+#else
+
+PyMODINIT_FUNC initwext_exact_test(void) {
+    PyObject *m = Py_InitModule("wext_exact_test", weightedEnrichmentMethods);
+    if (m == NULL) {
+        return;
+    }
+}
+
+
+#endif
\ No newline at end of file

From d9a03b70abe81bda4072b65615f2de0e10bc918e Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Mon, 10 Sep 2018 23:56:55 -0400
Subject: [PATCH 43/60] allow 2.7 builds with python

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 7c684c4..4652bb5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,6 @@
 language: python
 python:
-    ##- 2.7
+    - 2.7
     - 3.4
     - 3.5
     - 3.6

From 48873ff779199d111cdac664da2117f5b608ca03 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 00:00:42 -0400
Subject: [PATCH 44/60] revise how module named

---
 wext/src/c/comet_exact_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c
index 7736a5f..8b59a53 100755
--- a/wext/src/c/comet_exact_test.c
+++ b/wext/src/c/comet_exact_test.c
@@ -365,8 +365,8 @@ PyMODINIT_FUNC PyInit_comet_exact_tests(void)
 
 #else
 
-PyMODINIT_FUNC initcomet_exact_test(void) {
-    PyObject *m = Py_InitModule("comet_exact_test", cometExactTest);
+PyMODINIT_FUNC initcomet_exact_tests(void) {
+    PyObject *m = Py_InitModule("comet_exact_tests", cometExactTest);
         if (m == NULL) {
             return;
     }

From 3d74ad2141e27974209be2f86003c8ebf28d3a97 Mon Sep 17 00:00:00 2001
From: Biederstedt <biederse@lski2603.mskcc.org>
Date: Tue, 11 Sep 2018 10:21:33 -0400
Subject: [PATCH 45/60] removed comments

---
 wext/src/c/comet_exact_test.c | 11 -----------
 wext/src/c/poibinmodule.c     |  9 ---------
 wext/src/c/wext_exact_test.c  |  9 ---------
 3 files changed, 29 deletions(-)

diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c
index 8b59a53..b6097d6 100755
--- a/wext/src/c/comet_exact_test.c
+++ b/wext/src/c/comet_exact_test.c
@@ -334,17 +334,6 @@ static PyMethodDef cometExactTest[] = {
 
 
-// PYTHON 2
-// Note that the suffix of init has to match the name of the module,
-// both here and in the setup.py file
-// PyMODINIT_FUNC initcomet_exact_test(void) {
-//     PyObject *m = Py_InitModule("comet_exact_test", cometExactTest);
-//     if (m == NULL) {
-//         return;
-//     }
-// }
-
-
 #if PY_MAJOR_VERSION >= 3
 
 // define structure for module
diff --git a/wext/src/c/poibinmodule.c b/wext/src/c/poibinmodule.c
index a04d558..6c3e71a 100755
--- a/wext/src/c/poibinmodule.c
+++ b/wext/src/c/poibinmodule.c
@@ -81,15 +81,6 @@ static PyMethodDef poibinMethods[] = {
     {NULL, NULL, 0, NULL}
 };
 
-// PYTHON 2
-// Note that the suffix of init has to match the name of the module,
-// both here and in the setup.py file
-// PyMODINIT_FUNC initcpoibin(void) {
-//    PyObject *m = Py_InitModule("cpoibin", poibinMethods);
-//    if (m == NULL) {
-//        return;
-//    }
-// }
 
 // define the module structure
 
diff --git a/wext/src/c/wext_exact_test.c b/wext/src/c/wext_exact_test.c
index 86f9533..3ab4209 100755
--- a/wext/src/c/wext_exact_test.c
+++ b/wext/src/c/wext_exact_test.c
@@ -216,15 +216,6 @@ static PyMethodDef weightedEnrichmentMethods[] = {
     {NULL, NULL, 0, NULL}
 };
 
-// PYTHON 2
-// Note that the suffix of init has to match the name of the module,
-// both here and in the setup.py file
-// PyMODINIT_FUNC initwext_exact_test(void) {
-//     PyObject *m = Py_InitModule("wext_exact_test", weightedEnrichmentMethods);
-//     if (m == NULL) {
-//         return;
-//     }
-// }
 
 
 #if PY_MAJOR_VERSION >= 3

From 79f697b8ab0b906b50318e848eff3411968f54af Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 14:53:08 -0400
Subject: [PATCH 46/60] revised string handling, outside scripts

---
 compute_mutation_probabilities.py             |   3 +-
 .../simple/all-co-occurrence_results-k2.tsv   |   2 +
 .../simple/any-co-occurrence_results-k2.tsv   |   2 +
 examples/simple/commands_python2.sh           |  52 ++++++++++++++++++
 examples/simple/commands_python3.sh           |  52 ++++++++++++++++++
 examples/simple/data.json                     |   1 +
 examples/simple/exclusivity_results-k2.tsv    |   2 +
 examples/simple/weights.npy                   | Bin 0 -> 1024 bytes
 process_mutations.py                          |  12 ++--
 wext/__init__.py                              |   2 +-
 10 files changed, 120 insertions(+), 8 deletions(-)
 create mode 100644 examples/simple/all-co-occurrence_results-k2.tsv
 create mode 100644 examples/simple/any-co-occurrence_results-k2.tsv
 create mode 100644 examples/simple/commands_python2.sh
 create mode 100644 examples/simple/commands_python3.sh
 create mode 100644 examples/simple/data.json
 create mode 100644 examples/simple/exclusivity_results-k2.tsv
 create mode 100644 examples/simple/weights.npy

diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py
index a924427..ab1989c 100755
--- a/compute_mutation_probabilities.py
+++ b/compute_mutation_probabilities.py
@@ -4,6 +4,7 @@
 import sys, os, argparse, json, numpy as np, multiprocessing as mp, random
 from collections import defaultdict
 
+
 # Load the weighted exclusivity test
 this_dir = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(this_dir)
@@ -44,7 +45,7 @@ def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, m, n, num_
             indices.append( (edge[0]-1, edge[1]-1) )
 
         # Record the permutation
-        observed[list(zip(*indices))] += 1.
+        observed[tuple(zip(*indices))] += 1.
         geneToCases = dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) )
         permutations.append( dict(geneToCases=geneToCases, permutation_number=seed) )
 
diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv
new file mode 100644
index 0000000..9a5b388
--- /dev/null
+++ b/examples/simple/all-co-occurrence_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+c, d	0.003668945489200462	0.18261601945262562	0.00248384475708	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv
new file mode 100644
index 0000000..7107006
--- /dev/null
+++ b/examples/simple/any-co-occurrence_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+c, d	0.003668945489200462	0.18261601945262562	0.00242900848389	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/commands_python2.sh b/examples/simple/commands_python2.sh
new file mode 100644
index 0000000..7a43965
--- /dev/null
+++ b/examples/simple/commands_python2.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+num_permutations=1000
+num_cores=4
+
+# Preprocess mutations.
+python2 ../../process_mutations.py \
+    -m  adjacency_list.tsv \
+    -ct NA \
+    -o  data.json
+
+# Compute mutation probabilities.
+python2 ../../compute_mutation_probabilities.py \
+    -mf data.json \
+    -np $num_permutations \
+    -nc $num_cores \
+    -wf weights.npy \
+    -s  12345 \
+    -v  1
+
+# Find sets using mutual exclusivity test statistic.
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  exclusivity \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  exclusivity_results \
+    -v  0
+
+# Find sets using a co-occurrence test statistic (any co-occurrence).
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  any-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  any-co-occurrence_results \
+    -v  0
+
+# Find sets using another co-occurrence test statistic (all co-occurrence).
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  all-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  all-co-occurrence_results \
+    -v  0
diff --git a/examples/simple/commands_python3.sh b/examples/simple/commands_python3.sh
new file mode 100644
index 0000000..96904cf
--- /dev/null
+++ b/examples/simple/commands_python3.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+num_permutations=1000
+num_cores=4
+
+# Preprocess mutations.
+python3 ../../process_mutations.py \
+    -m  adjacency_list.tsv \
+    -ct NA \
+    -o  data.json
+
+# Compute mutation probabilities.
+python3 ../../compute_mutation_probabilities.py \
+    -mf data.json \
+    -np $num_permutations \
+    -nc $num_cores \
+    -wf weights.npy \
+    -s  12345 \
+    -v  1
+
+# Find sets using mutual exclusivity test statistic.
+python3 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  exclusivity \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  exclusivity_results \
+    -v  0
+
+# Find sets using a co-occurrence test statistic (any co-occurrence).
+python3 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  any-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  any-co-occurrence_results \
+    -v  0
+
+# Find sets using another co-occurrence test statistic (all co-occurrence).
+python3 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  all-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  all-co-occurrence_results \
+    -v  0
diff --git a/examples/simple/data.json b/examples/simple/data.json
new file mode 100644
index 0000000..731a28a
--- /dev/null
+++ b/examples/simple/data.json
@@ -0,0 +1 @@
+{"hypermutators": [], "geneToCases": {"a": ["11", "1", "3", "5", "7", "9"], "c": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "b": ["10", "12", "2", "4", "6", "8"], "e": ["1", "11"], "d": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "g": ["12", "2"], "f": ["1"], "h": ["2"]}, "num_patients": 14, "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "patientToType": {"11": "NA", "10": "NA", "13": "NA", "12": "NA", "14": "NA", "1": "NA", "3": "NA", "2": "NA", "5": "NA", "4": "NA", "7": "NA", "6": "NA", "9": "NA", "8": "NA"}, "num_genes": 8, "params": {"cancerToFiles": {"NA": ["/Users/biederse/wext/examples/simple/adjacency_list.tsv"]}, "ignored_validation_statuses": ["Wildtype", "Invalid"], "hypermutators_file": null, "cancer_types": ["NA"], "ignored_variant_types": ["Germline"], "patient_whitelist_file": null, "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"]}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "patientToMutations": {"11": ["a", "e"], "10": ["b"], "13": ["c", "d"], "12": ["b", "g"], "14": ["c", "d"], "1": ["a", "c", "e", "d", "f"], "3": ["a", "c", "d"], "2": ["h", "c", "b", "d", "g"], "5": ["a", "c", "d"], "4": ["c", "b", "d"], "7": ["a", "c", "d"], "6": ["c", "b", "d"], "9": ["a"], "8": ["b"]}}
\ No newline at end of file
diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv
new file mode 100644
index 0000000..bd8a6b0
--- /dev/null
+++ b/examples/simple/exclusivity_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+a, b	0.00025630815683653265	0.010596224876535663	0.00440192222595	12	0	2	6	6	0
\ No newline at end of file
diff --git a/examples/simple/weights.npy b/examples/simple/weights.npy
new file mode 100644
index 0000000000000000000000000000000000000000..0cdfdcf8096fc128e3dd5fb94fbe128002d925e6
GIT binary patch
literal 1024
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I#iItqp+nmP)#3giMV#SKLn6W_kFSM^<K@%zI;`?WGrDc1Kd<D_BgUwY{M
zSN!|Ho)8T)7e>SM!}zqe@8-Gh!H<`|wSTdt(NFrt6?<E*>x&EaKEg@E)X&;IZ32JQ
z3wuH|%v=}^(+}g*+CHV*KkKsg+_S&1LEGV->2!Oc<^!h|x^BWr!_+h8H?kb<KWI;g
zhM5baU#S^#tdY53FRg!9g{`sGen|nyhYN7hF!i7RUa5Ggw$Yvt4Ko);!`utAf4KP%
E0I)*lX#fBK

literal 0
HcmV?d00001

diff --git a/process_mutations.py b/process_mutations.py
index c886271..d040abc 100755
--- a/process_mutations.py
+++ b/process_mutations.py
@@ -161,12 +161,12 @@ def run( args ):
 
     # Summarize the data
     if args.verbose > 0:
-        print('* Summary of mutation data...')
-        print('\tGenes: {}'.format(num_genes))
-        print('\tPatients: {} ({} hypermutators)'.format(num_patients, len(hypermutators)))
-        print('\tUsed variant classes:', ', '.join(sorted(vc)))
-        print('\tUsed variant types:', ', '.join(sorted(vt)))
-        print('\tUsed validation statuses:', ', '.join(sorted(vs)))
+        print("* Summary of mutation data...")
+        print("\tGenes: {}".format(num_genes))
+        print("\tPatients: {} ({} hypermutators)".format(num_patients, len(hypermutators)))
+        print("\tUsed variant classes: " + ", ".join(sorted(vc)))
+        print("\tUsed variant types: " + ", ".join(sorted(vt)))
+        print("\tUsed validation statuses: " + ", ".join(sorted(vs)))
 
     # Output to file
     with open(args.output_file, 'w') as OUT:
diff --git a/wext/__init__.py b/wext/__init__.py
index 8a0c19e..de8e09d 100755
--- a/wext/__init__.py
+++ b/wext/__init__.py
@@ -12,4 +12,4 @@
 from .saddlepoint import saddlepoint
 import comet_exact_tests
 from .exclusivity_tests import re_test, wre_test
-import bipartite_edge_swap_module
\ No newline at end of file
+from bipartite_edge_swap_module import bipartite_edge_swap
\ No newline at end of file

From e799a4fad4a81c77e48efc2e568b794402c95b8b Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 14:53:56 -0400
Subject: [PATCH 47/60] revised string handling, outside scripts

---
 .../simple/all-co-occurrence_results-k2.tsv   |  2 -
 .../simple/any-co-occurrence_results-k2.tsv   |  2 -
 examples/simple/commands_python2.sh           | 52 -------------------
 examples/simple/commands_python3.sh           | 52 -------------------
 examples/simple/data.json                     |  1 -
 examples/simple/exclusivity_results-k2.tsv    |  2 -
 6 files changed, 111 deletions(-)
 delete mode 100644 examples/simple/all-co-occurrence_results-k2.tsv
 delete mode 100644 examples/simple/any-co-occurrence_results-k2.tsv
 delete mode 100644 examples/simple/commands_python2.sh
 delete mode 100644 examples/simple/commands_python3.sh
 delete mode 100644 examples/simple/data.json
 delete mode 100644 examples/simple/exclusivity_results-k2.tsv

diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv
deleted file mode 100644
index 9a5b388..0000000
--- a/examples/simple/all-co-occurrence_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-c, d	0.003668945489200462	0.18261601945262562	0.00248384475708	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv
deleted file mode 100644
index 7107006..0000000
--- a/examples/simple/any-co-occurrence_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-c, d	0.003668945489200462	0.18261601945262562	0.00242900848389	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/commands_python2.sh b/examples/simple/commands_python2.sh
deleted file mode 100644
index 7a43965..0000000
--- a/examples/simple/commands_python2.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-num_permutations=1000
-num_cores=4
-
-# Preprocess mutations.
-python2 ../../process_mutations.py \
-    -m  adjacency_list.tsv \
-    -ct NA \
-    -o  data.json
-
-# Compute mutation probabilities.
-python2 ../../compute_mutation_probabilities.py \
-    -mf data.json \
-    -np $num_permutations \
-    -nc $num_cores \
-    -wf weights.npy \
-    -s  12345 \
-    -v  1
-
-# Find sets using mutual exclusivity test statistic.
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  exclusivity \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  exclusivity_results \
-    -v  0
-
-# Find sets using a co-occurrence test statistic (any co-occurrence).
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  any-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  any-co-occurrence_results \
-    -v  0
-
-# Find sets using another co-occurrence test statistic (all co-occurrence).
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  all-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  all-co-occurrence_results \
-    -v  0
diff --git a/examples/simple/commands_python3.sh b/examples/simple/commands_python3.sh
deleted file mode 100644
index 96904cf..0000000
--- a/examples/simple/commands_python3.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-num_permutations=1000
-num_cores=4
-
-# Preprocess mutations.
-python3 ../../process_mutations.py \
-    -m  adjacency_list.tsv \
-    -ct NA \
-    -o  data.json
-
-# Compute mutation probabilities.
-python3 ../../compute_mutation_probabilities.py \
-    -mf data.json \
-    -np $num_permutations \
-    -nc $num_cores \
-    -wf weights.npy \
-    -s  12345 \
-    -v  1
-
-# Find sets using mutual exclusivity test statistic.
-python3 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  exclusivity \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  exclusivity_results \
-    -v  0
-
-# Find sets using a co-occurrence test statistic (any co-occurrence).
-python3 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  any-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  any-co-occurrence_results \
-    -v  0
-
-# Find sets using another co-occurrence test statistic (all co-occurrence).
-python3 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  all-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  all-co-occurrence_results \
-    -v  0
diff --git a/examples/simple/data.json b/examples/simple/data.json
deleted file mode 100644
index 731a28a..0000000
--- a/examples/simple/data.json
+++ /dev/null
@@ -1 +0,0 @@
-{"hypermutators": [], "geneToCases": {"a": ["11", "1", "3", "5", "7", "9"], "c": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "b": ["10", "12", "2", "4", "6", "8"], "e": ["1", "11"], "d": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "g": ["12", "2"], "f": ["1"], "h": ["2"]}, "num_patients": 14, "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "patientToType": {"11": "NA", "10": "NA", "13": "NA", "12": "NA", "14": "NA", "1": "NA", "3": "NA", "2": "NA", "5": "NA", "4": "NA", "7": "NA", "6": "NA", "9": "NA", "8": "NA"}, "num_genes": 8, "params": {"cancerToFiles": {"NA": ["/Users/biederse/wext/examples/simple/adjacency_list.tsv"]}, "ignored_validation_statuses": ["Wildtype", "Invalid"], "hypermutators_file": null, "cancer_types": ["NA"], "ignored_variant_types": ["Germline"], "patient_whitelist_file": null, "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"]}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "patientToMutations": {"11": ["a", "e"], "10": ["b"], "13": ["c", "d"], "12": ["b", "g"], "14": ["c", "d"], "1": ["a", "c", "e", "d", "f"], "3": ["a", "c", "d"], "2": ["h", "c", "b", "d", "g"], "5": ["a", "c", "d"], "4": ["c", "b", "d"], "7": ["a", "c", "d"], "6": ["c", "b", "d"], "9": ["a"], "8": ["b"]}}
\ No newline at end of file
diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv
deleted file mode 100644
index bd8a6b0..0000000
--- a/examples/simple/exclusivity_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-a, b	0.00025630815683653265	0.010596224876535663	0.00440192222595	12	0	2	6	6	0
\ No newline at end of file

From ec674fc1f348847c7e9df2e9f17faa2486e4cec5 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 17:05:36 -0400
Subject: [PATCH 48/60] first commit

---
 process_mutations.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/process_mutations.py b/process_mutations.py
index d040abc..66e78d4 100755
--- a/process_mutations.py
+++ b/process_mutations.py
@@ -178,9 +178,9 @@ def run( args ):
                       patient_whitelist_file=os.path.abspath(args.patient_whitelist) if args.patient_whitelist else None,
                       hypermutators_file=os.path.abspath(args.hypermutators_file) if args.hypermutators_file else None)
         output = dict(params=params, patients=patients, genes=genes, hypermutators=list(hypermutators),
-                      geneToCases=dict( (g, list(cases)) for g, cases in list(geneToCases.items())),
+                      geneToCases=dict( (g, list(cases)) for g, cases in geneToCases.items()),
                       patientToType=patientToType,
-                      patientToMutations=dict( (p, list(muts)) for p, muts in list(patientToMutations.items())),
+                      patientToMutations=dict( (p, list(muts)) for p, muts in patientToMutations.items()),
                       num_genes=num_genes, num_patients=num_patients)
         json.dump( output, OUT )
 

From bb0d1514aa00e779f1316d2f751b2f9ce3bb9391 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 17:26:17 -0400
Subject: [PATCH 49/60] revised external scripts

---
 find_exclusive_sets.py | 17 +++++++++--------
 find_sets.py           | 16 ++++++++--------
 process_mutations.py   |  6 +++---
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/find_exclusive_sets.py b/find_exclusive_sets.py
index 88a6401..f6046c4 100755
--- a/find_exclusive_sets.py
+++ b/find_exclusive_sets.py
@@ -18,7 +18,7 @@ def get_parser():
     parser.add_argument('-o', '--output_prefix', type=str, required=True)
     parser.add_argument('-f', '--min_frequency', type=int, default=1, required=False)
     parser.add_argument('-c', '--num_cores', type=int, required=False, default=1)
-    parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=range(5))
+    parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=list(range(5)))
     parser.add_argument('-r', '--report_invalids', action='store_true', default=False, required=False)
     parser.add_argument('--json_format', action='store_true', default=False, required=False)
 
@@ -105,11 +105,12 @@ def load_mutation_files(mutation_files):
         genes    |= set(type_genes)
 
         # Record the mutations in each gene
-        for g, cases in typeGeneToCases.iteritems(): geneToCases[g] |= cases
+        for g, cases in typeGeneToCases.items(): 
+            geneToCases[g] |= cases
 
         # Record the genes, patients, and their indices for later
-        typeToGeneIndex.append(dict(zip(type_genes, range(len(type_genes)))))
-        typeToPatientIndex.append(dict(zip(type_patients, range(len(type_patients)))))
+        typeToGeneIndex.append(dict(zip(type_genes, list(range(len(type_genes))))))
+        typeToPatientIndex.append(dict(zip(type_patients, list(range(len(type_patients))))))
 
     return genes, patients, geneToCases, typeToGeneIndex, typeToPatientIndex
 
@@ -128,7 +129,7 @@ def run( args ):
     num_all_genes, num_patients = len(genes), len(patients)
 
     # Restrict to genes mutated in a minimum number of samples
-    geneToCases = dict( (g, cases) for g, cases in list(geneToCases.items()) if g in genes and len(cases) >= args.min_frequency )
+    geneToCases = dict( (g, cases) for g, cases in geneToCases.items() if g in genes and len(cases) >= args.min_frequency )
     genes     = set(geneToCases.keys())
     num_genes = len(genes)
 
@@ -141,7 +142,7 @@ def run( args ):
         # Since we are looking for co-occurrence between exclusive sets with
         # an annotation A, we add events for each patient NOT annotated by
         # the given annotation
-        for annotation, cases in list(annotationToPatients.items()):
+        for annotation, cases in annotationToPatients.items():
             not_cases = patients - cases
             if len(not_cases) > 0:
                 geneToCases[annotation] = not_cases
@@ -159,8 +160,8 @@ def run( args ):
     test = nameToTest[args.test]
     if test == WRE:
         # Create master versions of the indices
-        masterGeneToIndex    = dict(list(zip(sorted(genes), list(range(num_genes)))))
-        masterPatientToIndex = dict(list(zip(sorted(patients), list(range(num_patients)))))
+        masterGeneToIndex    = dict(zip(sorted(genes), list(range(num_genes))))
+        masterPatientToIndex = dict(zip(sorted(patients), list(range(num_patients))))
         geneToP = load_weight_files(args.weights_files, genes, patients, typeToGeneIndex, typeToPatientIndex, masterGeneToIndex, masterPatientToIndex)
     else:
         geneToP = None
diff --git a/find_sets.py b/find_sets.py
index 1154da7..aa1f984 100755
--- a/find_sets.py
+++ b/find_sets.py
@@ -24,7 +24,7 @@ def get_parser():
     parser.add_argument('-t', '--test', type=str, required=False, default='WRE', choices=['WRE'])
     parser.add_argument('-m', '--method', type=str, required=False, default='Saddlepoint', choices=['Saddlepoint'])
     parser.add_argument('-s', '--statistic', type=str, required=True, choices=['exclusivity', 'any-co-occurrence', 'all-co-occurrence'])
-    parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=range(5))
+    parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=list(range(5)) )
     parser.add_argument('-r', '--report_invalids', action='store_true', default=False, required=False)
     parser.add_argument('--json_format', action='store_true', default=False, required=False)
     return parser
@@ -87,12 +87,12 @@ def load_mutation_files(mutation_files):
         genes    |= set(type_genes)
 
         # Record the mutations in each gene
-        for g, cases in list(typeGeneToCases.items()): 
+        for g, cases in typeGeneToCases.items(): 
             geneToCases[g] |= cases
 
         # Record the genes, patients, and their indices for later
-        typeToGeneIndex.append(dict(zip(type_genes, range(len(type_genes)))))
-        typeToPatientIndex.append(dict(zip(type_patients, range(len(type_patients)))))
+        typeToGeneIndex.append(dict(zip(type_genes, list(range(len(type_genes))))))
+        typeToPatientIndex.append(dict(zip(type_patients, list(range(len(type_patients))))))
 
     return genes, patients, geneToCases, typeToGeneIndex, typeToPatientIndex
 
@@ -107,7 +107,7 @@ def run( args ):
     num_all_genes, num_patients = len(genes), len(patients)
 
     # Restrict to genes mutated in a minimum number of samples
-    geneToCases = dict( (g, cases) for g, cases in list(geneToCases.items()) if g in genes and len(cases) >= args.min_frequency )
+    geneToCases = dict( (g, cases) for g, cases in geneToCases.items() if g in genes and len(cases) >= args.min_frequency )
     genes     = set(geneToCases.keys())
     num_genes = len(genes)
 
@@ -120,7 +120,7 @@ def run( args ):
         # Since we are looking for co-occurrence between exclusive sets with
         # an annotation A, we add events for each patient NOT annotated by
         # the given annotation
-        for annotation, cases in list(annotationToPatients.items()):
+        for annotation, cases in annotationToPatients.items():
             not_cases = patients - cases
             if len(not_cases) > 0:
                 geneToCases[annotation] = not_cases
@@ -137,8 +137,8 @@ def run( args ):
     # Load the weights (if necessary)
 
     # Create master versions of the indices
-    masterGeneToIndex    = dict(list(zip(sorted(genes), list(range(num_genes)))))
-    masterPatientToIndex = dict(list(zip(sorted(patients), list(range(num_patients)))))
+    masterGeneToIndex    = dict(zip(sorted(genes), list(range(num_genes))))
+    masterPatientToIndex = dict(zip(sorted(patients), list(range(num_patients))))
     geneToP = load_weight_files(args.weights_files, genes, patients, typeToGeneIndex, typeToPatientIndex, masterGeneToIndex, masterPatientToIndex)
 
     if args.verbose > 0: 
diff --git a/process_mutations.py b/process_mutations.py
index 66e78d4..99e7b09 100755
--- a/process_mutations.py
+++ b/process_mutations.py
@@ -24,7 +24,7 @@ def get_parser():
 
 def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc, vt, vs, ivc, ivt, ivs, verbose ):
     if verbose > 1: 
-        print('\tLoading MAF:', maf_file)
+        print('\tLoading MAF: ', maf_file)
     genes, patients = set(), set()
     with open(maf_file, 'r') as IN:
         seenHeader = False
@@ -32,7 +32,7 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc
             arr = l.rstrip('\n').split('\t')
             # Parse the header if we haven't seen it yet
             if not seenHeader and arr[0].lower() == 'hugo_symbol':
-                arr              = list(map(str.lower, arr))
+                arr              = map(str.lower, arr)
                 seenHeader       = True
                 gene_index       = 0
                 patient_index    = arr.index('tumor_sample_barcode')
@@ -86,7 +86,7 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc
 
 def process_events_file( events_file, patientWhitelist, geneToCases, patientToMutations, verbose ):
     if verbose > 1: 
-        print('\tProcessing events file:', events_file)
+        print('\tProcessing events file: ', events_file)
 
     # Parse the events file
     events, patients = set(), set()

From 4158cbd6c858a2c1a9941545ce00ee8ece8fcb36 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 18:51:50 -0400
Subject: [PATCH 50/60] fixed performance issue, added future dependency

---
 compute_mutation_probabilities.py             |  9 ++--
 .../simple/all-co-occurrence_results-k2.tsv   |  2 +
 .../simple/any-co-occurrence_results-k2.tsv   |  2 +
 examples/simple/commands2.sh                  | 52 +++++++++++++++++++
 examples/simple/commands3.sh                  | 52 +++++++++++++++++++
 examples/simple/data.json                     |  1 +
 examples/simple/exclusivity_results-k2.tsv    |  2 +
 requirements.txt                              |  1 +
 wext/exact.py                                 |  4 +-
 wext/exclusivity_tests.py                     |  2 +-
 wext/i_o.py                                   | 18 +++----
 wext/mcmc.py                                  |  9 ++--
 wext/statistics.py                            |  4 +-
 13 files changed, 137 insertions(+), 21 deletions(-)
 create mode 100644 examples/simple/all-co-occurrence_results-k2.tsv
 create mode 100644 examples/simple/any-co-occurrence_results-k2.tsv
 create mode 100644 examples/simple/commands2.sh
 create mode 100644 examples/simple/commands3.sh
 create mode 100644 examples/simple/data.json
 create mode 100644 examples/simple/exclusivity_results-k2.tsv

diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py
index ab1989c..8606d5a 100755
--- a/compute_mutation_probabilities.py
+++ b/compute_mutation_probabilities.py
@@ -9,6 +9,7 @@
 this_dir = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(this_dir)
 from wext import *
+from past.builtins import xrange
 
 # Argument parser
 def get_parser():
@@ -108,7 +109,7 @@ def run( args ):
     max_tries = 10**9
     if args.seed is not None:
         random.seed(args.seed)
-    seeds = random.sample(list(range(1, 2*10**9)), args.num_permutations)
+    seeds = random.sample(xrange(1, 2*10**9), args.num_permutations)
 
     # Run the bipartite edge swaps in parallel if more than one core indicated
     num_cores = min(args.num_cores if args.num_cores != -1 else mp.cpu_count(), args.num_permutations)
@@ -156,12 +157,12 @@ def run( args ):
         P = postprocess_weight_matrix(P, r, s)
 
         # Verify the weights again
-        for g, obs in list(geneToObserved.items()):
+        for g, obs in geneToObserved.items():
             assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol)
 
-        for p, obs in list(patientToObserved.items()):
+        for p, obs in patientToObserved.items():
             assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol)
-
+ 
         # Add pseudocounts to entries with no mutations observed; unlikely or impossible after post-processing step
         P[P == 0] = 1./(2. * args.num_permutations)
 
diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv
new file mode 100644
index 0000000..44fc4b9
--- /dev/null
+++ b/examples/simple/all-co-occurrence_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+c, d	0.003668945489200462	0.18261601945262562	0.00278782844543	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv
new file mode 100644
index 0000000..1eb94b2
--- /dev/null
+++ b/examples/simple/any-co-occurrence_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+c, d	0.003668945489200462	0.18261601945262562	0.00311303138733	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/commands2.sh b/examples/simple/commands2.sh
new file mode 100644
index 0000000..7a43965
--- /dev/null
+++ b/examples/simple/commands2.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+num_permutations=1000
+num_cores=4
+
+# Preprocess mutations.
+python2 ../../process_mutations.py \
+    -m  adjacency_list.tsv \
+    -ct NA \
+    -o  data.json
+
+# Compute mutation probabilities.
+python2 ../../compute_mutation_probabilities.py \
+    -mf data.json \
+    -np $num_permutations \
+    -nc $num_cores \
+    -wf weights.npy \
+    -s  12345 \
+    -v  1
+
+# Find sets using mutual exclusivity test statistic.
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  exclusivity \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  exclusivity_results \
+    -v  0
+
+# Find sets using a co-occurrence test statistic (any co-occurrence).
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  any-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  any-co-occurrence_results \
+    -v  0
+
+# Find sets using another co-occurrence test statistic (all co-occurrence).
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  all-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  all-co-occurrence_results \
+    -v  0
diff --git a/examples/simple/commands3.sh b/examples/simple/commands3.sh
new file mode 100644
index 0000000..96904cf
--- /dev/null
+++ b/examples/simple/commands3.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+num_permutations=1000
+num_cores=4
+
+# Preprocess mutations.
+python3 ../../process_mutations.py \
+    -m  adjacency_list.tsv \
+    -ct NA \
+    -o  data.json
+
+# Compute mutation probabilities.
+python3 ../../compute_mutation_probabilities.py \
+    -mf data.json \
+    -np $num_permutations \
+    -nc $num_cores \
+    -wf weights.npy \
+    -s  12345 \
+    -v  1
+
+# Find sets using mutual exclusivity test statistic.
+python3 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  exclusivity \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  exclusivity_results \
+    -v  0
+
+# Find sets using a co-occurrence test statistic (any co-occurrence).
+python3 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  any-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  any-co-occurrence_results \
+    -v  0
+
+# Find sets using another co-occurrence test statistic (all co-occurrence).
+python3 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  all-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  all-co-occurrence_results \
+    -v  0
diff --git a/examples/simple/data.json b/examples/simple/data.json
new file mode 100644
index 0000000..731a28a
--- /dev/null
+++ b/examples/simple/data.json
@@ -0,0 +1 @@
+{"hypermutators": [], "geneToCases": {"a": ["11", "1", "3", "5", "7", "9"], "c": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "b": ["10", "12", "2", "4", "6", "8"], "e": ["1", "11"], "d": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "g": ["12", "2"], "f": ["1"], "h": ["2"]}, "num_patients": 14, "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "patientToType": {"11": "NA", "10": "NA", "13": "NA", "12": "NA", "14": "NA", "1": "NA", "3": "NA", "2": "NA", "5": "NA", "4": "NA", "7": "NA", "6": "NA", "9": "NA", "8": "NA"}, "num_genes": 8, "params": {"cancerToFiles": {"NA": ["/Users/biederse/wext/examples/simple/adjacency_list.tsv"]}, "ignored_validation_statuses": ["Wildtype", "Invalid"], "hypermutators_file": null, "cancer_types": ["NA"], "ignored_variant_types": ["Germline"], "patient_whitelist_file": null, "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"]}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "patientToMutations": {"11": ["a", "e"], "10": ["b"], "13": ["c", "d"], "12": ["b", "g"], "14": ["c", "d"], "1": ["a", "c", "e", "d", "f"], "3": ["a", "c", "d"], "2": ["h", "c", "b", "d", "g"], "5": ["a", "c", "d"], "4": ["c", "b", "d"], "7": ["a", "c", "d"], "6": ["c", "b", "d"], "9": ["a"], "8": ["b"]}}
\ No newline at end of file
diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv
new file mode 100644
index 0000000..844ce1f
--- /dev/null
+++ b/examples/simple/exclusivity_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+a, b	0.00025630815683653265	0.010596224876535663	0.0043740272522	12	0	2	6	6	0
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 12f43db..eca8f8a 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 numpy >=1.11.0
 scipy >=0.17.0
 networkx >= 1.11
+future >= 0.16.0
diff --git a/wext/exact.py b/wext/exact.py
index 9bcfb5c..345450f 100644
--- a/wext/exact.py
+++ b/wext/exact.py
@@ -19,8 +19,10 @@ def exact_test_k3(t, x, p, verbose):
     return wext_exact_test.triple_exact_test( N, t, x[0], x[1], x[2], p )
 
 # Wrapper for k=2 exact test C function
-def exact_test_k2(t, x, y, p_x, p_y, verbose):
+def exact_test_k2(t, xy, pxpy, verbose):
 	# Two-sided test
+    (x, y) = xy
+    (p_x, p_y) = pxpy
     N = len(p_x)
     z = (x + y - t)/2 # count number of co-occurrences
     tail_masses = wext_exact_test.conditional(N, list(range(z+1)), x, y, p_x, p_y)
diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py
index a7c4229..936e77a 100755
--- a/wext/exclusivity_tests.py
+++ b/wext/exclusivity_tests.py
@@ -22,7 +22,7 @@ def wre_test(t, x, p, method=EXACT, verbose=0):
     # Check that the probabilities are in (0, 1].
     assert(all(0<b<= 1 for a in p for b in a))
     # Check that the number of mutations in each gene is not greater than the number of samples.
-    assert(all(a<=len(b) for a, b in list(zip(x, p))))
+    assert(all(a<=len(b) for a, b in zip(x, p)))
     # Check that the number of mutually exclusive mutations is not greater than the total number of mutations.
     assert(t<=sum(x))
     #Check that we've implemented the given set size with the exact test
diff --git a/wext/i_o.py b/wext/i_o.py
index a77a0bc..e533718 100755
--- a/wext/i_o.py
+++ b/wext/i_o.py
@@ -11,13 +11,13 @@ def load_mutation_data( mutation_file, min_freq=1 ):
         obj         = json.load(IN)
         all_genes   = obj['genes']
         patients    = obj['patients']
-        geneToCases = dict( (g, set(cases)) for g, cases in list(obj['geneToCases'].items()) )
-        patientToMutations = dict( (p, set(muts)) for p, muts in list(obj['patientToMutations'].items()) )
+        geneToCases = dict( (g, set(cases)) for g, cases in obj['geneToCases'].items() )
+        patientToMutations = dict( (p, set(muts)) for p, muts in obj['patientToMutations'].items() )
         hypermutators = set(obj['hypermutators'])
         params      = obj['params']
 
     # Restrict the genes based on the minimum frequency
-    genes = set( g for g, cases in list(geneToCases.items()) if len(cases) >= min_freq )
+    genes = set( g for g, cases in geneToCases.items() if len(cases) >= min_freq )
 
     return genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators
 
@@ -34,11 +34,11 @@ def load_patient_annotation_file(patient_annotation_file):
 # Converts keys from an iterable to tab-separated, so the dictionary can be
 # output as JSON
 def convert_dict_for_json( setToVal, sep='\t' ):
-    return dict( (sep.join(sorted(M)), val) for M, val in list(setToVal.items()) )
+    return dict( (sep.join(sorted(M)), val) for M, val in setToVal.items() ) 
 
 # Converts tab-separated keys back to frozensets
 def convert_dict_from_json( setToVal, sep='\t', iterable=frozenset ):
-    return dict( (iterable(M.split(sep)), val) for M, val in list(setToVal.items()) )
+    return dict( (iterable(M.split(sep)), val) for M, val in setToVal.items() )
 
 # Create the header strings for a contingency table
 def create_tbl_header( k ):
@@ -54,7 +54,7 @@ def output_enumeration_table(args, k, setToPval, setToRuntime, setToFDR, setToOb
         if not args.json_format:
             # Construct the rows
             rows = []
-            for M, pval in list(setToPval.items()):
+            for M, pval in setToPval.items():
                 if setToFDR[M]<=fdr_threshold:
                     X, T, Z, tbl = setToObs[M]
                     row = [ ', '.join(sorted(M)), pval, setToFDR[M], setToRuntime[M], T, Z ] + tbl
@@ -90,14 +90,14 @@ def output_mcmc(args, setsToFreq, setToPval, setToObs):
         params = vars(args)
         output = dict(params=params, setToPval=convert_dict_for_json(setToPval),
                       setToObs=convert_dict_for_json(setToObs),
-                      setsToFreq=dict( (' '.join([ ','.join(sorted(M)) for M in sets ]), freq) for sets, freq in list(setsToFreq.items()) ))
+                      setsToFreq=dict( (' '.join([ ','.join(sorted(M)) for M in sets ]), freq) for sets, freq in setsToFreq.items()) )
         with open(args.output_prefix + '.json', 'w') as OUT:
             json.dump( output, OUT )
     else:
         # Output a gene set file
         with open(args.output_prefix + '-sampled-collections.tsv', 'w') as OUT:
             rows = []
-            for sets, freq in list(setsToFreq.items()):
+            for sets, freq in setsToFreq.items():
                 row = [ ' '.join([ ','.join(M) for M in sets ]), freq ]
                 row.append( sum( -np.log10(setToPval[M] ** args.alpha) for M in sets ))
                 rows.append(row)
@@ -109,7 +109,7 @@ def output_mcmc(args, setsToFreq, setToPval, setToObs):
         # Output each of the sample gene sets
         with open(args.output_prefix + '-sampled-sets.tsv', 'w') as OUT:
             rows = []
-            for M, pval in list(setToPval.items()):
+            for M, pval in setToPval.items():
                 X, T, Z, tbl = setToObs[M]
                 rows.append([ ','.join(sorted(M)), pval, T, Z] + tbl )
             rows.sort(key=lambda r: r[1])
diff --git a/wext/mcmc.py b/wext/mcmc.py
index 29f43c9..4c15fc2 100755
--- a/wext/mcmc.py
+++ b/wext/mcmc.py
@@ -46,10 +46,11 @@ def _collection_weight(collection):
         return sum( _weight(M) for M in collection )
 
     def _to_collection(solution):
-        return frozenset( frozenset(M) for M in list(solution.values()) )
+        return frozenset( frozenset(M) for M in solution.values() ) 
 
     # Compute the acceptance ratio
-    def _log_accept_ratio( W_current, W_next ): return W_next - W_current
+    def _log_accept_ratio( W_current, W_next ): 
+        return W_next - W_current
 
     # Set up PRNG, sample space, and output
     random_seed(seed)
@@ -88,7 +89,7 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current
                 # if we only have one set, we can't swap between sets
                 if t == 1: continue
                 i = next_assigned[next_gene]
-                swap_gene = choice([ g for g in list(next_assigned.keys()) if g not in next_soln[i] ])
+                swap_gene = choice([ g for g in next_assigned.keys() if g not in next_soln[i] ])
                 j = next_assigned[swap_gene]
                 next_assigned[swap_gene] = i
                 next_soln[i].add(swap_gene)
@@ -129,7 +130,7 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current
     # Merge the various chains
     setsToTotalFreq = defaultdict(int)
     for counter in setsToFreq:
-        for sets, freq in list(counter.items()):
+        for sets, freq in counter.items():
             setsToTotalFreq[sets] += freq
 
     return setsToTotalFreq, setToPval, setToObs
diff --git a/wext/statistics.py b/wext/statistics.py
index b1a3834..4ceef54 100755
--- a/wext/statistics.py
+++ b/wext/statistics.py
@@ -36,7 +36,7 @@ def multiple_hypothesis_correction(p_values_, method='BH'):
 
         sorted_q_values = np.zeros(n)
         sorted_q_values[n-1] = min(sorted_p_values[n-1], 1.0)
-        for i in reversed(list(range(n-1))):
+        for i in reversed(range(n-1)):
             sorted_q_values[i] = min(float(n)/float(i+1)*sorted_p_values[i], sorted_q_values[i+1])
 
         q_values = np.zeros(n)
@@ -49,7 +49,7 @@ def multiple_hypothesis_correction(p_values_, method='BH'):
         c = np.sum(1.0/np.arange(1, n+1, dtype=np.float64))
         sorted_q_values = np.zeros(n)
         sorted_q_values[n-1] = min(c*sorted_p_values[n-1], 1.0)
-        for i in reversed(list(range(n-1))):
+        for i in reversed(range(n-1)):
             sorted_q_values[i] = min(c*(float(n)/float(i+1))*sorted_p_values[i], sorted_q_values[i+1])
 
         q_values = np.zeros(n)

From a60809a121878eef74d0030b9b5aadbfeb3a8fb1 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 18:52:43 -0400
Subject: [PATCH 51/60] fixed performance issue, added future dependency

---
 .../simple/all-co-occurrence_results-k2.tsv   |  2 -
 .../simple/any-co-occurrence_results-k2.tsv   |  2 -
 examples/simple/commands2.sh                  | 52 -------------------
 examples/simple/commands3.sh                  | 52 -------------------
 examples/simple/data.json                     |  1 -
 examples/simple/exclusivity_results-k2.tsv    |  2 -
 6 files changed, 111 deletions(-)
 delete mode 100644 examples/simple/all-co-occurrence_results-k2.tsv
 delete mode 100644 examples/simple/any-co-occurrence_results-k2.tsv
 delete mode 100644 examples/simple/commands2.sh
 delete mode 100644 examples/simple/commands3.sh
 delete mode 100644 examples/simple/data.json
 delete mode 100644 examples/simple/exclusivity_results-k2.tsv

diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv
deleted file mode 100644
index 44fc4b9..0000000
--- a/examples/simple/all-co-occurrence_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-c, d	0.003668945489200462	0.18261601945262562	0.00278782844543	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv
deleted file mode 100644
index 1eb94b2..0000000
--- a/examples/simple/any-co-occurrence_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-c, d	0.003668945489200462	0.18261601945262562	0.00311303138733	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/commands2.sh b/examples/simple/commands2.sh
deleted file mode 100644
index 7a43965..0000000
--- a/examples/simple/commands2.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-num_permutations=1000
-num_cores=4
-
-# Preprocess mutations.
-python2 ../../process_mutations.py \
-    -m  adjacency_list.tsv \
-    -ct NA \
-    -o  data.json
-
-# Compute mutation probabilities.
-python2 ../../compute_mutation_probabilities.py \
-    -mf data.json \
-    -np $num_permutations \
-    -nc $num_cores \
-    -wf weights.npy \
-    -s  12345 \
-    -v  1
-
-# Find sets using mutual exclusivity test statistic.
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  exclusivity \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  exclusivity_results \
-    -v  0
-
-# Find sets using a co-occurrence test statistic (any co-occurrence).
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  any-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  any-co-occurrence_results \
-    -v  0
-
-# Find sets using another co-occurrence test statistic (all co-occurrence).
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  all-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  all-co-occurrence_results \
-    -v  0
diff --git a/examples/simple/commands3.sh b/examples/simple/commands3.sh
deleted file mode 100644
index 96904cf..0000000
--- a/examples/simple/commands3.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-num_permutations=1000
-num_cores=4
-
-# Preprocess mutations.
-python3 ../../process_mutations.py \
-    -m  adjacency_list.tsv \
-    -ct NA \
-    -o  data.json
-
-# Compute mutation probabilities.
-python3 ../../compute_mutation_probabilities.py \
-    -mf data.json \
-    -np $num_permutations \
-    -nc $num_cores \
-    -wf weights.npy \
-    -s  12345 \
-    -v  1
-
-# Find sets using mutual exclusivity test statistic.
-python3 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  exclusivity \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  exclusivity_results \
-    -v  0
-
-# Find sets using a co-occurrence test statistic (any co-occurrence).
-python3 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  any-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  any-co-occurrence_results \
-    -v  0
-
-# Find sets using another co-occurrence test statistic (all co-occurrence).
-python3 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  all-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  all-co-occurrence_results \
-    -v  0
diff --git a/examples/simple/data.json b/examples/simple/data.json
deleted file mode 100644
index 731a28a..0000000
--- a/examples/simple/data.json
+++ /dev/null
@@ -1 +0,0 @@
-{"hypermutators": [], "geneToCases": {"a": ["11", "1", "3", "5", "7", "9"], "c": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "b": ["10", "12", "2", "4", "6", "8"], "e": ["1", "11"], "d": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "g": ["12", "2"], "f": ["1"], "h": ["2"]}, "num_patients": 14, "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "patientToType": {"11": "NA", "10": "NA", "13": "NA", "12": "NA", "14": "NA", "1": "NA", "3": "NA", "2": "NA", "5": "NA", "4": "NA", "7": "NA", "6": "NA", "9": "NA", "8": "NA"}, "num_genes": 8, "params": {"cancerToFiles": {"NA": ["/Users/biederse/wext/examples/simple/adjacency_list.tsv"]}, "ignored_validation_statuses": ["Wildtype", "Invalid"], "hypermutators_file": null, "cancer_types": ["NA"], "ignored_variant_types": ["Germline"], "patient_whitelist_file": null, "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"]}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "patientToMutations": {"11": ["a", "e"], "10": ["b"], "13": ["c", "d"], "12": ["b", "g"], "14": ["c", "d"], "1": ["a", "c", "e", "d", "f"], "3": ["a", "c", "d"], "2": ["h", "c", "b", "d", "g"], "5": ["a", "c", "d"], "4": ["c", "b", "d"], "7": ["a", "c", "d"], "6": ["c", "b", "d"], "9": ["a"], "8": ["b"]}}
\ No newline at end of file
diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv
deleted file mode 100644
index 844ce1f..0000000
--- a/examples/simple/exclusivity_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-a, b	0.00025630815683653265	0.010596224876535663	0.0043740272522	12	0	2	6	6	0
\ No newline at end of file

From e3a43558c4efdcc1bb34f6b9d53b21bfe1910065 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 22:09:05 -0400
Subject: [PATCH 52/60] revised experiments/eccb2016/scripts

---
 experiments/eccb2016/scripts/permutation_test_helper.py        | 2 +-
 .../eccb2016/scripts/reconcile_grid_permutation_test.py        | 3 ++-
 experiments/eccb2016/scripts/remove_genes_with_no_length.py    | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/experiments/eccb2016/scripts/permutation_test_helper.py b/experiments/eccb2016/scripts/permutation_test_helper.py
index fc9b90a..e2691ab 100644
--- a/experiments/eccb2016/scripts/permutation_test_helper.py
+++ b/experiments/eccb2016/scripts/permutation_test_helper.py
@@ -16,7 +16,7 @@
 parser.add_argument('-o', '--output_prefix', type=str, required=True)
 parser.add_argument('-w', '--wext_directory', type=str, required=True)
 parser.add_argument('-j', '--job_id', type=int, required=job_id is None, default=job_id)
-parser.add_argument('-v', '--verbose', type=int, required=False, default=0, choices=range(5))
+parser.add_argument('-v', '--verbose', type=int, required=False, default=0, choices=list(range(5)))
 args = parser.parse_args( sys.argv[1:] )
 
 # Load weighted exclusivity test
diff --git a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
index d7c0354..c60a68c 100644
--- a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
+++ b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py
@@ -19,6 +19,7 @@
 
 # Load and merge the JSON files
 def load_json_files(( json_files )):
+    
     setToCount       = defaultdict( int )
     setToRuntime     = defaultdict( float )
     setToObs         = dict()
@@ -27,7 +28,7 @@ def load_json_files(( json_files )):
         # Parse the JSON file
         with open(json_file, 'r') as IN:
             obj = json.load(IN)
-            for M, pval in obj['setToPval'].iteritems():
+            for M, pval in obj['setToPval'].items():
                 frozen_M = frozenset(M.split('\t'))
                 setToCount[frozen_M]   += int(round(pval * args.batch_size))
                 setToRuntime[frozen_M] += obj['setToRuntime'][M]
diff --git a/experiments/eccb2016/scripts/remove_genes_with_no_length.py b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
index 7017b08..3b5d316 100644
--- a/experiments/eccb2016/scripts/remove_genes_with_no_length.py
+++ b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
@@ -23,7 +23,7 @@
 # Remove genes without a length
 original_genes  = set(obj['genes'])
 remaining_genes = original_genes & set(geneToLength.keys())
-obj['geneToCases'] = dict( (g, cases) for g, cases in obj['geneToCases'].iteritems() if g in geneToLength )
+obj['geneToCases'] = dict( (g, cases) for g, cases in obj['geneToCases'].items() if g in geneToLength )
 obj['genes'] = sorted(obj['geneToCases'].keys())
 obj['num_genes'] = len(obj['genes'])
 obj['params']['lengths_file'] = os.path.abspath(args.lengths_file)

From a508daccd354be0d680901e398a39875ce8f9a1f Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Tue, 11 Sep 2018 22:09:35 -0400
Subject: [PATCH 53/60] revised experiments/eccb2016/scripts,
 permutation_helper

---
 .../eccb2016/scripts/permutation_test_helper.py      | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/experiments/eccb2016/scripts/permutation_test_helper.py b/experiments/eccb2016/scripts/permutation_test_helper.py
index e2691ab..56d8d57 100644
--- a/experiments/eccb2016/scripts/permutation_test_helper.py
+++ b/experiments/eccb2016/scripts/permutation_test_helper.py
@@ -25,19 +25,23 @@
 from wext import rce_permutation_test, load_mutation_data, output_enumeration_table
 
 # Load the mutation data
-if args.verbose > 0: print '* Loading mutation data..'
+if args.verbose > 0: 
+	print('* Loading mutation data..')
 mutation_data = load_mutation_data( args.mutation_file, args.min_freq )
 genes, all_genes, patients, geneToCases, _, params, _ = mutation_data
 num_patients = len(patients)
 sets = list( frozenset(t) for t in combinations(genes, args.gene_set_size) )
 
-if args.verbose > 0: print '\t- Testing {} sets of size k={}'.format(len(sets), args.gene_set_size)
+if args.verbose > 0: 
+	print('\t- Testing {} sets of size k={}'.format(len(sets), args.gene_set_size))
 
 # Run the permutational test
-if args.verbose > 0: print '* Running permutation test...'
+if args.verbose > 0: 
+	print('* Running permutation test...')
 start_index = (args.job_id-1) * args.batch_size
 permuted_files = get_permuted_files([args.input_directory], args.num_permutations)[start_index:start_index + args.batch_size]
-if args.verbose > 0: print '\t- Testing {} files'.format(len(permuted_files))
+if args.verbose > 0: 
+	print('\t- Testing {} files'.format(len(permuted_files)))
     
 setToPval, setToRuntime, setToFDR, setToObs = rce_permutation_test( sets, geneToCases, num_patients, permuted_files, 1, 0 )
 

From 82ff47c537e4628a92ab4cd615c051024f310813 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Wed, 12 Sep 2018 11:02:04 -0400
Subject: [PATCH 54/60] revised travis config

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4652bb5..22c0c61 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,7 +5,6 @@ python:
     - 3.5
     - 3.6
 install:
-    - sudo apt-get -y update
     - sudo apt-get -y update
     - sudo apt-get -y install r-base
     - sudo apt-get -y install python-matplotlib

From 3ddc71fe17678388a78fab0ff9c9c04053bb7921 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 30 Sep 2018 06:20:12 -0400
Subject: [PATCH 55/60] revised README

---
 README.md | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index a77b0c5..952fbe8 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,11 @@
 # Weighted Exclusivity Test (WExT) #
 
-The Weighted Exclusivity Test (WExT) was developed by the [Raphael research group](http://compbio.cs.brown.edu/) at Brown University.
-
-### Requirements ###
-
-Latest tested version in parentheses.
+[![Build Status](https://api.travis-ci.org/raphael-group/wext.svg?branch=master)](https://travis-ci.org/raphael-group/wext?branch=master)
 
-1. Python (2.7.9)
 
-    a. NumPy (1.11.0)
-
-    b. SciPy (0.17.0)
+The Weighted Exclusivity Test (WExT) was developed by the [Raphael research group](http://compbio.cs.brown.edu/) at Brown University.
 
-2. gcc (4.9.2)
+### Requirements ###
 
 We recommend using [`virtualenv`](https://virtualenv.pypa.io/en/latest/) to install the Python requirements. After installing `virtualenv`, you can install the Python requirements for the weighted exclusivity test as follows:
 
@@ -27,8 +20,7 @@ See the wiki for additional instructions on [Setup and installation](https://git
 The C and Fortran extensions must be compiled before running the weighted exclusivity test:
 
     cd wext
-    python setup.py build
-    f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
+    python setup.py install
 
 ### Usage ###
 

From 31a8a282313d02f9844f7cdeb49e9e8855440fed Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 30 Sep 2018 08:03:01 -0400
Subject: [PATCH 56/60] revised source

---
 compute_mutation_probabilities.py             |  12 ++--
 .../simple/all-co-occurrence_results-k2.tsv   |   2 +
 .../simple/any-co-occurrence_results-k2.tsv   |   2 +
 examples/simple/commands_python2.sh           |  52 ++++++++++++++++++
 examples/simple/commands_python3.sh           |  52 ++++++++++++++++++
 examples/simple/data.json                     |   1 +
 examples/simple/exclusivity_results-k2.tsv    |   2 +
 examples/simple/weights.npy                   | Bin 1024 -> 1024 bytes
 experiments/eccb2016/scripts/helper.py        |   9 +--
 .../eccb2016/scripts/permute_single_matrix.py |   8 +--
 .../scripts/remove_genes_with_no_length.py    |   2 +-
 .../eccb2016/scripts/weights_matrix.py        |   2 +-
 viz/generate_viz_data.py                      |  10 ++--
 wext/enumerate_sets.py                        |  30 +++++-----
 wext/mcmc.py                                  |   7 ++-
 wext/setup.py                                 |   3 +-
 16 files changed, 153 insertions(+), 41 deletions(-)
 create mode 100644 examples/simple/all-co-occurrence_results-k2.tsv
 create mode 100644 examples/simple/any-co-occurrence_results-k2.tsv
 create mode 100644 examples/simple/commands_python2.sh
 create mode 100644 examples/simple/commands_python3.sh
 create mode 100644 examples/simple/data.json
 create mode 100644 examples/simple/exclusivity_results-k2.tsv

diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py
index 8606d5a..1b4d845 100755
--- a/compute_mutation_probabilities.py
+++ b/compute_mutation_probabilities.py
@@ -63,7 +63,7 @@ def postprocess_weight_matrix(P, r, s):
 
     # Average weights over entries of weight matrix with same marginals
     P_mean = np.zeros(np.shape(P))
-    for marginals, indices in list(marginals_to_indices.items()):
+    for marginals, indices in marginals_to_indices.items():
         mean_value = float(sum(P[i, j] for i, j in indices))/float(len(indices))
         for i, j in indices:
             P_mean[i, j] = mean_value
@@ -84,15 +84,15 @@ def run( args ):
     mutation_data = load_mutation_data( args.mutation_file )
     genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data
 
-    geneToObserved = dict( (g, len(cases)) for g, cases in iter(list(geneToCases.items())) )
-    patientToObserved = dict( (p, len(muts)) for p, muts in iter(list(patientToMutations.items())) )
+    geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.items()) 
+    patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.items()) 
     geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) )
     indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) )
     patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) )
     indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )
 
     edges = set()
-    for gene, cases in list(geneToCases.items()):
+    for gene, cases in geneToCases.items():
         for patient in cases:
             edges.add( (geneToIndex[gene], patientToIndex[patient]) )
 
@@ -140,10 +140,10 @@ def run( args ):
         P = np.add.reduce(observeds) / float(len(observeds))
 
         # Verify the weights
-        for g, obs in list(geneToObserved.items()):
+        for g, obs in geneToObserved.items():
             assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol)
 
-        for p, obs in list(patientToObserved.items()):
+        for p, obs in patientToObserved.items():
             assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol)
 
         # Construct mutation matrix to compute marginals
diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv
new file mode 100644
index 0000000..de3ca9f
--- /dev/null
+++ b/examples/simple/all-co-occurrence_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+c, d	0.0037101364741384682	0.18466623625983988	0.0026938915252685547	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv
new file mode 100644
index 0000000..6b1b151
--- /dev/null
+++ b/examples/simple/any-co-occurrence_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+c, d	0.0037101364741384682	0.18466623625983988	0.0025408267974853516	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/commands_python2.sh b/examples/simple/commands_python2.sh
new file mode 100644
index 0000000..7a43965
--- /dev/null
+++ b/examples/simple/commands_python2.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+num_permutations=1000
+num_cores=4
+
+# Preprocess mutations.
+python2 ../../process_mutations.py \
+    -m  adjacency_list.tsv \
+    -ct NA \
+    -o  data.json
+
+# Compute mutation probabilities.
+python2 ../../compute_mutation_probabilities.py \
+    -mf data.json \
+    -np $num_permutations \
+    -nc $num_cores \
+    -wf weights.npy \
+    -s  12345 \
+    -v  1
+
+# Find sets using mutual exclusivity test statistic.
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  exclusivity \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  exclusivity_results \
+    -v  0
+
+# Find sets using a co-occurrence test statistic (any co-occurrence).
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  any-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  any-co-occurrence_results \
+    -v  0
+
+# Find sets using another co-occurrence test statistic (all co-occurrence).
+python2 ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  all-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  all-co-occurrence_results \
+    -v  0
diff --git a/examples/simple/commands_python3.sh b/examples/simple/commands_python3.sh
new file mode 100644
index 0000000..7320f5a
--- /dev/null
+++ b/examples/simple/commands_python3.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+num_permutations=1000
+num_cores=4
+
+# Preprocess mutations.
+python ../../process_mutations.py \
+    -m  adjacency_list.tsv \
+    -ct NA \
+    -o  data.json
+
+# Compute mutation probabilities.
+python ../../compute_mutation_probabilities.py \
+    -mf data.json \
+    -np $num_permutations \
+    -nc $num_cores \
+    -wf weights.npy \
+    -s  12345 \
+    -v  1
+
+# Find sets using mutual exclusivity test statistic.
+python ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  exclusivity \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  exclusivity_results \
+    -v  0
+
+# Find sets using a co-occurrence test statistic (any co-occurrence).
+python ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  any-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  any-co-occurrence_results \
+    -v  0
+
+# Find sets using another co-occurrence test statistic (all co-occurrence).
+python ../../find_sets.py \
+    -mf data.json \
+    -wf weights.npy \
+    -s  all-co-occurrence \
+    -k  2 \
+    -c  $num_cores \
+    -f  2 \
+    -o  all-co-occurrence_results \
+    -v  0
diff --git a/examples/simple/data.json b/examples/simple/data.json
new file mode 100644
index 0000000..84d082d
--- /dev/null
+++ b/examples/simple/data.json
@@ -0,0 +1 @@
+{"params": {"cancerToFiles": {"NA": ["/Users/biederse/benchmark_hotnet2_test27Sept2018/wext/examples/simple/adjacency_list.tsv"]}, "cancer_types": ["NA"], "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"], "ignored_variant_types": ["Germline"], "ignored_validation_statuses": ["Wildtype", "Invalid"], "patient_whitelist_file": null, "hypermutators_file": null}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "hypermutators": [], "geneToCases": {"e": ["1", "11"], "a": ["9", "3", "7", "5", "1", "11"], "c": ["3", "6", "7", "4", "13", "5", "14", "2", "1"], "f": ["1"], "d": ["3", "6", "7", "4", "13", "5", "14", "2", "1"], "h": ["2"], "g": ["12", "2"], "b": ["6", "10", "8", "12", "4", "2"]}, "patientToType": {"9": "NA", "3": "NA", "6": "NA", "8": "NA", "10": "NA", "7": "NA", "4": "NA", "12": "NA", "13": "NA", "5": "NA", "14": "NA", "2": "NA", "1": "NA", "11": "NA"}, "patientToMutations": {"1": ["f", "e", "d", "a", "c"], "2": ["h", "d", "c", "g", "b"], "3": ["d", "a", "c"], "4": ["d", "c", "b"], "5": ["d", "a", "c"], "6": ["d", "c", "b"], "7": ["d", "a", "c"], "8": ["b"], "9": ["a"], "10": ["b"], "11": ["a", "e"], "12": ["g", "b"], "13": ["d", "c"], "14": ["d", "c"]}, "num_genes": 8, "num_patients": 14}
\ No newline at end of file
diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv
new file mode 100644
index 0000000..ef88565
--- /dev/null
+++ b/examples/simple/exclusivity_results-k2.tsv
@@ -0,0 +1,2 @@
+#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
+a, b	0.00024227153775402632	0.012058686612143744	0.002846956253051758	12	0	2	6	6	0
\ No newline at end of file
diff --git a/examples/simple/weights.npy b/examples/simple/weights.npy
index 0cdfdcf8096fc128e3dd5fb94fbe128002d925e6..017cfb74ef630ca46607d00e657a5736cdab2707 100644
GIT binary patch
literal 1024
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I#iItqp+nmP)#3giN=`BM6KRoGtJYcAfNy0-72{ee>pUAKL{jFX0`Pkeeo
z>GsbD_JnAdxiA{0AI7J(eMb~F6lF|&Yk$jOId93WEB2Wa-`3b~eT0*SseiS7J@fOP
z7xsi`n7J?-rXR+qwS6M%+#gnn+_SeoypCb#^*Q!aBv``ubT{FoVe0Q~ZK=7i`Jg=^
z8fGqxR=)kSE^E&P`;FQT?@Yy;?ER7iA}qHoz)8c@|6Td<l5f%mdqOnKTo?^=FU<bo
G=05-uefE_A

literal 1024
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I#iItqp+nmP)#3giMV#SKLn6W_kFSM^<K@%zI;`?WGrDc1Kd<D_BgUwY{M
zSN!|Ho)8T)7e>SM!}zqe@8-Gh!H<`|wSTdt(NFrt6?<E*>x&EaKEg@E)X&;IZ32JQ
z3wuH|%v=}^(+}g*+CHV*KkKsg+_S&1LEGV->2!Oc<^!h|x^BWr!_+h8H?kb<KWI;g
zhM5baU#S^#tdY53FRg!9g{`sGen|nyhYN7hF!i7RUa5Ggw$Yvt4Ko);!`utAf4KP%
E0I)*lX#fBK

diff --git a/experiments/eccb2016/scripts/helper.py b/experiments/eccb2016/scripts/helper.py
index 35fc5b5..8131c6a 100644
--- a/experiments/eccb2016/scripts/helper.py
+++ b/experiments/eccb2016/scripts/helper.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
+from past.builtins import xrange
 
 # Add a y=x line to the given matplotlib axis
 def add_y_equals_x(ax, c='k', line_style='--', alpha=0.75):
@@ -31,14 +32,14 @@ def aligned_plaintext_table(table, sep='\t', spaces=2):
 
     # Find numbers of rows and columns.
     m = len(rows)
-    lengths = list(map(len, rows))
+    lengths = map(len, rows)
     n = max(lengths)
 
     # Pad rows with a deficient number of columns.
     entries = [[rows[i][j] if j<lengths[i] else '' for j in range(n)] for i in range(m)]
 
     # Find column widths.
-    sizes = [max(len(entries[i][j]) for i in list(range(m))) for j in range(n)]
+    sizes = [max(len(entries[i][j]) for i in range(m)) for j in range(n)]
 
     # Return results.
     return '\n'.join([''.join([entries[i][j].rjust(sizes[j]+spaces) for j in range(n)]).rstrip() for i in range(m)])
@@ -83,14 +84,14 @@ def rank(a, reverse=False, ties=2):
     elif ties==1 :
         z = np.zeros(n, dtype=y.dtype)
         j = 0
-        for i in range(1, n):
+        for i in xrange(1, n):
             if x[y[i]]!=x[y[i-1]]:
                 j += 1
             z[y[i]] = j
     elif ties==2:
         z = np.zeros(n, dtype=y.dtype)
         j = 0
-        for i in range(1, n):
+        for i in xrange(1, n):
             if x[y[i]]!=x[y[i-1]]:
                 j = i
             z[y[i]] = j
diff --git a/experiments/eccb2016/scripts/permute_single_matrix.py b/experiments/eccb2016/scripts/permute_single_matrix.py
index 12c6d4e..c25eff2 100755
--- a/experiments/eccb2016/scripts/permute_single_matrix.py
+++ b/experiments/eccb2016/scripts/permute_single_matrix.py
@@ -34,7 +34,7 @@ def run( args ):
     indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )
 
     edges = set()
-    for gene, cases in list(geneToCases.items()):
+    for gene, cases in geneToCases.items():
         for patient in cases:
             edges.add( (geneToIndex[gene], patientToIndex[patient]) )
 
@@ -58,16 +58,16 @@ def run( args ):
         permutedPatientToMutations[patient].add(gene)
         
     # Verify the number of mutations per gene/patient is preserved
-    for g, cases in list(geneToCases.items()):
+    for g, cases in geneToCases.items():
         assert( len(cases) == len(permutedGeneToCases[g]) )
 
-    for p, muts in list(patientToMutations.items()):
+    for p, muts in patientToMutations.items():
         assert( len(muts) == len(permutedPatientToMutations[p]) )
 
     # Save edge list.
     output_file = '{}-{}.json'.format(args.output_prefix, args.job_id)
     permutation = dict(params=params, permutation_number=args.job_id,
-                       geneToCases=dict( (g, list(cases)) for g, cases in iter(list(permutedGeneToCases.items()))))
+                       geneToCases=dict( (g, list(cases)) for g, cases in permutedGeneToCases.items()))
     with open(output_file, 'w') as OUT: json.dump( permutation, OUT )
     
 if __name__ == '__main__':
diff --git a/experiments/eccb2016/scripts/remove_genes_with_no_length.py b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
index 3b5d316..f0897e7 100644
--- a/experiments/eccb2016/scripts/remove_genes_with_no_length.py
+++ b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
@@ -28,7 +28,7 @@
 obj['num_genes'] = len(obj['genes'])
 obj['params']['lengths_file'] = os.path.abspath(args.lengths_file)
 obj['genes_with_no_length_removed'] = sorted(original_genes - set(obj['genes']))
-obj['patientToMutations'] = dict((p, sorted(set(muts) & remaining_genes)) for p, muts in iter(list(obj['patientToMutations'].items())))
+obj['patientToMutations'] = dict((p, sorted(set(muts) & remaining_genes)) for p, muts in obj['patientToMutations'].items())
 print('Removed {} genes with no length'.format(len(obj['genes_with_no_length_removed'])))
 
 # Output the new file
diff --git a/experiments/eccb2016/scripts/weights_matrix.py b/experiments/eccb2016/scripts/weights_matrix.py
index 9d13cbb..e98b8b0 100755
--- a/experiments/eccb2016/scripts/weights_matrix.py
+++ b/experiments/eccb2016/scripts/weights_matrix.py
@@ -28,7 +28,7 @@
         cancerToHypermutators[cancer] = set(obj['hypermutators'])
         geneToCases[cancer] = obj['geneToCases']
         patientToMutations[cancer] = dict( (p, set()) for p in obj['patients'] )
-        for g, cases in list(geneToCases[cancer].items()):
+        for g, cases in (geneToCases[cancer].items():
             for p in cases:
                 patientToMutations[cancer][p].add( g )
     cancerToWeights[cancer] = np.load(weights_file)
diff --git a/viz/generate_viz_data.py b/viz/generate_viz_data.py
index cf80f37..c341534 100755
--- a/viz/generate_viz_data.py
+++ b/viz/generate_viz_data.py
@@ -60,15 +60,15 @@ def run( args ):
             new_sets |= set(sorted( list(setToPval[run_name].keys()), key=lambda M: setToPval[run_name][M] )[:args.num_sets])
 
         sets = new_sets
-        setToPval = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToPval[run_name].items())) if M in new_sets)) for run_name in methods )
-        setToRuntime = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToRuntime[run_name].items())) if M in new_sets)) for run_name in methods )
-        setToObs = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToObs[run_name].items())) if M in new_sets)) for run_name in methods )
-        setToFDR = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToFDR[run_name].items())) if M in new_sets)) for run_name in methods )
+        setToPval = dict( (run_name, dict( (M, pval) for M, pval in setToPval[run_name].items() if M in new_sets)) for run_name in methods )
+        setToRuntime = dict( (run_name, dict( (M, pval) for M, pval in setToRuntime[run_name].items() if M in new_sets)) for run_name in methods )
+        setToObs = dict( (run_name, dict( (M, pval) for M, pval in setToObs[run_name].items() if M in new_sets)) for run_name in methods )
+        setToFDR = dict( (run_name, dict( (M, pval) for M, pval in setToFDR[run_name].items() if M in new_sets)) for run_name in methods )
 
     # Restrict the weights
     genes_in_sets = set( g for M in sets for g in M.split('\t') )
     P = dict( (g, P[g]) for g in genes_in_sets )
-    geneToCases = dict( (g, cases) for g, cases in iter(list(geneToCases.items())) if g in genes_in_sets )
+    geneToCases = dict( (g, cases) for g, cases in geneToCases.items() if g in genes_in_sets )
 
     print('* Considering {} sets...'.format(len(new_sets)))
 
diff --git a/wext/enumerate_sets.py b/wext/enumerate_sets.py
index 7908b42..5d8cce0 100755
--- a/wext/enumerate_sets.py
+++ b/wext/enumerate_sets.py
@@ -17,7 +17,7 @@
 # Compute the mutual exclusivity T for the given gene set
 def T(M, geneToCases):
     sampleToCount = Counter( s for g in M for s in geneToCases.get(g, []) )
-    return sum( 1 for sample, count in list(sampleToCount.items()) if count == 1 )
+    return sum( 1 for sample, count in sampleToCount.items() if count == 1 )
 
 # Compute the permutational
 def permutational_dist_wrapper( args ): return permutational_dist( *args )
@@ -29,7 +29,7 @@ def permutational_dist( sets, permuted_files ):
         permutedGeneToCases = defaultdict(set)
         for pf in pf_group:
             with open(pf, 'r') as IN:
-                for g, cases in list(json.load(IN)['geneToCases'].items()):
+                for g, cases in json.load(IN)['geneToCases'].items():
                     permutedGeneToCases[g] |= set(cases)
 
         reading_time = time() - reading_start
@@ -55,7 +55,7 @@ def rce_permutation_test(sets, geneToCases, num_patients, permuted_files, num_co
     # Filter the sets based on the observed values
     k = len(next(iter(sets)))
     setToObs = dict( (M, observed_values(M, num_patients, geneToCases)) for M in sets )
-    sets = set( M for M, (X, T, Z, tbl) in list(setToObs.items()) if testable_set(k, T, Z, tbl) )
+    sets = set( M for M, (X, T, Z, tbl) in setToObs.items() if testable_set(k, T, Z, tbl) )
 
     # Compute the distribution of exclusivity for each pair across the permuted files
     np    = float(len(permuted_files))
@@ -70,19 +70,19 @@ def rce_permutation_test(sets, geneToCases, num_patients, permuted_files, num_co
     setToDist, setToTime = defaultdict(list), dict()
     for dist, times in empirical_distributions:
         setToTime.update(list(times.items()))
-        for k, v in list(dist.tems()):
+        for k, v in dist.tems():
             setToDist[k].extend(v)
 
     # Compute the observed values and then the P-values
     setToObs = dict( (M, setToObs[M]) for M in sets )
     setToPval = dict()
-    for M, (X, T, Z, tbl) in list(setToObs.items()):
+    for M, (X, T, Z, tbl) in setToObs.items():
         # Compute the P-value.
         count = sum( 1. for d in setToDist[M] if d >= T )
         setToPval[M] = count / np
 
     # Compute FDRs
-    tested_sets = list(setToPval.keys())
+    tested_sets = setToPval.keys()
     pvals = [ setToPval[M] for M in tested_sets ]
     setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))))
 
@@ -181,7 +181,7 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores=
 
     # Make sure all P-values are numbers
     tested_sets = len(setToPval)
-    invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
+    invalid_sets = set( M for M, pval in setToPval.items() if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
 
     # Report invalid sets
     if verbose > 0 and report_invalids:
@@ -192,9 +192,9 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores=
             invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ])
         sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' )
 
-    setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets )
-    setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets )
-    setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets )
+    setToPval = dict( (M, pval) for M, pval in setToPval.items() if not M in invalid_sets )
+    setToTime = dict( (M, runtime) for M, runtime in setToTime.items() if not M in invalid_sets )
+    setToObs = dict( (M, obs) for M, obs in setToObs.items() if not M in invalid_sets )
 
     if verbose > 0:
         print('- Output {} sets'.format(len(setToPval)))
@@ -260,7 +260,7 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
 
     # Make sure all P-values are numbers
     tested_sets = len(setToPval)
-    invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
+    invalid_sets = set( M for M, pval in setToPval.items() if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
 
     # Report invalid sets
     if verbose > 0 and report_invalids:
@@ -271,9 +271,9 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
             invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ])
         sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' )
 
-    setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets )
-    setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets )
-    setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets )
+    setToPval = dict( (M, pval) for M, pval in setToPval.items() if not M in invalid_sets )
+    setToTime = dict( (M, runtime) for M, runtime in setToTime.items() if not M in invalid_sets )
+    setToObs = dict( (M, obs) for M, obs in setToObs.items() if not M in invalid_sets )
 
     if verbose > 0:
         print('- Output {} sets'.format(len(setToPval)))
@@ -292,4 +292,4 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
 ################################################################################
 # Testable set
 def testable_set( k, T, Z, tbl ):
-    return T > Z and all( tbl[2**i] > 0 for i in list(range(k)) )
+    return T > Z and all( tbl[2**i] > 0 for i in range(k) )
diff --git a/wext/mcmc.py b/wext/mcmc.py
index 4c15fc2..1269378 100755
--- a/wext/mcmc.py
+++ b/wext/mcmc.py
@@ -4,6 +4,7 @@
 from collections import defaultdict
 from time import time
 from random import random, sample, choice, seed as random_seed
+from past.builtins import xrange
 
 from .constants import *
 from .enumerate_sets import observed_values
@@ -56,9 +57,9 @@ def _log_accept_ratio( W_current, W_next ):
     random_seed(seed)
     t          = len(ks)
     genespace  = list(geneToCases.keys())
-    setsToFreq = [ defaultdict(int) for _ in range(nchains) ]
+    setsToFreq = [ defaultdict(int) for _ in xrange(nchains) ]
     setToPval, setToObs =  dict(), dict()
-    for c in range(nchains):
+    for c in xrange(nchains):
         if verbose > 0: 
             print('- Experiment', c+1)
 
@@ -78,7 +79,7 @@ def _log_accept_ratio( W_current, W_next ):
                 sys.stdout.flush()
 
             # Sample the next gene to swap in/around the set
-            next_soln = dict( (index, set(M)) for index, M in list(soln.items()) )
+            next_soln = dict( (index, set(M)) for index, M in soln.items() )
             next_assigned = dict(list(assigned.items()))
             next_gene = choice(genespace)
 
diff --git a/wext/setup.py b/wext/setup.py
index c4ba272..38de6ac 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -3,8 +3,7 @@
 """Compiles the C modules used by the weighted exclusivity test."""
 
 # Load required modules
-from numpy.distutils.core import setup
-from numpy.distutils.extension import Extension
+from numpy.distutils.core import setup, Extension
 import numpy, os
 
 thisDir = os.path.dirname(os.path.realpath(__file__))

From 3abf0f60c9594bd0fe281a9da945564eb36d8850 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 30 Sep 2018 08:03:47 -0400
Subject: [PATCH 57/60] remove debugging files

---
 examples/simple/adjacency_list.tsv            |  14 -----
 .../simple/all-co-occurrence_results-k2.tsv   |   2 -
 .../simple/any-co-occurrence_results-k2.tsv   |   2 -
 examples/simple/commands_python2.sh           |  52 ------------------
 examples/simple/commands_python3.sh           |  52 ------------------
 examples/simple/data.json                     |   1 -
 examples/simple/exclusivity_results-k2.tsv    |   2 -
 examples/simple/weights.npy                   | Bin 1024 -> 0 bytes
 8 files changed, 125 deletions(-)
 delete mode 100644 examples/simple/adjacency_list.tsv
 delete mode 100644 examples/simple/all-co-occurrence_results-k2.tsv
 delete mode 100644 examples/simple/any-co-occurrence_results-k2.tsv
 delete mode 100644 examples/simple/commands_python2.sh
 delete mode 100644 examples/simple/commands_python3.sh
 delete mode 100644 examples/simple/data.json
 delete mode 100644 examples/simple/exclusivity_results-k2.tsv
 delete mode 100644 examples/simple/weights.npy

diff --git a/examples/simple/adjacency_list.tsv b/examples/simple/adjacency_list.tsv
deleted file mode 100644
index b303963..0000000
--- a/examples/simple/adjacency_list.tsv
+++ /dev/null
@@ -1,14 +0,0 @@
-1	a	c	d	e	f
-2	b	c	d	g	h
-3	a	c	d
-4	b	c	d
-5	a	c	d
-6	b	c	d
-7	a	c	d
-8	b
-9	a
-10	b
-11	a	e
-12	b	g
-13	c	d
-14	c	d
diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv
deleted file mode 100644
index de3ca9f..0000000
--- a/examples/simple/all-co-occurrence_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-c, d	0.0037101364741384682	0.18466623625983988	0.0026938915252685547	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv
deleted file mode 100644
index 6b1b151..0000000
--- a/examples/simple/any-co-occurrence_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-c, d	0.0037101364741384682	0.18466623625983988	0.0025408267974853516	0	9	5	0	0	9
\ No newline at end of file
diff --git a/examples/simple/commands_python2.sh b/examples/simple/commands_python2.sh
deleted file mode 100644
index 7a43965..0000000
--- a/examples/simple/commands_python2.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-num_permutations=1000
-num_cores=4
-
-# Preprocess mutations.
-python2 ../../process_mutations.py \
-    -m  adjacency_list.tsv \
-    -ct NA \
-    -o  data.json
-
-# Compute mutation probabilities.
-python2 ../../compute_mutation_probabilities.py \
-    -mf data.json \
-    -np $num_permutations \
-    -nc $num_cores \
-    -wf weights.npy \
-    -s  12345 \
-    -v  1
-
-# Find sets using mutual exclusivity test statistic.
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  exclusivity \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  exclusivity_results \
-    -v  0
-
-# Find sets using a co-occurrence test statistic (any co-occurrence).
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  any-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  any-co-occurrence_results \
-    -v  0
-
-# Find sets using another co-occurrence test statistic (all co-occurrence).
-python2 ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  all-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  all-co-occurrence_results \
-    -v  0
diff --git a/examples/simple/commands_python3.sh b/examples/simple/commands_python3.sh
deleted file mode 100644
index 7320f5a..0000000
--- a/examples/simple/commands_python3.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-num_permutations=1000
-num_cores=4
-
-# Preprocess mutations.
-python ../../process_mutations.py \
-    -m  adjacency_list.tsv \
-    -ct NA \
-    -o  data.json
-
-# Compute mutation probabilities.
-python ../../compute_mutation_probabilities.py \
-    -mf data.json \
-    -np $num_permutations \
-    -nc $num_cores \
-    -wf weights.npy \
-    -s  12345 \
-    -v  1
-
-# Find sets using mutual exclusivity test statistic.
-python ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  exclusivity \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  exclusivity_results \
-    -v  0
-
-# Find sets using a co-occurrence test statistic (any co-occurrence).
-python ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  any-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  any-co-occurrence_results \
-    -v  0
-
-# Find sets using another co-occurrence test statistic (all co-occurrence).
-python ../../find_sets.py \
-    -mf data.json \
-    -wf weights.npy \
-    -s  all-co-occurrence \
-    -k  2 \
-    -c  $num_cores \
-    -f  2 \
-    -o  all-co-occurrence_results \
-    -v  0
diff --git a/examples/simple/data.json b/examples/simple/data.json
deleted file mode 100644
index 84d082d..0000000
--- a/examples/simple/data.json
+++ /dev/null
@@ -1 +0,0 @@
-{"params": {"cancerToFiles": {"NA": ["/Users/biederse/benchmark_hotnet2_test27Sept2018/wext/examples/simple/adjacency_list.tsv"]}, "cancer_types": ["NA"], "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"], "ignored_variant_types": ["Germline"], "ignored_validation_statuses": ["Wildtype", "Invalid"], "patient_whitelist_file": null, "hypermutators_file": null}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "hypermutators": [], "geneToCases": {"e": ["1", "11"], "a": ["9", "3", "7", "5", "1", "11"], "c": ["3", "6", "7", "4", "13", "5", "14", "2", "1"], "f": ["1"], "d": ["3", "6", "7", "4", "13", "5", "14", "2", "1"], "h": ["2"], "g": ["12", "2"], "b": ["6", "10", "8", "12", "4", "2"]}, "patientToType": {"9": "NA", "3": "NA", "6": "NA", "8": "NA", "10": "NA", "7": "NA", "4": "NA", "12": "NA", "13": "NA", "5": "NA", "14": "NA", "2": "NA", "1": "NA", "11": "NA"}, "patientToMutations": {"1": ["f", "e", "d", "a", "c"], "2": ["h", "d", "c", "g", "b"], "3": ["d", "a", "c"], "4": ["d", "c", "b"], "5": ["d", "a", "c"], "6": ["d", "c", "b"], "7": ["d", "a", "c"], "8": ["b"], "9": ["a"], "10": ["b"], "11": ["a", "e"], "12": ["g", "b"], "13": ["d", "c"], "14": ["d", "c"]}, "num_genes": 8, "num_patients": 14}
\ No newline at end of file
diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv
deleted file mode 100644
index ef88565..0000000
--- a/examples/simple/exclusivity_results-k2.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-#Gene set	WRE (Saddlepoint) P-value	WRE (Saddlepoint) FDR	WRE (Saddlepoint) Runtime	T	Z	t00	t01	t10	t11
-a, b	0.00024227153775402632	0.012058686612143744	0.002846956253051758	12	0	2	6	6	0
\ No newline at end of file
diff --git a/examples/simple/weights.npy b/examples/simple/weights.npy
deleted file mode 100644
index 017cfb74ef630ca46607d00e657a5736cdab2707..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1024
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I#iItqp+nmP)#3giN=`BM6KRoGtJYcAfNy0-72{ee>pUAKL{jFX0`Pkeeo
z>GsbD_JnAdxiA{0AI7J(eMb~F6lF|&Yk$jOId93WEB2Wa-`3b~eT0*SseiS7J@fOP
z7xsi`n7J?-rXR+qwS6M%+#gnn+_SeoypCb#^*Q!aBv``ubT{FoVe0Q~ZK=7i`Jg=^
z8fGqxR=)kSE^E&P`;FQT?@Yy;?ER7iA}qHoz)8c@|6Td<l5f%mdqOnKTo?^=FU<bo
G=05-uefE_A


From 990fdb22d01733d9888490c79d221d2c30aa094a Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 30 Sep 2018 08:37:17 -0400
Subject: [PATCH 58/60] source code revisions for py23 compatibility

---
 .travis.yml                                   |   1 -
 compute_mutation_probabilities.py             |  14 ++++----
 examples/simple/weights.npy                   | Bin 1024 -> 0 bytes
 experiments/eccb2016/scripts/helper.py        |   9 +++---
 .../eccb2016/scripts/permute_single_matrix.py |   8 ++---
 .../scripts/remove_genes_with_no_length.py    |   2 +-
 experiments/eccb2016/scripts/results_table.py |  14 ++++----
 .../scripts/sample_mutation_frequency_plot.py |   4 +--
 .../eccb2016/scripts/triple_pval_scatter.py   |   2 +-
 .../eccb2016/scripts/unweighted_comparison.py |   4 +--
 .../eccb2016/scripts/weights_matrix.py        |   2 +-
 viz/generate_viz_data.py                      |  12 +++----
 wext/enumerate_sets.py                        |  30 +++++++++---------
 wext/mcmc.py                                  |   7 ++--
 wext/setup.py                                 |   3 +-
 15 files changed, 56 insertions(+), 56 deletions(-)
 delete mode 100644 examples/simple/weights.npy

diff --git a/.travis.yml b/.travis.yml
index 22c0c61..37ca25e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,6 @@ install:
     - cd wext
     - python setup.py install
     - cd ../
-    ##- f2py -c wext/src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
     - pwd
     - ls
 script:
diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py
index 8606d5a..5be6777 100755
--- a/compute_mutation_probabilities.py
+++ b/compute_mutation_probabilities.py
@@ -47,7 +47,7 @@ def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, m, n, num_
 
         # Record the permutation
         observed[tuple(zip(*indices))] += 1.
-        geneToCases = dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) )
+        geneToCases = dict( (g, list(cases)) for g, cases in geneToCases.items())
         permutations.append( dict(geneToCases=geneToCases, permutation_number=seed) )
 
     return observed/float(len(seeds)), permutations
@@ -63,7 +63,7 @@ def postprocess_weight_matrix(P, r, s):
 
     # Average weights over entries of weight matrix with same marginals
     P_mean = np.zeros(np.shape(P))
-    for marginals, indices in list(marginals_to_indices.items()):
+    for marginals, indices in marginals_to_indices.items():
         mean_value = float(sum(P[i, j] for i, j in indices))/float(len(indices))
         for i, j in indices:
             P_mean[i, j] = mean_value
@@ -84,15 +84,15 @@ def run( args ):
     mutation_data = load_mutation_data( args.mutation_file )
     genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data
 
-    geneToObserved = dict( (g, len(cases)) for g, cases in iter(list(geneToCases.items())) )
-    patientToObserved = dict( (p, len(muts)) for p, muts in iter(list(patientToMutations.items())) )
+    geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.items()) 
+    patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.items()) 
     geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) )
     indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) )
     patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) )
     indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )
 
     edges = set()
-    for gene, cases in list(geneToCases.items()):
+    for gene, cases in geneToCases.items():
         for patient in cases:
             edges.add( (geneToIndex[gene], patientToIndex[patient]) )
 
@@ -140,10 +140,10 @@ def run( args ):
         P = np.add.reduce(observeds) / float(len(observeds))
 
         # Verify the weights
-        for g, obs in list(geneToObserved.items()):
+        for g, obs in geneToObserved.items():
             assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol)
 
-        for p, obs in list(patientToObserved.items()):
+        for p, obs in patientToObserved.items():
             assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol)
 
         # Construct mutation matrix to compute marginals
diff --git a/examples/simple/weights.npy b/examples/simple/weights.npy
deleted file mode 100644
index 0cdfdcf8096fc128e3dd5fb94fbe128002d925e6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1024
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I#iItqp+nmP)#3giMV#SKLn6W_kFSM^<K@%zI;`?WGrDc1Kd<D_BgUwY{M
zSN!|Ho)8T)7e>SM!}zqe@8-Gh!H<`|wSTdt(NFrt6?<E*>x&EaKEg@E)X&;IZ32JQ
z3wuH|%v=}^(+}g*+CHV*KkKsg+_S&1LEGV->2!Oc<^!h|x^BWr!_+h8H?kb<KWI;g
zhM5baU#S^#tdY53FRg!9g{`sGen|nyhYN7hF!i7RUa5Ggw$Yvt4Ko);!`utAf4KP%
E0I)*lX#fBK

diff --git a/experiments/eccb2016/scripts/helper.py b/experiments/eccb2016/scripts/helper.py
index 35fc5b5..8131c6a 100644
--- a/experiments/eccb2016/scripts/helper.py
+++ b/experiments/eccb2016/scripts/helper.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import numpy as np
+from past.builtins import xrange
 
 # Add a y=x line to the given matplotlib axis
 def add_y_equals_x(ax, c='k', line_style='--', alpha=0.75):
@@ -31,14 +32,14 @@ def aligned_plaintext_table(table, sep='\t', spaces=2):
 
     # Find numbers of rows and columns.
     m = len(rows)
-    lengths = list(map(len, rows))
+    lengths = map(len, rows)
     n = max(lengths)
 
     # Pad rows with a deficient number of columns.
     entries = [[rows[i][j] if j<lengths[i] else '' for j in range(n)] for i in range(m)]
 
     # Find column widths.
-    sizes = [max(len(entries[i][j]) for i in list(range(m))) for j in range(n)]
+    sizes = [max(len(entries[i][j]) for i in range(m)) for j in range(n)]
 
     # Return results.
     return '\n'.join([''.join([entries[i][j].rjust(sizes[j]+spaces) for j in range(n)]).rstrip() for i in range(m)])
@@ -83,14 +84,14 @@ def rank(a, reverse=False, ties=2):
     elif ties==1 :
         z = np.zeros(n, dtype=y.dtype)
         j = 0
-        for i in range(1, n):
+        for i in xrange(1, n):
             if x[y[i]]!=x[y[i-1]]:
                 j += 1
             z[y[i]] = j
     elif ties==2:
         z = np.zeros(n, dtype=y.dtype)
         j = 0
-        for i in range(1, n):
+        for i in xrange(1, n):
             if x[y[i]]!=x[y[i-1]]:
                 j = i
             z[y[i]] = j
diff --git a/experiments/eccb2016/scripts/permute_single_matrix.py b/experiments/eccb2016/scripts/permute_single_matrix.py
index 12c6d4e..c25eff2 100755
--- a/experiments/eccb2016/scripts/permute_single_matrix.py
+++ b/experiments/eccb2016/scripts/permute_single_matrix.py
@@ -34,7 +34,7 @@ def run( args ):
     indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )
 
     edges = set()
-    for gene, cases in list(geneToCases.items()):
+    for gene, cases in geneToCases.items():
         for patient in cases:
             edges.add( (geneToIndex[gene], patientToIndex[patient]) )
 
@@ -58,16 +58,16 @@ def run( args ):
         permutedPatientToMutations[patient].add(gene)
         
     # Verify the number of mutations per gene/patient is preserved
-    for g, cases in list(geneToCases.items()):
+    for g, cases in geneToCases.items():
         assert( len(cases) == len(permutedGeneToCases[g]) )
 
-    for p, muts in list(patientToMutations.items()):
+    for p, muts in patientToMutations.items():
         assert( len(muts) == len(permutedPatientToMutations[p]) )
 
     # Save edge list.
     output_file = '{}-{}.json'.format(args.output_prefix, args.job_id)
     permutation = dict(params=params, permutation_number=args.job_id,
-                       geneToCases=dict( (g, list(cases)) for g, cases in iter(list(permutedGeneToCases.items()))))
+                       geneToCases=dict( (g, list(cases)) for g, cases in permutedGeneToCases.items()))
     with open(output_file, 'w') as OUT: json.dump( permutation, OUT )
     
 if __name__ == '__main__':
diff --git a/experiments/eccb2016/scripts/remove_genes_with_no_length.py b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
index 3b5d316..f0897e7 100644
--- a/experiments/eccb2016/scripts/remove_genes_with_no_length.py
+++ b/experiments/eccb2016/scripts/remove_genes_with_no_length.py
@@ -28,7 +28,7 @@
 obj['num_genes'] = len(obj['genes'])
 obj['params']['lengths_file'] = os.path.abspath(args.lengths_file)
 obj['genes_with_no_length_removed'] = sorted(original_genes - set(obj['genes']))
-obj['patientToMutations'] = dict((p, sorted(set(muts) & remaining_genes)) for p, muts in iter(list(obj['patientToMutations'].items())))
+obj['patientToMutations'] = dict((p, sorted(set(muts) & remaining_genes)) for p, muts in obj['patientToMutations'].items())
 print('Removed {} genes with no length'.format(len(obj['genes_with_no_length_removed'])))
 
 # Output the new file
diff --git a/experiments/eccb2016/scripts/results_table.py b/experiments/eccb2016/scripts/results_table.py
index 3989369..1315dd0 100755
--- a/experiments/eccb2016/scripts/results_table.py
+++ b/experiments/eccb2016/scripts/results_table.py
@@ -34,23 +34,23 @@
     obj = json.load(IN)
     genes, patients = obj['genes'], obj['patients']
     hypermutators = set(obj['hypermutators'])
-    geneToCases = dict((g, set(cases)) for g, cases in iter(list(obj['geneToCases'].items())))
+    geneToCases = dict((g, set(cases)) for g, cases in obj['geneToCases'].items())
 
 # Load the triples
 with open(args.unweighted_exact_file, 'r') as IN:
     obj            = json.load(IN)
-    unweightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in iter(list(obj['setToPval'].items())))
+    unweightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in list(obj['setToPval'].items()))
     assert( all( not(isnan(pval)) for pval in list(unweightedPval.values()) ))
-    unweightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in iter(list(obj['setToFDR'].items())))
+    unweightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in list(obj['setToFDR'].items()))
 
 with open(args.weighted_saddlepoint_file, 'r') as IN:
     obj          = json.load(IN)
-    weightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in iter(list(obj['setToPval'].items())))
+    weightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].items())
     assert( all( not(isnan(pval)) for pval in list(weightedPval.values()) ))
-    weightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in iter(list(obj['setToFDR'].items())))
+    weightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].items())
 
-print('Triples with weighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in weightedFDR.iteritems() if fdr < args.fdr_cutoff), len(weightedFDR)))
-print('Triples with unweighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in unweightedFDR.iteritems() if fdr < args.fdr_cutoff), len(unweightedFDR)))
+print('Triples with weighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in weightedFDR.items() if fdr < args.fdr_cutoff), len(weightedFDR)))
+print('Triples with unweighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in unweightedFDR.items() if fdr < args.fdr_cutoff), len(unweightedFDR)))
 
 # Rank triples by P-value
 triples = sorted(set(weightedPval.keys()) & set(unweightedPval.keys()))
diff --git a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
index 7b66ee6..909bcba 100755
--- a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
+++ b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py
@@ -26,12 +26,12 @@
 
         # Make a map of patients to their mutated genes
         patientToMutations = dict( (p, set()) for p in patients )
-        for g, cases in list(obj['geneToCases'].items()):
+        for g, cases in obj['geneToCases'].items():
             for p in cases:
                 patientToMutations[p].add( g )
 
         # Assemble the data into dictionaries for Pandas
-        for p, mutations in list(patientToMutations.items()):
+        for p, mutations in patientToMutations.items():
             ty = "Hypermutator" if p in hypermutators else "Non-hypermutator"
             items.append({ "Sample": p, "Mutated genes per sample": len(mutations),
                            "Type": ty, "Cancer": cancer })
diff --git a/experiments/eccb2016/scripts/triple_pval_scatter.py b/experiments/eccb2016/scripts/triple_pval_scatter.py
index 6e2f168..127c9d5 100755
--- a/experiments/eccb2016/scripts/triple_pval_scatter.py
+++ b/experiments/eccb2016/scripts/triple_pval_scatter.py
@@ -36,7 +36,7 @@
     with open(permuted_file, 'r') as IN:
         setToPermuted.update( json.load(IN)['setToPval'] )
 
-for M, pval in list(setToPermuted.items()):
+for M, pval in setToPermuted.items():
     if pval == 0:
         setToPermuted[M] = 1./args.num_permutations
 
diff --git a/experiments/eccb2016/scripts/unweighted_comparison.py b/experiments/eccb2016/scripts/unweighted_comparison.py
index 369174b..e2c3913 100755
--- a/experiments/eccb2016/scripts/unweighted_comparison.py
+++ b/experiments/eccb2016/scripts/unweighted_comparison.py
@@ -27,7 +27,7 @@
         exactPval[cancer] = obj['setToPval']
         exactRuntime[cancer] = obj['setToRuntime']
 
-num_exact = sum(1 for c in args.cancers for M in list(exactPval[c].keys()))
+num_exact = sum(1 for c in args.cancers for M in exactPval[c].keys())
 
 for cancer, saddlepoint_file in zip(args.cancers, args.saddlepoint_files):
     with open(saddlepoint_file, 'r') as IN:
@@ -35,7 +35,7 @@
         saddlepointPval[cancer] = obj['setToPval']
         saddlepointRuntime[cancer] = obj['setToRuntime']
 
-num_saddlepoint = sum(1 for c in args.cancers for M in list(saddlepointPval[c].keys()))
+num_saddlepoint = sum(1 for c in args.cancers for M in saddlepointPval[c].keys())
 print('* Loaded {} exact sets and {} saddlepoint sets...'.format(num_exact, num_saddlepoint))
 
 # Construct the arrays of data
diff --git a/experiments/eccb2016/scripts/weights_matrix.py b/experiments/eccb2016/scripts/weights_matrix.py
index 9d13cbb..e98b8b0 100755
--- a/experiments/eccb2016/scripts/weights_matrix.py
+++ b/experiments/eccb2016/scripts/weights_matrix.py
@@ -28,7 +28,7 @@
         cancerToHypermutators[cancer] = set(obj['hypermutators'])
         geneToCases[cancer] = obj['geneToCases']
         patientToMutations[cancer] = dict( (p, set()) for p in obj['patients'] )
-        for g, cases in list(geneToCases[cancer].items()):
+        for g, cases in (geneToCases[cancer].items():
             for p in cases:
                 patientToMutations[cancer][p].add( g )
     cancerToWeights[cancer] = np.load(weights_file)
diff --git a/viz/generate_viz_data.py b/viz/generate_viz_data.py
index cf80f37..4a4608b 100755
--- a/viz/generate_viz_data.py
+++ b/viz/generate_viz_data.py
@@ -60,15 +60,15 @@ def run( args ):
             new_sets |= set(sorted( list(setToPval[run_name].keys()), key=lambda M: setToPval[run_name][M] )[:args.num_sets])
 
         sets = new_sets
-        setToPval = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToPval[run_name].items())) if M in new_sets)) for run_name in methods )
-        setToRuntime = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToRuntime[run_name].items())) if M in new_sets)) for run_name in methods )
-        setToObs = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToObs[run_name].items())) if M in new_sets)) for run_name in methods )
-        setToFDR = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToFDR[run_name].items())) if M in new_sets)) for run_name in methods )
+        setToPval = dict( (run_name, dict( (M, pval) for M, pval in setToPval[run_name].items() if M in new_sets)) for run_name in methods )
+        setToRuntime = dict( (run_name, dict( (M, pval) for M, pval in setToRuntime[run_name].items() if M in new_sets)) for run_name in methods )
+        setToObs = dict( (run_name, dict( (M, pval) for M, pval in setToObs[run_name].items() if M in new_sets)) for run_name in methods )
+        setToFDR = dict( (run_name, dict( (M, pval) for M, pval in setToFDR[run_name].items() if M in new_sets)) for run_name in methods )
 
     # Restrict the weights
     genes_in_sets = set( g for M in sets for g in M.split('\t') )
     P = dict( (g, P[g]) for g in genes_in_sets )
-    geneToCases = dict( (g, cases) for g, cases in iter(list(geneToCases.items())) if g in genes_in_sets )
+    geneToCases = dict( (g, cases) for g, cases in geneToCases.items() if g in genes_in_sets )
 
     print('* Considering {} sets...'.format(len(new_sets)))
 
@@ -81,7 +81,7 @@ def run( args ):
         params['weights_file'] = os.path.abspath(args.weights_file)
 
         # Output
-        output = dict(params=params, geneToCases=dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) ),
+        output = dict(params=params, geneToCases=dict( (g, list(cases)) for g, cases in geneToCases.items()),
                       setToPval=setToPval, methods=sorted(methods),
                       patientToType=patientToType, setToFDR=setToFDR,
                       setToRuntime=setToRuntime, setToObs=setToObs, sets=list(sets),
diff --git a/wext/enumerate_sets.py b/wext/enumerate_sets.py
index 7908b42..5d8cce0 100755
--- a/wext/enumerate_sets.py
+++ b/wext/enumerate_sets.py
@@ -17,7 +17,7 @@
 # Compute the mutual exclusivity T for the given gene set
 def T(M, geneToCases):
     sampleToCount = Counter( s for g in M for s in geneToCases.get(g, []) )
-    return sum( 1 for sample, count in list(sampleToCount.items()) if count == 1 )
+    return sum( 1 for sample, count in sampleToCount.items() if count == 1 )
 
 # Compute the permutational
 def permutational_dist_wrapper( args ): return permutational_dist( *args )
@@ -29,7 +29,7 @@ def permutational_dist( sets, permuted_files ):
         permutedGeneToCases = defaultdict(set)
         for pf in pf_group:
             with open(pf, 'r') as IN:
-                for g, cases in list(json.load(IN)['geneToCases'].items()):
+                for g, cases in json.load(IN)['geneToCases'].items():
                     permutedGeneToCases[g] |= set(cases)
 
         reading_time = time() - reading_start
@@ -55,7 +55,7 @@ def rce_permutation_test(sets, geneToCases, num_patients, permuted_files, num_co
     # Filter the sets based on the observed values
     k = len(next(iter(sets)))
     setToObs = dict( (M, observed_values(M, num_patients, geneToCases)) for M in sets )
-    sets = set( M for M, (X, T, Z, tbl) in list(setToObs.items()) if testable_set(k, T, Z, tbl) )
+    sets = set( M for M, (X, T, Z, tbl) in setToObs.items() if testable_set(k, T, Z, tbl) )
 
     # Compute the distribution of exclusivity for each pair across the permuted files
     np    = float(len(permuted_files))
@@ -70,19 +70,19 @@ def rce_permutation_test(sets, geneToCases, num_patients, permuted_files, num_co
     setToDist, setToTime = defaultdict(list), dict()
     for dist, times in empirical_distributions:
         setToTime.update(list(times.items()))
-        for k, v in list(dist.tems()):
+        for k, v in dist.tems():
             setToDist[k].extend(v)
 
     # Compute the observed values and then the P-values
     setToObs = dict( (M, setToObs[M]) for M in sets )
     setToPval = dict()
-    for M, (X, T, Z, tbl) in list(setToObs.items()):
+    for M, (X, T, Z, tbl) in setToObs.items():
         # Compute the P-value.
         count = sum( 1. for d in setToDist[M] if d >= T )
         setToPval[M] = count / np
 
     # Compute FDRs
-    tested_sets = list(setToPval.keys())
+    tested_sets = setToPval.keys()
     pvals = [ setToPval[M] for M in tested_sets ]
     setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))))
 
@@ -181,7 +181,7 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores=
 
     # Make sure all P-values are numbers
     tested_sets = len(setToPval)
-    invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
+    invalid_sets = set( M for M, pval in setToPval.items() if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
 
     # Report invalid sets
     if verbose > 0 and report_invalids:
@@ -192,9 +192,9 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores=
             invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ])
         sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' )
 
-    setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets )
-    setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets )
-    setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets )
+    setToPval = dict( (M, pval) for M, pval in setToPval.items() if not M in invalid_sets )
+    setToTime = dict( (M, runtime) for M, runtime in setToTime.items() if not M in invalid_sets )
+    setToObs = dict( (M, obs) for M, obs in setToObs.items() if not M in invalid_sets )
 
     if verbose > 0:
         print('- Output {} sets'.format(len(setToPval)))
@@ -260,7 +260,7 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
 
     # Make sure all P-values are numbers
     tested_sets = len(setToPval)
-    invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
+    invalid_sets = set( M for M, pval in setToPval.items() if isnan(pval) or -PTOL > pval or pval > 1+PTOL )
 
     # Report invalid sets
     if verbose > 0 and report_invalids:
@@ -271,9 +271,9 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
             invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ])
         sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' )
 
-    setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets )
-    setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets )
-    setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets )
+    setToPval = dict( (M, pval) for M, pval in setToPval.items() if not M in invalid_sets )
+    setToTime = dict( (M, runtime) for M, runtime in setToTime.items() if not M in invalid_sets )
+    setToObs = dict( (M, obs) for M, obs in setToObs.items() if not M in invalid_sets )
 
     if verbose > 0:
         print('- Output {} sets'.format(len(setToPval)))
@@ -292,4 +292,4 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic,
 ################################################################################
 # Testable set
 def testable_set( k, T, Z, tbl ):
-    return T > Z and all( tbl[2**i] > 0 for i in list(range(k)) )
+    return T > Z and all( tbl[2**i] > 0 for i in range(k) )
diff --git a/wext/mcmc.py b/wext/mcmc.py
index 4c15fc2..1269378 100755
--- a/wext/mcmc.py
+++ b/wext/mcmc.py
@@ -4,6 +4,7 @@
 from collections import defaultdict
 from time import time
 from random import random, sample, choice, seed as random_seed
+from past.builtins import xrange
 
 from .constants import *
 from .enumerate_sets import observed_values
@@ -56,9 +57,9 @@ def _log_accept_ratio( W_current, W_next ):
     random_seed(seed)
     t          = len(ks)
     genespace  = list(geneToCases.keys())
-    setsToFreq = [ defaultdict(int) for _ in range(nchains) ]
+    setsToFreq = [ defaultdict(int) for _ in xrange(nchains) ]
     setToPval, setToObs =  dict(), dict()
-    for c in range(nchains):
+    for c in xrange(nchains):
         if verbose > 0: 
             print('- Experiment', c+1)
 
@@ -78,7 +79,7 @@ def _log_accept_ratio( W_current, W_next ):
                 sys.stdout.flush()
 
             # Sample the next gene to swap in/around the set
-            next_soln = dict( (index, set(M)) for index, M in list(soln.items()) )
+            next_soln = dict( (index, set(M)) for index, M in soln.items() )
             next_assigned = dict(list(assigned.items()))
             next_gene = choice(genespace)
 
diff --git a/wext/setup.py b/wext/setup.py
index c4ba272..38de6ac 100755
--- a/wext/setup.py
+++ b/wext/setup.py
@@ -3,8 +3,7 @@
 """Compiles the C modules used by the weighted exclusivity test."""
 
 # Load required modules
-from numpy.distutils.core import setup
-from numpy.distutils.extension import Extension
+from numpy.distutils.core import setup, Extension
 import numpy, os
 
 thisDir = os.path.dirname(os.path.realpath(__file__))

From 866d32e355c1629ce35e99f2bb0bf2f5366c9d9a Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 30 Sep 2018 08:38:57 -0400
Subject: [PATCH 59/60] revised source for py23 compatibility

---
 examples/simple/adjacency_list.tsv | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 examples/simple/adjacency_list.tsv

diff --git a/examples/simple/adjacency_list.tsv b/examples/simple/adjacency_list.tsv
new file mode 100644
index 0000000..b303963
--- /dev/null
+++ b/examples/simple/adjacency_list.tsv
@@ -0,0 +1,14 @@
+1	a	c	d	e	f
+2	b	c	d	g	h
+3	a	c	d
+4	b	c	d
+5	a	c	d
+6	b	c	d
+7	a	c	d
+8	b
+9	a
+10	b
+11	a	e
+12	b	g
+13	c	d
+14	c	d

From 3ee93a678a8df9cf2fd0e5c694117c4b784b18c6 Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 30 Sep 2018 09:01:10 -0400
Subject: [PATCH 60/60] revise source, use generator instead of converting to
 list()

---
 experiments/eccb2016/scripts/results_table.py  | 8 ++++----
 experiments/eccb2016/scripts/weights_matrix.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/experiments/eccb2016/scripts/results_table.py b/experiments/eccb2016/scripts/results_table.py
index 1315dd0..7953f09 100755
--- a/experiments/eccb2016/scripts/results_table.py
+++ b/experiments/eccb2016/scripts/results_table.py
@@ -39,14 +39,14 @@
 # Load the triples
 with open(args.unweighted_exact_file, 'r') as IN:
     obj            = json.load(IN)
-    unweightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in list(obj['setToPval'].items()))
-    assert( all( not(isnan(pval)) for pval in list(unweightedPval.values()) ))
-    unweightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in list(obj['setToFDR'].items()))
+    unweightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].items())
+    assert( all( not(isnan(pval)) for pval in unweightedPval.values() ))
+    unweightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].items())
 
 with open(args.weighted_saddlepoint_file, 'r') as IN:
     obj          = json.load(IN)
     weightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].items())
-    assert( all( not(isnan(pval)) for pval in list(weightedPval.values()) ))
+    assert( all( not(isnan(pval)) for pval in weightedPval.values() ))
     weightedFDR  = dict((frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].items())
 
 print('Triples with weighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in weightedFDR.items() if fdr < args.fdr_cutoff), len(weightedFDR)))
diff --git a/experiments/eccb2016/scripts/weights_matrix.py b/experiments/eccb2016/scripts/weights_matrix.py
index e98b8b0..6d958ba 100755
--- a/experiments/eccb2016/scripts/weights_matrix.py
+++ b/experiments/eccb2016/scripts/weights_matrix.py
@@ -37,7 +37,7 @@
 # Set up the figure
 fig, axes = plt.subplots( 1, len(args.cancers))
 fig.set_size_inches( len(args.cancers) * 5, 5)
-min_weight = min([ np.min(W) for W in list(cancerToWeights.values()) ])
+min_weight = min([ np.min(W) for W in cancerToWeights.values() ])
 print('Min weight:', min_weight)
 
 for ax, cancer in zip(axes, args.cancers):