raphael-group · evanbiederstedt · Aug 12, 2018 · Aug 12, 2018 · Aug 12, 2018 · Aug 14, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,21 @@
+language: python
+python:
+    - 2.7
+    - 3.4
+    - 3.5
+    - 3.6
+install:
+    - sudo apt-get -y update
+    - sudo apt-get -y install r-base
+    - sudo apt-get -y install python-matplotlib
+    - pip install codecov
+    - pip install -r requirements.txt
+    - cd wext
+    - python setup.py install
+    - cd ../
+    - pwd
+    - ls
+script:
+    - nosetests
+after_success:
+    - codecov
diff --git a/README.md b/README.md
@@ -1,18 +1,11 @@
 # Weighted Exclusivity Test (WExT) #
 
-The Weighted Exclusivity Test (WExT) was developed by the [Raphael research group](http://compbio.cs.brown.edu/) at Brown University.
-
-### Requirements ###
-
-Latest tested version in parentheses.
+[![Build Status](https://api.travis-ci.org/raphael-group/wext.svg?branch=master)](https://travis-ci.org/raphael-group/wext?branch=master)
 
-1. Python (2.7.9)
 
-    a. NumPy (1.11.0)
-
-    b. SciPy (0.17.0)
+The Weighted Exclusivity Test (WExT) was developed by the [Raphael research group](http://compbio.cs.brown.edu/) at Brown University.
 
-2. gcc (4.9.2)
+### Requirements ###
 
 We recommend using [`virtualenv`](https://virtualenv.pypa.io/en/latest/) to install the Python requirements. After installing `virtualenv`, you can install the Python requirements for the weighted exclusivity test as follows:
 
@@ -27,8 +20,7 @@ See the wiki for additional instructions on [Setup and installation](https://git
 The C and Fortran extensions must be compiled before running the weighted exclusivity test:
 
     cd wext
-    python setup.py build
-    f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module
+    python setup.py install
 
 ### Usage ###
 

diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py
@@ -4,10 +4,12 @@
 import sys, os, argparse, json, numpy as np, multiprocessing as mp, random
 from collections import defaultdict
 
+
 # Load the weighted exclusivity test
 this_dir = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(this_dir)
 from wext import *
+from past.builtins import xrange
 
 # Argument parser
 def get_parser():
@@ -20,12 +22,13 @@ def get_parser():
     parser.add_argument('-q', '--swap_multiplier', type=int, required=False, default=100)
     parser.add_argument('-nc', '--num_cores', type=int, required=False, default=1)
     parser.add_argument('-s', '--seed', type=int, required=False, default=None)
-    parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=range(5))
+    parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=list(range(5)))
     return parser
 
-def permute_matrices_wrapper(args): return permute_matrices(*args)
-def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose,
-                     m, n, num_edges, indexToGene, indexToPatient):
+def permute_matrices_wrapper(args): 
+    return permute_matrices(*args)
+
+def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, m, n, num_edges, indexToGene, indexToPatient):
     # Initialize our output
     observed     = np.zeros((m, n))
     permutations = []
@@ -43,8 +46,8 @@ def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose,
             indices.append( (edge[0]-1, edge[1]-1) )
 
         # Record the permutation
-        observed[zip(*indices)] += 1.
-        geneToCases = dict( (g, list(cases)) for g, cases in geneToCases.iteritems() )
+        observed[tuple(zip(*indices))] += 1.
+        geneToCases = dict( (g, list(cases)) for g, cases in geneToCases.items())
         permutations.append( dict(geneToCases=geneToCases, permutation_number=seed) )
 
     return observed/float(len(seeds)), permutations
@@ -76,28 +79,28 @@ def run( args ):
 
     # Load mutation data
     if args.verbose > 0:
-        print '* Loading mutation data...'
+        print('* Loading mutation data...')
 
     mutation_data = load_mutation_data( args.mutation_file )
     genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data
 
-    geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.iteritems() )
-    patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.iteritems() )
+    geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.items()) 
+    patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.items()) 
     geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) )
     indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) )
     patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) )
     indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )
 
     edges = set()
-    for gene, cases in geneToCases.iteritems():
+    for gene, cases in geneToCases.items():
         for patient in cases:
             edges.add( (geneToIndex[gene], patientToIndex[patient]) )
 
     edge_list = np.array(sorted(edges), dtype=np.int)
 
     # Run the bipartite edge swaps
     if args.verbose > 0:
-        print '* Permuting matrices...'
+        print('* Permuting matrices...')
 
     m = len(all_genes)
     n = len(patients)
@@ -127,7 +130,7 @@ def run( args ):
     # Create the weights file
     if args.weights_file:
         if args.verbose > 0:
-            print '* Saving weights file...'
+            print('* Saving weights file...')
 
         # Allow for small accumulated numerical errors
         tol = 1e3*max(m, n)*args.num_permutations*np.finfo(np.float64).eps
@@ -137,10 +140,10 @@ def run( args ):
         P = np.add.reduce(observeds) / float(len(observeds))
 
         # Verify the weights
-        for g, obs in geneToObserved.iteritems():
+        for g, obs in geneToObserved.items():
             assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol)
 
-        for p, obs in patientToObserved.iteritems():
+        for p, obs in patientToObserved.items():
             assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol)
 
         # Construct mutation matrix to compute marginals
@@ -154,12 +157,12 @@ def run( args ):
         P = postprocess_weight_matrix(P, r, s)
 
         # Verify the weights again
-        for g, obs in geneToObserved.iteritems():
+        for g, obs in geneToObserved.items():
             assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol)
 
-        for p, obs in patientToObserved.iteritems():
+        for p, obs in patientToObserved.items():
             assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol)
-
+ 
         # Add pseudocounts to entries with no mutations observed; unlikely or impossible after post-processing step
         P[P == 0] = 1./(2. * args.num_permutations)
 
@@ -171,7 +174,7 @@ def run( args ):
     if args.permutation_directory:
         output_prefix = args.permutation_directory + '/permuted-mutations-{}.json'
         if args.verbose > 0:
-            print '* Saving permuted mutation data...'
+            print('* Saving permuted mutation data...')
 
         for _, permutation_list in results:
             for permutation in permutation_list:
@@ -180,4 +183,5 @@ def run( args ):
                     permutation['params'] = params
                     json.dump( permutation, OUT )
 
-if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) )
+if __name__ == '__main__': 
+    run( get_parser().parse_args(sys.argv[1:]) )
diff --git a/examples/generate_data.py b/examples/generate_data.py
@@ -81,4 +81,5 @@ def run(args):
         raise NotImplementedError('Data generation mode "%s" is not implemented.' % args.mode)
     return
 
-if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) )
+if __name__ == '__main__': 
+    run( get_parser().parse_args(sys.argv[1:]) )
diff --git a/experiments/eccb2016/scripts/helper.py b/experiments/eccb2016/scripts/helper.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
+
 import numpy as np
+from past.builtins import xrange
 
 # Add a y=x line to the given matplotlib axis
 def add_y_equals_x(ax, c='k', line_style='--', alpha=0.75):
@@ -15,6 +17,7 @@ def add_y_equals_x(ax, c='k', line_style='--', alpha=0.75):
     ax.set_xlim(lims)
     ax.set_ylim(lims)
 
+
 def aligned_plaintext_table(table, sep='\t', spaces=2):
     """
     Create and return an aligned plaintext table.
@@ -41,6 +44,7 @@ def aligned_plaintext_table(table, sep='\t', spaces=2):
     # Return results.
     return '\n'.join([''.join([entries[i][j].rjust(sizes[j]+spaces) for j in range(n)]).rstrip() for i in range(m)])
 
+
 def rank(a, reverse=False, ties=2):
     """
     Find the ranks of the elements of a.

diff --git a/experiments/eccb2016/scripts/pairs_summary.py b/experiments/eccb2016/scripts/pairs_summary.py
@@ -86,7 +86,7 @@
                         "Cancer": cancer})
 df = pd.DataFrame(items)
 
-print 'Testing {} pairs...'.format(len(weighted_exact_pvals))
+print('Testing {} pairs...'.format(len(weighted_exact_pvals)))
 
 # Set up the figure
 fig, ((ax1, ax2, ax3, ax4)) = plt.subplots(1, 4)
@@ -138,15 +138,15 @@
 # Output the correlation between
 all_correlation = spearmanr(weighted_exact_pvals, weighted_saddlepoint_pvals)
 tail_correlation = spearmanr(weighted_exact_tail_pvals, weighted_saddlepoint_tail_pvals)
-print '-' * 14, 'Correlation: WRE (Saddlepoint) and WRE (Recursive)', '-' * 14
-print 'All: \\rho={:.5}, P={:.5}'.format(*all_correlation)
-print '\Phi_WR < 10^-4: \\rho={:.5}, P={:.5}'.format(*tail_correlation)
+print('-' * 14, 'Correlation: WRE (Saddlepoint) and WRE (Recursive)', '-' * 14)
+print('All: \\rho={:.5}, P={:.5}'.format(*all_correlation))
+print('\Phi_WR < 10^-4: \\rho={:.5}, P={:.5}'.format(*tail_correlation))
 
 # Output a table summarizing the runtimes (Table 3)
-print '-' * 35, 'Runtimes', '-' * 35
+print('-' * 35, 'Runtimes', '-' * 35)
 tbl = ['#Method\tMinimum\tMedian\tMaximum\tTotal']
 for method in ["WRE (Exact)", "WRE (Saddlepoint)"]:
-    print method, sum(list(df.loc[df['Method'] == method]['Runtime (seconds)']))
+    print(method, sum(list(df.loc[df['Method'] == method]['Runtime (seconds)'])))
 
 # Output to file
 plt.tight_layout()

diff --git a/experiments/eccb2016/scripts/permutation_test_helper.py b/experiments/eccb2016/scripts/permutation_test_helper.py
@@ -16,7 +16,7 @@
 parser.add_argument('-o', '--output_prefix', type=str, required=True)
 parser.add_argument('-w', '--wext_directory', type=str, required=True)
 parser.add_argument('-j', '--job_id', type=int, required=job_id is None, default=job_id)
-parser.add_argument('-v', '--verbose', type=int, required=False, default=0, choices=range(5))
+parser.add_argument('-v', '--verbose', type=int, required=False, default=0, choices=list(range(5)))
 args = parser.parse_args( sys.argv[1:] )
 
 # Load weighted exclusivity test
@@ -25,19 +25,23 @@
 from wext import rce_permutation_test, load_mutation_data, output_enumeration_table
 
 # Load the mutation data
-if args.verbose > 0: print '* Loading mutation data..'
+if args.verbose > 0: 
+	print('* Loading mutation data..')
 mutation_data = load_mutation_data( args.mutation_file, args.min_freq )
 genes, all_genes, patients, geneToCases, _, params, _ = mutation_data
 num_patients = len(patients)
 sets = list( frozenset(t) for t in combinations(genes, args.gene_set_size) )
 
-if args.verbose > 0: print '\t- Testing {} sets of size k={}'.format(len(sets), args.gene_set_size)
+if args.verbose > 0: 
+	print('\t- Testing {} sets of size k={}'.format(len(sets), args.gene_set_size))
 
 # Run the permutational test
-if args.verbose > 0: print '* Running permutation test...'
+if args.verbose > 0: 
+	print('* Running permutation test...')
 start_index = (args.job_id-1) * args.batch_size
 permuted_files = get_permuted_files([args.input_directory], args.num_permutations)[start_index:start_index + args.batch_size]
-if args.verbose > 0: print '\t- Testing {} files'.format(len(permuted_files))
+if args.verbose > 0: 
+	print('\t- Testing {} files'.format(len(permuted_files)))
 
 setToPval, setToRuntime, setToFDR, setToObs = rce_permutation_test( sets, geneToCases, num_patients, permuted_files, 1, 0 )
 

diff --git a/experiments/eccb2016/scripts/permute_single_matrix.py b/experiments/eccb2016/scripts/permute_single_matrix.py
@@ -17,6 +17,7 @@ def get_parser():
                         default=os.environ.get('SGE_TASK_ID', 0))
     return parser
 
+
 def run( args ):
     # Load WExT
     sys.path.append(args.wext_dir)
@@ -33,7 +34,7 @@ def run( args ):
     indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) )
 
     edges = set()
-    for gene, cases in geneToCases.iteritems():
+    for gene, cases in geneToCases.items():
         for patient in cases:
             edges.add( (geneToIndex[gene], patientToIndex[patient]) )
 
@@ -57,16 +58,16 @@ def run( args ):
         permutedPatientToMutations[patient].add(gene)
 
     # Verify the number of mutations per gene/patient is preserved
-    for g, cases in geneToCases.iteritems():
+    for g, cases in geneToCases.items():
         assert( len(cases) == len(permutedGeneToCases[g]) )
 
-    for p, muts in patientToMutations.iteritems():
+    for p, muts in patientToMutations.items():
         assert( len(muts) == len(permutedPatientToMutations[p]) )
 
     # Save edge list.
     output_file = '{}-{}.json'.format(args.output_prefix, args.job_id)
     permutation = dict(params=params, permutation_number=args.job_id,
-                       geneToCases=dict( (g, list(cases)) for g, cases in permutedGeneToCases.iteritems()))
+                       geneToCases=dict( (g, list(cases)) for g, cases in permutedGeneToCases.items()))
     with open(output_file, 'w') as OUT: json.dump( permutation, OUT )
 
 if __name__ == '__main__':

diff --git a/experiments/eccb2016/scripts/pval_correlations.py b/experiments/eccb2016/scripts/pval_correlations.py
@@ -31,7 +31,7 @@
 # Compute the correlations with permutational
 # permutational_pvals_with_zeros = list(df.loc[df['Method'] == 'Permutational']['Raw P-value'])
 # all_indices =
-tests       = ["Permutational", "Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"]
+tests = ["Permutational", "Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"]
 for val, indices in [("All", []), (0, 1./args.num_permutations), (1./args.num_permutations, 2)]:
     tbl = [list(tests)]
     for t1 in tests:
@@ -46,33 +46,33 @@
                 row.append(rho)
         tbl.append(row)
 
-    print '-' * 80
-    print 'CORRELATIONS ({})'.format(val)
-    print aligned_plaintext_table('\n'.join([ '\t'.join(map(str, row)) for row in tbl ]) )
+    print('-' * 80)
+    print('CORRELATIONS ({})'.format(val))
+    print(aligned_plaintext_table('\n'.join([ '\t'.join(map(str, row)) for row in tbl ])))
 
 permutational_pvals_no_zeros = [ p for p in permutational_pvals_with_zeros if p > 0 ]
 for method in ["Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"]:
     pvals = list(df.loc[df['Method'] == method]['P-value'])
-    print 'Correlation:', method, 'with Permutational'
+    print('Correlation:', method, 'with Permutational')
     rho, pval = spearmanr(permutational_pvals, pvals)
-    print '\tIncluding P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals), rho, pval)
+    print('\tIncluding P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals), rho, pval))
     pvals_no_zeros = [ p for i, p in enumerate(pvals) if permutational_pvals_with_zeros[i] > 0 ]
     rho, pval = spearmanr(permutational_pvals_no_zeros, pvals_no_zeros)
-    print '\tWithout P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals_no_zeros), rho, pval)
-print
+    print('\tWithout P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals_no_zeros), rho, pval))
+
 # Compute the correlations of weighted saddlepoint and exact test
 weighted_exact_pvals = list(df.loc[df['Method'] == 'Weighted (exact test)']['P-value'])
 weighted_saddlepoint_pvals = list(df.loc[df['Method'] == 'Weighted (saddlepoint)']['P-value'])
 rho, pval = spearmanr(weighted_exact_pvals, weighted_saddlepoint_pvals)
 
-print 'Correlation of weighted exact test and saddlepoint (all P-values)'
-print '\tN={}, \\rho: {}, P={}'.format(len(weighted_exact_pvals), rho, pval)
+print('Correlation of weighted exact test and saddlepoint (all P-values)')
+print('\tN={}, \\rho: {}, P={}'.format(len(weighted_exact_pvals), rho, pval))
 
 tail_weighted_exact_pvals = [ p for p in weighted_exact_pvals if p < 1e-4 ]
 rho, pval = spearmanr(tail_weighted_exact_pvals, [ p for i, p in enumerate(weighted_saddlepoint_pvals) if weighted_exact_pvals[i] < 1e-4])
-print 'Correlation of weighted exact test and saddlepoint (P < 0.0001)'
-print '\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval)
+print('Correlation of weighted exact test and saddlepoint (P < 0.0001)')
+print('\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval))
 
 rho, pval = spearmanr(tail_weighted_exact_pvals, [ p for i, p in enumerate(permutational_pvals) if weighted_exact_pvals[i] < 1e-4])
-print 'Correlation of weighted exact test and permutational (P < 0.0001)'
-print '\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval)
+print('Correlation of weighted exact test and permutational (P < 0.0001)')
+print('\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval))