From 282b3ba2a8dc3a362a138741939e00a8fa85e788 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sat, 11 Aug 2018 21:37:54 -0400 Subject: [PATCH 01/60] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a77b0c5..42c842a 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ Latest tested version in parentheses. a. NumPy (1.11.0) b. SciPy (0.17.0) + + c. NetworkX (1.11) ?? 2. gcc (4.9.2) From a4106f64de00440c606ec2b8641b9929c1aaffd4 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Sat, 11 Aug 2018 21:49:53 -0400 Subject: [PATCH 02/60] revised requirements.txt, README --- README.md | 2 -- requirements.txt | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 42c842a..a77b0c5 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,6 @@ Latest tested version in parentheses. a. NumPy (1.11.0) b. SciPy (0.17.0) - - c. NetworkX (1.11) ?? 2. gcc (4.9.2) diff --git a/requirements.txt b/requirements.txt index 0e48cd0..12f43db 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -numpy -scipy -networkx +numpy >=1.11.0 +scipy >=0.17.0 +networkx >= 1.11 From 8effca7fafa408a7a0bbb0885198d6bc9a5bd031 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Sat, 11 Aug 2018 21:50:55 -0400 Subject: [PATCH 03/60] added travis config --- .travis.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..c6d60c5 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,17 @@ +language: python +python: + - 2.7 + - 3.4 + - 3.5 + - 3.6 +install: + - sudo apt-get -y update + - sudo apt-get -y update + - sudo apt-get -y install r-base + - sudo apt-get -y install python-matplotlib + - pip install codecov + - pip install -r requirements.txt +script: + - nosetests +after_success: + - codecov From 4ff185842255a2f67068fa4a9a004172fc1189ba Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Tue, 14 Aug 2018 16:18:36 -0400 Subject: [PATCH 04/60] updated to python3.x in "examples", "experiments", "viz" --- examples/generate_data.py | 2 +- experiments/eccb2016/scripts/helper.py | 11 +++--- experiments/eccb2016/scripts/pairs_summary.py | 14 +++---- .../scripts/permutation_test_helper.py | 2 +- .../eccb2016/scripts/permute_single_matrix.py | 10 ++--- .../eccb2016/scripts/pval_correlations.py | 28 +++++++------- .../reconcile_grid_permutation_test.py | 12 +++--- .../scripts/remove_genes_with_no_length.py | 6 +-- experiments/eccb2016/scripts/results_table.py | 38 ++++++++++--------- .../scripts/sample_mutation_frequency_plot.py | 8 ++-- .../eccb2016/scripts/triple_pval_scatter.py | 26 ++++++------- .../eccb2016/scripts/unweighted_comparison.py | 16 ++++---- .../eccb2016/scripts/weights_matrix.py | 15 ++++---- viz/generate_viz_data.py | 31 +++++++-------- viz/server.py | 5 ++- 15 files changed, 115 insertions(+), 109 deletions(-) diff --git a/examples/generate_data.py b/examples/generate_data.py index e5d5606..50200dc 100644 --- a/examples/generate_data.py +++ b/examples/generate_data.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, os, argparse, numpy as np, random diff --git a/experiments/eccb2016/scripts/helper.py b/experiments/eccb2016/scripts/helper.py index 597270b..2017240 100644 --- a/experiments/eccb2016/scripts/helper.py +++ b/experiments/eccb2016/scripts/helper.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 + import numpy as np # Add a y=x line to the given matplotlib axis @@ -29,14 +30,14 @@ def aligned_plaintext_table(table, sep='\t', spaces=2): # Find numbers of rows and columns. m = len(rows) - lengths = map(len, rows) + lengths = list(map(len, rows)) n = max(lengths) # Pad rows with a deficient number of columns. entries = [[rows[i][j] if j 0 ] for method in ["Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"]: pvals = list(df.loc[df['Method'] == method]['P-value']) - print 'Correlation:', method, 'with Permutational' + print('Correlation:', method, 'with Permutational') rho, pval = spearmanr(permutational_pvals, pvals) - print '\tIncluding P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals), rho, pval) + print('\tIncluding P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals), rho, pval)) pvals_no_zeros = [ p for i, p in enumerate(pvals) if permutational_pvals_with_zeros[i] > 0 ] rho, pval = spearmanr(permutational_pvals_no_zeros, pvals_no_zeros) - print '\tWithout P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals_no_zeros), rho, pval) -print + print('\tWithout P < {}: N={}, \\rho={}, P={}'.format(1./args.num_permutations, len(pvals_no_zeros), rho, pval)) + # Compute the correlations of weighted saddlepoint and exact test weighted_exact_pvals = list(df.loc[df['Method'] == 'Weighted (exact test)']['P-value']) weighted_saddlepoint_pvals = list(df.loc[df['Method'] == 'Weighted (saddlepoint)']['P-value']) rho, pval = spearmanr(weighted_exact_pvals, weighted_saddlepoint_pvals) -print 'Correlation of weighted exact test and saddlepoint (all P-values)' -print '\tN={}, \\rho: {}, P={}'.format(len(weighted_exact_pvals), rho, pval) +print('Correlation of weighted exact test and saddlepoint (all P-values)') +print('\tN={}, \\rho: {}, P={}'.format(len(weighted_exact_pvals), rho, pval)) tail_weighted_exact_pvals = [ p for p in weighted_exact_pvals if p < 1e-4 ] rho, pval = spearmanr(tail_weighted_exact_pvals, [ p for i, p in enumerate(weighted_saddlepoint_pvals) if weighted_exact_pvals[i] < 1e-4]) -print 'Correlation of weighted exact test and saddlepoint (P < 0.0001)' -print '\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval) +print('Correlation of weighted exact test and saddlepoint (P < 0.0001)') +print('\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval)) rho, pval = spearmanr(tail_weighted_exact_pvals, [ p for i, p in enumerate(permutational_pvals) if weighted_exact_pvals[i] < 1e-4]) -print 'Correlation of weighted exact test and permutational (P < 0.0001)' -print '\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval) +print('Correlation of weighted exact test and permutational (P < 0.0001)') +print('\tN={}, \\rho: {}, P={}'.format(len(tail_weighted_exact_pvals), rho, pval)) diff --git a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py index 3b123a2..2f3e250 100644 --- a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py +++ b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, os, argparse, json, multiprocessing as mp @@ -42,7 +42,7 @@ def load_json_files(( json_files )): json_files = [ '{}/{}'.format(root, f) for f in files if f.lower().endswith('.json') ] # Set up the multiprocessing and run -print '* Loading {} JSON files...'.format(len(json_files)) +print('* Loading {} JSON files...'.format(len(json_files))) num_cores = args.num_cores if args.num_cores != -1 else mp.cpu_count() if num_cores != 1: pool = mp.Pool(num_cores) @@ -58,7 +58,7 @@ def load_json_files(( json_files )): pool.join() # Merge the results -print '\t- Merging results...' +print('\t- Merging results...') setToCount = defaultdict( int ) setToRuntime = defaultdict( float ) setToObs = dict() @@ -72,16 +72,16 @@ def load_json_files(( json_files )): setToPval = dict( (M, count/num_permutations) for M, count in setToCount.iteritems() ) -print '\t- Loaded {} sets with {} permutations'.format(len(setToPval), int(num_permutations)) +print('\t- Loaded {} sets with {} permutations'.format(len(setToPval), int(num_permutations))) # Compute FDR -print '* Computing FDRs...' +print('* Computing FDRs...') tested_sets = setToPval.keys() pvals = [ setToPval[M] for M in tested_sets ] setToFDR = dict(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))) # Output the merged file -print '* Outputting to file...' +print('* Outputting to file...') k = len(tested_sets[0]) args.json_format = True args.test = 'RCE' diff --git a/experiments/eccb2016/scripts/remove_genes_with_no_length.py b/experiments/eccb2016/scripts/remove_genes_with_no_length.py index 5441499..3aa9e9f 100644 --- a/experiments/eccb2016/scripts/remove_genes_with_no_length.py +++ b/experiments/eccb2016/scripts/remove_genes_with_no_length.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, os, argparse, json @@ -28,8 +28,8 @@ obj['num_genes'] = len(obj['genes']) obj['params']['lengths_file'] = os.path.abspath(args.lengths_file) obj['genes_with_no_length_removed'] = sorted(original_genes - set(obj['genes'])) -obj['patientToMutations'] = dict( (p, sorted(set(muts) & remaining_genes)) for p, muts in obj['patientToMutations'].iteritems() ) -print 'Removed {} genes with no length'.format(len(obj['genes_with_no_length_removed'])) +obj['patientToMutations'] = dict((p, sorted(set(muts) & remaining_genes)) for p, muts in iter(list(obj['patientToMutations'].items()))) +print('Removed {} genes with no length'.format(len(obj['genes_with_no_length_removed']))) # Output the new file with open(args.output_file, 'w') as OUT: diff --git a/experiments/eccb2016/scripts/results_table.py b/experiments/eccb2016/scripts/results_table.py index a8514db..ad39a94 100755 --- a/experiments/eccb2016/scripts/results_table.py +++ b/experiments/eccb2016/scripts/results_table.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, os, argparse, json @@ -22,47 +22,49 @@ with open(args.lengths_file, 'r') as IN: arrs = [ l.rstrip().split('\t') for l in IN if not l.startswith('#') ] geneToLength = dict( (arr[0], float(arr[1])) for arr in arrs ) - lengths = geneToLength.values() + lengths = list(geneToLength.values()) length_ranks = rank(lengths, reverse=True) geneToLengthRank = defaultdict( lambda : args.length_threshold + 1 ) - geneToLengthRank.update(zip(geneToLength.keys(), length_ranks)) - threshold_gene = sorted(geneToLength.keys(), key=lambda g: geneToLengthRank[g])[args.length_threshold] - print 'Length of {} longest gene: {}'.format(args.length_threshold, geneToLength[threshold_gene]) + geneToLengthRank.update(list(zip(list(geneToLength.keys()), length_ranks))) + threshold_gene = sorted(list(geneToLength.keys()), key=lambda g: geneToLengthRank[g])[args.length_threshold] + print('Length of {} longest gene: {}'.format(args.length_threshold, geneToLength[threshold_gene])) # Load the mutations with open(args.mutation_file, 'r') as IN: obj = json.load(IN) genes, patients = obj['genes'], obj['patients'] hypermutators = set(obj['hypermutators']) - geneToCases = dict( (g, set(cases)) for g, cases in obj['geneToCases'].iteritems() ) + geneToCases = dict((g, set(cases)) for g, cases in iter(list(obj['geneToCases'].items()))) # Load the triples with open(args.unweighted_exact_file, 'r') as IN: obj = json.load(IN) - unweightedPval = dict( (frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].iteritems() ) - assert( all( not(isnan(pval)) for pval in unweightedPval.values() )) - unweightedFDR = dict( (frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].iteritems() ) + unweightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in iter(list(obj['setToPval'].items()))) + assert( all( not(isnan(pval)) for pval in list(unweightedPval.values()) )) + unweightedFDR = dict((frozenset(t.split('\t')), fdr) for t, fdr in iter(list(obj['setToFDR'].items()))) with open(args.weighted_saddlepoint_file, 'r') as IN: obj = json.load(IN) - weightedPval = dict( (frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].iteritems() ) - assert( all( not(isnan(pval)) for pval in weightedPval.values() )) - weightedFDR = dict( (frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].iteritems() ) + weightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in iter(list(obj['setToPval'].items()))) + assert( all( not(isnan(pval)) for pval in list(weightedPval.values()) )) + weightedFDR = dict((frozenset(t.split('\t')), fdr) for t, fdr in iter(list(obj['setToFDR'].items()))) -print 'Triples with weighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in weightedFDR.iteritems() if fdr < args.fdr_cutoff), len(weightedFDR)) -print 'Triples with unweighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in unweightedFDR.iteritems() if fdr < args.fdr_cutoff), len(unweightedFDR)) +print('Triples with weighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in weightedFDR.iteritems() if fdr < args.fdr_cutoff), len(weightedFDR))) +print('Triples with unweighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in unweightedFDR.iteritems() if fdr < args.fdr_cutoff), len(unweightedFDR))) # Rank triples by P-value triples = sorted(set(weightedPval.keys()) & set(unweightedPval.keys())) top_weighted_triples = sorted(triples, key=lambda t: weightedPval[t]) -weightedRank = dict(zip(triples, rank([ weightedPval[t] for t in triples ]))) +weightedRank = dict(list(zip(triples, rank([ weightedPval[t] for t in triples ])))) top_unweighted_triples = sorted(triples, key=lambda t: unweightedPval[t]) -unweightedRank = dict(zip(triples, rank([ unweightedPval[t] for t in triples ]))) +unweightedRank = dict(list(zip(triples, rank([ unweightedPval[t] for t in triples ])))) # Create tables def length_indicate(g): - if geneToLengthRank[g] > args.length_threshold: return g - else: return '\\textbf{%s}' % g + if geneToLengthRank[g] > args.length_threshold: + return g + else: + return '\\textbf{%s}' % g header = ['CoMEt rank', 'Weighted rank', 'Triple', 'Phi(M)', 'Psi(M)', 'Hypermutator mutations'] tbl = [ header ] diff --git a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py index 3435794..77a25df 100755 --- a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py +++ b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import matplotlib @@ -26,12 +26,12 @@ # Make a map of patients to their mutated genes patientToMutations = dict( (p, set()) for p in patients ) - for g, cases in obj['geneToCases'].iteritems(): + for g, cases in list(obj['geneToCases'].items()): for p in cases: patientToMutations[p].add( g ) # Assemble the data into dictionaries for Pandas - for p, mutations in patientToMutations.iteritems(): + for p, mutations in list(patientToMutations.items()): ty = "Hypermutator" if p in hypermutators else "Non-hypermutator" items.append({ "Sample": p, "Mutated genes per sample": len(mutations), "Type": ty, "Cancer": cancer }) @@ -51,4 +51,4 @@ non_hyper_rates = list(df.loc[(df['Cancer'] == c) & (df['Type'] == "Non-hypermutator")]['Mutated genes per sample']) tbl.append([ c, np.median(all_rates), np.median(hyper_rates) if len(hyper_rates) > 0 else '--', np.median(non_hyper_rates)]) -print aligned_plaintext_table('\n'.join([ '\t'.join(map(str, row)) for row in tbl ])) +print(aligned_plaintext_table('\n'.join([ '\t'.join(map(str, row)) for row in tbl ]))) diff --git a/experiments/eccb2016/scripts/triple_pval_scatter.py b/experiments/eccb2016/scripts/triple_pval_scatter.py index 59a6eee..da966b3 100755 --- a/experiments/eccb2016/scripts/triple_pval_scatter.py +++ b/experiments/eccb2016/scripts/triple_pval_scatter.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import matplotlib @@ -36,16 +36,16 @@ with open(permuted_file, 'r') as IN: setToPermuted.update( json.load(IN)['setToPval'] ) -for M, pval in setToPermuted.iteritems(): +for M, pval in list(setToPermuted.items()): if pval == 0: setToPermuted[M] = 1./args.num_permutations sets = set(setToWeighted.keys()) & set(setToUnweighted.keys()) & set(setToPermuted.keys()) -print '* Loaded weighted/unweighted P-values in {} triples...'.format(len(setToWeighted)) -print '\t- Weighted range: [{}, {}]'.format(np.min(setToWeighted.values()), np.max(setToWeighted.values())) -print '\t- Unweighted range: [{}, {}]'.format(np.min(setToUnweighted.values()), np.max(setToUnweighted.values())) -print '* Loaded permuted P-values for {} sets ({} intersection)...'.format(len(setToPermuted), len(sets)) +print('* Loaded weighted/unweighted P-values in {} triples...'.format(len(setToWeighted))) +print('\t- Weighted range: [{}, {}]'.format(np.min(setToWeighted.values()), np.max(setToWeighted.values()))) +print('\t- Unweighted range: [{}, {}]'.format(np.min(setToUnweighted.values()), np.max(setToUnweighted.values()))) +print('* Loaded permuted P-values for {} sets ({} intersection)...'.format(len(setToPermuted), len(sets))) # Create two scatter plots fig, (ax1, ax2) = plt.subplots(1, 2) @@ -77,17 +77,17 @@ ax2.plot(ax2.get_xlim(), ax2.get_xlim(), ls="--", c=".3") # Output maximum deviation and correlations -print 'Max deviation permutational vs. weighted (1E-3 to 1E-5):', +print('Max deviation permutational vs. weighted (1E-3 to 1E-5):') deviations = [ (x, y, np.abs(y/x)) for x, y in zip(xs, ys) if 1e-3 > x > 1e-5 ] if deviations: - print max(deviations, key=lambda (x, y, z): z) + print(max(deviations, key=lambda (x, y, z): z)) else: - print 'None in p-value interval' + print('None in p-value interval') -print 'Unweighted correlation (all): \\rho={}'.format(unweighted_rho) -print 'Unweighted correlation (P<0.001): \\rho={}'.format(unweighted_tail_rho) -print 'Weighted correlation (all): \\rho={}'.format(weighted_rho) -print 'Weighted correlation (P<0.001): \\rho={}'.format(weighted_tail_rho) +print('Unweighted correlation (all): \\rho={}'.format(unweighted_rho)) +print('Unweighted correlation (P<0.001): \\rho={}'.format(unweighted_tail_rho)) +print('Weighted correlation (all): \\rho={}'.format(weighted_rho)) +print('Weighted correlation (P<0.001): \\rho={}'.format(weighted_tail_rho)) # Output to file plt.tight_layout() diff --git a/experiments/eccb2016/scripts/unweighted_comparison.py b/experiments/eccb2016/scripts/unweighted_comparison.py index 6367890..b6ffce4 100755 --- a/experiments/eccb2016/scripts/unweighted_comparison.py +++ b/experiments/eccb2016/scripts/unweighted_comparison.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 #Load required modules import matplotlib @@ -27,7 +27,7 @@ exactPval[cancer] = obj['setToPval'] exactRuntime[cancer] = obj['setToRuntime'] -num_exact = sum( 1 for c in args.cancers for M in exactPval[c].keys() ) +num_exact = sum(1 for c in args.cancers for M in list(exactPval[c].keys())) for cancer, saddlepoint_file in zip(args.cancers, args.saddlepoint_files): with open(saddlepoint_file, 'r') as IN: @@ -35,8 +35,8 @@ saddlepointPval[cancer] = obj['setToPval'] saddlepointRuntime[cancer] = obj['setToRuntime'] -num_saddlepoint = sum( 1 for c in args.cancers for M in saddlepointPval[c].keys() ) -print '* Loaded {} exact sets and {} saddlepoint sets...'.format(num_exact, num_saddlepoint) +num_saddlepoint = sum(1 for c in args.cancers for M in list(saddlepointPval[c].keys())) +print('* Loaded {} exact sets and {} saddlepoint sets...'.format(num_exact, num_saddlepoint)) # Construct the arrays of data saddlepoint_pvals, exact_pvals, items = [], [], [] @@ -52,13 +52,13 @@ df = pd.DataFrame(items) -print '* Testing {} triples in the intersection (ignoring sets with invalid P-values)...'.format(len(saddlepoint_pvals)) +print('* Testing {} triples in the intersection (ignoring sets with invalid P-values)...'.format(len(saddlepoint_pvals))) # Output spearman correlations between the saddlepoint and exact rho, pval = spearmanr(exact_pvals, saddlepoint_pvals) -print '-' * 80 -print 'CORRELATION' -print "Spearman's Rho: {}\nSpearman's P-value: {}\n".format(rho, pval) +print('-' * 80) +print('CORRELATION') +print("Spearman's Rho: {}\nSpearman's P-value: {}\n".format(rho, pval)) # Set up the figure fig, (ax1, ax2) = plt.subplots(1, 2) diff --git a/experiments/eccb2016/scripts/weights_matrix.py b/experiments/eccb2016/scripts/weights_matrix.py index e7d2c54..d263bca 100755 --- a/experiments/eccb2016/scripts/weights_matrix.py +++ b/experiments/eccb2016/scripts/weights_matrix.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import matplotlib @@ -17,7 +17,7 @@ assert( len(args.cancers) == len(args.weights_files) == len(args.mutation_files) ) # Load the mutation file -print '* Loading mutation files...' +print('* Loading mutation files...') cancerToWeights, cancerToPatients, cancerToGenes, cancerToHypermutators, patientToMutations, geneToCases = dict(), dict(), dict(), dict(), dict(), dict() for cancer, mutation_file, weights_file in zip(args.cancers, args.mutation_files, args.weights_files): with open(mutation_file, 'r') as IN: @@ -28,24 +28,25 @@ cancerToHypermutators[cancer] = set(obj['hypermutators']) geneToCases[cancer] = obj['geneToCases'] patientToMutations[cancer] = dict( (p, set()) for p in obj['patients'] ) - for g, cases in geneToCases[cancer].iteritems(): + for g, cases in list(geneToCases[cancer].items()): for p in cases: patientToMutations[cancer][p].add( g ) cancerToWeights[cancer] = np.load(weights_file) - print '\t{}\n\t\t- Genes: {}\n\t\t- Patients: {}'.format(cancer, num_genes, num_patients) + print('\t{}\n\t\t- Genes: {}\n\t\t- Patients: {}'.format(cancer, num_genes, num_patients)) # Set up the figure fig, axes = plt.subplots( 1, len(args.cancers)) fig.set_size_inches( len(args.cancers) * 5, 5) -min_weight = min([ np.min(W) for W in cancerToWeights.values() ]) -print 'Min weight:', min_weight +min_weight = min([ np.min(W) for W in list(cancerToWeights.values()) ]) +print('Min weight:', min_weight) + for ax, cancer in zip(axes, args.cancers): # Sort the weights so that hypermutators are all on one side patients = cancerToPatients[cancer] genes = cancerToGenes[cancer] hypermutators = cancerToHypermutators[cancer] num_non_hypermutators = len(set(patients) - hypermutators) - patient_indices = sorted(range(len(patients)), key=lambda p: (patients[p] in hypermutators, len(patientToMutations[cancer][patients[p]]))) + patient_indices = sorted(list(range(len(patients))), key=lambda p: (patients[p] in hypermutators, len(patientToMutations[cancer][patients[p]]))) gene_indices = sorted([ i for i, g in enumerate(genes) if g in geneToCases[cancer]], key=lambda g: len(geneToCases[cancer].get(genes[g], [])), reverse=True) weights = [ row[patient_indices] for row in cancerToWeights[cancer][gene_indices] ] diff --git a/viz/generate_viz_data.py b/viz/generate_viz_data.py index a675096..cf80f37 100755 --- a/viz/generate_viz_data.py +++ b/viz/generate_viz_data.py @@ -35,42 +35,42 @@ def run( args ): method_paren = '' if is_rce else ' ({})'.format(params['method']) run_name = '{}{}'.format(params['test'], method_paren) methods.add( run_name ) - setToPval[run_name].update( obj['setToPval'].items() ) - setToRuntime[run_name].update( obj['setToRuntime'].items() ) - setToFDR[run_name].update( obj['setToFDR'].items() ) - setToObs[run_name].update(obj['setToObs'].items() ) + setToPval[run_name].update( list(obj['setToPval'].items()) ) + setToRuntime[run_name].update( list(obj['setToRuntime'].items()) ) + setToFDR[run_name].update( list(obj['setToFDR'].items()) ) + setToObs[run_name].update(list(obj['setToObs'].items()) ) sets |= set(obj['setToPval'].keys()) # Load the mutation data mutation_data = load_mutation_data( args.mutation_file, min_frequency ) genes, _, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data num_genes, num_patients = len(genes), len(patients) - geneToIndex = dict(zip(genes, range(num_genes))) + geneToIndex = dict(list(zip(genes, list(range(num_genes))))) patientToType = dict( (p, "Hypermutator" if p in hypermutators else "Non-hypermutator") for p in patients ) # Load the weights P = np.load(args.weights_file) - P = dict( (g, dict(zip(patients, P[geneToIndex[g]]))) for g in genes ) + P = dict( (g, dict(list(zip(patients, P[geneToIndex[g]])))) for g in genes ) # Restrict the sets (if necessary) if args.num_sets: new_sets = set() for run_name in methods: - new_sets |= set(sorted( setToPval[run_name].keys(), key=lambda M: setToPval[run_name][M] )[:args.num_sets]) + new_sets |= set(sorted( list(setToPval[run_name].keys()), key=lambda M: setToPval[run_name][M] )[:args.num_sets]) sets = new_sets - setToPval = dict( (run_name, dict( (M, pval) for M, pval in setToPval[run_name].iteritems() if M in new_sets)) for run_name in methods ) - setToRuntime = dict( (run_name, dict( (M, pval) for M, pval in setToRuntime[run_name].iteritems() if M in new_sets)) for run_name in methods ) - setToObs = dict( (run_name, dict( (M, pval) for M, pval in setToObs[run_name].iteritems() if M in new_sets)) for run_name in methods ) - setToFDR = dict( (run_name, dict( (M, pval) for M, pval in setToFDR[run_name].iteritems() if M in new_sets)) for run_name in methods ) + setToPval = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToPval[run_name].items())) if M in new_sets)) for run_name in methods ) + setToRuntime = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToRuntime[run_name].items())) if M in new_sets)) for run_name in methods ) + setToObs = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToObs[run_name].items())) if M in new_sets)) for run_name in methods ) + setToFDR = dict( (run_name, dict( (M, pval) for M, pval in iter(list(setToFDR[run_name].items())) if M in new_sets)) for run_name in methods ) # Restrict the weights genes_in_sets = set( g for M in sets for g in M.split('\t') ) P = dict( (g, P[g]) for g in genes_in_sets ) - geneToCases = dict( (g, cases) for g, cases in geneToCases.iteritems() if g in genes_in_sets ) + geneToCases = dict( (g, cases) for g, cases in iter(list(geneToCases.items())) if g in genes_in_sets ) - print '* Considering {} sets...'.format(len(new_sets)) + print('* Considering {} sets...'.format(len(new_sets))) # Output the JSON file with open(args.output_file, 'w') as OUT: @@ -81,11 +81,12 @@ def run( args ): params['weights_file'] = os.path.abspath(args.weights_file) # Output - output = dict(params=params, geneToCases=dict( (g, list(cases)) for g, cases in geneToCases.iteritems() ), + output = dict(params=params, geneToCases=dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) ), setToPval=setToPval, methods=sorted(methods), patientToType=patientToType, setToFDR=setToFDR, setToRuntime=setToRuntime, setToObs=setToObs, sets=list(sets), genes=list(genes), patients=patients, P=P) json.dump( output, OUT ) -if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) ) +if __name__ == '__main__': + run( get_parser().parse_args(sys.argv[1:]) ) diff --git a/viz/server.py b/viz/server.py index 568c36d..38a21ab 100644 --- a/viz/server.py +++ b/viz/server.py @@ -47,7 +47,8 @@ def run( args ): # Start server app = tornado.web.Application(routes) app.listen(args.port) - print 'Listening on port {}'.format(args.port) + print('Listening on port {}'.format(args.port)) tornado.ioloop.IOLoop.current().start() -if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) ) +if __name__ == '__main__': + run( get_parser().parse_args(sys.argv[1:]) ) From f4c339cf652a305d8c92c94bb379b1c23698c07e Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Tue, 14 Aug 2018 16:37:07 -0400 Subject: [PATCH 05/60] updated certain scripts to python3.x --- compute_mutation_probabilities.py | 42 +++++++++++++++-------------- find_exclusive_sets.py | 29 +++++++++++--------- find_sets.py | 30 ++++++++++++--------- process_mutations.py | 44 ++++++++++++++++++------------- 4 files changed, 81 insertions(+), 64 deletions(-) diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py index da2a41d..a924427 100755 --- a/compute_mutation_probabilities.py +++ b/compute_mutation_probabilities.py @@ -20,12 +20,13 @@ def get_parser(): parser.add_argument('-q', '--swap_multiplier', type=int, required=False, default=100) parser.add_argument('-nc', '--num_cores', type=int, required=False, default=1) parser.add_argument('-s', '--seed', type=int, required=False, default=None) - parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=range(5)) + parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=list(range(5))) return parser -def permute_matrices_wrapper(args): return permute_matrices(*args) -def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, - m, n, num_edges, indexToGene, indexToPatient): +def permute_matrices_wrapper(args): + return permute_matrices(*args) + +def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, m, n, num_edges, indexToGene, indexToPatient): # Initialize our output observed = np.zeros((m, n)) permutations = [] @@ -43,8 +44,8 @@ def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, indices.append( (edge[0]-1, edge[1]-1) ) # Record the permutation - observed[zip(*indices)] += 1. - geneToCases = dict( (g, list(cases)) for g, cases in geneToCases.iteritems() ) + observed[list(zip(*indices))] += 1. + geneToCases = dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) ) permutations.append( dict(geneToCases=geneToCases, permutation_number=seed) ) return observed/float(len(seeds)), permutations @@ -60,7 +61,7 @@ def postprocess_weight_matrix(P, r, s): # Average weights over entries of weight matrix with same marginals P_mean = np.zeros(np.shape(P)) - for marginals, indices in marginals_to_indices.items(): + for marginals, indices in list(marginals_to_indices.items()): mean_value = float(sum(P[i, j] for i, j in indices))/float(len(indices)) for i, j in indices: P_mean[i, j] = mean_value @@ -76,20 +77,20 @@ def run( args ): # Load mutation data if args.verbose > 0: - print '* Loading mutation data...' + print('* Loading mutation data...') mutation_data = load_mutation_data( args.mutation_file ) genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data - geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.iteritems() ) - patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.iteritems() ) + geneToObserved = dict( (g, len(cases)) for g, cases in iter(list(geneToCases.items())) ) + patientToObserved = dict( (p, len(muts)) for p, muts in iter(list(patientToMutations.items())) ) geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) ) indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) ) patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) ) indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) ) edges = set() - for gene, cases in geneToCases.iteritems(): + for gene, cases in list(geneToCases.items()): for patient in cases: edges.add( (geneToIndex[gene], patientToIndex[patient]) ) @@ -97,7 +98,7 @@ def run( args ): # Run the bipartite edge swaps if args.verbose > 0: - print '* Permuting matrices...' + print('* Permuting matrices...') m = len(all_genes) n = len(patients) @@ -106,7 +107,7 @@ def run( args ): max_tries = 10**9 if args.seed is not None: random.seed(args.seed) - seeds = random.sample(xrange(1, 2*10**9), args.num_permutations) + seeds = random.sample(list(range(1, 2*10**9)), args.num_permutations) # Run the bipartite edge swaps in parallel if more than one core indicated num_cores = min(args.num_cores if args.num_cores != -1 else mp.cpu_count(), args.num_permutations) @@ -127,7 +128,7 @@ def run( args ): # Create the weights file if args.weights_file: if args.verbose > 0: - print '* Saving weights file...' + print('* Saving weights file...') # Allow for small accumulated numerical errors tol = 1e3*max(m, n)*args.num_permutations*np.finfo(np.float64).eps @@ -137,10 +138,10 @@ def run( args ): P = np.add.reduce(observeds) / float(len(observeds)) # Verify the weights - for g, obs in geneToObserved.iteritems(): + for g, obs in list(geneToObserved.items()): assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol) - for p, obs in patientToObserved.iteritems(): + for p, obs in list(patientToObserved.items()): assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol) # Construct mutation matrix to compute marginals @@ -154,10 +155,10 @@ def run( args ): P = postprocess_weight_matrix(P, r, s) # Verify the weights again - for g, obs in geneToObserved.iteritems(): + for g, obs in list(geneToObserved.items()): assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol) - for p, obs in patientToObserved.iteritems(): + for p, obs in list(patientToObserved.items()): assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol) # Add pseudocounts to entries with no mutations observed; unlikely or impossible after post-processing step @@ -171,7 +172,7 @@ def run( args ): if args.permutation_directory: output_prefix = args.permutation_directory + '/permuted-mutations-{}.json' if args.verbose > 0: - print '* Saving permuted mutation data...' + print('* Saving permuted mutation data...') for _, permutation_list in results: for permutation in permutation_list: @@ -180,4 +181,5 @@ def run( args ): permutation['params'] = params json.dump( permutation, OUT ) -if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) ) +if __name__ == '__main__': + run( get_parser().parse_args(sys.argv[1:]) ) diff --git a/find_exclusive_sets.py b/find_exclusive_sets.py index d15ae05..88a6401 100755 --- a/find_exclusive_sets.py +++ b/find_exclusive_sets.py @@ -123,12 +123,12 @@ def run( args ): # Load the mutation data if args.verbose > 0: - print ('-' * 30), 'Input Mutation Data', ('-' * 29) + print(('-' * 30), 'Input Mutation Data', ('-' * 29)) genes, patients, geneToCases, typeToGeneIndex, typeToPatientIndex = load_mutation_files( args.mutation_files ) num_all_genes, num_patients = len(genes), len(patients) # Restrict to genes mutated in a minimum number of samples - geneToCases = dict( (g, cases) for g, cases in geneToCases.iteritems() if g in genes and len(cases) >= args.min_frequency ) + geneToCases = dict( (g, cases) for g, cases in list(geneToCases.items()) if g in genes and len(cases) >= args.min_frequency ) genes = set(geneToCases.keys()) num_genes = len(genes) @@ -141,7 +141,7 @@ def run( args ): # Since we are looking for co-occurrence between exclusive sets with # an annotation A, we add events for each patient NOT annotated by # the given annotation - for annotation, cases in annotationToPatients.iteritems(): + for annotation, cases in list(annotationToPatients.items()): not_cases = patients - cases if len(not_cases) > 0: geneToCases[annotation] = not_cases @@ -149,18 +149,18 @@ def run( args ): annotations = set() if args.verbose > 0: - print '- Genes:', num_all_genes - print '- Patients:', num_patients - print '- Genes mutated in >={} patients: {}'.format(args.min_frequency, num_genes) + print('- Genes:', num_all_genes) + print('- Patients:', num_patients) + print('- Genes mutated in >={} patients: {}'.format(args.min_frequency, num_genes)) if args.patient_annotation_file: - print '- Patient annotations:', len(annotations) + print('- Patient annotations:', len(annotations)) # Load the weights (if necessary) test = nameToTest[args.test] if test == WRE: # Create master versions of the indices - masterGeneToIndex = dict(zip(sorted(genes), range(num_genes))) - masterPatientToIndex = dict( zip(sorted(patients), range(num_patients)) ) + masterGeneToIndex = dict(list(zip(sorted(genes), list(range(num_genes))))) + masterPatientToIndex = dict(list(zip(sorted(patients), list(range(num_patients))))) geneToP = load_weight_files(args.weights_files, genes, patients, typeToGeneIndex, typeToPatientIndex, masterGeneToIndex, masterPatientToIndex) else: geneToP = None @@ -169,17 +169,19 @@ def run( args ): if test == RCE: permuted_files = get_permuted_files(args.permuted_matrix_directories, args.num_permutations) if args.verbose > 0: - print '* Using {} permuted matrix files'.format(len(permuted_files)) + print('* Using {} permuted matrix files'.format(len(permuted_files))) #Enumeration if args.search_strategy == 'Enumerate': - if args.verbose > 0: print ('-' * 31), 'Enumerating Sets', ('-' * 31) + if args.verbose > 0: + print(('-' * 31), 'Enumerating Sets', ('-' * 31)) for k in set( args.gene_set_sizes ): # we don't need to enumerate the same size more than once # Create a list of sets to test sets = list( frozenset(t) for t in combinations(genes, k) ) num_sets = len(sets) - if args.verbose > 0: print 'k={}: {} sets...'.format(k, num_sets) + if args.verbose > 0: + print('k={}: {} sets...'.format(k, num_sets)) if test == RCE: # Run the permutational setToPval, setToRuntime, setToFDR, setToObs = rce_permutation_test( sets, geneToCases, num_patients, permuted_files, args.num_cores, args.verbose ) @@ -199,4 +201,5 @@ def run( args ): else: raise NotImplementedError("Strategy '{}' not implemented.".format(args.strategy)) -if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) ) +if __name__ == '__main__': + run( get_parser().parse_args(sys.argv[1:]) ) diff --git a/find_sets.py b/find_sets.py index 903f3c3..1154da7 100755 --- a/find_sets.py +++ b/find_sets.py @@ -87,7 +87,8 @@ def load_mutation_files(mutation_files): genes |= set(type_genes) # Record the mutations in each gene - for g, cases in typeGeneToCases.iteritems(): geneToCases[g] |= cases + for g, cases in list(typeGeneToCases.items()): + geneToCases[g] |= cases # Record the genes, patients, and their indices for later typeToGeneIndex.append(dict(zip(type_genes, range(len(type_genes))))) @@ -101,12 +102,12 @@ def run( args ): # Load the mutation data if args.verbose > 0: - print ('-' * 30), 'Input Mutation Data', ('-' * 29) + print(('-' * 30), 'Input Mutation Data', ('-' * 29)) genes, patients, geneToCases, typeToGeneIndex, typeToPatientIndex = load_mutation_files( args.mutation_files ) num_all_genes, num_patients = len(genes), len(patients) # Restrict to genes mutated in a minimum number of samples - geneToCases = dict( (g, cases) for g, cases in geneToCases.iteritems() if g in genes and len(cases) >= args.min_frequency ) + geneToCases = dict( (g, cases) for g, cases in list(geneToCases.items()) if g in genes and len(cases) >= args.min_frequency ) genes = set(geneToCases.keys()) num_genes = len(genes) @@ -119,7 +120,7 @@ def run( args ): # Since we are looking for co-occurrence between exclusive sets with # an annotation A, we add events for each patient NOT annotated by # the given annotation - for annotation, cases in annotationToPatients.iteritems(): + for annotation, cases in list(annotationToPatients.items()): not_cases = patients - cases if len(not_cases) > 0: geneToCases[annotation] = not_cases @@ -127,26 +128,28 @@ def run( args ): annotations = set() if args.verbose > 0: - print '- Genes:', num_all_genes - print '- Patients:', num_patients - print '- Genes mutated in >={} patients: {}'.format(args.min_frequency, num_genes) + print('- Genes:', num_all_genes) + print('- Patients:', num_patients) + print('- Genes mutated in >={} patients: {}'.format(args.min_frequency, num_genes)) if args.patient_annotation_file: - print '- Patient annotations:', len(annotations) + print('- Patient annotations:', len(annotations)) # Load the weights (if necessary) # Create master versions of the indices - masterGeneToIndex = dict(zip(sorted(genes), range(num_genes))) - masterPatientToIndex = dict( zip(sorted(patients), range(num_patients)) ) + masterGeneToIndex = dict(list(zip(sorted(genes), list(range(num_genes))))) + masterPatientToIndex = dict(list(zip(sorted(patients), list(range(num_patients))))) geneToP = load_weight_files(args.weights_files, genes, patients, typeToGeneIndex, typeToPatientIndex, masterGeneToIndex, masterPatientToIndex) - if args.verbose > 0: print ('-' * 31), 'Enumerating Sets', ('-' * 31) + if args.verbose > 0: + print(('-' * 31), 'Enumerating Sets', ('-' * 31)) k = args.gene_set_size # Create a list of sets to test sets = list( frozenset(t) for t in combinations(genes, k) ) num_sets = len(sets) - if args.verbose > 0: print 'k={}: {} sets...'.format(k, num_sets) + if args.verbose > 0: + print('k={}: {} sets...'.format(k, num_sets)) # Run the test method = nameToMethod['Saddlepoint'] test = nameToTest['WRE'] @@ -155,4 +158,5 @@ def run( args ): verbose=args.verbose, report_invalids=args.report_invalids) output_enumeration_table( args, k, setToPval, setToRuntime, setToFDR, setToObs, args.fdr_threshold ) -if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) ) +if __name__ == '__main__': + run( get_parser().parse_args(sys.argv[1:]) ) diff --git a/process_mutations.py b/process_mutations.py index 4b39585..c886271 100755 --- a/process_mutations.py +++ b/process_mutations.py @@ -19,11 +19,12 @@ def get_parser(): parser.add_argument('-ivs', '--ignored_validation_statuses', type=str, required=False, nargs='*', default=['Wildtype', 'Invalid']) parser.add_argument('-o', '--output_file', type=str, required=True) - parser.add_argument('-v', '--verbose', type=int, default=1, required=False, choices=range(5)) + parser.add_argument('-v', '--verbose', type=int, default=1, required=False, choices=list(range(5))) return parser def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc, vt, vs, ivc, ivt, ivs, verbose ): - if verbose > 1: print '\tLoading MAF:', maf_file + if verbose > 1: + print('\tLoading MAF:', maf_file) genes, patients = set(), set() with open(maf_file, 'r') as IN: seenHeader = False @@ -31,7 +32,7 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc arr = l.rstrip('\n').split('\t') # Parse the header if we haven't seen it yet if not seenHeader and arr[0].lower() == 'hugo_symbol': - arr = map(str.lower, arr) + arr = list(map(str.lower, arr)) seenHeader = True gene_index = 0 patient_index = arr.index('tumor_sample_barcode') @@ -44,7 +45,8 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc # Record the patients and genes, even if we ignore their mutations patient, gene = '-'.join(arr[patient_index].split('-')[:3]), arr[gene_index] - if not patientWhitelist[patient]: continue + if not patientWhitelist[patient]: + continue patients.add(patient) genes.add(gene) @@ -83,7 +85,8 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc return genes, patients def process_events_file( events_file, patientWhitelist, geneToCases, patientToMutations, verbose ): - if verbose > 1: print '\tProcessing events file:', events_file + if verbose > 1: + print('\tProcessing events file:', events_file) # Parse the events file events, patients = set(), set() @@ -92,7 +95,8 @@ def process_events_file( events_file, patientWhitelist, geneToCases, patientToMu for arr in arrs: # Skip patients that aren't whitelisted patient, mutations = arr[0], set(arr[1:]) - if not patientWhitelist[patient]: continue + if not patientWhitelist[patient]: + continue # Record the events and mutations patients.add(patient) @@ -112,16 +116,19 @@ def run( args ): # Load the patient whitelist (if supplied) if args.patient_whitelist: - if args.verbose > 0: print '* Loading patient whitelist...' + if args.verbose > 0: + print('* Loading patient whitelist...') patientWhitelist = defaultdict( lambda : False ) with open(args.patient_whitelist, 'r') as IN: patientWhitelist.update( (l.rstrip('\n').split()[0], True) for l in IN if not l.startswith('#') ) else: - if args.verbose > 0: print '* No patient whitelist provided, including all patients...' + if args.verbose > 0: + print('* No patient whitelist provided, including all patients...') patientWhitelist = defaultdict( lambda : True ) # Load the mutations from each MAF - if args.verbose > 0: print '* Loading and combining {} datasets...'.format(len(args.cancer_types)) + if args.verbose > 0: + print('* Loading and combining {} datasets...'.format(len(args.cancer_types))) geneToCases, patientToMutations = defaultdict( set ), defaultdict( set ) genes, patients = set(), set() vc, vt, vs = set(), set(), set() # variant classes/types and validation statuses @@ -154,12 +161,12 @@ def run( args ): # Summarize the data if args.verbose > 0: - print '* Summary of mutation data...' - print '\tGenes: {}'.format(num_genes) - print '\tPatients: {} ({} hypermutators)'.format(num_patients, len(hypermutators)) - print '\tUsed variant classes:', ', '.join(sorted(vc)) - print '\tUsed variant types:', ', '.join(sorted(vt)) - print '\tUsed validation statuses:', ', '.join(sorted(vs)) + print('* Summary of mutation data...') + print('\tGenes: {}'.format(num_genes)) + print('\tPatients: {} ({} hypermutators)'.format(num_patients, len(hypermutators))) + print('\tUsed variant classes:', ', '.join(sorted(vc))) + print('\tUsed variant types:', ', '.join(sorted(vt))) + print('\tUsed validation statuses:', ', '.join(sorted(vs))) # Output to file with open(args.output_file, 'w') as OUT: @@ -171,10 +178,11 @@ def run( args ): patient_whitelist_file=os.path.abspath(args.patient_whitelist) if args.patient_whitelist else None, hypermutators_file=os.path.abspath(args.hypermutators_file) if args.hypermutators_file else None) output = dict(params=params, patients=patients, genes=genes, hypermutators=list(hypermutators), - geneToCases=dict( (g, list(cases)) for g, cases in geneToCases.items()), + geneToCases=dict( (g, list(cases)) for g, cases in list(geneToCases.items())), patientToType=patientToType, - patientToMutations=dict( (p, list(muts)) for p, muts in patientToMutations.items()), + patientToMutations=dict( (p, list(muts)) for p, muts in list(patientToMutations.items())), num_genes=num_genes, num_patients=num_patients) json.dump( output, OUT ) -if __name__ == '__main__': run( get_parser().parse_args( sys.argv[1:]) ) +if __name__ == '__main__': + run( get_parser().parse_args( sys.argv[1:]) ) From 98086fc423372fcba36d9445eba6db9e87172bc9 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Tue, 14 Aug 2018 16:37:37 -0400 Subject: [PATCH 06/60] updated certain scripts to python3.x --- compute_mutation_probabilities.py | 2 +- find_exclusive_sets.py | 2 +- find_sets.py | 2 +- process_mutations.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py index a924427..acac439 100755 --- a/compute_mutation_probabilities.py +++ b/compute_mutation_probabilities.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, os, argparse, json, numpy as np, multiprocessing as mp, random diff --git a/find_exclusive_sets.py b/find_exclusive_sets.py index 88a6401..40eb062 100755 --- a/find_exclusive_sets.py +++ b/find_exclusive_sets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, os, argparse, numpy as np, json diff --git a/find_sets.py b/find_sets.py index 1154da7..69e8388 100755 --- a/find_sets.py +++ b/find_sets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, os, argparse, numpy as np, json diff --git a/process_mutations.py b/process_mutations.py index c886271..4d4a9a3 100755 --- a/process_mutations.py +++ b/process_mutations.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, os, argparse, json, numpy as np From e3a0a710eb4ba977eb3a0aad98140dc983e86ef5 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Wed, 15 Aug 2018 03:04:34 -0400 Subject: [PATCH 07/60] ported scripts from "wext" to python3.x --- wext/__init__.py | 2 +- wext/constants.py | 2 +- wext/enumerate_sets.py | 72 ++++++++++++++++++--------------------- wext/exact.py | 8 ++--- wext/exclusivity_tests.py | 4 +-- wext/i_o.py | 20 +++++------ wext/mcmc.py | 31 +++++++++-------- wext/saddlepoint.py | 4 ++- wext/setup.py | 2 +- wext/statistics.py | 6 ++-- 10 files changed, 76 insertions(+), 75 deletions(-) diff --git a/wext/__init__.py b/wext/__init__.py index 08c6b5e..c746bc2 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Import modules. from constants import * diff --git a/wext/constants.py b/wext/constants.py index 7cffc13..9f6425b 100755 --- a/wext/constants.py +++ b/wext/constants.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # P-values are called invalid if P > 1+PTOL or P < -PTOL PTOL = 10**-3 diff --git a/wext/enumerate_sets.py b/wext/enumerate_sets.py index 780deb6..25d7c0f 100755 --- a/wext/enumerate_sets.py +++ b/wext/enumerate_sets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import sys, multiprocessing as mp, json @@ -17,7 +17,7 @@ # Compute the mutual exclusivity T for the given gene set def T(M, geneToCases): sampleToCount = Counter( s for g in M for s in geneToCases.get(g, []) ) - return sum( 1 for sample, count in sampleToCount.iteritems() if count == 1 ) + return sum( 1 for sample, count in list(sampleToCount.items()) if count == 1 ) # Compute the permutational def permutational_dist_wrapper( args ): return permutational_dist( *args ) @@ -29,7 +29,7 @@ def permutational_dist( sets, permuted_files ): permutedGeneToCases = defaultdict(set) for pf in pf_group: with open(pf, 'r') as IN: - for g, cases in json.load(IN)['geneToCases'].iteritems(): + for g, cases in list(json.load(IN)['geneToCases'].items()): permutedGeneToCases[g] |= set(cases) reading_time = time() - reading_start @@ -55,7 +55,7 @@ def rce_permutation_test(sets, geneToCases, num_patients, permuted_files, num_co # Filter the sets based on the observed values k = len(next(iter(sets))) setToObs = dict( (M, observed_values(M, num_patients, geneToCases)) for M in sets ) - sets = set( M for M, (X, T, Z, tbl) in setToObs.iteritems() if testable_set(k, T, Z, tbl) ) + sets = set( M for M, (X, T, Z, tbl) in list(setToObs.items()) if testable_set(k, T, Z, tbl) ) # Compute the distribution of exclusivity for each pair across the permuted files np = float(len(permuted_files)) @@ -69,22 +69,22 @@ def rce_permutation_test(sets, geneToCases, num_patients, permuted_files, num_co # Merge the different distributions setToDist, setToTime = defaultdict(list), dict() for dist, times in empirical_distributions: - setToTime.update(times.items()) - for k, v in dist.iteritems(): + setToTime.update(list(times.items())) + for k, v in list(dist.tems()): setToDist[k].extend(v) # Compute the observed values and then the P-values setToObs = dict( (M, setToObs[M]) for M in sets ) setToPval = dict() - for M, (X, T, Z, tbl) in setToObs.iteritems(): + for M, (X, T, Z, tbl) in list(setToObs.items()): # Compute the P-value. count = sum( 1. for d in setToDist[M] if d >= T ) setToPval[M] = count / np # Compute FDRs - tested_sets = setToPval.keys() + tested_sets = list(setToPval.keys()) pvals = [ setToPval[M] for M in tested_sets ] - setToFDR = dict(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))) + setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))) return setToPval, setToTime, setToFDR, setToObs @@ -151,8 +151,6 @@ def test_set_group( sets, geneToCases, num_patients, method, test, P=None, verbo setToTime[M] = time() - start - if verbose > 1: print - return setToPval, setToTime, setToObs def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores=1, verbose=0, @@ -177,13 +175,13 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores= # Combine the dictionaries setToPval, setToTime, setToObs = dict(), dict(), dict() for pval, time, obs in results: - setToPval.update(pval.items()) - setToTime.update(time.items()) - setToObs.update(obs.items()) + setToPval.update(list(pval.items())) + setToTime.update(list(time.items())) + setToObs.update(list(obs.items())) # Make sure all P-values are numbers tested_sets = len(setToPval) - invalid_sets = set( M for M, pval in setToPval.iteritems() if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) + invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) # Report invalid sets if verbose > 0 and report_invalids: @@ -194,19 +192,19 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores= invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ]) sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' ) - setToPval = dict( (M, pval) for M, pval in setToPval.iteritems() if not M in invalid_sets ) - setToTime = dict( (M, runtime) for M, runtime in setToTime.iteritems() if not M in invalid_sets ) - setToObs = dict( (M, obs) for M, obs in setToObs.iteritems() if not M in invalid_sets ) + setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets ) + setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets ) + setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets ) if verbose > 0: - print '- Output {} sets'.format(len(setToPval)) - print '\tRemoved {} sets with NaN or invalid P-values'.format(len(invalid_sets)) - print '\tIgnored {} sets with Z >= T or a gene with no exclusive mutations'.format(len(sets)-tested_sets) + print('- Output {} sets'.format(len(setToPval))) + print('\tRemoved {} sets with NaN or invalid P-values'.format(len(invalid_sets))) + print('\tIgnored {} sets with Z >= T or a gene with no exclusive mutations'.format(len(sets)-tested_sets)) # Compute the FDRs - tested_sets = setToPval.keys() + tested_sets = list(setToPval.keys()) pvals = [ setToPval[M] for M in tested_sets ] - setToFDR = dict(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))) + setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))) return setToPval, setToTime, setToFDR, setToObs @@ -232,8 +230,6 @@ def general_test_set_group( sets, geneToCases, num_patients, method, test, stati setToPval[M] = general_wre_test( sorted_M, geneToCases, [ P[g] for g in sorted_M ], statistic ) setToTime[M] = time() - start - if verbose > 1: print - return setToPval, setToTime, setToObs def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, P=None, num_cores=1, verbose=0, @@ -258,13 +254,13 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, # Combine the dictionaries setToPval, setToTime, setToObs = dict(), dict(), dict() for pval, time, obs in results: - setToPval.update(pval.items()) - setToTime.update(time.items()) - setToObs.update(obs.items()) + setToPval.update(list(pval.items())) + setToTime.update(list(time.items())) + setToObs.update(list(obs.items())) # Make sure all P-values are numbers tested_sets = len(setToPval) - invalid_sets = set( M for M, pval in setToPval.iteritems() if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) + invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) # Report invalid sets if verbose > 0 and report_invalids: @@ -275,19 +271,19 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ]) sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' ) - setToPval = dict( (M, pval) for M, pval in setToPval.iteritems() if not M in invalid_sets ) - setToTime = dict( (M, runtime) for M, runtime in setToTime.iteritems() if not M in invalid_sets ) - setToObs = dict( (M, obs) for M, obs in setToObs.iteritems() if not M in invalid_sets ) + setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets ) + setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets ) + setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets ) if verbose > 0: - print '- Output {} sets'.format(len(setToPval)) - print '\tRemoved {} sets with NaN or invalid P-values'.format(len(invalid_sets)) - print '\tIgnored {} sets with Z >= T or a gene with no exclusive mutations'.format(len(sets)-tested_sets) + print('- Output {} sets'.format(len(setToPval))) + print('\tRemoved {} sets with NaN or invalid P-values'.format(len(invalid_sets))) + print('\tIgnored {} sets with Z >= T or a gene with no exclusive mutations'.format(len(sets)-tested_sets)) # Compute the FDRs - tested_sets = setToPval.keys() + tested_sets = list(setToPval.keys()) pvals = [ min(max(0.0, setToPval[M]), 1.0) for M in tested_sets ] - setToFDR = dict(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY"))) + setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))) return setToPval, setToTime, setToFDR, setToObs @@ -296,4 +292,4 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, ################################################################################ # Testable set def testable_set( k, T, Z, tbl ): - return T > Z and all( tbl[2**i] > 0 for i in range(k) ) + return T > Z and all( tbl[2**i] > 0 for i in list(range(k)) ) diff --git a/wext/exact.py b/wext/exact.py index 052a578..32636d9 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import numpy as np import wext_exact_test from constants import * -def exact_test( t, x, p, verbose=False ): +def exact_test(t, x, p, verbose=False): k = len(x) if k == 2: return exact_test_k2( t, x, p, verbose ) @@ -19,11 +19,11 @@ def exact_test_k3(t, x, p, verbose): return wext_exact_test.triple_exact_test( N, t, x[0], x[1], x[2], p ) # Wrapper for k=2 exact test C function -def exact_test_k2(t, (x, y), (p_x, p_y), verbose): +def exact_test_k2(t, x, y, p_x, p_y, verbose): # Two-sided test N = len(p_x) z = (x + y - t)/2 # count number of co-occurrences - tail_masses = wext_exact_test.conditional(N, range(z+1), x, y, p_x, p_y) + tail_masses = wext_exact_test.conditional(N, list(range(z+1)), x, y, p_x, p_y) obs_mass = tail_masses[-1] pval = sum(tail_masses) return pval diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py index 985ce2e..dfc4322 100755 --- a/wext/exclusivity_tests.py +++ b/wext/exclusivity_tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Load required modules import numpy as np @@ -22,7 +22,7 @@ def wre_test(t, x, p, method=EXACT, verbose=0): # Check that the probabilities are in (0, 1]. assert(all(0= min_freq ) + genes = set( g for g, cases in list(geneToCases.items()) if len(cases) >= min_freq ) return genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators @@ -34,11 +34,11 @@ def load_patient_annotation_file(patient_annotation_file): # Converts keys from an iterable to tab-separated, so the dictionary can be # output as JSON def convert_dict_for_json( setToVal, sep='\t' ): - return dict( (sep.join(sorted(M)), val) for M, val in setToVal.iteritems() ) + return dict( (sep.join(sorted(M)), val) for M, val in list(setToVal.items()) ) # Converts tab-separated keys back to frozensets def convert_dict_from_json( setToVal, sep='\t', iterable=frozenset ): - return dict( (iterable(M.split(sep)), val) for M, val in setToVal.iteritems() ) + return dict( (iterable(M.split(sep)), val) for M, val in list(setToVal.items()) ) # Create the header strings for a contingency table def create_tbl_header( k ): @@ -54,7 +54,7 @@ def output_enumeration_table(args, k, setToPval, setToRuntime, setToFDR, setToOb if not args.json_format: # Construct the rows rows = [] - for M, pval in setToPval.iteritems(): + for M, pval in list(setToPval.items()): if setToFDR[M]<=fdr_threshold: X, T, Z, tbl = setToObs[M] row = [ ', '.join(sorted(M)), pval, setToFDR[M], setToRuntime[M], T, Z ] + tbl @@ -90,14 +90,14 @@ def output_mcmc(args, setsToFreq, setToPval, setToObs): params = vars(args) output = dict(params=params, setToPval=convert_dict_for_json(setToPval), setToObs=convert_dict_for_json(setToObs), - setsToFreq=dict( (' '.join([ ','.join(sorted(M)) for M in sets ]), freq) for sets, freq in setsToFreq.iteritems() )) + setsToFreq=dict( (' '.join([ ','.join(sorted(M)) for M in sets ]), freq) for sets, freq in list(setsToFreq.items()) )) with open(args.output_prefix + '.json', 'w') as OUT: json.dump( output, OUT ) else: # Output a gene set file with open(args.output_prefix + '-sampled-collections.tsv', 'w') as OUT: rows = [] - for sets, freq in setsToFreq.iteritems(): + for sets, freq in list(setsToFreq.items()): row = [ ' '.join([ ','.join(M) for M in sets ]), freq ] row.append( sum( -np.log10(setToPval[M] ** args.alpha) for M in sets )) rows.append(row) @@ -109,7 +109,7 @@ def output_mcmc(args, setsToFreq, setToPval, setToObs): # Output each of the sample gene sets with open(args.output_prefix + '-sampled-sets.tsv', 'w') as OUT: rows = [] - for M, pval in setToPval.iteritems(): + for M, pval in list(setToPval.items()): X, T, Z, tbl = setToObs[M] rows.append([ ','.join(sorted(M)), pval, T, Z] + tbl ) rows.sort(key=lambda r: r[1]) diff --git a/wext/mcmc.py b/wext/mcmc.py index 06ca876..550b541 100755 --- a/wext/mcmc.py +++ b/wext/mcmc.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 import sys, os, numpy as np from collections import defaultdict @@ -10,7 +10,8 @@ from exclusivity_tests import re_test, wre_test def mcmc(ks, geneToCases, num_patients, method, test, geneToP, seed, annotations=set(), verbose=0, step_len=100, nchains=1, niters=1000, alpha=1): - if verbose > 0: print '-' * 33, 'Running MCMC', '-' * 33 + if verbose > 0: + print('-' * 33, 'Running MCMC', '-' * 33) # Set up a local version of the weight function if test == WRE: @@ -45,7 +46,7 @@ def _collection_weight(collection): return sum( _weight(M) for M in collection ) def _to_collection(solution): - return frozenset( frozenset(M) for M in solution.values() ) + return frozenset( frozenset(M) for M in list(solution.values()) ) # Compute the acceptance ratio def _log_accept_ratio( W_current, W_next ): return W_next - W_current @@ -53,11 +54,12 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current # Set up PRNG, sample space, and output random_seed(seed) t = len(ks) - genespace = geneToCases.keys() - setsToFreq = [ defaultdict(int) for _ in xrange(nchains) ] + genespace = list(geneToCases.keys()) + setsToFreq = [ defaultdict(int) for _ in range(nchains) ] setToPval, setToObs = dict(), dict() - for c in xrange(nchains): - if verbose > 0: print '- Experiment', c+1 + for c in range(nchains): + if verbose > 0: + print('- Experiment', c+1) # Seed Markov chain soln, assigned = choose_random_set(ks, genespace) @@ -75,8 +77,8 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current sys.stdout.flush() # Sample the next gene to swap in/around the set - next_soln = dict( (index, set(M)) for index, M in soln.iteritems() ) - next_assigned = dict(assigned.items()) + next_soln = dict( (index, set(M)) for index, M in list(soln.items()) ) + next_assigned = dict(list(assigned.items())) next_gene = choice(genespace) # There are two possibilities for the next gene @@ -86,7 +88,7 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current # if we only have one set, we can't swap between sets if t == 1: continue i = next_assigned[next_gene] - swap_gene = choice([ g for g in next_assigned.keys() if g not in next_soln[i] ]) + swap_gene = choice([ g for g in list(next_assigned.keys()) if g not in next_soln[i] ]) j = next_assigned[swap_gene] next_assigned[swap_gene] = i next_soln[i].add(swap_gene) @@ -101,14 +103,15 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current # 2) The gene is not in the current solution. In this case, we choose # a random gene in the solution to remove, and add the next gene. else: - swap_gene = choice(next_assigned.keys()) + swap_gene = choice(list(next_assigned.keys())) j = next_assigned[swap_gene] del next_assigned[swap_gene] next_assigned[next_gene] = j next_soln[j].remove(swap_gene) next_soln[j].add(next_gene) - if not _valid_set(next_soln[j]): continue + if not _valid_set(next_soln[j]): + continue # Compare the current soln to the next soln next_weight = _collection_weight(_to_collection(next_soln)) @@ -121,12 +124,12 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current setsToFreq[c][_to_collection(soln)] += 1 if verbose > 0: - print '\r[' + ('='*71) + '>] 100%' + print('\r[' + ('='*71) + '>] 100%') # Merge the various chains setsToTotalFreq = defaultdict(int) for counter in setsToFreq: - for sets, freq in counter.iteritems(): + for sets, freq in list(counter.items()): setsToTotalFreq[sets] += freq return setsToTotalFreq, setToPval, setToObs diff --git a/wext/saddlepoint.py b/wext/saddlepoint.py index 02334c2..dac4e44 100644 --- a/wext/saddlepoint.py +++ b/wext/saddlepoint.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import numpy as np from numpy.linalg import det from scipy.optimize import fsolve @@ -82,7 +84,7 @@ def saddlepoint(observed_t, observed_y, probabilities, condition='exclusivity'): w = np.zeros((2**k, n)) for i, state in enumerate(states): - w[i, :] = np.product(p[state, range(k), :], axis=0) + w[i, :] = np.product(p[state, list(range(k)), :], axis=0) # Define the moment generating functions and cumulant generating functions. These functions # use the above constants. diff --git a/wext/setup.py b/wext/setup.py index 072ae0f..873678a 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """Compiles the C modules used by the weighted exclusivity test.""" diff --git a/wext/statistics.py b/wext/statistics.py index 4ceef54..1593b16 100755 --- a/wext/statistics.py +++ b/wext/statistics.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import numpy as np @@ -36,7 +36,7 @@ def multiple_hypothesis_correction(p_values_, method='BH'): sorted_q_values = np.zeros(n) sorted_q_values[n-1] = min(sorted_p_values[n-1], 1.0) - for i in reversed(range(n-1)): + for i in reversed(list(range(n-1))): sorted_q_values[i] = min(float(n)/float(i+1)*sorted_p_values[i], sorted_q_values[i+1]) q_values = np.zeros(n) @@ -49,7 +49,7 @@ def multiple_hypothesis_correction(p_values_, method='BH'): c = np.sum(1.0/np.arange(1, n+1, dtype=np.float64)) sorted_q_values = np.zeros(n) sorted_q_values[n-1] = min(c*sorted_p_values[n-1], 1.0) - for i in reversed(range(n-1)): + for i in reversed(list(range(n-1))): sorted_q_values[i] = min(c*(float(n)/float(i+1))*sorted_p_values[i], sorted_q_values[i+1]) q_values = np.zeros(n) From f613b306dd28f96e925a6f4e42d4d3ab5def88dc Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Wed, 15 Aug 2018 03:35:36 -0400 Subject: [PATCH 08/60] update travis config, compile 'wext' C/Fortran code --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index c6d60c5..16933a0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,9 @@ install: - sudo apt-get -y install python-matplotlib - pip install codecov - pip install -r requirements.txt + - cd wext + - python setup.py build + - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module script: - nosetests after_success: From e45829c6317974262c989f6b8e94ba7ec497f41e Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Wed, 15 Aug 2018 03:43:24 -0400 Subject: [PATCH 09/60] revise travis config --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 16933a0..fc7f9bc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,7 @@ install: - cd wext - python setup.py build - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module + - cd .. script: - nosetests after_success: From 4612a70b88cdc9110099d277b002b07355e98eac Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Wed, 15 Aug 2018 03:55:35 -0400 Subject: [PATCH 10/60] revise using python3.x syntax for explicit relative imports --- wext/__init__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/wext/__init__.py b/wext/__init__.py index c746bc2..1c5a62b 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 # Import modules. -from constants import * -from statistics import * -from i_o import * -from enumerate_sets import * -from mcmc import mcmc -from exact import exact_test +from .constants import * +from .statistics import * +from .i_o import * +from .enumerate_sets import * +from .mcmc import mcmc +from .exact import exact_test import cpoibin -from saddlepoint import saddlepoint +from .saddlepoint import saddlepoint from comet_exact_test import comet_exact_test -from exclusivity_tests import re_test, wre_test +from .exclusivity_tests import re_test, wre_test from bipartite_edge_swap_module import bipartite_edge_swap From 844ec7075fd064c5e8a0f87e364b7aedf391a8e4 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Wed, 15 Aug 2018 04:03:15 -0400 Subject: [PATCH 11/60] fixed relative import syntax for all scripts in "wext" --- wext/enumerate_sets.py | 6 +++--- wext/exact.py | 2 +- wext/exclusivity_tests.py | 6 +++--- wext/i_o.py | 2 +- wext/mcmc.py | 6 +++--- wext/saddlepoint.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/wext/enumerate_sets.py b/wext/enumerate_sets.py index 25d7c0f..8b80295 100755 --- a/wext/enumerate_sets.py +++ b/wext/enumerate_sets.py @@ -7,9 +7,9 @@ from math import ceil, isnan # Load local modules -from exclusivity_tests import wre_test, re_test, general_wre_test -from constants import * -from statistics import multiple_hypothesis_correction +from .exclusivity_tests import wre_test, re_test, general_wre_test +from .constants import * +from .statistics import multiple_hypothesis_correction ################################################################################ # Permutational test diff --git a/wext/exact.py b/wext/exact.py index 32636d9..5b79167 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -2,7 +2,7 @@ import numpy as np import wext_exact_test -from constants import * +from .constants import * def exact_test(t, x, p, verbose=False): k = len(x) diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py index dfc4322..697cfd9 100755 --- a/wext/exclusivity_tests.py +++ b/wext/exclusivity_tests.py @@ -2,10 +2,10 @@ # Load required modules import numpy as np -from constants import * -from exact import exact_test +from .constants import * +from .exact import exact_test import cpoibin -from saddlepoint import saddlepoint, check_condition +from .saddlepoint import saddlepoint, check_condition from comet_exact_test import comet_exact_test import warnings diff --git a/wext/i_o.py b/wext/i_o.py index fd7c706..241f340 100755 --- a/wext/i_o.py +++ b/wext/i_o.py @@ -3,7 +3,7 @@ # Load required modules import sys, os, json, numpy as np from collections import defaultdict -from constants import * +from .constants import * # Load mutation data from one of our processed JSON file def load_mutation_data( mutation_file, min_freq=1 ): diff --git a/wext/mcmc.py b/wext/mcmc.py index 550b541..675a410 100755 --- a/wext/mcmc.py +++ b/wext/mcmc.py @@ -5,9 +5,9 @@ from time import time from random import random, sample, choice, seed as random_seed -from constants import * -from enumerate_sets import observed_values -from exclusivity_tests import re_test, wre_test +from .constants import * +from .enumerate_sets import observed_values +from .exclusivity_tests import re_test, wre_test def mcmc(ks, geneToCases, num_patients, method, test, geneToP, seed, annotations=set(), verbose=0, step_len=100, nchains=1, niters=1000, alpha=1): if verbose > 0: diff --git a/wext/saddlepoint.py b/wext/saddlepoint.py index dac4e44..8869c79 100644 --- a/wext/saddlepoint.py +++ b/wext/saddlepoint.py @@ -5,7 +5,7 @@ from scipy.optimize import fsolve from scipy.stats import norm import itertools -from constants import * +from .constants import * def check_condition(state, condition): From 300f1d82cc7cffd47e280926c1d091802ae3e787 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Wed, 15 Aug 2018 04:10:05 -0400 Subject: [PATCH 12/60] attempt to fix issue with wext_exact_test --- wext/exact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/exact.py b/wext/exact.py index 5b79167..39232b9 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import numpy as np -import wext_exact_test +from .wext_exact_test import * from .constants import * def exact_test(t, x, p, verbose=False): From 4a87caa6aeab785ecde6eb98cab8e05cd06cabe3 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Wed, 15 Aug 2018 04:14:58 -0400 Subject: [PATCH 13/60] 2nd attempt to fix issue with wext_exact_test --- wext/exact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/exact.py b/wext/exact.py index 39232b9..fc3c64d 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import numpy as np -from .wext_exact_test import * +import .wext_exact_test from .constants import * def exact_test(t, x, p, verbose=False): From 36a22bc327ae4088675a604567d5087dc0788f02 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Wed, 15 Aug 2018 04:19:12 -0400 Subject: [PATCH 14/60] 3rd attempt to fix issue with wext_exact_test --- wext/exact.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wext/exact.py b/wext/exact.py index fc3c64d..c7494ac 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import numpy as np -import .wext_exact_test +from .wext_exact_test import triple_exact_test from .constants import * def exact_test(t, x, p, verbose=False): @@ -16,7 +16,7 @@ def exact_test(t, x, p, verbose=False): # Wrapper for k=3 exact test C function def exact_test_k3(t, x, p, verbose): N = len(p[0]) - return wext_exact_test.triple_exact_test( N, t, x[0], x[1], x[2], p ) + return triple_exact_test( N, t, x[0], x[1], x[2], p ) # Wrapper for k=2 exact test C function def exact_test_k2(t, x, y, p_x, p_y, verbose): From 7c6427ede313c692f2f0b90c8e1c189eefb28087 Mon Sep 17 00:00:00 2001 From: Evan Biederstedt Date: Mon, 20 Aug 2018 23:09:01 -0400 Subject: [PATCH 15/60] removed python3 shebangs --- compute_mutation_probabilities.py | 2 +- examples/generate_data.py | 5 +++-- experiments/eccb2016/scripts/helper.py | 4 +++- experiments/eccb2016/scripts/pairs_summary.py | 2 +- experiments/eccb2016/scripts/permutation_test_helper.py | 2 +- experiments/eccb2016/scripts/permute_single_matrix.py | 3 ++- experiments/eccb2016/scripts/pval_correlations.py | 4 ++-- .../eccb2016/scripts/reconcile_grid_permutation_test.py | 2 +- experiments/eccb2016/scripts/remove_genes_with_no_length.py | 2 +- experiments/eccb2016/scripts/results_table.py | 2 +- .../eccb2016/scripts/sample_mutation_frequency_plot.py | 2 +- experiments/eccb2016/scripts/triple_pval_scatter.py | 2 +- experiments/eccb2016/scripts/unweighted_comparison.py | 2 +- experiments/eccb2016/scripts/weights_matrix.py | 2 +- find_exclusive_sets.py | 2 +- find_sets.py | 2 +- process_mutations.py | 2 +- wext/__init__.py | 2 +- wext/constants.py | 2 +- wext/enumerate_sets.py | 2 +- wext/exact.py | 2 +- wext/exclusivity_tests.py | 2 +- wext/i_o.py | 2 +- wext/mcmc.py | 2 +- wext/saddlepoint.py | 2 +- wext/setup.py | 2 +- wext/statistics.py | 2 +- 27 files changed, 33 insertions(+), 29 deletions(-) diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py index acac439..a924427 100755 --- a/compute_mutation_probabilities.py +++ b/compute_mutation_probabilities.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, json, numpy as np, multiprocessing as mp, random diff --git a/examples/generate_data.py b/examples/generate_data.py index 50200dc..5a6c4c9 100644 --- a/examples/generate_data.py +++ b/examples/generate_data.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, numpy as np, random @@ -81,4 +81,5 @@ def run(args): raise NotImplementedError('Data generation mode "%s" is not implemented.' % args.mode) return -if __name__ == '__main__': run( get_parser().parse_args(sys.argv[1:]) ) \ No newline at end of file +if __name__ == '__main__': + run( get_parser().parse_args(sys.argv[1:]) ) \ No newline at end of file diff --git a/experiments/eccb2016/scripts/helper.py b/experiments/eccb2016/scripts/helper.py index 2017240..35fc5b5 100644 --- a/experiments/eccb2016/scripts/helper.py +++ b/experiments/eccb2016/scripts/helper.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import numpy as np @@ -16,6 +16,7 @@ def add_y_equals_x(ax, c='k', line_style='--', alpha=0.75): ax.set_xlim(lims) ax.set_ylim(lims) + def aligned_plaintext_table(table, sep='\t', spaces=2): """ Create and return an aligned plaintext table. @@ -42,6 +43,7 @@ def aligned_plaintext_table(table, sep='\t', spaces=2): # Return results. return '\n'.join([''.join([entries[i][j].rjust(sizes[j]+spaces) for j in range(n)]).rstrip() for i in range(m)]) + def rank(a, reverse=False, ties=2): """ Find the ranks of the elements of a. diff --git a/experiments/eccb2016/scripts/pairs_summary.py b/experiments/eccb2016/scripts/pairs_summary.py index 3e9d2ce..2920d18 100755 --- a/experiments/eccb2016/scripts/pairs_summary.py +++ b/experiments/eccb2016/scripts/pairs_summary.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import matplotlib diff --git a/experiments/eccb2016/scripts/permutation_test_helper.py b/experiments/eccb2016/scripts/permutation_test_helper.py index cb6770a..fc9b90a 100644 --- a/experiments/eccb2016/scripts/permutation_test_helper.py +++ b/experiments/eccb2016/scripts/permutation_test_helper.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse diff --git a/experiments/eccb2016/scripts/permute_single_matrix.py b/experiments/eccb2016/scripts/permute_single_matrix.py index 00000ba..12c6d4e 100755 --- a/experiments/eccb2016/scripts/permute_single_matrix.py +++ b/experiments/eccb2016/scripts/permute_single_matrix.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Import modules. import numpy as np, os, sys, argparse, json @@ -17,6 +17,7 @@ def get_parser(): default=os.environ.get('SGE_TASK_ID', 0)) return parser + def run( args ): # Load WExT sys.path.append(args.wext_dir) diff --git a/experiments/eccb2016/scripts/pval_correlations.py b/experiments/eccb2016/scripts/pval_correlations.py index 55c280f..75e3c86 100755 --- a/experiments/eccb2016/scripts/pval_correlations.py +++ b/experiments/eccb2016/scripts/pval_correlations.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, pandas as pd @@ -31,7 +31,7 @@ # Compute the correlations with permutational # permutational_pvals_with_zeros = list(df.loc[df['Method'] == 'Permutational']['Raw P-value']) # all_indices = -tests = ["Permutational", "Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"] +tests = ["Permutational", "Fisher's exact test", "Weighted (exact test)", "Weighted (saddlepoint)"] for val, indices in [("All", []), (0, 1./args.num_permutations), (1./args.num_permutations, 2)]: tbl = [list(tests)] for t1 in tests: diff --git a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py index 2f3e250..d7c0354 100644 --- a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py +++ b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, json, multiprocessing as mp diff --git a/experiments/eccb2016/scripts/remove_genes_with_no_length.py b/experiments/eccb2016/scripts/remove_genes_with_no_length.py index 3aa9e9f..7017b08 100644 --- a/experiments/eccb2016/scripts/remove_genes_with_no_length.py +++ b/experiments/eccb2016/scripts/remove_genes_with_no_length.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, json diff --git a/experiments/eccb2016/scripts/results_table.py b/experiments/eccb2016/scripts/results_table.py index ad39a94..3989369 100755 --- a/experiments/eccb2016/scripts/results_table.py +++ b/experiments/eccb2016/scripts/results_table.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, json diff --git a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py index 77a25df..7b66ee6 100755 --- a/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py +++ b/experiments/eccb2016/scripts/sample_mutation_frequency_plot.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import matplotlib diff --git a/experiments/eccb2016/scripts/triple_pval_scatter.py b/experiments/eccb2016/scripts/triple_pval_scatter.py index da966b3..6e2f168 100755 --- a/experiments/eccb2016/scripts/triple_pval_scatter.py +++ b/experiments/eccb2016/scripts/triple_pval_scatter.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import matplotlib diff --git a/experiments/eccb2016/scripts/unweighted_comparison.py b/experiments/eccb2016/scripts/unweighted_comparison.py index b6ffce4..369174b 100755 --- a/experiments/eccb2016/scripts/unweighted_comparison.py +++ b/experiments/eccb2016/scripts/unweighted_comparison.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python #Load required modules import matplotlib diff --git a/experiments/eccb2016/scripts/weights_matrix.py b/experiments/eccb2016/scripts/weights_matrix.py index d263bca..9d13cbb 100755 --- a/experiments/eccb2016/scripts/weights_matrix.py +++ b/experiments/eccb2016/scripts/weights_matrix.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import matplotlib diff --git a/find_exclusive_sets.py b/find_exclusive_sets.py index 40eb062..88a6401 100755 --- a/find_exclusive_sets.py +++ b/find_exclusive_sets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, numpy as np, json diff --git a/find_sets.py b/find_sets.py index 69e8388..1154da7 100755 --- a/find_sets.py +++ b/find_sets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, numpy as np, json diff --git a/process_mutations.py b/process_mutations.py index 4d4a9a3..c886271 100755 --- a/process_mutations.py +++ b/process_mutations.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, argparse, json, numpy as np diff --git a/wext/__init__.py b/wext/__init__.py index 1c5a62b..7df3a5f 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Import modules. from .constants import * diff --git a/wext/constants.py b/wext/constants.py index 9f6425b..7cffc13 100755 --- a/wext/constants.py +++ b/wext/constants.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # P-values are called invalid if P > 1+PTOL or P < -PTOL PTOL = 10**-3 diff --git a/wext/enumerate_sets.py b/wext/enumerate_sets.py index 8b80295..7908b42 100755 --- a/wext/enumerate_sets.py +++ b/wext/enumerate_sets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, multiprocessing as mp, json diff --git a/wext/exact.py b/wext/exact.py index c7494ac..6fc0a0f 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import numpy as np from .wext_exact_test import triple_exact_test diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py index 697cfd9..c29ff56 100755 --- a/wext/exclusivity_tests.py +++ b/wext/exclusivity_tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import numpy as np diff --git a/wext/i_o.py b/wext/i_o.py index 241f340..a77a0bc 100755 --- a/wext/i_o.py +++ b/wext/i_o.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Load required modules import sys, os, json, numpy as np diff --git a/wext/mcmc.py b/wext/mcmc.py index 675a410..29f43c9 100755 --- a/wext/mcmc.py +++ b/wext/mcmc.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import sys, os, numpy as np from collections import defaultdict diff --git a/wext/saddlepoint.py b/wext/saddlepoint.py index 8869c79..49df27b 100644 --- a/wext/saddlepoint.py +++ b/wext/saddlepoint.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import numpy as np from numpy.linalg import det diff --git a/wext/setup.py b/wext/setup.py index 873678a..072ae0f 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """Compiles the C modules used by the weighted exclusivity test.""" diff --git a/wext/statistics.py b/wext/statistics.py index 1593b16..b1a3834 100755 --- a/wext/statistics.py +++ b/wext/statistics.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import numpy as np From 02e55866829724087b407cae8d84d8d58b0476aa Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 19:00:28 -0400 Subject: [PATCH 16/60] corrected structures for PyMethodDef --- wext/src/c/comet_exact_test.c | 12 +++++++++--- wext/src/c/poibinmodule.c | 3 ++- wext/src/c/wext_exact_test.c | 5 ++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c index 903d991..e30f3f8 100755 --- a/wext/src/c/comet_exact_test.c +++ b/wext/src/c/comet_exact_test.c @@ -281,8 +281,11 @@ struct Pvalues comet_exact_test(int k, int N, int *ctbl, double pvalthresh){ } + + + //////////////////////////////////////////////////////////////////////////////// -// Python registration +// Python wrapper functions //////////////////////////////////////////////////////////////////////////////// // The CoMEt exact test, callable from Python @@ -307,7 +310,7 @@ PyObject *py_comet_exact_test(PyObject *self, PyObject *args){ tbl[i] = (int) PyLong_AsLong (PyList_GetItem(py_tbl, i)); // Compute the P-values - pval = comet_exact_test(k, N, tbl, pvalthresh); + pval = comet_exact_test(k, N, tbl, pvalthresh); // Free memory free(tbl); @@ -316,10 +319,13 @@ PyObject *py_comet_exact_test(PyObject *self, PyObject *args){ } +// methods definition: cometExactTest + // Register the functions we want to be accessible from Python PyMethodDef cometExactTest[] = { - {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"} + {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}, + {NULL, NULL, 0, NULL} }; // Note that the suffix of init has to match the name of the module, diff --git a/wext/src/c/poibinmodule.c b/wext/src/c/poibinmodule.c index 8d5b3a3..4a55a00 100755 --- a/wext/src/c/poibinmodule.c +++ b/wext/src/c/poibinmodule.c @@ -73,7 +73,8 @@ PyObject *py_pmf(PyObject *self, PyObject *args){ // Register the functions we want to be accessible from Python PyMethodDef poibinMethods[] = { - {"pmf", py_pmf, METH_VARARGS, "Poisson-Binomial PMF"} + {"pmf", py_pmf, METH_VARARGS, "Poisson-Binomial PMF"}, + {NULL, NULL, 0, NULL} }; // Note that the suffix of init has to match the name of the module, diff --git a/wext/src/c/wext_exact_test.c b/wext/src/c/wext_exact_test.c index ce49264..fe5e170 100755 --- a/wext/src/c/wext_exact_test.c +++ b/wext/src/c/wext_exact_test.c @@ -39,6 +39,8 @@ double joint_mass(int n, int z, int x, int y, double *p_x, double *p_y, double * return cache[n][z][x][y]; } +// python wrapper + PyObject *py_conditional(PyObject *self, PyObject *args){ // Parameters int i, j, i2, j2, N, x, y, *zs, num_zs; @@ -206,7 +208,8 @@ PyObject *triple_exact_test(PyObject *self, PyObject *args){ // Register the functions we want to be accessible from Python PyMethodDef weightedEnrichmentMethods[] = { {"conditional", py_conditional, METH_VARARGS, "Weighted enrichment test conditional PMF for pairs"}, - {"triple_exact_test", triple_exact_test, METH_VARARGS, "Weighted enrichment test for triples"} + {"triple_exact_test", triple_exact_test, METH_VARARGS, "Weighted enrichment test for triples"}, + {NULL, NULL, 0, NULL} }; // Note that the suffix of init has to match the name of the module, From 934904a25fdea281bacc7577a8dfe3828a15b6bc Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 19:25:01 -0400 Subject: [PATCH 17/60] check compiles under Python3.x --- wext/src/c/comet_exact_test.c | 37 +++++++++++++++++++++++++------- wext/src/c/poibinmodule.c | 38 +++++++++++++++++++++++++++------ wext/src/c/wext_exact_test.c | 40 ++++++++++++++++++++++++++++------- 3 files changed, 92 insertions(+), 23 deletions(-) diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c index e30f3f8..5e9ab1f 100755 --- a/wext/src/c/comet_exact_test.c +++ b/wext/src/c/comet_exact_test.c @@ -289,7 +289,7 @@ struct Pvalues comet_exact_test(int k, int N, int *ctbl, double pvalthresh){ //////////////////////////////////////////////////////////////////////////////// // The CoMEt exact test, callable from Python -PyObject *py_comet_exact_test(PyObject *self, PyObject *args){ +static PyObject *py_comet_exact_test(PyObject *self, PyObject *args){ // Parameters int k, N; // k: gene set size; N: number of samples PyObject *py_tbl; // FLAT Python contingency table @@ -320,19 +320,40 @@ PyObject *py_comet_exact_test(PyObject *self, PyObject *args){ } // methods definition: cometExactTest - +// name of module: comet_exact_test ... which is also the name of the function in Python // Register the functions we want to be accessible from Python -PyMethodDef cometExactTest[] = { +static PyMethodDef cometExactTest[] = { {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}, {NULL, NULL, 0, NULL} }; +// PYTHON 2 // Note that the suffix of init has to match the name of the module, // both here and in the setup.py file -PyMODINIT_FUNC initcomet_exact_test(void) { - PyObject *m = Py_InitModule("comet_exact_test", cometExactTest); - if (m == NULL) { - return; - } +// PyMODINIT_FUNC initcomet_exact_test(void) { +// PyObject *m = Py_InitModule("comet_exact_test", cometExactTest); +// if (m == NULL) { +// return; +// } +// } + +// define structure for module +static struct PyModuleDef comet_exact_test = { + PyModuleDef_HEAD_INIT, // required + "comet_exact_test", // name of module + "documentation detailed here", // documentation + -1, + cometExactTest // method definitions +}; + + +// finally, write the initalizer function + +PyMODINIT_FUNC PyInit_comet_exact_test(void) +{ + return PyModule_Create(&comet_exact_test); } + + + diff --git a/wext/src/c/poibinmodule.c b/wext/src/c/poibinmodule.c index 4a55a00..f2bf67b 100755 --- a/wext/src/c/poibinmodule.c +++ b/wext/src/c/poibinmodule.c @@ -45,7 +45,7 @@ double pmf(int k, int N, double *ps){ return mass; } -PyObject *py_pmf(PyObject *self, PyObject *args){ +static PyObject *py_pmf(PyObject *self, PyObject *args){ // Parameters int i, k, N; double result, *ps; @@ -71,17 +71,41 @@ PyObject *py_pmf(PyObject *self, PyObject *args){ return Py_BuildValue("d", result); } + +// methods definition: poibinMethods +// name of module: cpoibin + // Register the functions we want to be accessible from Python -PyMethodDef poibinMethods[] = { +static PyMethodDef poibinMethods[] = { {"pmf", py_pmf, METH_VARARGS, "Poisson-Binomial PMF"}, {NULL, NULL, 0, NULL} }; +// PYTHON 2 // Note that the suffix of init has to match the name of the module, // both here and in the setup.py file -PyMODINIT_FUNC initcpoibin(void) { - PyObject *m = Py_InitModule("cpoibin", poibinMethods); - if (m == NULL) { - return; - } +// PyMODINIT_FUNC initcpoibin(void) { +// PyObject *m = Py_InitModule("cpoibin", poibinMethods); +// if (m == NULL) { +// return; +// } +// } + +// define the module structure + +static struct PyModuleDef cpoibin = { + PyModuleDef_HEAD_INIT, // required + "cpoibin", // name of module + "ocumentation detailed here", // documentation + -1, + poibinMethods // method definitions +}; + +// finally, write the initalizer function + +PyMODINIT_FUNC PyInit_cpoibin(void) +{ + return PyModule_Create(&cpoibin); } + + diff --git a/wext/src/c/wext_exact_test.c b/wext/src/c/wext_exact_test.c index fe5e170..7064296 100755 --- a/wext/src/c/wext_exact_test.c +++ b/wext/src/c/wext_exact_test.c @@ -41,7 +41,7 @@ double joint_mass(int n, int z, int x, int y, double *p_x, double *p_y, double * // python wrapper -PyObject *py_conditional(PyObject *self, PyObject *args){ +static PyObject *py_conditional(PyObject *self, PyObject *args){ // Parameters int i, j, i2, j2, N, x, y, *zs, num_zs; double *p_x, *p_y, joint_marginal, mass, ****cache; @@ -138,7 +138,7 @@ double P(int n, int t, int w, int x, int y, double **p, double *****cache){ return cache[n][t][w][x][y]; } -PyObject *triple_exact_test(PyObject *self, PyObject *args){ +static PyObject *triple_exact_test(PyObject *self, PyObject *args){ // Parameters int i, j, i2, j2, i3, N, w, x, y, t, T; double **p, marginals, joint, result, *****cache; @@ -204,19 +204,43 @@ PyObject *triple_exact_test(PyObject *self, PyObject *args){ return Py_BuildValue("f", result); } + +// methods definition: weightedEnrichmentMethods +// name of module: wext_exact_test + //////////////////////////////////////////////////////////////////////////////// // Register the functions we want to be accessible from Python -PyMethodDef weightedEnrichmentMethods[] = { +static PyMethodDef weightedEnrichmentMethods[] = { {"conditional", py_conditional, METH_VARARGS, "Weighted enrichment test conditional PMF for pairs"}, {"triple_exact_test", triple_exact_test, METH_VARARGS, "Weighted enrichment test for triples"}, {NULL, NULL, 0, NULL} }; +// PYTHON 2 // Note that the suffix of init has to match the name of the module, // both here and in the setup.py file -PyMODINIT_FUNC initwext_exact_test(void) { - PyObject *m = Py_InitModule("wext_exact_test", weightedEnrichmentMethods); - if (m == NULL) { - return; - } +// PyMODINIT_FUNC initwext_exact_test(void) { +// PyObject *m = Py_InitModule("wext_exact_test", weightedEnrichmentMethods); +// if (m == NULL) { +// return; +// } +// } + +// define module structure + +static struct PyModuleDef wext_exact_test = { + PyModuleDef_HEAD_INIT, // required + "wext_exact_test", // name of module + "documentation detailed here", // documentation + -1, + weightedEnrichmentMethods // method definitions +}; + + +// finally, write the initalizer function + +PyMODINIT_FUNC PyInit_wext_exact_test(void) +{ + return PyModule_Create(&wext_exact_test); } + From 602e301eff11991d3d090f5d1b2b48b70daf5843 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 19:29:30 -0400 Subject: [PATCH 18/60] revise poibinmodule header file, defin py_pmf as static --- wext/src/c/poibinmodule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/src/c/poibinmodule.h b/wext/src/c/poibinmodule.h index 2d851e4..59b3511 100755 --- a/wext/src/c/poibinmodule.h +++ b/wext/src/c/poibinmodule.h @@ -8,4 +8,4 @@ // Function declarations double pmf_recursion(int k, int j, double *ps, double **cache); double pmf(int k, int N, double *ps); -PyObject *py_pmf(PyObject *self, PyObject *args); +static PyObject *py_pmf(PyObject *self, PyObject *args); From 49750b768d8eabc83a12cb88e741c22887b5fbb5 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 19:35:24 -0400 Subject: [PATCH 19/60] try renaming to comet_exact_tests --- wext/setup.py | 4 ++-- wext/src/c/comet_exact_test.c | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/wext/setup.py b/wext/setup.py index 072ae0f..22cf1d2 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -26,8 +26,8 @@ # Compile the CoMEt exact test module srcs = ['/src/c/comet_exact_test.c'] -module = Extension('comet_exact_test', include_dirs=[numpy.get_include()], +module = Extension('comet_exact_tests', include_dirs=[numpy.get_include()], sources = [ thisDir + s for s in srcs ], extra_compile_args = ['-g', '-O0']) -setup(name='comet_exact_test', version='0.0.1', ext_modules=[module], +setup(name='comet_exact_tests', version='0.0.1', ext_modules=[module], description='CoMEt exact test implementation.') diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c index 5e9ab1f..dc36a59 100755 --- a/wext/src/c/comet_exact_test.c +++ b/wext/src/c/comet_exact_test.c @@ -322,6 +322,8 @@ static PyObject *py_comet_exact_test(PyObject *self, PyObject *args){ // methods definition: cometExactTest // name of module: comet_exact_test ... which is also the name of the function in Python +// try renaming this to 'comet_exact_tests' + // Register the functions we want to be accessible from Python static PyMethodDef cometExactTest[] = { {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}, @@ -339,9 +341,9 @@ static PyMethodDef cometExactTest[] = { // } // define structure for module -static struct PyModuleDef comet_exact_test = { +static struct PyModuleDef comet_exact_tests = { PyModuleDef_HEAD_INIT, // required - "comet_exact_test", // name of module + "comet_exact_tests", // name of module "documentation detailed here", // documentation -1, cometExactTest // method definitions @@ -350,9 +352,9 @@ static struct PyModuleDef comet_exact_test = { // finally, write the initalizer function -PyMODINIT_FUNC PyInit_comet_exact_test(void) +PyMODINIT_FUNC PyInit_comet_exact_tests(void) { - return PyModule_Create(&comet_exact_test); + return PyModule_Create(&comet_exact_tests); } From 6b2cdda4794e567b2fa169be16c190ecafd02d22 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 19:40:29 -0400 Subject: [PATCH 20/60] revised setup.py, module should be cpoibin --- wext/setup.cfg | 2 -- wext/setup.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100755 wext/setup.cfg diff --git a/wext/setup.cfg b/wext/setup.cfg deleted file mode 100755 index 8f69613..0000000 --- a/wext/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[build_ext] -inplace=1 diff --git a/wext/setup.py b/wext/setup.py index 22cf1d2..1c76366 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -13,7 +13,7 @@ module = Extension('cpoibin', include_dirs=[numpy.get_include()], sources = [ thisDir + s for s in srcs ], extra_compile_args = ['-g', '-O0']) -setup(name='poibin', version='0.0.1', ext_modules=[module], +setup(name='cpoibin', version='0.0.1', ext_modules=[module], description='Module for analyzing the Poisson-Binomial distribution.') # Compile the weighted enrichment module From 053e680e88a8f3480ed1da8a23679a5f1e49b022 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 19:50:12 -0400 Subject: [PATCH 21/60] corrected typo with "from wext_exact_test import triple_exact_test" --- wext/exact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/exact.py b/wext/exact.py index 6fc0a0f..dcd4800 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import numpy as np -from .wext_exact_test import triple_exact_test +from wext_exact_test import triple_exact_test from .constants import * def exact_test(t, x, p, verbose=False): From 29de50b6067f7448f6f5512649382a9d9d8f6e2a Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 20:00:31 -0400 Subject: [PATCH 22/60] cannot find module, but it does install... --- .travis.yml | 2 ++ wext/exact.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index fc7f9bc..d947b5f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,8 @@ install: - python setup.py build - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - cd .. + - pwd + - ls script: - nosetests after_success: diff --git a/wext/exact.py b/wext/exact.py index dcd4800..9bcfb5c 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import numpy as np -from wext_exact_test import triple_exact_test +import wext_exact_test from .constants import * def exact_test(t, x, p, verbose=False): @@ -16,7 +16,7 @@ def exact_test(t, x, p, verbose=False): # Wrapper for k=3 exact test C function def exact_test_k3(t, x, p, verbose): N = len(p[0]) - return triple_exact_test( N, t, x[0], x[1], x[2], p ) + return wext_exact_test.triple_exact_test( N, t, x[0], x[1], x[2], p ) # Wrapper for k=2 exact test C function def exact_test_k2(t, x, y, p_x, p_y, verbose): From 61fc045ea33fff1bd6c925611823ebb83f49d429 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 20:03:11 -0400 Subject: [PATCH 23/60] revised __init__.py to correclty import modules --- wext/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wext/__init__.py b/wext/__init__.py index 7df3a5f..d144487 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -8,7 +8,9 @@ from .mcmc import mcmc from .exact import exact_test import cpoibin +import wext_exact_test +import comet_exact_tests from .saddlepoint import saddlepoint -from comet_exact_test import comet_exact_test +from comet_exact_tests import comet_exact_test from .exclusivity_tests import re_test, wre_test from bipartite_edge_swap_module import bipartite_edge_swap From da5a5a980c2909658fe18fade40f7cde8d725a13 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 20:07:57 -0400 Subject: [PATCH 24/60] run nosetests in different subdirectory --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index d947b5f..e41baa6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,9 +14,9 @@ install: - cd wext - python setup.py build - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - - cd .. - - pwd - - ls + ##- cd .. + ##- pwd + ##- ls script: - nosetests after_success: From 2beb71ffdbd4c9c3f841013330410066043fd194 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 20:08:58 -0400 Subject: [PATCH 25/60] try nosetests in upper subdirectory --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index e41baa6..41186f8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,9 +14,9 @@ install: - cd wext - python setup.py build - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - ##- cd .. - ##- pwd - ##- ls + - cd ../../ + - pwd + - ls script: - nosetests after_success: From 8f646d4af6d970bfc58d71c06c2e9b6b650c4b12 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 20:20:52 -0400 Subject: [PATCH 26/60] revised __init__.py --- .travis.yml | 4 ++-- wext/__init__.py | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 41186f8..5554f3a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python python: - - 2.7 + ##- 2.7 - 3.4 - 3.5 - 3.6 @@ -14,7 +14,7 @@ install: - cd wext - python setup.py build - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - - cd ../../ + - cd ../ - pwd - ls script: diff --git a/wext/__init__.py b/wext/__init__.py index d144487..0b273e8 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -1,16 +1,15 @@ #!/usr/bin/env python -# Import modules. +# Import modules from .constants import * from .statistics import * from .i_o import * from .enumerate_sets import * from .mcmc import mcmc from .exact import exact_test -import cpoibin -import wext_exact_test -import comet_exact_tests +from .. import cpoibin +from .. import wext_exact_test from .saddlepoint import saddlepoint -from comet_exact_tests import comet_exact_test +from .. import comet_exact_tests from .exclusivity_tests import re_test, wre_test -from bipartite_edge_swap_module import bipartite_edge_swap +from .. import bipartite_edge_swap_module \ No newline at end of file From c04fdc874cc12cfd7b2db825795e50941134150b Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 20:23:26 -0400 Subject: [PATCH 27/60] changed __init__.py again, try explicit imports --- wext/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wext/__init__.py b/wext/__init__.py index 0b273e8..b9a1ddb 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -7,9 +7,9 @@ from .enumerate_sets import * from .mcmc import mcmc from .exact import exact_test -from .. import cpoibin -from .. import wext_exact_test +from ..c import cpoibin +from ..c import wext_exact_test from .saddlepoint import saddlepoint -from .. import comet_exact_tests +from ..c import comet_exact_tests from .exclusivity_tests import re_test, wre_test -from .. import bipartite_edge_swap_module \ No newline at end of file +from ..fortran import bipartite_edge_swap_module \ No newline at end of file From 82547561af7be8e7a9c3347e7c5c30366552cb0e Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 20:27:23 -0400 Subject: [PATCH 28/60] now change exact.py, from ..c import wext_exact_test --- wext/exact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/exact.py b/wext/exact.py index 9bcfb5c..46f4a1e 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import numpy as np -import wext_exact_test +from ..c import wext_exact_test from .constants import * def exact_test(t, x, p, verbose=False): From 7fa5cbb8a94bc596cd0f966c5adf9e634c88fd86 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 21:26:51 -0400 Subject: [PATCH 29/60] revised relative imports --- wext/__init__.py | 8 ++++---- wext/exact.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/wext/__init__.py b/wext/__init__.py index b9a1ddb..de218c5 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -7,9 +7,9 @@ from .enumerate_sets import * from .mcmc import mcmc from .exact import exact_test -from ..c import cpoibin -from ..c import wext_exact_test +from .c import cpoibin +from .c import wext_exact_test from .saddlepoint import saddlepoint -from ..c import comet_exact_tests +from .c import comet_exact_tests from .exclusivity_tests import re_test, wre_test -from ..fortran import bipartite_edge_swap_module \ No newline at end of file +from .fortran import bipartite_edge_swap_module \ No newline at end of file diff --git a/wext/exact.py b/wext/exact.py index 46f4a1e..2ce07e9 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import numpy as np -from ..c import wext_exact_test +from .c import wext_exact_test from .constants import * def exact_test(t, x, p, verbose=False): From c43aa8872b30dd08b1c8b86e8064e9c5ac032df6 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 21:45:47 -0400 Subject: [PATCH 30/60] try "from .wext_exact_test import * " --- wext/exact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/exact.py b/wext/exact.py index 2ce07e9..c81e269 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import numpy as np -from .c import wext_exact_test +from .wext_exact_test import * from .constants import * def exact_test(t, x, p, verbose=False): From fcea393f7997665677bf1c330a389dd9f5deb363 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 22:52:30 -0400 Subject: [PATCH 31/60] revise import --- wext/__init__.py | 8 ++++---- wext/exact.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/wext/__init__.py b/wext/__init__.py index de218c5..cb1908f 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -7,9 +7,9 @@ from .enumerate_sets import * from .mcmc import mcmc from .exact import exact_test -from .c import cpoibin -from .c import wext_exact_test +from .src.c import cpoibin +from .src.c import wext_exact_test from .saddlepoint import saddlepoint -from .c import comet_exact_tests +from .src.c import comet_exact_tests from .exclusivity_tests import re_test, wre_test -from .fortran import bipartite_edge_swap_module \ No newline at end of file +from .src.fortran import bipartite_edge_swap_module \ No newline at end of file diff --git a/wext/exact.py b/wext/exact.py index c81e269..566dd33 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import numpy as np -from .wext_exact_test import * +from .src.c import wext_exact_test from .constants import * def exact_test(t, x, p, verbose=False): From 3a3e8d18d81f24248e271af94d7361d55c2a59ac Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 22:55:47 -0400 Subject: [PATCH 32/60] try from .src.c.wext_exact_test import * --- wext/exact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/exact.py b/wext/exact.py index 566dd33..03447b0 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import numpy as np -from .src.c import wext_exact_test +from .src.c.wext_exact_test import * from .constants import * def exact_test(t, x, p, verbose=False): From a08fa528891e5f60652220f973562720431e5331 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 9 Sep 2018 23:28:42 -0400 Subject: [PATCH 33/60] added __init__.py files --- wext/exact.py | 2 +- wext/src/__init__.py | 0 wext/src/c/__init__.py | 0 3 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 wext/src/__init__.py create mode 100644 wext/src/c/__init__.py diff --git a/wext/exact.py b/wext/exact.py index 03447b0..9bcfb5c 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import numpy as np -from .src.c.wext_exact_test import * +import wext_exact_test from .constants import * def exact_test(t, x, p, verbose=False): diff --git a/wext/src/__init__.py b/wext/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/wext/src/c/__init__.py b/wext/src/c/__init__.py new file mode 100644 index 0000000..e69de29 From 38acd11da31d97db9bbe38bc9b4bc17979be0968 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 10:19:52 -0400 Subject: [PATCH 34/60] these should be global modules --- wext/__init__.py | 8 ++++---- wext/src/__init__.py | 0 wext/src/c/__init__.py | 0 3 files changed, 4 insertions(+), 4 deletions(-) delete mode 100644 wext/src/__init__.py delete mode 100644 wext/src/c/__init__.py diff --git a/wext/__init__.py b/wext/__init__.py index cb1908f..de8e09d 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -7,9 +7,9 @@ from .enumerate_sets import * from .mcmc import mcmc from .exact import exact_test -from .src.c import cpoibin -from .src.c import wext_exact_test +import cpoibin +import wext_exact_test from .saddlepoint import saddlepoint -from .src.c import comet_exact_tests +import comet_exact_tests from .exclusivity_tests import re_test, wre_test -from .src.fortran import bipartite_edge_swap_module \ No newline at end of file +from bipartite_edge_swap_module import bipartite_edge_swap \ No newline at end of file diff --git a/wext/src/__init__.py b/wext/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/wext/src/c/__init__.py b/wext/src/c/__init__.py deleted file mode 100644 index e69de29..0000000 From 885059be8be03afb16584433df0ef43c57c9441a Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 13:40:56 -0400 Subject: [PATCH 35/60] install instead --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5554f3a..d45cf56 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ install: - pip install codecov - pip install -r requirements.txt - cd wext - - python setup.py build + - python setup.py install - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - cd ../ - pwd From 2717e4a6d7e62f661038139151f1ce6be476119d Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 13:48:03 -0400 Subject: [PATCH 36/60] revised module name --- wext/exclusivity_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py index c29ff56..a7c4229 100755 --- a/wext/exclusivity_tests.py +++ b/wext/exclusivity_tests.py @@ -6,7 +6,7 @@ from .exact import exact_test import cpoibin from .saddlepoint import saddlepoint, check_condition -from comet_exact_test import comet_exact_test +from comet_exact_tests import comet_exact_test import warnings # Perform the weighted-row exclusivity test (WR-test) using the given method. From f0498a122cdfcdf74f77b823c89906b097c02805 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 14:34:26 -0400 Subject: [PATCH 37/60] check if fortran extension module can be imported --- wext/__init__.py | 2 +- .../Contents/Info.plist | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist diff --git a/wext/__init__.py b/wext/__init__.py index de8e09d..8a0c19e 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -12,4 +12,4 @@ from .saddlepoint import saddlepoint import comet_exact_tests from .exclusivity_tests import re_test, wre_test -from bipartite_edge_swap_module import bipartite_edge_swap \ No newline at end of file +import bipartite_edge_swap_module \ No newline at end of file diff --git a/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist b/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist new file mode 100644 index 0000000..b5e4350 --- /dev/null +++ b/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist @@ -0,0 +1,20 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleIdentifier + com.apple.xcode.dsym.bipartite_edge_swap_module.cpython-37m-darwin.so + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + dSYM + CFBundleSignature + ???? + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + From 096a4126dab464e6cbe2e4aa5f3e73755508d5be Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 14:44:11 -0400 Subject: [PATCH 38/60] revised travis config --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d45cf56..f62bf87 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,8 +13,8 @@ install: - pip install -r requirements.txt - cd wext - python setup.py install - - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - cd ../ + - f2py -c wext/src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - pwd - ls script: From 3d4972cd467defee96e360f25e65d75932865c5a Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 14:50:18 -0400 Subject: [PATCH 39/60] check the FORTRAN code installs via setup.py --- .travis.yml | 2 +- .../Contents/Info.plist | 20 ------------------- wext/setup.py | 7 +++++++ 3 files changed, 8 insertions(+), 21 deletions(-) delete mode 100644 wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist diff --git a/.travis.yml b/.travis.yml index f62bf87..7c684c4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ install: - cd wext - python setup.py install - cd ../ - - f2py -c wext/src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module + ##- f2py -c wext/src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - pwd - ls script: diff --git a/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist b/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist deleted file mode 100644 index b5e4350..0000000 --- a/wext/bipartite_edge_swap_module.cpython-37m-darwin.so.dSYM/Contents/Info.plist +++ /dev/null @@ -1,20 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleIdentifier - com.apple.xcode.dsym.bipartite_edge_swap_module.cpython-37m-darwin.so - CFBundleInfoDictionaryVersion - 6.0 - CFBundlePackageType - dSYM - CFBundleSignature - ???? - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - - diff --git a/wext/setup.py b/wext/setup.py index 1c76366..8ec5bda 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -31,3 +31,10 @@ extra_compile_args = ['-g', '-O0']) setup(name='comet_exact_tests', version='0.0.1', ext_modules=[module], description='CoMEt exact test implementation.') + +## Compile the FORTRAN extension, bipartite_edge_swap_module +srcs = ['/src/fortran/bipartite_edge_swap_module.f95'] +module = Extension('bipartite_edge_swap_module', include_dirs=[numpy.get_include()], + sources = [ thisDir + s for s in srcs ] +setup(name='bipartite_edge_swap_module', version='0.0.1', ext_modules=[module], + description='FORTRAN code description') From 81ebf4303b65410b0798cfac0c66e5b8a693354b Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 15:05:52 -0400 Subject: [PATCH 40/60] missing ) --- wext/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wext/setup.py b/wext/setup.py index 8ec5bda..f28e3ec 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -35,6 +35,6 @@ ## Compile the FORTRAN extension, bipartite_edge_swap_module srcs = ['/src/fortran/bipartite_edge_swap_module.f95'] module = Extension('bipartite_edge_swap_module', include_dirs=[numpy.get_include()], - sources = [ thisDir + s for s in srcs ] + sources = [ thisDir + s for s in srcs ]) setup(name='bipartite_edge_swap_module', version='0.0.1', ext_modules=[module], description='FORTRAN code description') From a8ff8d207efa768aa5c7bcad0ee17e3874e04d54 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 15:24:08 -0400 Subject: [PATCH 41/60] revise setup.py --- wext/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wext/setup.py b/wext/setup.py index f28e3ec..c4ba272 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -3,7 +3,8 @@ """Compiles the C modules used by the weighted exclusivity test.""" # Load required modules -from distutils.core import setup, Extension +from numpy.distutils.core import setup +from numpy.distutils.extension import Extension import numpy, os thisDir = os.path.dirname(os.path.realpath(__file__)) From 9e5d982089857445d87c9cc26fc20d42ccf5c99d Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 23:45:54 -0400 Subject: [PATCH 42/60] revise c extensions --- wext/src/c/comet_exact_test.c | 18 +++++++++++++++++- wext/src/c/poibinmodule.c | 11 +++++++++++ wext/src/c/wext_exact_test.c | 14 ++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c index dc36a59..7736a5f 100755 --- a/wext/src/c/comet_exact_test.c +++ b/wext/src/c/comet_exact_test.c @@ -324,12 +324,16 @@ static PyObject *py_comet_exact_test(PyObject *self, PyObject *args){ // try renaming this to 'comet_exact_tests' + + // Register the functions we want to be accessible from Python static PyMethodDef cometExactTest[] = { {"comet_exact_test", py_comet_exact_test, METH_VARARGS, "CoMEt exact test"}, {NULL, NULL, 0, NULL} }; + + // PYTHON 2 // Note that the suffix of init has to match the name of the module, // both here and in the setup.py file @@ -340,6 +344,9 @@ static PyMethodDef cometExactTest[] = { // } // } + +#if PY_MAJOR_VERSION >= 3 + // define structure for module static struct PyModuleDef comet_exact_tests = { PyModuleDef_HEAD_INIT, // required @@ -349,7 +356,6 @@ static struct PyModuleDef comet_exact_tests = { cometExactTest // method definitions }; - // finally, write the initalizer function PyMODINIT_FUNC PyInit_comet_exact_tests(void) @@ -357,5 +363,15 @@ PyMODINIT_FUNC PyInit_comet_exact_tests(void) return PyModule_Create(&comet_exact_tests); } +#else + +PyMODINIT_FUNC initcomet_exact_test(void) { + PyObject *m = Py_InitModule("comet_exact_test", cometExactTest); + if (m == NULL) { + return; + } +} + +#endif diff --git a/wext/src/c/poibinmodule.c b/wext/src/c/poibinmodule.c index f2bf67b..a04d558 100755 --- a/wext/src/c/poibinmodule.c +++ b/wext/src/c/poibinmodule.c @@ -93,6 +93,8 @@ static PyMethodDef poibinMethods[] = { // define the module structure +#if PY_MAJOR_VERSION >= 3 + static struct PyModuleDef cpoibin = { PyModuleDef_HEAD_INIT, // required "cpoibin", // name of module @@ -108,4 +110,13 @@ PyMODINIT_FUNC PyInit_cpoibin(void) return PyModule_Create(&cpoibin); } +#else + +PyMODINIT_FUNC initcpoibin(void) { + PyObject *m = Py_InitModule("cpoibin", poibinMethods); + if (m == NULL) { + return; + } +} +#endif diff --git a/wext/src/c/wext_exact_test.c b/wext/src/c/wext_exact_test.c index 7064296..86f9533 100755 --- a/wext/src/c/wext_exact_test.c +++ b/wext/src/c/wext_exact_test.c @@ -226,6 +226,9 @@ static PyMethodDef weightedEnrichmentMethods[] = { // } // } + +#if PY_MAJOR_VERSION >= 3 + // define module structure static struct PyModuleDef wext_exact_test = { @@ -244,3 +247,14 @@ PyMODINIT_FUNC PyInit_wext_exact_test(void) return PyModule_Create(&wext_exact_test); } +#else + +PyMODINIT_FUNC initwext_exact_test(void) { + PyObject *m = Py_InitModule("wext_exact_test", weightedEnrichmentMethods); + if (m == NULL) { + return; + } +} + + +#endif \ No newline at end of file From d9a03b70abe81bda4072b65615f2de0e10bc918e Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Mon, 10 Sep 2018 23:56:55 -0400 Subject: [PATCH 43/60] allow 2.7 builds with python --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7c684c4..4652bb5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python python: - ##- 2.7 + - 2.7 - 3.4 - 3.5 - 3.6 From 48873ff779199d111cdac664da2117f5b608ca03 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 00:00:42 -0400 Subject: [PATCH 44/60] revise how module named --- wext/src/c/comet_exact_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c index 7736a5f..8b59a53 100755 --- a/wext/src/c/comet_exact_test.c +++ b/wext/src/c/comet_exact_test.c @@ -365,8 +365,8 @@ PyMODINIT_FUNC PyInit_comet_exact_tests(void) #else -PyMODINIT_FUNC initcomet_exact_test(void) { - PyObject *m = Py_InitModule("comet_exact_test", cometExactTest); +PyMODINIT_FUNC initcomet_exact_tests(void) { + PyObject *m = Py_InitModule("comet_exact_tests", cometExactTest); if (m == NULL) { return; } From 3d74ad2141e27974209be2f86003c8ebf28d3a97 Mon Sep 17 00:00:00 2001 From: Biederstedt Date: Tue, 11 Sep 2018 10:21:33 -0400 Subject: [PATCH 45/60] removed comments --- wext/src/c/comet_exact_test.c | 11 ----------- wext/src/c/poibinmodule.c | 9 --------- wext/src/c/wext_exact_test.c | 9 --------- 3 files changed, 29 deletions(-) diff --git a/wext/src/c/comet_exact_test.c b/wext/src/c/comet_exact_test.c index 8b59a53..b6097d6 100755 --- a/wext/src/c/comet_exact_test.c +++ b/wext/src/c/comet_exact_test.c @@ -334,17 +334,6 @@ static PyMethodDef cometExactTest[] = { -// PYTHON 2 -// Note that the suffix of init has to match the name of the module, -// both here and in the setup.py file -// PyMODINIT_FUNC initcomet_exact_test(void) { -// PyObject *m = Py_InitModule("comet_exact_test", cometExactTest); -// if (m == NULL) { -// return; -// } -// } - - #if PY_MAJOR_VERSION >= 3 // define structure for module diff --git a/wext/src/c/poibinmodule.c b/wext/src/c/poibinmodule.c index a04d558..6c3e71a 100755 --- a/wext/src/c/poibinmodule.c +++ b/wext/src/c/poibinmodule.c @@ -81,15 +81,6 @@ static PyMethodDef poibinMethods[] = { {NULL, NULL, 0, NULL} }; -// PYTHON 2 -// Note that the suffix of init has to match the name of the module, -// both here and in the setup.py file -// PyMODINIT_FUNC initcpoibin(void) { -// PyObject *m = Py_InitModule("cpoibin", poibinMethods); -// if (m == NULL) { -// return; -// } -// } // define the module structure diff --git a/wext/src/c/wext_exact_test.c b/wext/src/c/wext_exact_test.c index 86f9533..3ab4209 100755 --- a/wext/src/c/wext_exact_test.c +++ b/wext/src/c/wext_exact_test.c @@ -216,15 +216,6 @@ static PyMethodDef weightedEnrichmentMethods[] = { {NULL, NULL, 0, NULL} }; -// PYTHON 2 -// Note that the suffix of init has to match the name of the module, -// both here and in the setup.py file -// PyMODINIT_FUNC initwext_exact_test(void) { -// PyObject *m = Py_InitModule("wext_exact_test", weightedEnrichmentMethods); -// if (m == NULL) { -// return; -// } -// } #if PY_MAJOR_VERSION >= 3 From 79f697b8ab0b906b50318e848eff3411968f54af Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 14:53:08 -0400 Subject: [PATCH 46/60] revised string handling, outside scripts --- compute_mutation_probabilities.py | 3 +- .../simple/all-co-occurrence_results-k2.tsv | 2 + .../simple/any-co-occurrence_results-k2.tsv | 2 + examples/simple/commands_python2.sh | 52 ++++++++++++++++++ examples/simple/commands_python3.sh | 52 ++++++++++++++++++ examples/simple/data.json | 1 + examples/simple/exclusivity_results-k2.tsv | 2 + examples/simple/weights.npy | Bin 0 -> 1024 bytes process_mutations.py | 12 ++-- wext/__init__.py | 2 +- 10 files changed, 120 insertions(+), 8 deletions(-) create mode 100644 examples/simple/all-co-occurrence_results-k2.tsv create mode 100644 examples/simple/any-co-occurrence_results-k2.tsv create mode 100644 examples/simple/commands_python2.sh create mode 100644 examples/simple/commands_python3.sh create mode 100644 examples/simple/data.json create mode 100644 examples/simple/exclusivity_results-k2.tsv create mode 100644 examples/simple/weights.npy diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py index a924427..ab1989c 100755 --- a/compute_mutation_probabilities.py +++ b/compute_mutation_probabilities.py @@ -4,6 +4,7 @@ import sys, os, argparse, json, numpy as np, multiprocessing as mp, random from collections import defaultdict + # Load the weighted exclusivity test this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(this_dir) @@ -44,7 +45,7 @@ def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, m, n, num_ indices.append( (edge[0]-1, edge[1]-1) ) # Record the permutation - observed[list(zip(*indices))] += 1. + observed[tuple(zip(*indices))] += 1. geneToCases = dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) ) permutations.append( dict(geneToCases=geneToCases, permutation_number=seed) ) diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv new file mode 100644 index 0000000..9a5b388 --- /dev/null +++ b/examples/simple/all-co-occurrence_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +c, d 0.003668945489200462 0.18261601945262562 0.00248384475708 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv new file mode 100644 index 0000000..7107006 --- /dev/null +++ b/examples/simple/any-co-occurrence_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +c, d 0.003668945489200462 0.18261601945262562 0.00242900848389 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/commands_python2.sh b/examples/simple/commands_python2.sh new file mode 100644 index 0000000..7a43965 --- /dev/null +++ b/examples/simple/commands_python2.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +num_permutations=1000 +num_cores=4 + +# Preprocess mutations. +python2 ../../process_mutations.py \ + -m adjacency_list.tsv \ + -ct NA \ + -o data.json + +# Compute mutation probabilities. +python2 ../../compute_mutation_probabilities.py \ + -mf data.json \ + -np $num_permutations \ + -nc $num_cores \ + -wf weights.npy \ + -s 12345 \ + -v 1 + +# Find sets using mutual exclusivity test statistic. +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s exclusivity \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o exclusivity_results \ + -v 0 + +# Find sets using a co-occurrence test statistic (any co-occurrence). +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s any-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o any-co-occurrence_results \ + -v 0 + +# Find sets using another co-occurrence test statistic (all co-occurrence). +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s all-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o all-co-occurrence_results \ + -v 0 diff --git a/examples/simple/commands_python3.sh b/examples/simple/commands_python3.sh new file mode 100644 index 0000000..96904cf --- /dev/null +++ b/examples/simple/commands_python3.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +num_permutations=1000 +num_cores=4 + +# Preprocess mutations. +python3 ../../process_mutations.py \ + -m adjacency_list.tsv \ + -ct NA \ + -o data.json + +# Compute mutation probabilities. +python3 ../../compute_mutation_probabilities.py \ + -mf data.json \ + -np $num_permutations \ + -nc $num_cores \ + -wf weights.npy \ + -s 12345 \ + -v 1 + +# Find sets using mutual exclusivity test statistic. +python3 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s exclusivity \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o exclusivity_results \ + -v 0 + +# Find sets using a co-occurrence test statistic (any co-occurrence). +python3 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s any-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o any-co-occurrence_results \ + -v 0 + +# Find sets using another co-occurrence test statistic (all co-occurrence). +python3 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s all-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o all-co-occurrence_results \ + -v 0 diff --git a/examples/simple/data.json b/examples/simple/data.json new file mode 100644 index 0000000..731a28a --- /dev/null +++ b/examples/simple/data.json @@ -0,0 +1 @@ +{"hypermutators": [], "geneToCases": {"a": ["11", "1", "3", "5", "7", "9"], "c": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "b": ["10", "12", "2", "4", "6", "8"], "e": ["1", "11"], "d": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "g": ["12", "2"], "f": ["1"], "h": ["2"]}, "num_patients": 14, "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "patientToType": {"11": "NA", "10": "NA", "13": "NA", "12": "NA", "14": "NA", "1": "NA", "3": "NA", "2": "NA", "5": "NA", "4": "NA", "7": "NA", "6": "NA", "9": "NA", "8": "NA"}, "num_genes": 8, "params": {"cancerToFiles": {"NA": ["/Users/biederse/wext/examples/simple/adjacency_list.tsv"]}, "ignored_validation_statuses": ["Wildtype", "Invalid"], "hypermutators_file": null, "cancer_types": ["NA"], "ignored_variant_types": ["Germline"], "patient_whitelist_file": null, "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"]}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "patientToMutations": {"11": ["a", "e"], "10": ["b"], "13": ["c", "d"], "12": ["b", "g"], "14": ["c", "d"], "1": ["a", "c", "e", "d", "f"], "3": ["a", "c", "d"], "2": ["h", "c", "b", "d", "g"], "5": ["a", "c", "d"], "4": ["c", "b", "d"], "7": ["a", "c", "d"], "6": ["c", "b", "d"], "9": ["a"], "8": ["b"]}} \ No newline at end of file diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv new file mode 100644 index 0000000..bd8a6b0 --- /dev/null +++ b/examples/simple/exclusivity_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +a, b 0.00025630815683653265 0.010596224876535663 0.00440192222595 12 0 2 6 6 0 \ No newline at end of file diff --git a/examples/simple/weights.npy b/examples/simple/weights.npy new file mode 100644 index 0000000000000000000000000000000000000000..0cdfdcf8096fc128e3dd5fb94fbe128002d925e6 GIT binary patch literal 1024 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I#iItqp+nmP)#3giMV#SKLn6W_kFSM^SM!}zqe@8-Gh!H<`|wSTdt(NFrt6?x&EaKEg@E)X&;IZ32JQ z3wuH|%v=}^(+}g*+CHV*KkKsg+_S&1LEGV->2!Oc<^!h|x^BWr!_+h8H?kb 0: - print('* Summary of mutation data...') - print('\tGenes: {}'.format(num_genes)) - print('\tPatients: {} ({} hypermutators)'.format(num_patients, len(hypermutators))) - print('\tUsed variant classes:', ', '.join(sorted(vc))) - print('\tUsed variant types:', ', '.join(sorted(vt))) - print('\tUsed validation statuses:', ', '.join(sorted(vs))) + print("* Summary of mutation data...") + print("\tGenes: {}".format(num_genes)) + print("\tPatients: {} ({} hypermutators)".format(num_patients, len(hypermutators))) + print("\tUsed variant classes: " + ", ".join(sorted(vc))) + print("\tUsed variant types: " + ", ".join(sorted(vt))) + print("\tUsed validation statuses: " + ", ".join(sorted(vs))) # Output to file with open(args.output_file, 'w') as OUT: diff --git a/wext/__init__.py b/wext/__init__.py index 8a0c19e..de8e09d 100755 --- a/wext/__init__.py +++ b/wext/__init__.py @@ -12,4 +12,4 @@ from .saddlepoint import saddlepoint import comet_exact_tests from .exclusivity_tests import re_test, wre_test -import bipartite_edge_swap_module \ No newline at end of file +from bipartite_edge_swap_module import bipartite_edge_swap \ No newline at end of file From e799a4fad4a81c77e48efc2e568b794402c95b8b Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 14:53:56 -0400 Subject: [PATCH 47/60] revised string handling, outside scripts --- .../simple/all-co-occurrence_results-k2.tsv | 2 - .../simple/any-co-occurrence_results-k2.tsv | 2 - examples/simple/commands_python2.sh | 52 ------------------- examples/simple/commands_python3.sh | 52 ------------------- examples/simple/data.json | 1 - examples/simple/exclusivity_results-k2.tsv | 2 - 6 files changed, 111 deletions(-) delete mode 100644 examples/simple/all-co-occurrence_results-k2.tsv delete mode 100644 examples/simple/any-co-occurrence_results-k2.tsv delete mode 100644 examples/simple/commands_python2.sh delete mode 100644 examples/simple/commands_python3.sh delete mode 100644 examples/simple/data.json delete mode 100644 examples/simple/exclusivity_results-k2.tsv diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv deleted file mode 100644 index 9a5b388..0000000 --- a/examples/simple/all-co-occurrence_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -c, d 0.003668945489200462 0.18261601945262562 0.00248384475708 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv deleted file mode 100644 index 7107006..0000000 --- a/examples/simple/any-co-occurrence_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -c, d 0.003668945489200462 0.18261601945262562 0.00242900848389 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/commands_python2.sh b/examples/simple/commands_python2.sh deleted file mode 100644 index 7a43965..0000000 --- a/examples/simple/commands_python2.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash - -num_permutations=1000 -num_cores=4 - -# Preprocess mutations. -python2 ../../process_mutations.py \ - -m adjacency_list.tsv \ - -ct NA \ - -o data.json - -# Compute mutation probabilities. -python2 ../../compute_mutation_probabilities.py \ - -mf data.json \ - -np $num_permutations \ - -nc $num_cores \ - -wf weights.npy \ - -s 12345 \ - -v 1 - -# Find sets using mutual exclusivity test statistic. -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s exclusivity \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o exclusivity_results \ - -v 0 - -# Find sets using a co-occurrence test statistic (any co-occurrence). -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s any-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o any-co-occurrence_results \ - -v 0 - -# Find sets using another co-occurrence test statistic (all co-occurrence). -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s all-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o all-co-occurrence_results \ - -v 0 diff --git a/examples/simple/commands_python3.sh b/examples/simple/commands_python3.sh deleted file mode 100644 index 96904cf..0000000 --- a/examples/simple/commands_python3.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash - -num_permutations=1000 -num_cores=4 - -# Preprocess mutations. -python3 ../../process_mutations.py \ - -m adjacency_list.tsv \ - -ct NA \ - -o data.json - -# Compute mutation probabilities. -python3 ../../compute_mutation_probabilities.py \ - -mf data.json \ - -np $num_permutations \ - -nc $num_cores \ - -wf weights.npy \ - -s 12345 \ - -v 1 - -# Find sets using mutual exclusivity test statistic. -python3 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s exclusivity \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o exclusivity_results \ - -v 0 - -# Find sets using a co-occurrence test statistic (any co-occurrence). -python3 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s any-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o any-co-occurrence_results \ - -v 0 - -# Find sets using another co-occurrence test statistic (all co-occurrence). -python3 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s all-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o all-co-occurrence_results \ - -v 0 diff --git a/examples/simple/data.json b/examples/simple/data.json deleted file mode 100644 index 731a28a..0000000 --- a/examples/simple/data.json +++ /dev/null @@ -1 +0,0 @@ -{"hypermutators": [], "geneToCases": {"a": ["11", "1", "3", "5", "7", "9"], "c": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "b": ["10", "12", "2", "4", "6", "8"], "e": ["1", "11"], "d": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "g": ["12", "2"], "f": ["1"], "h": ["2"]}, "num_patients": 14, "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "patientToType": {"11": "NA", "10": "NA", "13": "NA", "12": "NA", "14": "NA", "1": "NA", "3": "NA", "2": "NA", "5": "NA", "4": "NA", "7": "NA", "6": "NA", "9": "NA", "8": "NA"}, "num_genes": 8, "params": {"cancerToFiles": {"NA": ["/Users/biederse/wext/examples/simple/adjacency_list.tsv"]}, "ignored_validation_statuses": ["Wildtype", "Invalid"], "hypermutators_file": null, "cancer_types": ["NA"], "ignored_variant_types": ["Germline"], "patient_whitelist_file": null, "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"]}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "patientToMutations": {"11": ["a", "e"], "10": ["b"], "13": ["c", "d"], "12": ["b", "g"], "14": ["c", "d"], "1": ["a", "c", "e", "d", "f"], "3": ["a", "c", "d"], "2": ["h", "c", "b", "d", "g"], "5": ["a", "c", "d"], "4": ["c", "b", "d"], "7": ["a", "c", "d"], "6": ["c", "b", "d"], "9": ["a"], "8": ["b"]}} \ No newline at end of file diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv deleted file mode 100644 index bd8a6b0..0000000 --- a/examples/simple/exclusivity_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -a, b 0.00025630815683653265 0.010596224876535663 0.00440192222595 12 0 2 6 6 0 \ No newline at end of file From ec674fc1f348847c7e9df2e9f17faa2486e4cec5 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 17:05:36 -0400 Subject: [PATCH 48/60] first commit --- process_mutations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/process_mutations.py b/process_mutations.py index d040abc..66e78d4 100755 --- a/process_mutations.py +++ b/process_mutations.py @@ -178,9 +178,9 @@ def run( args ): patient_whitelist_file=os.path.abspath(args.patient_whitelist) if args.patient_whitelist else None, hypermutators_file=os.path.abspath(args.hypermutators_file) if args.hypermutators_file else None) output = dict(params=params, patients=patients, genes=genes, hypermutators=list(hypermutators), - geneToCases=dict( (g, list(cases)) for g, cases in list(geneToCases.items())), + geneToCases=dict( (g, list(cases)) for g, cases in geneToCases.items()), patientToType=patientToType, - patientToMutations=dict( (p, list(muts)) for p, muts in list(patientToMutations.items())), + patientToMutations=dict( (p, list(muts)) for p, muts in patientToMutations.items()), num_genes=num_genes, num_patients=num_patients) json.dump( output, OUT ) From bb0d1514aa00e779f1316d2f751b2f9ce3bb9391 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 17:26:17 -0400 Subject: [PATCH 49/60] revised external scripts --- find_exclusive_sets.py | 17 +++++++++-------- find_sets.py | 16 ++++++++-------- process_mutations.py | 6 +++--- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/find_exclusive_sets.py b/find_exclusive_sets.py index 88a6401..f6046c4 100755 --- a/find_exclusive_sets.py +++ b/find_exclusive_sets.py @@ -18,7 +18,7 @@ def get_parser(): parser.add_argument('-o', '--output_prefix', type=str, required=True) parser.add_argument('-f', '--min_frequency', type=int, default=1, required=False) parser.add_argument('-c', '--num_cores', type=int, required=False, default=1) - parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=range(5)) + parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=list(range(5))) parser.add_argument('-r', '--report_invalids', action='store_true', default=False, required=False) parser.add_argument('--json_format', action='store_true', default=False, required=False) @@ -105,11 +105,12 @@ def load_mutation_files(mutation_files): genes |= set(type_genes) # Record the mutations in each gene - for g, cases in typeGeneToCases.iteritems(): geneToCases[g] |= cases + for g, cases in typeGeneToCases.items(): + geneToCases[g] |= cases # Record the genes, patients, and their indices for later - typeToGeneIndex.append(dict(zip(type_genes, range(len(type_genes))))) - typeToPatientIndex.append(dict(zip(type_patients, range(len(type_patients))))) + typeToGeneIndex.append(dict(zip(type_genes, list(range(len(type_genes)))))) + typeToPatientIndex.append(dict(zip(type_patients, list(range(len(type_patients)))))) return genes, patients, geneToCases, typeToGeneIndex, typeToPatientIndex @@ -128,7 +129,7 @@ def run( args ): num_all_genes, num_patients = len(genes), len(patients) # Restrict to genes mutated in a minimum number of samples - geneToCases = dict( (g, cases) for g, cases in list(geneToCases.items()) if g in genes and len(cases) >= args.min_frequency ) + geneToCases = dict( (g, cases) for g, cases in geneToCases.items() if g in genes and len(cases) >= args.min_frequency ) genes = set(geneToCases.keys()) num_genes = len(genes) @@ -141,7 +142,7 @@ def run( args ): # Since we are looking for co-occurrence between exclusive sets with # an annotation A, we add events for each patient NOT annotated by # the given annotation - for annotation, cases in list(annotationToPatients.items()): + for annotation, cases in annotationToPatients.items(): not_cases = patients - cases if len(not_cases) > 0: geneToCases[annotation] = not_cases @@ -159,8 +160,8 @@ def run( args ): test = nameToTest[args.test] if test == WRE: # Create master versions of the indices - masterGeneToIndex = dict(list(zip(sorted(genes), list(range(num_genes))))) - masterPatientToIndex = dict(list(zip(sorted(patients), list(range(num_patients))))) + masterGeneToIndex = dict(zip(sorted(genes), list(range(num_genes)))) + masterPatientToIndex = dict(zip(sorted(patients), list(range(num_patients)))) geneToP = load_weight_files(args.weights_files, genes, patients, typeToGeneIndex, typeToPatientIndex, masterGeneToIndex, masterPatientToIndex) else: geneToP = None diff --git a/find_sets.py b/find_sets.py index 1154da7..aa1f984 100755 --- a/find_sets.py +++ b/find_sets.py @@ -24,7 +24,7 @@ def get_parser(): parser.add_argument('-t', '--test', type=str, required=False, default='WRE', choices=['WRE']) parser.add_argument('-m', '--method', type=str, required=False, default='Saddlepoint', choices=['Saddlepoint']) parser.add_argument('-s', '--statistic', type=str, required=True, choices=['exclusivity', 'any-co-occurrence', 'all-co-occurrence']) - parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=range(5)) + parser.add_argument('-v', '--verbose', type=int, required=False, default=1, choices=list(range(5)) ) parser.add_argument('-r', '--report_invalids', action='store_true', default=False, required=False) parser.add_argument('--json_format', action='store_true', default=False, required=False) return parser @@ -87,12 +87,12 @@ def load_mutation_files(mutation_files): genes |= set(type_genes) # Record the mutations in each gene - for g, cases in list(typeGeneToCases.items()): + for g, cases in typeGeneToCases.items(): geneToCases[g] |= cases # Record the genes, patients, and their indices for later - typeToGeneIndex.append(dict(zip(type_genes, range(len(type_genes))))) - typeToPatientIndex.append(dict(zip(type_patients, range(len(type_patients))))) + typeToGeneIndex.append(dict(zip(type_genes, list(range(len(type_genes)))))) + typeToPatientIndex.append(dict(zip(type_patients, list(range(len(type_patients)))))) return genes, patients, geneToCases, typeToGeneIndex, typeToPatientIndex @@ -107,7 +107,7 @@ def run( args ): num_all_genes, num_patients = len(genes), len(patients) # Restrict to genes mutated in a minimum number of samples - geneToCases = dict( (g, cases) for g, cases in list(geneToCases.items()) if g in genes and len(cases) >= args.min_frequency ) + geneToCases = dict( (g, cases) for g, cases in geneToCases.items() if g in genes and len(cases) >= args.min_frequency ) genes = set(geneToCases.keys()) num_genes = len(genes) @@ -120,7 +120,7 @@ def run( args ): # Since we are looking for co-occurrence between exclusive sets with # an annotation A, we add events for each patient NOT annotated by # the given annotation - for annotation, cases in list(annotationToPatients.items()): + for annotation, cases in annotationToPatients.items(): not_cases = patients - cases if len(not_cases) > 0: geneToCases[annotation] = not_cases @@ -137,8 +137,8 @@ def run( args ): # Load the weights (if necessary) # Create master versions of the indices - masterGeneToIndex = dict(list(zip(sorted(genes), list(range(num_genes))))) - masterPatientToIndex = dict(list(zip(sorted(patients), list(range(num_patients))))) + masterGeneToIndex = dict(zip(sorted(genes), list(range(num_genes)))) + masterPatientToIndex = dict(zip(sorted(patients), list(range(num_patients)))) geneToP = load_weight_files(args.weights_files, genes, patients, typeToGeneIndex, typeToPatientIndex, masterGeneToIndex, masterPatientToIndex) if args.verbose > 0: diff --git a/process_mutations.py b/process_mutations.py index 66e78d4..99e7b09 100755 --- a/process_mutations.py +++ b/process_mutations.py @@ -24,7 +24,7 @@ def get_parser(): def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc, vt, vs, ivc, ivt, ivs, verbose ): if verbose > 1: - print('\tLoading MAF:', maf_file) + print('\tLoading MAF: ', maf_file) genes, patients = set(), set() with open(maf_file, 'r') as IN: seenHeader = False @@ -32,7 +32,7 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc arr = l.rstrip('\n').split('\t') # Parse the header if we haven't seen it yet if not seenHeader and arr[0].lower() == 'hugo_symbol': - arr = list(map(str.lower, arr)) + arr = map(str.lower, arr) seenHeader = True gene_index = 0 patient_index = arr.index('tumor_sample_barcode') @@ -86,7 +86,7 @@ def process_maf( maf_file, patientWhitelist, geneToCases, patientToMutations, vc def process_events_file( events_file, patientWhitelist, geneToCases, patientToMutations, verbose ): if verbose > 1: - print('\tProcessing events file:', events_file) + print('\tProcessing events file: ', events_file) # Parse the events file events, patients = set(), set() From 4158cbd6c858a2c1a9941545ce00ee8ece8fcb36 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 18:51:50 -0400 Subject: [PATCH 50/60] fixed performance issue, added future dependency --- compute_mutation_probabilities.py | 9 ++-- .../simple/all-co-occurrence_results-k2.tsv | 2 + .../simple/any-co-occurrence_results-k2.tsv | 2 + examples/simple/commands2.sh | 52 +++++++++++++++++++ examples/simple/commands3.sh | 52 +++++++++++++++++++ examples/simple/data.json | 1 + examples/simple/exclusivity_results-k2.tsv | 2 + requirements.txt | 1 + wext/exact.py | 4 +- wext/exclusivity_tests.py | 2 +- wext/i_o.py | 18 +++---- wext/mcmc.py | 9 ++-- wext/statistics.py | 4 +- 13 files changed, 137 insertions(+), 21 deletions(-) create mode 100644 examples/simple/all-co-occurrence_results-k2.tsv create mode 100644 examples/simple/any-co-occurrence_results-k2.tsv create mode 100644 examples/simple/commands2.sh create mode 100644 examples/simple/commands3.sh create mode 100644 examples/simple/data.json create mode 100644 examples/simple/exclusivity_results-k2.tsv diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py index ab1989c..8606d5a 100755 --- a/compute_mutation_probabilities.py +++ b/compute_mutation_probabilities.py @@ -9,6 +9,7 @@ this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(this_dir) from wext import * +from past.builtins import xrange # Argument parser def get_parser(): @@ -108,7 +109,7 @@ def run( args ): max_tries = 10**9 if args.seed is not None: random.seed(args.seed) - seeds = random.sample(list(range(1, 2*10**9)), args.num_permutations) + seeds = random.sample(xrange(1, 2*10**9), args.num_permutations) # Run the bipartite edge swaps in parallel if more than one core indicated num_cores = min(args.num_cores if args.num_cores != -1 else mp.cpu_count(), args.num_permutations) @@ -156,12 +157,12 @@ def run( args ): P = postprocess_weight_matrix(P, r, s) # Verify the weights again - for g, obs in list(geneToObserved.items()): + for g, obs in geneToObserved.items(): assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol) - for p, obs in list(patientToObserved.items()): + for p, obs in patientToObserved.items(): assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol) - + # Add pseudocounts to entries with no mutations observed; unlikely or impossible after post-processing step P[P == 0] = 1./(2. * args.num_permutations) diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv new file mode 100644 index 0000000..44fc4b9 --- /dev/null +++ b/examples/simple/all-co-occurrence_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +c, d 0.003668945489200462 0.18261601945262562 0.00278782844543 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv new file mode 100644 index 0000000..1eb94b2 --- /dev/null +++ b/examples/simple/any-co-occurrence_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +c, d 0.003668945489200462 0.18261601945262562 0.00311303138733 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/commands2.sh b/examples/simple/commands2.sh new file mode 100644 index 0000000..7a43965 --- /dev/null +++ b/examples/simple/commands2.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +num_permutations=1000 +num_cores=4 + +# Preprocess mutations. +python2 ../../process_mutations.py \ + -m adjacency_list.tsv \ + -ct NA \ + -o data.json + +# Compute mutation probabilities. +python2 ../../compute_mutation_probabilities.py \ + -mf data.json \ + -np $num_permutations \ + -nc $num_cores \ + -wf weights.npy \ + -s 12345 \ + -v 1 + +# Find sets using mutual exclusivity test statistic. +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s exclusivity \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o exclusivity_results \ + -v 0 + +# Find sets using a co-occurrence test statistic (any co-occurrence). +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s any-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o any-co-occurrence_results \ + -v 0 + +# Find sets using another co-occurrence test statistic (all co-occurrence). +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s all-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o all-co-occurrence_results \ + -v 0 diff --git a/examples/simple/commands3.sh b/examples/simple/commands3.sh new file mode 100644 index 0000000..96904cf --- /dev/null +++ b/examples/simple/commands3.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +num_permutations=1000 +num_cores=4 + +# Preprocess mutations. +python3 ../../process_mutations.py \ + -m adjacency_list.tsv \ + -ct NA \ + -o data.json + +# Compute mutation probabilities. +python3 ../../compute_mutation_probabilities.py \ + -mf data.json \ + -np $num_permutations \ + -nc $num_cores \ + -wf weights.npy \ + -s 12345 \ + -v 1 + +# Find sets using mutual exclusivity test statistic. +python3 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s exclusivity \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o exclusivity_results \ + -v 0 + +# Find sets using a co-occurrence test statistic (any co-occurrence). +python3 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s any-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o any-co-occurrence_results \ + -v 0 + +# Find sets using another co-occurrence test statistic (all co-occurrence). +python3 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s all-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o all-co-occurrence_results \ + -v 0 diff --git a/examples/simple/data.json b/examples/simple/data.json new file mode 100644 index 0000000..731a28a --- /dev/null +++ b/examples/simple/data.json @@ -0,0 +1 @@ +{"hypermutators": [], "geneToCases": {"a": ["11", "1", "3", "5", "7", "9"], "c": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "b": ["10", "12", "2", "4", "6", "8"], "e": ["1", "11"], "d": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "g": ["12", "2"], "f": ["1"], "h": ["2"]}, "num_patients": 14, "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "patientToType": {"11": "NA", "10": "NA", "13": "NA", "12": "NA", "14": "NA", "1": "NA", "3": "NA", "2": "NA", "5": "NA", "4": "NA", "7": "NA", "6": "NA", "9": "NA", "8": "NA"}, "num_genes": 8, "params": {"cancerToFiles": {"NA": ["/Users/biederse/wext/examples/simple/adjacency_list.tsv"]}, "ignored_validation_statuses": ["Wildtype", "Invalid"], "hypermutators_file": null, "cancer_types": ["NA"], "ignored_variant_types": ["Germline"], "patient_whitelist_file": null, "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"]}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "patientToMutations": {"11": ["a", "e"], "10": ["b"], "13": ["c", "d"], "12": ["b", "g"], "14": ["c", "d"], "1": ["a", "c", "e", "d", "f"], "3": ["a", "c", "d"], "2": ["h", "c", "b", "d", "g"], "5": ["a", "c", "d"], "4": ["c", "b", "d"], "7": ["a", "c", "d"], "6": ["c", "b", "d"], "9": ["a"], "8": ["b"]}} \ No newline at end of file diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv new file mode 100644 index 0000000..844ce1f --- /dev/null +++ b/examples/simple/exclusivity_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +a, b 0.00025630815683653265 0.010596224876535663 0.0043740272522 12 0 2 6 6 0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 12f43db..eca8f8a 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ numpy >=1.11.0 scipy >=0.17.0 networkx >= 1.11 +future >= 0.16.0 diff --git a/wext/exact.py b/wext/exact.py index 9bcfb5c..345450f 100644 --- a/wext/exact.py +++ b/wext/exact.py @@ -19,8 +19,10 @@ def exact_test_k3(t, x, p, verbose): return wext_exact_test.triple_exact_test( N, t, x[0], x[1], x[2], p ) # Wrapper for k=2 exact test C function -def exact_test_k2(t, x, y, p_x, p_y, verbose): +def exact_test_k2(t, xy, pxpy, verbose): # Two-sided test + (x, y) = xy + (p_x, p_y) = pxpy N = len(p_x) z = (x + y - t)/2 # count number of co-occurrences tail_masses = wext_exact_test.conditional(N, list(range(z+1)), x, y, p_x, p_y) diff --git a/wext/exclusivity_tests.py b/wext/exclusivity_tests.py index a7c4229..936e77a 100755 --- a/wext/exclusivity_tests.py +++ b/wext/exclusivity_tests.py @@ -22,7 +22,7 @@ def wre_test(t, x, p, method=EXACT, verbose=0): # Check that the probabilities are in (0, 1]. assert(all(0= min_freq ) + genes = set( g for g, cases in geneToCases.items() if len(cases) >= min_freq ) return genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators @@ -34,11 +34,11 @@ def load_patient_annotation_file(patient_annotation_file): # Converts keys from an iterable to tab-separated, so the dictionary can be # output as JSON def convert_dict_for_json( setToVal, sep='\t' ): - return dict( (sep.join(sorted(M)), val) for M, val in list(setToVal.items()) ) + return dict( (sep.join(sorted(M)), val) for M, val in setToVal.items() ) # Converts tab-separated keys back to frozensets def convert_dict_from_json( setToVal, sep='\t', iterable=frozenset ): - return dict( (iterable(M.split(sep)), val) for M, val in list(setToVal.items()) ) + return dict( (iterable(M.split(sep)), val) for M, val in setToVal.items() ) # Create the header strings for a contingency table def create_tbl_header( k ): @@ -54,7 +54,7 @@ def output_enumeration_table(args, k, setToPval, setToRuntime, setToFDR, setToOb if not args.json_format: # Construct the rows rows = [] - for M, pval in list(setToPval.items()): + for M, pval in setToPval.items(): if setToFDR[M]<=fdr_threshold: X, T, Z, tbl = setToObs[M] row = [ ', '.join(sorted(M)), pval, setToFDR[M], setToRuntime[M], T, Z ] + tbl @@ -90,14 +90,14 @@ def output_mcmc(args, setsToFreq, setToPval, setToObs): params = vars(args) output = dict(params=params, setToPval=convert_dict_for_json(setToPval), setToObs=convert_dict_for_json(setToObs), - setsToFreq=dict( (' '.join([ ','.join(sorted(M)) for M in sets ]), freq) for sets, freq in list(setsToFreq.items()) )) + setsToFreq=dict( (' '.join([ ','.join(sorted(M)) for M in sets ]), freq) for sets, freq in setsToFreq.items()) ) with open(args.output_prefix + '.json', 'w') as OUT: json.dump( output, OUT ) else: # Output a gene set file with open(args.output_prefix + '-sampled-collections.tsv', 'w') as OUT: rows = [] - for sets, freq in list(setsToFreq.items()): + for sets, freq in setsToFreq.items(): row = [ ' '.join([ ','.join(M) for M in sets ]), freq ] row.append( sum( -np.log10(setToPval[M] ** args.alpha) for M in sets )) rows.append(row) @@ -109,7 +109,7 @@ def output_mcmc(args, setsToFreq, setToPval, setToObs): # Output each of the sample gene sets with open(args.output_prefix + '-sampled-sets.tsv', 'w') as OUT: rows = [] - for M, pval in list(setToPval.items()): + for M, pval in setToPval.items(): X, T, Z, tbl = setToObs[M] rows.append([ ','.join(sorted(M)), pval, T, Z] + tbl ) rows.sort(key=lambda r: r[1]) diff --git a/wext/mcmc.py b/wext/mcmc.py index 29f43c9..4c15fc2 100755 --- a/wext/mcmc.py +++ b/wext/mcmc.py @@ -46,10 +46,11 @@ def _collection_weight(collection): return sum( _weight(M) for M in collection ) def _to_collection(solution): - return frozenset( frozenset(M) for M in list(solution.values()) ) + return frozenset( frozenset(M) for M in solution.values() ) # Compute the acceptance ratio - def _log_accept_ratio( W_current, W_next ): return W_next - W_current + def _log_accept_ratio( W_current, W_next ): + return W_next - W_current # Set up PRNG, sample space, and output random_seed(seed) @@ -88,7 +89,7 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current # if we only have one set, we can't swap between sets if t == 1: continue i = next_assigned[next_gene] - swap_gene = choice([ g for g in list(next_assigned.keys()) if g not in next_soln[i] ]) + swap_gene = choice([ g for g in next_assigned.keys() if g not in next_soln[i] ]) j = next_assigned[swap_gene] next_assigned[swap_gene] = i next_soln[i].add(swap_gene) @@ -129,7 +130,7 @@ def _log_accept_ratio( W_current, W_next ): return W_next - W_current # Merge the various chains setsToTotalFreq = defaultdict(int) for counter in setsToFreq: - for sets, freq in list(counter.items()): + for sets, freq in counter.items(): setsToTotalFreq[sets] += freq return setsToTotalFreq, setToPval, setToObs diff --git a/wext/statistics.py b/wext/statistics.py index b1a3834..4ceef54 100755 --- a/wext/statistics.py +++ b/wext/statistics.py @@ -36,7 +36,7 @@ def multiple_hypothesis_correction(p_values_, method='BH'): sorted_q_values = np.zeros(n) sorted_q_values[n-1] = min(sorted_p_values[n-1], 1.0) - for i in reversed(list(range(n-1))): + for i in reversed(range(n-1)): sorted_q_values[i] = min(float(n)/float(i+1)*sorted_p_values[i], sorted_q_values[i+1]) q_values = np.zeros(n) @@ -49,7 +49,7 @@ def multiple_hypothesis_correction(p_values_, method='BH'): c = np.sum(1.0/np.arange(1, n+1, dtype=np.float64)) sorted_q_values = np.zeros(n) sorted_q_values[n-1] = min(c*sorted_p_values[n-1], 1.0) - for i in reversed(list(range(n-1))): + for i in reversed(range(n-1)): sorted_q_values[i] = min(c*(float(n)/float(i+1))*sorted_p_values[i], sorted_q_values[i+1]) q_values = np.zeros(n) From a60809a121878eef74d0030b9b5aadbfeb3a8fb1 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 18:52:43 -0400 Subject: [PATCH 51/60] fixed performance issue, added future dependency --- .../simple/all-co-occurrence_results-k2.tsv | 2 - .../simple/any-co-occurrence_results-k2.tsv | 2 - examples/simple/commands2.sh | 52 ------------------- examples/simple/commands3.sh | 52 ------------------- examples/simple/data.json | 1 - examples/simple/exclusivity_results-k2.tsv | 2 - 6 files changed, 111 deletions(-) delete mode 100644 examples/simple/all-co-occurrence_results-k2.tsv delete mode 100644 examples/simple/any-co-occurrence_results-k2.tsv delete mode 100644 examples/simple/commands2.sh delete mode 100644 examples/simple/commands3.sh delete mode 100644 examples/simple/data.json delete mode 100644 examples/simple/exclusivity_results-k2.tsv diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv deleted file mode 100644 index 44fc4b9..0000000 --- a/examples/simple/all-co-occurrence_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -c, d 0.003668945489200462 0.18261601945262562 0.00278782844543 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv deleted file mode 100644 index 1eb94b2..0000000 --- a/examples/simple/any-co-occurrence_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -c, d 0.003668945489200462 0.18261601945262562 0.00311303138733 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/commands2.sh b/examples/simple/commands2.sh deleted file mode 100644 index 7a43965..0000000 --- a/examples/simple/commands2.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash - -num_permutations=1000 -num_cores=4 - -# Preprocess mutations. -python2 ../../process_mutations.py \ - -m adjacency_list.tsv \ - -ct NA \ - -o data.json - -# Compute mutation probabilities. -python2 ../../compute_mutation_probabilities.py \ - -mf data.json \ - -np $num_permutations \ - -nc $num_cores \ - -wf weights.npy \ - -s 12345 \ - -v 1 - -# Find sets using mutual exclusivity test statistic. -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s exclusivity \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o exclusivity_results \ - -v 0 - -# Find sets using a co-occurrence test statistic (any co-occurrence). -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s any-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o any-co-occurrence_results \ - -v 0 - -# Find sets using another co-occurrence test statistic (all co-occurrence). -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s all-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o all-co-occurrence_results \ - -v 0 diff --git a/examples/simple/commands3.sh b/examples/simple/commands3.sh deleted file mode 100644 index 96904cf..0000000 --- a/examples/simple/commands3.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash - -num_permutations=1000 -num_cores=4 - -# Preprocess mutations. -python3 ../../process_mutations.py \ - -m adjacency_list.tsv \ - -ct NA \ - -o data.json - -# Compute mutation probabilities. -python3 ../../compute_mutation_probabilities.py \ - -mf data.json \ - -np $num_permutations \ - -nc $num_cores \ - -wf weights.npy \ - -s 12345 \ - -v 1 - -# Find sets using mutual exclusivity test statistic. -python3 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s exclusivity \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o exclusivity_results \ - -v 0 - -# Find sets using a co-occurrence test statistic (any co-occurrence). -python3 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s any-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o any-co-occurrence_results \ - -v 0 - -# Find sets using another co-occurrence test statistic (all co-occurrence). -python3 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s all-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o all-co-occurrence_results \ - -v 0 diff --git a/examples/simple/data.json b/examples/simple/data.json deleted file mode 100644 index 731a28a..0000000 --- a/examples/simple/data.json +++ /dev/null @@ -1 +0,0 @@ -{"hypermutators": [], "geneToCases": {"a": ["11", "1", "3", "5", "7", "9"], "c": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "b": ["10", "12", "2", "4", "6", "8"], "e": ["1", "11"], "d": ["13", "14", "1", "3", "2", "5", "4", "7", "6"], "g": ["12", "2"], "f": ["1"], "h": ["2"]}, "num_patients": 14, "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "patientToType": {"11": "NA", "10": "NA", "13": "NA", "12": "NA", "14": "NA", "1": "NA", "3": "NA", "2": "NA", "5": "NA", "4": "NA", "7": "NA", "6": "NA", "9": "NA", "8": "NA"}, "num_genes": 8, "params": {"cancerToFiles": {"NA": ["/Users/biederse/wext/examples/simple/adjacency_list.tsv"]}, "ignored_validation_statuses": ["Wildtype", "Invalid"], "hypermutators_file": null, "cancer_types": ["NA"], "ignored_variant_types": ["Germline"], "patient_whitelist_file": null, "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"]}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "patientToMutations": {"11": ["a", "e"], "10": ["b"], "13": ["c", "d"], "12": ["b", "g"], "14": ["c", "d"], "1": ["a", "c", "e", "d", "f"], "3": ["a", "c", "d"], "2": ["h", "c", "b", "d", "g"], "5": ["a", "c", "d"], "4": ["c", "b", "d"], "7": ["a", "c", "d"], "6": ["c", "b", "d"], "9": ["a"], "8": ["b"]}} \ No newline at end of file diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv deleted file mode 100644 index 844ce1f..0000000 --- a/examples/simple/exclusivity_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -a, b 0.00025630815683653265 0.010596224876535663 0.0043740272522 12 0 2 6 6 0 \ No newline at end of file From e3a43558c4efdcc1bb34f6b9d53b21bfe1910065 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 22:09:05 -0400 Subject: [PATCH 52/60] revised experiments/eccb2016/scripts --- experiments/eccb2016/scripts/permutation_test_helper.py | 2 +- .../eccb2016/scripts/reconcile_grid_permutation_test.py | 3 ++- experiments/eccb2016/scripts/remove_genes_with_no_length.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/experiments/eccb2016/scripts/permutation_test_helper.py b/experiments/eccb2016/scripts/permutation_test_helper.py index fc9b90a..e2691ab 100644 --- a/experiments/eccb2016/scripts/permutation_test_helper.py +++ b/experiments/eccb2016/scripts/permutation_test_helper.py @@ -16,7 +16,7 @@ parser.add_argument('-o', '--output_prefix', type=str, required=True) parser.add_argument('-w', '--wext_directory', type=str, required=True) parser.add_argument('-j', '--job_id', type=int, required=job_id is None, default=job_id) -parser.add_argument('-v', '--verbose', type=int, required=False, default=0, choices=range(5)) +parser.add_argument('-v', '--verbose', type=int, required=False, default=0, choices=list(range(5))) args = parser.parse_args( sys.argv[1:] ) # Load weighted exclusivity test diff --git a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py index d7c0354..c60a68c 100644 --- a/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py +++ b/experiments/eccb2016/scripts/reconcile_grid_permutation_test.py @@ -19,6 +19,7 @@ # Load and merge the JSON files def load_json_files(( json_files )): + setToCount = defaultdict( int ) setToRuntime = defaultdict( float ) setToObs = dict() @@ -27,7 +28,7 @@ def load_json_files(( json_files )): # Parse the JSON file with open(json_file, 'r') as IN: obj = json.load(IN) - for M, pval in obj['setToPval'].iteritems(): + for M, pval in obj['setToPval'].items(): frozen_M = frozenset(M.split('\t')) setToCount[frozen_M] += int(round(pval * args.batch_size)) setToRuntime[frozen_M] += obj['setToRuntime'][M] diff --git a/experiments/eccb2016/scripts/remove_genes_with_no_length.py b/experiments/eccb2016/scripts/remove_genes_with_no_length.py index 7017b08..3b5d316 100644 --- a/experiments/eccb2016/scripts/remove_genes_with_no_length.py +++ b/experiments/eccb2016/scripts/remove_genes_with_no_length.py @@ -23,7 +23,7 @@ # Remove genes without a length original_genes = set(obj['genes']) remaining_genes = original_genes & set(geneToLength.keys()) -obj['geneToCases'] = dict( (g, cases) for g, cases in obj['geneToCases'].iteritems() if g in geneToLength ) +obj['geneToCases'] = dict( (g, cases) for g, cases in obj['geneToCases'].items() if g in geneToLength ) obj['genes'] = sorted(obj['geneToCases'].keys()) obj['num_genes'] = len(obj['genes']) obj['params']['lengths_file'] = os.path.abspath(args.lengths_file) From a508daccd354be0d680901e398a39875ce8f9a1f Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Tue, 11 Sep 2018 22:09:35 -0400 Subject: [PATCH 53/60] revised experiments/eccb2016/scripts, permutation_helper --- .../eccb2016/scripts/permutation_test_helper.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/experiments/eccb2016/scripts/permutation_test_helper.py b/experiments/eccb2016/scripts/permutation_test_helper.py index e2691ab..56d8d57 100644 --- a/experiments/eccb2016/scripts/permutation_test_helper.py +++ b/experiments/eccb2016/scripts/permutation_test_helper.py @@ -25,19 +25,23 @@ from wext import rce_permutation_test, load_mutation_data, output_enumeration_table # Load the mutation data -if args.verbose > 0: print '* Loading mutation data..' +if args.verbose > 0: + print('* Loading mutation data..') mutation_data = load_mutation_data( args.mutation_file, args.min_freq ) genes, all_genes, patients, geneToCases, _, params, _ = mutation_data num_patients = len(patients) sets = list( frozenset(t) for t in combinations(genes, args.gene_set_size) ) -if args.verbose > 0: print '\t- Testing {} sets of size k={}'.format(len(sets), args.gene_set_size) +if args.verbose > 0: + print('\t- Testing {} sets of size k={}'.format(len(sets), args.gene_set_size)) # Run the permutational test -if args.verbose > 0: print '* Running permutation test...' +if args.verbose > 0: + print('* Running permutation test...') start_index = (args.job_id-1) * args.batch_size permuted_files = get_permuted_files([args.input_directory], args.num_permutations)[start_index:start_index + args.batch_size] -if args.verbose > 0: print '\t- Testing {} files'.format(len(permuted_files)) +if args.verbose > 0: + print('\t- Testing {} files'.format(len(permuted_files))) setToPval, setToRuntime, setToFDR, setToObs = rce_permutation_test( sets, geneToCases, num_patients, permuted_files, 1, 0 ) From 82ff47c537e4628a92ab4cd615c051024f310813 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Wed, 12 Sep 2018 11:02:04 -0400 Subject: [PATCH 54/60] revised travis config --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4652bb5..22c0c61 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,6 @@ python: - 3.5 - 3.6 install: - - sudo apt-get -y update - sudo apt-get -y update - sudo apt-get -y install r-base - sudo apt-get -y install python-matplotlib From 3ddc71fe17678388a78fab0ff9c9c04053bb7921 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 30 Sep 2018 06:20:12 -0400 Subject: [PATCH 55/60] revised README --- README.md | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index a77b0c5..952fbe8 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,11 @@ # Weighted Exclusivity Test (WExT) # -The Weighted Exclusivity Test (WExT) was developed by the [Raphael research group](http://compbio.cs.brown.edu/) at Brown University. - -### Requirements ### - -Latest tested version in parentheses. +[![Build Status](https://api.travis-ci.org/raphael-group/wext.svg?branch=master)](https://travis-ci.org/raphael-group/wext?branch=master) -1. Python (2.7.9) - a. NumPy (1.11.0) - - b. SciPy (0.17.0) +The Weighted Exclusivity Test (WExT) was developed by the [Raphael research group](http://compbio.cs.brown.edu/) at Brown University. -2. gcc (4.9.2) +### Requirements ### We recommend using [`virtualenv`](https://virtualenv.pypa.io/en/latest/) to install the Python requirements. After installing `virtualenv`, you can install the Python requirements for the weighted exclusivity test as follows: @@ -27,8 +20,7 @@ See the wiki for additional instructions on [Setup and installation](https://git The C and Fortran extensions must be compiled before running the weighted exclusivity test: cd wext - python setup.py build - f2py -c src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module + python setup.py install ### Usage ### From 31a8a282313d02f9844f7cdeb49e9e8855440fed Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 30 Sep 2018 08:03:01 -0400 Subject: [PATCH 56/60] revised source --- compute_mutation_probabilities.py | 12 ++-- .../simple/all-co-occurrence_results-k2.tsv | 2 + .../simple/any-co-occurrence_results-k2.tsv | 2 + examples/simple/commands_python2.sh | 52 ++++++++++++++++++ examples/simple/commands_python3.sh | 52 ++++++++++++++++++ examples/simple/data.json | 1 + examples/simple/exclusivity_results-k2.tsv | 2 + examples/simple/weights.npy | Bin 1024 -> 1024 bytes experiments/eccb2016/scripts/helper.py | 9 +-- .../eccb2016/scripts/permute_single_matrix.py | 8 +-- .../scripts/remove_genes_with_no_length.py | 2 +- .../eccb2016/scripts/weights_matrix.py | 2 +- viz/generate_viz_data.py | 10 ++-- wext/enumerate_sets.py | 30 +++++----- wext/mcmc.py | 7 ++- wext/setup.py | 3 +- 16 files changed, 153 insertions(+), 41 deletions(-) create mode 100644 examples/simple/all-co-occurrence_results-k2.tsv create mode 100644 examples/simple/any-co-occurrence_results-k2.tsv create mode 100644 examples/simple/commands_python2.sh create mode 100644 examples/simple/commands_python3.sh create mode 100644 examples/simple/data.json create mode 100644 examples/simple/exclusivity_results-k2.tsv diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py index 8606d5a..1b4d845 100755 --- a/compute_mutation_probabilities.py +++ b/compute_mutation_probabilities.py @@ -63,7 +63,7 @@ def postprocess_weight_matrix(P, r, s): # Average weights over entries of weight matrix with same marginals P_mean = np.zeros(np.shape(P)) - for marginals, indices in list(marginals_to_indices.items()): + for marginals, indices in marginals_to_indices.items(): mean_value = float(sum(P[i, j] for i, j in indices))/float(len(indices)) for i, j in indices: P_mean[i, j] = mean_value @@ -84,15 +84,15 @@ def run( args ): mutation_data = load_mutation_data( args.mutation_file ) genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data - geneToObserved = dict( (g, len(cases)) for g, cases in iter(list(geneToCases.items())) ) - patientToObserved = dict( (p, len(muts)) for p, muts in iter(list(patientToMutations.items())) ) + geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.items()) + patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.items()) geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) ) indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) ) patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) ) indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) ) edges = set() - for gene, cases in list(geneToCases.items()): + for gene, cases in geneToCases.items(): for patient in cases: edges.add( (geneToIndex[gene], patientToIndex[patient]) ) @@ -140,10 +140,10 @@ def run( args ): P = np.add.reduce(observeds) / float(len(observeds)) # Verify the weights - for g, obs in list(geneToObserved.items()): + for g, obs in geneToObserved.items(): assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol) - for p, obs in list(patientToObserved.items()): + for p, obs in patientToObserved.items(): assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol) # Construct mutation matrix to compute marginals diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv new file mode 100644 index 0000000..de3ca9f --- /dev/null +++ b/examples/simple/all-co-occurrence_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +c, d 0.0037101364741384682 0.18466623625983988 0.0026938915252685547 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv new file mode 100644 index 0000000..6b1b151 --- /dev/null +++ b/examples/simple/any-co-occurrence_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +c, d 0.0037101364741384682 0.18466623625983988 0.0025408267974853516 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/commands_python2.sh b/examples/simple/commands_python2.sh new file mode 100644 index 0000000..7a43965 --- /dev/null +++ b/examples/simple/commands_python2.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +num_permutations=1000 +num_cores=4 + +# Preprocess mutations. +python2 ../../process_mutations.py \ + -m adjacency_list.tsv \ + -ct NA \ + -o data.json + +# Compute mutation probabilities. +python2 ../../compute_mutation_probabilities.py \ + -mf data.json \ + -np $num_permutations \ + -nc $num_cores \ + -wf weights.npy \ + -s 12345 \ + -v 1 + +# Find sets using mutual exclusivity test statistic. +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s exclusivity \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o exclusivity_results \ + -v 0 + +# Find sets using a co-occurrence test statistic (any co-occurrence). +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s any-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o any-co-occurrence_results \ + -v 0 + +# Find sets using another co-occurrence test statistic (all co-occurrence). +python2 ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s all-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o all-co-occurrence_results \ + -v 0 diff --git a/examples/simple/commands_python3.sh b/examples/simple/commands_python3.sh new file mode 100644 index 0000000..7320f5a --- /dev/null +++ b/examples/simple/commands_python3.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +num_permutations=1000 +num_cores=4 + +# Preprocess mutations. +python ../../process_mutations.py \ + -m adjacency_list.tsv \ + -ct NA \ + -o data.json + +# Compute mutation probabilities. +python ../../compute_mutation_probabilities.py \ + -mf data.json \ + -np $num_permutations \ + -nc $num_cores \ + -wf weights.npy \ + -s 12345 \ + -v 1 + +# Find sets using mutual exclusivity test statistic. +python ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s exclusivity \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o exclusivity_results \ + -v 0 + +# Find sets using a co-occurrence test statistic (any co-occurrence). +python ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s any-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o any-co-occurrence_results \ + -v 0 + +# Find sets using another co-occurrence test statistic (all co-occurrence). +python ../../find_sets.py \ + -mf data.json \ + -wf weights.npy \ + -s all-co-occurrence \ + -k 2 \ + -c $num_cores \ + -f 2 \ + -o all-co-occurrence_results \ + -v 0 diff --git a/examples/simple/data.json b/examples/simple/data.json new file mode 100644 index 0000000..84d082d --- /dev/null +++ b/examples/simple/data.json @@ -0,0 +1 @@ +{"params": {"cancerToFiles": {"NA": ["/Users/biederse/benchmark_hotnet2_test27Sept2018/wext/examples/simple/adjacency_list.tsv"]}, "cancer_types": ["NA"], "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"], "ignored_variant_types": ["Germline"], "ignored_validation_statuses": ["Wildtype", "Invalid"], "patient_whitelist_file": null, "hypermutators_file": null}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "hypermutators": [], "geneToCases": {"e": ["1", "11"], "a": ["9", "3", "7", "5", "1", "11"], "c": ["3", "6", "7", "4", "13", "5", "14", "2", "1"], "f": ["1"], "d": ["3", "6", "7", "4", "13", "5", "14", "2", "1"], "h": ["2"], "g": ["12", "2"], "b": ["6", "10", "8", "12", "4", "2"]}, "patientToType": {"9": "NA", "3": "NA", "6": "NA", "8": "NA", "10": "NA", "7": "NA", "4": "NA", "12": "NA", "13": "NA", "5": "NA", "14": "NA", "2": "NA", "1": "NA", "11": "NA"}, "patientToMutations": {"1": ["f", "e", "d", "a", "c"], "2": ["h", "d", "c", "g", "b"], "3": ["d", "a", "c"], "4": ["d", "c", "b"], "5": ["d", "a", "c"], "6": ["d", "c", "b"], "7": ["d", "a", "c"], "8": ["b"], "9": ["a"], "10": ["b"], "11": ["a", "e"], "12": ["g", "b"], "13": ["d", "c"], "14": ["d", "c"]}, "num_genes": 8, "num_patients": 14} \ No newline at end of file diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv new file mode 100644 index 0000000..ef88565 --- /dev/null +++ b/examples/simple/exclusivity_results-k2.tsv @@ -0,0 +1,2 @@ +#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 +a, b 0.00024227153775402632 0.012058686612143744 0.002846956253051758 12 0 2 6 6 0 \ No newline at end of file diff --git a/examples/simple/weights.npy b/examples/simple/weights.npy index 0cdfdcf8096fc128e3dd5fb94fbe128002d925e6..017cfb74ef630ca46607d00e657a5736cdab2707 100644 GIT binary patch literal 1024 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I#iItqp+nmP)#3giN=`BM6KRoGtJYcAfNy0-72{ee>pUAKL{jFX0`Pkeeo z>GsbD_JnAdxiA{0AI7J(eMb~F6lF|&Yk$jOId93WEB2Wa-`3b~eT0*SseiS7J@fOP z7xsi`n7J?-rXR+qwS6M%+#gnn+_SeoypCb#^*Q!aBv``ubT{FoVe0Q~ZK=7i`Jg=^ z8fGqxR=)kSE^E&P`;FQT?@Yy;?ER7iA}qHoz)8c@|6TdSM!}zqe@8-Gh!H<`|wSTdt(NFrt6?x&EaKEg@E)X&;IZ32JQ z3wuH|%v=}^(+}g*+CHV*KkKsg+_S&1LEGV->2!Oc<^!h|x^BWr!_+h8H?kb= T ) setToPval[M] = count / np # Compute FDRs - tested_sets = list(setToPval.keys()) + tested_sets = setToPval.keys() pvals = [ setToPval[M] for M in tested_sets ] setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))) @@ -181,7 +181,7 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores= # Make sure all P-values are numbers tested_sets = len(setToPval) - invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) + invalid_sets = set( M for M, pval in setToPval.items() if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) # Report invalid sets if verbose > 0 and report_invalids: @@ -192,9 +192,9 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores= invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ]) sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' ) - setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets ) - setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets ) - setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets ) + setToPval = dict( (M, pval) for M, pval in setToPval.items() if not M in invalid_sets ) + setToTime = dict( (M, runtime) for M, runtime in setToTime.items() if not M in invalid_sets ) + setToObs = dict( (M, obs) for M, obs in setToObs.items() if not M in invalid_sets ) if verbose > 0: print('- Output {} sets'.format(len(setToPval))) @@ -260,7 +260,7 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, # Make sure all P-values are numbers tested_sets = len(setToPval) - invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) + invalid_sets = set( M for M, pval in setToPval.items() if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) # Report invalid sets if verbose > 0 and report_invalids: @@ -271,9 +271,9 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ]) sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' ) - setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets ) - setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets ) - setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets ) + setToPval = dict( (M, pval) for M, pval in setToPval.items() if not M in invalid_sets ) + setToTime = dict( (M, runtime) for M, runtime in setToTime.items() if not M in invalid_sets ) + setToObs = dict( (M, obs) for M, obs in setToObs.items() if not M in invalid_sets ) if verbose > 0: print('- Output {} sets'.format(len(setToPval))) @@ -292,4 +292,4 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, ################################################################################ # Testable set def testable_set( k, T, Z, tbl ): - return T > Z and all( tbl[2**i] > 0 for i in list(range(k)) ) + return T > Z and all( tbl[2**i] > 0 for i in range(k) ) diff --git a/wext/mcmc.py b/wext/mcmc.py index 4c15fc2..1269378 100755 --- a/wext/mcmc.py +++ b/wext/mcmc.py @@ -4,6 +4,7 @@ from collections import defaultdict from time import time from random import random, sample, choice, seed as random_seed +from past.builtins import xrange from .constants import * from .enumerate_sets import observed_values @@ -56,9 +57,9 @@ def _log_accept_ratio( W_current, W_next ): random_seed(seed) t = len(ks) genespace = list(geneToCases.keys()) - setsToFreq = [ defaultdict(int) for _ in range(nchains) ] + setsToFreq = [ defaultdict(int) for _ in xrange(nchains) ] setToPval, setToObs = dict(), dict() - for c in range(nchains): + for c in xrange(nchains): if verbose > 0: print('- Experiment', c+1) @@ -78,7 +79,7 @@ def _log_accept_ratio( W_current, W_next ): sys.stdout.flush() # Sample the next gene to swap in/around the set - next_soln = dict( (index, set(M)) for index, M in list(soln.items()) ) + next_soln = dict( (index, set(M)) for index, M in soln.items() ) next_assigned = dict(list(assigned.items())) next_gene = choice(genespace) diff --git a/wext/setup.py b/wext/setup.py index c4ba272..38de6ac 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -3,8 +3,7 @@ """Compiles the C modules used by the weighted exclusivity test.""" # Load required modules -from numpy.distutils.core import setup -from numpy.distutils.extension import Extension +from numpy.distutils.core import setup, Extension import numpy, os thisDir = os.path.dirname(os.path.realpath(__file__)) From 3abf0f60c9594bd0fe281a9da945564eb36d8850 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 30 Sep 2018 08:03:47 -0400 Subject: [PATCH 57/60] remove debugging files --- examples/simple/adjacency_list.tsv | 14 ----- .../simple/all-co-occurrence_results-k2.tsv | 2 - .../simple/any-co-occurrence_results-k2.tsv | 2 - examples/simple/commands_python2.sh | 52 ------------------ examples/simple/commands_python3.sh | 52 ------------------ examples/simple/data.json | 1 - examples/simple/exclusivity_results-k2.tsv | 2 - examples/simple/weights.npy | Bin 1024 -> 0 bytes 8 files changed, 125 deletions(-) delete mode 100644 examples/simple/adjacency_list.tsv delete mode 100644 examples/simple/all-co-occurrence_results-k2.tsv delete mode 100644 examples/simple/any-co-occurrence_results-k2.tsv delete mode 100644 examples/simple/commands_python2.sh delete mode 100644 examples/simple/commands_python3.sh delete mode 100644 examples/simple/data.json delete mode 100644 examples/simple/exclusivity_results-k2.tsv delete mode 100644 examples/simple/weights.npy diff --git a/examples/simple/adjacency_list.tsv b/examples/simple/adjacency_list.tsv deleted file mode 100644 index b303963..0000000 --- a/examples/simple/adjacency_list.tsv +++ /dev/null @@ -1,14 +0,0 @@ -1 a c d e f -2 b c d g h -3 a c d -4 b c d -5 a c d -6 b c d -7 a c d -8 b -9 a -10 b -11 a e -12 b g -13 c d -14 c d diff --git a/examples/simple/all-co-occurrence_results-k2.tsv b/examples/simple/all-co-occurrence_results-k2.tsv deleted file mode 100644 index de3ca9f..0000000 --- a/examples/simple/all-co-occurrence_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -c, d 0.0037101364741384682 0.18466623625983988 0.0026938915252685547 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/any-co-occurrence_results-k2.tsv b/examples/simple/any-co-occurrence_results-k2.tsv deleted file mode 100644 index 6b1b151..0000000 --- a/examples/simple/any-co-occurrence_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -c, d 0.0037101364741384682 0.18466623625983988 0.0025408267974853516 0 9 5 0 0 9 \ No newline at end of file diff --git a/examples/simple/commands_python2.sh b/examples/simple/commands_python2.sh deleted file mode 100644 index 7a43965..0000000 --- a/examples/simple/commands_python2.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash - -num_permutations=1000 -num_cores=4 - -# Preprocess mutations. -python2 ../../process_mutations.py \ - -m adjacency_list.tsv \ - -ct NA \ - -o data.json - -# Compute mutation probabilities. -python2 ../../compute_mutation_probabilities.py \ - -mf data.json \ - -np $num_permutations \ - -nc $num_cores \ - -wf weights.npy \ - -s 12345 \ - -v 1 - -# Find sets using mutual exclusivity test statistic. -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s exclusivity \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o exclusivity_results \ - -v 0 - -# Find sets using a co-occurrence test statistic (any co-occurrence). -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s any-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o any-co-occurrence_results \ - -v 0 - -# Find sets using another co-occurrence test statistic (all co-occurrence). -python2 ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s all-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o all-co-occurrence_results \ - -v 0 diff --git a/examples/simple/commands_python3.sh b/examples/simple/commands_python3.sh deleted file mode 100644 index 7320f5a..0000000 --- a/examples/simple/commands_python3.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash - -num_permutations=1000 -num_cores=4 - -# Preprocess mutations. -python ../../process_mutations.py \ - -m adjacency_list.tsv \ - -ct NA \ - -o data.json - -# Compute mutation probabilities. -python ../../compute_mutation_probabilities.py \ - -mf data.json \ - -np $num_permutations \ - -nc $num_cores \ - -wf weights.npy \ - -s 12345 \ - -v 1 - -# Find sets using mutual exclusivity test statistic. -python ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s exclusivity \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o exclusivity_results \ - -v 0 - -# Find sets using a co-occurrence test statistic (any co-occurrence). -python ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s any-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o any-co-occurrence_results \ - -v 0 - -# Find sets using another co-occurrence test statistic (all co-occurrence). -python ../../find_sets.py \ - -mf data.json \ - -wf weights.npy \ - -s all-co-occurrence \ - -k 2 \ - -c $num_cores \ - -f 2 \ - -o all-co-occurrence_results \ - -v 0 diff --git a/examples/simple/data.json b/examples/simple/data.json deleted file mode 100644 index 84d082d..0000000 --- a/examples/simple/data.json +++ /dev/null @@ -1 +0,0 @@ -{"params": {"cancerToFiles": {"NA": ["/Users/biederse/benchmark_hotnet2_test27Sept2018/wext/examples/simple/adjacency_list.tsv"]}, "cancer_types": ["NA"], "ignored_variant_classes": ["Silent", "Intron", "3'UTR", "5'UTR", "IGR", "lincRNA", "RNA"], "ignored_variant_types": ["Germline"], "ignored_validation_statuses": ["Wildtype", "Invalid"], "patient_whitelist_file": null, "hypermutators_file": null}, "patients": ["1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9"], "genes": ["a", "b", "c", "d", "e", "f", "g", "h"], "hypermutators": [], "geneToCases": {"e": ["1", "11"], "a": ["9", "3", "7", "5", "1", "11"], "c": ["3", "6", "7", "4", "13", "5", "14", "2", "1"], "f": ["1"], "d": ["3", "6", "7", "4", "13", "5", "14", "2", "1"], "h": ["2"], "g": ["12", "2"], "b": ["6", "10", "8", "12", "4", "2"]}, "patientToType": {"9": "NA", "3": "NA", "6": "NA", "8": "NA", "10": "NA", "7": "NA", "4": "NA", "12": "NA", "13": "NA", "5": "NA", "14": "NA", "2": "NA", "1": "NA", "11": "NA"}, "patientToMutations": {"1": ["f", "e", "d", "a", "c"], "2": ["h", "d", "c", "g", "b"], "3": ["d", "a", "c"], "4": ["d", "c", "b"], "5": ["d", "a", "c"], "6": ["d", "c", "b"], "7": ["d", "a", "c"], "8": ["b"], "9": ["a"], "10": ["b"], "11": ["a", "e"], "12": ["g", "b"], "13": ["d", "c"], "14": ["d", "c"]}, "num_genes": 8, "num_patients": 14} \ No newline at end of file diff --git a/examples/simple/exclusivity_results-k2.tsv b/examples/simple/exclusivity_results-k2.tsv deleted file mode 100644 index ef88565..0000000 --- a/examples/simple/exclusivity_results-k2.tsv +++ /dev/null @@ -1,2 +0,0 @@ -#Gene set WRE (Saddlepoint) P-value WRE (Saddlepoint) FDR WRE (Saddlepoint) Runtime T Z t00 t01 t10 t11 -a, b 0.00024227153775402632 0.012058686612143744 0.002846956253051758 12 0 2 6 6 0 \ No newline at end of file diff --git a/examples/simple/weights.npy b/examples/simple/weights.npy deleted file mode 100644 index 017cfb74ef630ca46607d00e657a5736cdab2707..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1024 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I#iItqp+nmP)#3giN=`BM6KRoGtJYcAfNy0-72{ee>pUAKL{jFX0`Pkeeo z>GsbD_JnAdxiA{0AI7J(eMb~F6lF|&Yk$jOId93WEB2Wa-`3b~eT0*SseiS7J@fOP z7xsi`n7J?-rXR+qwS6M%+#gnn+_SeoypCb#^*Q!aBv``ubT{FoVe0Q~ZK=7i`Jg=^ z8fGqxR=)kSE^E&P`;FQT?@Yy;?ER7iA}qHoz)8c@|6Td Date: Sun, 30 Sep 2018 08:37:17 -0400 Subject: [PATCH 58/60] source code revisions for py23 compatibility --- .travis.yml | 1 - compute_mutation_probabilities.py | 14 ++++---- examples/simple/weights.npy | Bin 1024 -> 0 bytes experiments/eccb2016/scripts/helper.py | 9 +++--- .../eccb2016/scripts/permute_single_matrix.py | 8 ++--- .../scripts/remove_genes_with_no_length.py | 2 +- experiments/eccb2016/scripts/results_table.py | 14 ++++---- .../scripts/sample_mutation_frequency_plot.py | 4 +-- .../eccb2016/scripts/triple_pval_scatter.py | 2 +- .../eccb2016/scripts/unweighted_comparison.py | 4 +-- .../eccb2016/scripts/weights_matrix.py | 2 +- viz/generate_viz_data.py | 12 +++---- wext/enumerate_sets.py | 30 +++++++++--------- wext/mcmc.py | 7 ++-- wext/setup.py | 3 +- 15 files changed, 56 insertions(+), 56 deletions(-) delete mode 100644 examples/simple/weights.npy diff --git a/.travis.yml b/.travis.yml index 22c0c61..37ca25e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,6 @@ install: - cd wext - python setup.py install - cd ../ - ##- f2py -c wext/src/fortran/bipartite_edge_swap_module.f95 -m bipartite_edge_swap_module - pwd - ls script: diff --git a/compute_mutation_probabilities.py b/compute_mutation_probabilities.py index 8606d5a..5be6777 100755 --- a/compute_mutation_probabilities.py +++ b/compute_mutation_probabilities.py @@ -47,7 +47,7 @@ def permute_matrices(edge_list, max_swaps, max_tries, seeds, verbose, m, n, num_ # Record the permutation observed[tuple(zip(*indices))] += 1. - geneToCases = dict( (g, list(cases)) for g, cases in iter(list(geneToCases.items())) ) + geneToCases = dict( (g, list(cases)) for g, cases in geneToCases.items()) permutations.append( dict(geneToCases=geneToCases, permutation_number=seed) ) return observed/float(len(seeds)), permutations @@ -63,7 +63,7 @@ def postprocess_weight_matrix(P, r, s): # Average weights over entries of weight matrix with same marginals P_mean = np.zeros(np.shape(P)) - for marginals, indices in list(marginals_to_indices.items()): + for marginals, indices in marginals_to_indices.items(): mean_value = float(sum(P[i, j] for i, j in indices))/float(len(indices)) for i, j in indices: P_mean[i, j] = mean_value @@ -84,15 +84,15 @@ def run( args ): mutation_data = load_mutation_data( args.mutation_file ) genes, all_genes, patients, geneToCases, patientToMutations, params, hypermutators = mutation_data - geneToObserved = dict( (g, len(cases)) for g, cases in iter(list(geneToCases.items())) ) - patientToObserved = dict( (p, len(muts)) for p, muts in iter(list(patientToMutations.items())) ) + geneToObserved = dict( (g, len(cases)) for g, cases in geneToCases.items()) + patientToObserved = dict( (p, len(muts)) for p, muts in patientToMutations.items()) geneToIndex = dict( (g, i+1) for i, g in enumerate(all_genes) ) indexToGene = dict( (i+1, g) for i, g in enumerate(all_genes) ) patientToIndex = dict( (p, j+1) for j, p in enumerate(patients) ) indexToPatient = dict( (j+1, p) for j, p in enumerate(patients) ) edges = set() - for gene, cases in list(geneToCases.items()): + for gene, cases in geneToCases.items(): for patient in cases: edges.add( (geneToIndex[gene], patientToIndex[patient]) ) @@ -140,10 +140,10 @@ def run( args ): P = np.add.reduce(observeds) / float(len(observeds)) # Verify the weights - for g, obs in list(geneToObserved.items()): + for g, obs in geneToObserved.items(): assert( np.abs(P[geneToIndex[g]-1].sum() - obs) < tol) - for p, obs in list(patientToObserved.items()): + for p, obs in patientToObserved.items(): assert( np.abs(P[:, patientToIndex[p]-1].sum() - obs) < tol) # Construct mutation matrix to compute marginals diff --git a/examples/simple/weights.npy b/examples/simple/weights.npy deleted file mode 100644 index 0cdfdcf8096fc128e3dd5fb94fbe128002d925e6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1024 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I#iItqp+nmP)#3giMV#SKLn6W_kFSM^SM!}zqe@8-Gh!H<`|wSTdt(NFrt6?x&EaKEg@E)X&;IZ32JQ z3wuH|%v=}^(+}g*+CHV*KkKsg+_S&1LEGV->2!Oc<^!h|x^BWr!_+h8H?kb= T ) setToPval[M] = count / np # Compute FDRs - tested_sets = list(setToPval.keys()) + tested_sets = setToPval.keys() pvals = [ setToPval[M] for M in tested_sets ] setToFDR = dict(list(zip(tested_sets, multiple_hypothesis_correction(pvals, method="BY")))) @@ -181,7 +181,7 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores= # Make sure all P-values are numbers tested_sets = len(setToPval) - invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) + invalid_sets = set( M for M, pval in setToPval.items() if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) # Report invalid sets if verbose > 0 and report_invalids: @@ -192,9 +192,9 @@ def test_sets( sets, geneToCases, num_patients, method, test, P=None, num_cores= invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ]) sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' ) - setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets ) - setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets ) - setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets ) + setToPval = dict( (M, pval) for M, pval in setToPval.items() if not M in invalid_sets ) + setToTime = dict( (M, runtime) for M, runtime in setToTime.items() if not M in invalid_sets ) + setToObs = dict( (M, obs) for M, obs in setToObs.items() if not M in invalid_sets ) if verbose > 0: print('- Output {} sets'.format(len(setToPval))) @@ -260,7 +260,7 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, # Make sure all P-values are numbers tested_sets = len(setToPval) - invalid_sets = set( M for M, pval in list(setToPval.items()) if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) + invalid_sets = set( M for M, pval in setToPval.items() if isnan(pval) or -PTOL > pval or pval > 1+PTOL ) # Report invalid sets if verbose > 0 and report_invalids: @@ -271,9 +271,9 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, invalid_rows.append([ ','.join(sorted(M)), T, Z, tbl, setToPval[M] ]) sys.stderr.write( '\t' + '\n\t '.join([ '\t'.join(map(str, row)) for row in invalid_rows ]) + '\n' ) - setToPval = dict( (M, pval) for M, pval in list(setToPval.items()) if not M in invalid_sets ) - setToTime = dict( (M, runtime) for M, runtime in list(setToTime.items()) if not M in invalid_sets ) - setToObs = dict( (M, obs) for M, obs in list(setToObs.items()) if not M in invalid_sets ) + setToPval = dict( (M, pval) for M, pval in setToPval.items() if not M in invalid_sets ) + setToTime = dict( (M, runtime) for M, runtime in setToTime.items() if not M in invalid_sets ) + setToObs = dict( (M, obs) for M, obs in setToObs.items() if not M in invalid_sets ) if verbose > 0: print('- Output {} sets'.format(len(setToPval))) @@ -292,4 +292,4 @@ def general_test_sets( sets, geneToCases, num_patients, method, test, statistic, ################################################################################ # Testable set def testable_set( k, T, Z, tbl ): - return T > Z and all( tbl[2**i] > 0 for i in list(range(k)) ) + return T > Z and all( tbl[2**i] > 0 for i in range(k) ) diff --git a/wext/mcmc.py b/wext/mcmc.py index 4c15fc2..1269378 100755 --- a/wext/mcmc.py +++ b/wext/mcmc.py @@ -4,6 +4,7 @@ from collections import defaultdict from time import time from random import random, sample, choice, seed as random_seed +from past.builtins import xrange from .constants import * from .enumerate_sets import observed_values @@ -56,9 +57,9 @@ def _log_accept_ratio( W_current, W_next ): random_seed(seed) t = len(ks) genespace = list(geneToCases.keys()) - setsToFreq = [ defaultdict(int) for _ in range(nchains) ] + setsToFreq = [ defaultdict(int) for _ in xrange(nchains) ] setToPval, setToObs = dict(), dict() - for c in range(nchains): + for c in xrange(nchains): if verbose > 0: print('- Experiment', c+1) @@ -78,7 +79,7 @@ def _log_accept_ratio( W_current, W_next ): sys.stdout.flush() # Sample the next gene to swap in/around the set - next_soln = dict( (index, set(M)) for index, M in list(soln.items()) ) + next_soln = dict( (index, set(M)) for index, M in soln.items() ) next_assigned = dict(list(assigned.items())) next_gene = choice(genespace) diff --git a/wext/setup.py b/wext/setup.py index c4ba272..38de6ac 100755 --- a/wext/setup.py +++ b/wext/setup.py @@ -3,8 +3,7 @@ """Compiles the C modules used by the weighted exclusivity test.""" # Load required modules -from numpy.distutils.core import setup -from numpy.distutils.extension import Extension +from numpy.distutils.core import setup, Extension import numpy, os thisDir = os.path.dirname(os.path.realpath(__file__)) From 866d32e355c1629ce35e99f2bb0bf2f5366c9d9a Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 30 Sep 2018 08:38:57 -0400 Subject: [PATCH 59/60] revised source for py23 compatibility --- examples/simple/adjacency_list.tsv | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 examples/simple/adjacency_list.tsv diff --git a/examples/simple/adjacency_list.tsv b/examples/simple/adjacency_list.tsv new file mode 100644 index 0000000..b303963 --- /dev/null +++ b/examples/simple/adjacency_list.tsv @@ -0,0 +1,14 @@ +1 a c d e f +2 b c d g h +3 a c d +4 b c d +5 a c d +6 b c d +7 a c d +8 b +9 a +10 b +11 a e +12 b g +13 c d +14 c d From 3ee93a678a8df9cf2fd0e5c694117c4b784b18c6 Mon Sep 17 00:00:00 2001 From: evanbiederstedt Date: Sun, 30 Sep 2018 09:01:10 -0400 Subject: [PATCH 60/60] revise source, use generator instead of converting to list() --- experiments/eccb2016/scripts/results_table.py | 8 ++++---- experiments/eccb2016/scripts/weights_matrix.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/experiments/eccb2016/scripts/results_table.py b/experiments/eccb2016/scripts/results_table.py index 1315dd0..7953f09 100755 --- a/experiments/eccb2016/scripts/results_table.py +++ b/experiments/eccb2016/scripts/results_table.py @@ -39,14 +39,14 @@ # Load the triples with open(args.unweighted_exact_file, 'r') as IN: obj = json.load(IN) - unweightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in list(obj['setToPval'].items())) - assert( all( not(isnan(pval)) for pval in list(unweightedPval.values()) )) - unweightedFDR = dict((frozenset(t.split('\t')), fdr) for t, fdr in list(obj['setToFDR'].items())) + unweightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].items()) + assert( all( not(isnan(pval)) for pval in unweightedPval.values() )) + unweightedFDR = dict((frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].items()) with open(args.weighted_saddlepoint_file, 'r') as IN: obj = json.load(IN) weightedPval = dict((frozenset(t.split('\t')), pval) for t, pval in obj['setToPval'].items()) - assert( all( not(isnan(pval)) for pval in list(weightedPval.values()) )) + assert( all( not(isnan(pval)) for pval in weightedPval.values() )) weightedFDR = dict((frozenset(t.split('\t')), fdr) for t, fdr in obj['setToFDR'].items()) print('Triples with weighted FDR < {}: {}/{}'.format(args.fdr_cutoff, sum(1 for t, fdr in weightedFDR.items() if fdr < args.fdr_cutoff), len(weightedFDR))) diff --git a/experiments/eccb2016/scripts/weights_matrix.py b/experiments/eccb2016/scripts/weights_matrix.py index e98b8b0..6d958ba 100755 --- a/experiments/eccb2016/scripts/weights_matrix.py +++ b/experiments/eccb2016/scripts/weights_matrix.py @@ -37,7 +37,7 @@ # Set up the figure fig, axes = plt.subplots( 1, len(args.cancers)) fig.set_size_inches( len(args.cancers) * 5, 5) -min_weight = min([ np.min(W) for W in list(cancerToWeights.values()) ]) +min_weight = min([ np.min(W) for W in cancerToWeights.values() ]) print('Min weight:', min_weight) for ax, cancer in zip(axes, args.cancers):