From 184413387ec8bb542bd815b288405bf7de419cf9 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 13:32:40 +0800 Subject: [PATCH 01/21] 2to3 --- pyatac/VMat.py | 6 +++--- pyatac/__init__.py | 5 ++--- pyatac/bias.py | 2 +- pyatac/chunk.py | 8 ++++---- pyatac/fragmentsizes.py | 2 +- pyatac/get_cov.py | 6 +++--- pyatac/get_ins.py | 10 +++++----- pyatac/get_nucleotide.py | 4 ++-- pyatac/get_pwm.py | 4 ++-- pyatac/get_sizes.py | 2 +- pyatac/make_bias_track.py | 8 ++++---- pyatac/make_bias_vplot.py | 4 ++-- pyatac/make_vplot.py | 6 +++--- pyatac/seq.py | 4 ++-- pyatac/signal_around_sites.py | 6 +++--- pyatac/tracks.py | 10 +++++----- 16 files changed, 43 insertions(+), 44 deletions(-) diff --git a/pyatac/VMat.py b/pyatac/VMat.py index 2c6be8a..683826f 100644 --- a/pyatac/VMat.py +++ b/pyatac/VMat.py @@ -146,7 +146,7 @@ def plot_1d(self,filename=None): """plot the 1d insertion representation of the matrix""" fig = plt.figure() xlim = len(self.one_d)/2 - plt.plot(range(-xlim,xlim+1),self.one_d) + plt.plot(list(range(-xlim,xlim+1)),self.one_d) plt.vlines(-73,0,max(self.one_d)*1.1,linestyles='dashed') plt.vlines(73,0,max(self.one_d)*1.1,linestyles='dashed') plt.xlabel("Position relative to dyad") @@ -164,7 +164,7 @@ def plot_insertsize(self,filename=None): fig = plt.figure() ins = np.sum(self.mat,axis=1) ins = ins/sum(ins) - plt.plot(range(self.lower,self.upper),ins) + plt.plot(list(range(self.lower,self.upper)),ins) plt.xlabel("Insert Size") plt.ylabel("Frequency") if filename: @@ -208,7 +208,7 @@ def open(filename): elif state == 'upper': upper = int(line.strip('\n')) elif state == 'mat': - mat.append(map(float,line.strip('\n').split('\t'))) + mat.append(list(map(float,line.strip('\n').split('\t')))) try: new = VMat(np.array(mat), lower, upper) except NameError: diff --git a/pyatac/__init__.py b/pyatac/__init__.py index def3758..b28b04f 100644 --- a/pyatac/__init__.py +++ b/pyatac/__init__.py @@ -1,4 +1,3 @@ -#Define version based on setup script -import pkg_resources -__version__ = pkg_resources.require("NucleoATAC")[0].version + + diff --git a/pyatac/bias.py b/pyatac/bias.py index df2bcf0..6b36827 100644 --- a/pyatac/bias.py +++ b/pyatac/bias.py @@ -66,7 +66,7 @@ def open(name): elif state == 'nucleotides': nucleotides = line.strip('\n').split() elif state == 'mat': - mat.append(map(float,line.strip('\n').split('\t'))) + mat.append(list(map(float,line.strip('\n').split('\t')))) infile.close() try: new = PWM(np.array(mat), up, down, nucleotides) diff --git a/pyatac/chunk.py b/pyatac/chunk.py index c36fe8b..08d3e26 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -97,7 +97,7 @@ def sort(self): list.sort(self, cmp = _chunkCompare) def isSorted(self): """check that regions are sorted""" - return all([_chunkCompare(self[i],self[i+1])==-1 for i in xrange(len(self)-1)]) + return all([_chunkCompare(self[i],self[i+1])==-1 for i in range(len(self)-1)]) def slop(self, chromDict, up = 0, down = 0, new = False): out = ChunkList() for i in self: @@ -154,7 +154,7 @@ def read(bedfile, weight_col=None, strand_col = None, name_col = None, chromDict start = int(in_line[1]) end = int(in_line[2]) chrom = in_line[0] - if chromDict is not None and chrom not in chromDict.keys(): + if chromDict is not None and chrom not in list(chromDict.keys()): bad_chroms.append(chrom) continue if min_offset: @@ -183,7 +183,7 @@ def convertChromSizes(chromDict, splitsize = None, offset = 0): out = ChunkList() for chrom in chrs: out.extend(ChunkList(*(Chunk(chrom, i, min(i + splitsize, chromDict[chrom] - offset)) - for i in xrange(offset, chromDict[chrom] - offset, splitsize)))) + for i in range(offset, chromDict[chrom] - offset, splitsize)))) return out def split(self, bases = None, items = None): """splits list of chunks into set of sublists""" @@ -202,7 +202,7 @@ def split(self, bases = None, items = None): out.append(self[i:(k+1)]) return out elif items is not None: - out = [ self[i:i+items] for i in xrange(0,len(self),items)] + out = [ self[i:i+items] for i in range(0,len(self),items)] return out else: raise Exception("Need to provide items or bases argument!") diff --git a/pyatac/fragmentsizes.py b/pyatac/fragmentsizes.py index 4fd6ba0..10af79c 100644 --- a/pyatac/fragmentsizes.py +++ b/pyatac/fragmentsizes.py @@ -71,7 +71,7 @@ def open(filename): elif state == 'upper': upper = int(line.strip('\n')) elif state == 'sizes': - fragmentsizes = np.array(map(float,line.rstrip("\n").split("\t"))) + fragmentsizes = np.array(list(map(float,line.rstrip("\n").split("\t")))) try: new = FragmentSizes(lower, upper, vals = fragmentsizes) except NameError: diff --git a/pyatac/get_cov.py b/pyatac/get_cov.py index 29a6fe2..4b4d729 100644 --- a/pyatac/get_cov.py +++ b/pyatac/get_cov.py @@ -29,7 +29,7 @@ def _covHelper(arg): cov.calculateCoverage(mat, lower = args.lower, upper = args.upper, window_len = args.window) cov.vals *= args.scale / float(args.window) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -42,7 +42,7 @@ def _writeCov(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing coverage track\n') traceback.print_exc() print() @@ -75,7 +75,7 @@ def get_cov(args, bases = 50000, splitsize = 1000): write_process = mp.Process(target = _writeCov, args=(write_queue, args.out)) write_process.start() for j in sets: - tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args))) + tmp = pool1.map(_covHelper, list(zip(j,itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() diff --git a/pyatac/get_ins.py b/pyatac/get_ins.py index 8d28e96..1bf208c 100644 --- a/pyatac/get_ins.py +++ b/pyatac/get_ins.py @@ -26,7 +26,7 @@ def _insHelperSmooth(arg): ins.calculateInsertions( args.bam, lower = args.lower, upper = args.upper, atac = args.atac) ins.smooth_track(args.smooth, window = "gaussian", mode= 'valid') except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -39,7 +39,7 @@ def _insHelper(arg): ins = InsertionTrack(chunk.chrom, chunk.start, chunk.end) ins.calculateInsertions( args.bam, lower = args.lower, upper = args.upper, atac = args.atac) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -51,7 +51,7 @@ def _writeIns(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -85,9 +85,9 @@ def get_ins(args, bases = 50000, splitsize = 1000): write_process.start() for j in sets: if args.smooth: - tmp = pool1.map(_insHelperSmooth, zip(j,itertools.repeat(args))) + tmp = pool1.map(_insHelperSmooth, list(zip(j,itertools.repeat(args)))) else: - tmp = pool1.map(_insHelper, zip(j,itertools.repeat(args))) + tmp = pool1.map(_insHelper, list(zip(j,itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() diff --git a/pyatac/get_nucleotide.py b/pyatac/get_nucleotide.py index 2419523..a0cfd25 100644 --- a/pyatac/get_nucleotide.py +++ b/pyatac/get_nucleotide.py @@ -31,7 +31,7 @@ def _nucleotideHelper(arg): mat += submat n += 1 except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+'\n') + print(('Caught exception when processing:\n'+ chunk.asBed()+'\n')) traceback.print_exc() print() raise e @@ -66,7 +66,7 @@ def get_nucleotide(args): params = _NucleotideParameters(args.up, args.down, args.fasta, args.dinucleotide) sets = chunks.split(bases = 10000) pool = Pool(processes = args.cores) - tmp = pool.map(_nucleotideHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_nucleotideHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = np.zeros(params.matsize) diff --git a/pyatac/get_pwm.py b/pyatac/get_pwm.py index bdfc534..b185462 100644 --- a/pyatac/get_pwm.py +++ b/pyatac/get_pwm.py @@ -34,7 +34,7 @@ def _pwmHelper(arg): mat += ins.getStrandedInsertionSequences(params.fasta, params.nucleotides, up = params.up, down = params.down) n += sum(ins.vals) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -69,7 +69,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): params = _PWMParameters(bam = args.bam, up = args.flank, down = args.flank, fasta = args.fasta, lower = args.lower, upper = args.upper, atac = args.atac, sym = args.sym) pool = Pool(processes = args.cores) - tmp = pool.map(_pwmHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_pwmHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() n = 0.0 diff --git a/pyatac/get_sizes.py b/pyatac/get_sizes.py index cf66f54..aac0905 100644 --- a/pyatac/get_sizes.py +++ b/pyatac/get_sizes.py @@ -30,7 +30,7 @@ def get_sizes(args): if not args.no_plot: #make figure fig = plt.figure() - plt.plot(range(sizes.lower,sizes.upper),sizes.get(sizes.lower,sizes.upper),label = args.out) + plt.plot(list(range(sizes.lower,sizes.upper)),sizes.get(sizes.lower,sizes.upper),label = args.out) plt.xlabel("Fragment Size") plt.ylabel("Frequency") fig.savefig(args.out+'.fragmentsizes.eps') diff --git a/pyatac/make_bias_track.py b/pyatac/make_bias_track.py index 94e6640..e6204c7 100644 --- a/pyatac/make_bias_track.py +++ b/pyatac/make_bias_track.py @@ -23,7 +23,7 @@ def _biasHelper(arg): bias = InsertionBiasTrack(chunk.chrom, chunk.start, chunk.end) bias.computeBias(params.fasta, params.chrs, params.pwm) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -36,7 +36,7 @@ def _writeBias(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -66,7 +66,7 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): sets = chunks.split(items = bases/splitsize) else: chunks = ChunkList.read(args.bed) - chunks.checkChroms(params.chrs.keys()) + chunks.checkChroms(list(params.chrs.keys())) chunks.merge() sets = chunks.split(bases = bases) maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) @@ -77,7 +77,7 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): write_process = mp.Process(target = _writeBias, args=(write_queue, args.out)) write_process.start() for j in sets: - tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params))) + tmp = pool.map(_biasHelper, list(zip(j,itertools.repeat(params)))) for track in tmp: write_queue.put(track) pool.close() diff --git a/pyatac/make_bias_vplot.py b/pyatac/make_bias_vplot.py index ce80274..a7c70c2 100644 --- a/pyatac/make_bias_vplot.py +++ b/pyatac/make_bias_vplot.py @@ -48,7 +48,7 @@ def _biasVplotHelper(arg): else: mat += add except Exception as e: - print('Caught exception when processing:\n' + chunk.asBed() + "\n") + print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() raise e @@ -80,7 +80,7 @@ def make_bias_vplot(args): sizes = args.sizes, scale = args.scale, pwm = args.pwm, fasta = args.fasta) pool = Pool(processes = args.cores) - tmp = pool.map(_biasVplotHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_biasVplotHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = sum(tmp) diff --git a/pyatac/make_vplot.py b/pyatac/make_vplot.py index edb57a2..300d742 100644 --- a/pyatac/make_vplot.py +++ b/pyatac/make_vplot.py @@ -13,7 +13,7 @@ import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.chunk import ChunkList from pyatac.chunkmat2d import FragmentMat2D -import VMat as V +from . import VMat as V from multiprocessing import Pool import itertools import traceback @@ -35,7 +35,7 @@ def _vplotHelper(arg): add = add/np.sum(add) result += add except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -67,7 +67,7 @@ def make_vplot(args): params = _VplotParams(flank = args.flank, lower = args.lower, upper = args.upper, bam = args.bam, atac = args.atac, scale = args.scale) pool = Pool(processes = args.cores) - tmp = pool.map(_vplotHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_vplotHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = sum(tmp) diff --git a/pyatac/seq.py b/pyatac/seq.py index df825ed..75296b4 100644 --- a/pyatac/seq.py +++ b/pyatac/seq.py @@ -22,7 +22,7 @@ def get_sequence(chunk, fastafile): return sequence.upper() -DNA_Translation = string.maketrans('ACGT', 'TGCA') +DNA_Translation = ''.maketrans('ACGT', 'TGCA') def complement(sequence): """Get complement of DNA sequenceuence""" @@ -41,7 +41,7 @@ def seq_to_mat(sequence, nucleotides): raise Exception("Usage Error! Nucleotides must all be of same length! No mixing single nucleotides with dinucleotides, etc") mat = np.zeros((len(nucleotides),len(sequence)-l+1)) for i in range(len(nucleotides)): - mat[i] = np.array(map(int,[sequence[j:j+l] ==nucleotides[i] for j in range(len(sequence)-l+1)])) + mat[i] = np.array(list(map(int,[sequence[j:j+l] ==nucleotides[i] for j in range(len(sequence)-l+1)]))) return(mat) def getNucFreqs(fasta, nucleotides): diff --git a/pyatac/signal_around_sites.py b/pyatac/signal_around_sites.py index 67a03bf..7877276 100644 --- a/pyatac/signal_around_sites.py +++ b/pyatac/signal_around_sites.py @@ -62,7 +62,7 @@ def _signalHelper(arg): sig[np.isnan(sig)]=0 agg += sig except Exception as e: - print('Caught exception when processing:\n' + chunk.asBed() + "\n") + print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() bg.close() @@ -96,7 +96,7 @@ def get_signal(args): args.scale, args.positive, args.all) sets = chunks.split(items = min(args.cores*20,len(chunks))) pool = Pool(processes = args.cores) - tmp = pool.map(_signalHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_signalHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() if args.all: @@ -110,7 +110,7 @@ def get_signal(args): if args.norm: result = result / len(chunks) fig = plt.figure() - plt.plot(range(-args.up,args.down+1),result) + plt.plot(list(range(-args.up,args.down+1)),result) plt.xlabel("Position relative to Site") plt.ylabel("Signal Intensity") fig.savefig(args.out+'.agg.track.eps') diff --git a/pyatac/tracks.py b/pyatac/tracks.py index d1ac322..f1de3c6 100644 --- a/pyatac/tracks.py +++ b/pyatac/tracks.py @@ -47,7 +47,7 @@ def write_track(self, handle, start = None, end = None, vals = None, write_zero if vals is None: vals=self.vals if len(vals)!=self.end-self.start: - print len(vals),self.end-self.start + print(len(vals),self.end-self.start) raise Exception("Error! Inconsistency between length of \ values and start/end values") prev_value = None @@ -88,14 +88,14 @@ def read_track(self, bedgraph, start = None, end = None, empty = np.nan, flank = def log(self, pseudo = 1): """Log values. Add psuedo count so values don't equal 0 before logging""" if self.log: - print "Logging a track that is already log..." + print("Logging a track that is already log...") adjusted = self.vals + pseudo self.vals = np.log(adjusted) self.log = True def exp(self): """Take exponent of values""" if not self.log: - print "taking exponent of a non-logged track..." + print("taking exponent of a non-logged track...") self.vals = np.exp(self.vals) self.log = False def smooth_track(self, window_len, window='flat', sd = None, @@ -135,7 +135,7 @@ def plot(self, name = None, start = None, end = None, filename=None): name = self.name values = self.get(start,end) fig = plt.figure() - plt.plot(range(start,end),values) + plt.plot(list(range(start,end)),values) plt.xlabel(self.chrom) plt.ylabel(name) if filename: @@ -143,7 +143,7 @@ def plot(self, name = None, start = None, end = None, filename=None): plt.close(fig) #Also save text output! filename2 = ".".join(filename.split(".")[:-1]+['txt']) - out = np.vstack((range(start,end),values)) + out = np.vstack((list(range(start,end)),values)) np.savetxt(filename2,out,delimiter="\t") else: fig.show() From 8b4c6aa799326b67695174bef7234a4ff5ed5a24 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 13:33:14 +0800 Subject: [PATCH 02/21] 2to3 --- bin/nucleoatac | 8 ++++---- bin/pyatac | 8 ++++---- nucleoatac/NFRCalling.py | 4 ++-- nucleoatac/NucleosomeCalling.py | 6 +++--- nucleoatac/Occupancy.py | 20 ++++++++++---------- nucleoatac/__init__.py | 4 +--- nucleoatac/cli.py | 4 ++-- nucleoatac/diff_occ.py | 6 +++--- nucleoatac/run_nfr.py | 8 ++++---- nucleoatac/run_nuc.py | 16 ++++++++-------- nucleoatac/run_occ.py | 12 ++++++------ 11 files changed, 47 insertions(+), 49 deletions(-) mode change 100644 => 100755 bin/nucleoatac diff --git a/bin/nucleoatac b/bin/nucleoatac old mode 100644 new mode 100755 index b0e08df..6d6c15a --- a/bin/nucleoatac +++ b/bin/nucleoatac @@ -24,14 +24,14 @@ from nucleoatac.cli import nucleoatac_parser, nucleoatac_main if __name__ == '__main__': - print "Command run: "+ ' '.join(map(str,sys.argv)) - print "nucleoatac version " + __version__ - print "start run at: " + time.strftime("%Y-%m-%d %H:%M") + print("Command run: "+ ' '.join(map(str,sys.argv))) + print("nucleoatac version " + __version__) + print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) try: parser = nucleoatac_parser() args = parser.parse_args() nucleoatac_main(args) - print "end run at: " + time.strftime("%Y-%m-%d %H:%M") + print("end run at: " + time.strftime("%Y-%m-%d %H:%M")) except KeyboardInterrupt: sys.stderr.write("User interrupted nucleoatac.") sys.exit(0) diff --git a/bin/pyatac b/bin/pyatac index cc3dbb4..8906250 100644 --- a/bin/pyatac +++ b/bin/pyatac @@ -23,14 +23,14 @@ from pyatac.cli import pyatac_parser, pyatac_main if __name__ == '__main__': - print "Command run: "+ ' '.join(map(str,sys.argv)) - print "pyatac version "+__version__ - print "start run at: " + time.strftime("%Y-%m-%d %H:%M") + print("Command run: "+ ' '.join(map(str,sys.argv))) + print("pyatac version "+__version__) + print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) try: parser = pyatac_parser() args = parser.parse_args() pyatac_main(args) - print "end run at: " + time.strftime("%Y-%m-%d %H:%M") + print("end run at: " + time.strftime("%Y-%m-%d %H:%M")) except KeyboardInterrupt: sys.stderr.write("User interrupted pyatac.") sys.exit(0) diff --git a/nucleoatac/NFRCalling.py b/nucleoatac/NFRCalling.py index d41fd4c..a0be397 100644 --- a/nucleoatac/NFRCalling.py +++ b/nucleoatac/NFRCalling.py @@ -89,7 +89,7 @@ def findNFRs(self): if self.chrom in tbx.contigs: for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()): nucs.append(int(row[1])) - for j in xrange(1,len(nucs)): + for j in range(1,len(nucs)): left = nucs[j-1] + 73 right = nucs[j] - 72 if right <= left: @@ -106,7 +106,7 @@ def process(self, params): self.findNFRs() def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self,name) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index d146d73..c4df1ba 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -79,7 +79,7 @@ def simulateReads(self): sim_mat = np.reshape(sim_vect, self.vmat.mat.shape) return sim_mat def simulateDist(self, numiters = 1000): - self.scores = map(lambda x: np.sum(self.simulateReads() * self.vmat.mat),range(numiters)) + self.scores = [np.sum(self.simulateReads() * self.vmat.mat) for x in range(numiters)] def analStd(self): flatv = np.ravel(self.vmat.mat) var = calculateCov(self.probs, flatv, self.reads) @@ -310,7 +310,7 @@ def findAllNucs(self): self.nuc_collection[i] = nuc self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys())) self.nonredundant = reduce_peaks( self.sorted_nuc_keys, - map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys), + [self.nuc_collection[x].z for x in self.sorted_nuc_keys], self.params.nonredundant_sep) self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant) def fit(self): @@ -340,7 +340,7 @@ def process(self, params): self.makeInsertionTrack() def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self,name) diff --git a/nucleoatac/Occupancy.py b/nucleoatac/Occupancy.py index 9e9cd59..6b3ac1d 100644 --- a/nucleoatac/Occupancy.py +++ b/nucleoatac/Occupancy.py @@ -67,11 +67,11 @@ def gamma_fit(X,o,p): def plotFits(self,filename=None): """plot the Fits""" fig = plt.figure() - plt.plot(range(self.lower,self.upper),self.fragmentsizes.get(), + plt.plot(list(range(self.lower,self.upper)),self.fragmentsizes.get(), label = "Observed") - plt.plot(range(self.lower,self.upper),self.nfr_fit0.get(), label = "NFR Fit") - plt.plot(range(self.lower,self.upper),self.nuc_fit.get(), label = "Nucleosome Model") - plt.plot(range(self.lower,self.upper),self.nfr_fit.get(), label = "NFR Model") + plt.plot(list(range(self.lower,self.upper)),self.nfr_fit0.get(), label = "NFR Fit") + plt.plot(list(range(self.lower,self.upper)),self.nuc_fit.get(), label = "Nucleosome Model") + plt.plot(list(range(self.lower,self.upper)),self.nfr_fit.get(), label = "NFR Model") plt.legend() plt.xlabel("Fragment size") plt.ylabel("Relative Frequency") @@ -109,8 +109,8 @@ def calculateOccupancy(inserts, bias, params): nuc_probs = nuc_probs / np.sum(nuc_probs) nfr_probs = params.nfr_probs * bias nfr_probs = nfr_probs / np.sum(nfr_probs) - x = map(lambda alpha: np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs), params.alphas) - logliks = np.array(map(lambda j: np.sum(x[j]*inserts),range(params.l))) + x = [np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs) for alpha in params.alphas] + logliks = np.array([np.sum(x[j]*inserts) for j in range(params.l)]) logliks[np.isnan(logliks)] = -float('inf') occ = params.alphas[np.argmax(logliks)] #Compute upper and lower bounds for 95% confidence interval @@ -129,11 +129,11 @@ def calculateOccupancyMLE(self, mat, bias_mat, params): """Calculate Occupancy track""" offset=self.start - mat.start if offset Date: Sat, 7 Nov 2020 13:41:41 +0800 Subject: [PATCH 03/21] pure python fragments --- pyatac/chunkmat2d.py | 4 +- pyatac/fragments.py | 125 +++++++++++++++++++++++++++++++++++++++++++ pyatac/tracks.py | 4 +- 3 files changed, 129 insertions(+), 4 deletions(-) create mode 100644 pyatac/fragments.py diff --git a/pyatac/chunkmat2d.py b/pyatac/chunkmat2d.py index 0ffdc7c..3fb36ce 100644 --- a/pyatac/chunkmat2d.py +++ b/pyatac/chunkmat2d.py @@ -3,8 +3,8 @@ import matplotlib.pyplot as plt import matplotlib.cm as cm from pyatac.tracks import InsertionTrack -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) -from fragments import makeFragmentMat +#import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) +from pyatac.fragments import makeFragmentMat class ChunkMat2D: """Class that stores fragments in 2D matrix according to diff --git a/pyatac/fragments.py b/pyatac/fragments.py new file mode 100644 index 0000000..ebd6ad5 --- /dev/null +++ b/pyatac/fragments.py @@ -0,0 +1,125 @@ +#### Import needed modules ##### +import pyatac.seq as seq +import numpy as np +from pysam import AlignmentFile, AlignedSegment +from pysam import FastaFile + +DTYPE = np.float + +#### Import needed modules ##### +#import pysam + +def makeFragmentMat(bamfile, chrom, start, end, lower, upper, atac = 1): + nrow = upper - lower + ncol = end - start + mat = np.zeros( (nrow, ncol), dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) + + + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): + if read.is_proper_pair and not read.is_reverse: + if atac: + #get left position + l_pos = read.pos + 4 + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + l_pos = read.pos + ilen = abs(read.template_length) + row = ilen - lower + col = (ilen-1)/2 + l_pos - start + if col >= 0 and col < ncol and row < nrow and row >= 0: + mat[row, col] += 1 + bamHandle.close() + return mat + +def getInsertions(bamfile, chrom, start, end, lower, upper, atac = 1): + npos = end - start + mat = np.zeros(npos, dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): + if read.is_proper_pair and not read.is_reverse: + if atac: + #get left position + l_pos = read.pos + 4 + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + l_pos = read.pos + ilen = abs(read.template_length) + r_pos = l_pos + ilen - 1 + if ilen >= lower and ilen < upper: + if l_pos >= start and l_pos < end: + mat[l_pos - start] += 1 + if r_pos >= start and r_pos < end: + mat[r_pos - start] += 1 + bamHandle.close() + return mat + +def getStrandedInsertions(bamfile, chrom, start, end, lower, upper, atac = 1): + npos = end - start + matplus = np.zeros(npos, dtype = DTYPE) + matminus = np.zeros(npos, dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): + if read.is_proper_pair and not read.is_reverse: + if atac: + #get left position + l_pos = read.pos + 4 + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + l_pos = read.pos + ilen = abs(read.template_length) + r_pos = l_pos + ilen - 1 + if ilen >= lower and ilen < upper: + if l_pos >= start and l_pos < end: + matplus[l_pos - start] += 1 + if r_pos >= start and r_pos < end: + matminus[r_pos - start] += 1 + bamHandle.close() + return (matplus, matminus) + +def getAllFragmentSizes(bamfile, lower, upper, atac = 1): + sizes = np.zeros(upper - lower, dtype= np.float) + # loop over samfile + bamHandle = AlignmentFile(bamfile) + for read in bamHandle: + if read.is_proper_pair and not read.is_reverse: + if atac: + + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + ilen = abs(read.template_length) + if ilen < upper and ilen >= lower: + sizes[ilen - lower]+=1 + bamHandle.close() + return sizes + +def getFragmentSizesFromChunkList(chunks, bamfile, lower, upper, atac = 1): + sizes = np.zeros(upper - lower, dtype= np.float) + # loop over samfile + bamHandle = AlignmentFile(bamfile) + for chunk in chunks: + for read in bamHandle.fetch(chunk.chrom, max(0, chunk.start - upper), chunk.end + upper): + if read.is_proper_pair and not read.is_reverse: + if atac: + #get left position + l_pos = read.pos + 4 + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + l_pos = read.pos + ilen = abs(read.template_length) + center = l_pos + (ilen-1) / 2 + if ilen < upper and ilen >= lower and center >= chunk.start and center < chunk.end: + sizes[ilen - lower] += 1 + bamHandle.close() + return sizes + diff --git a/pyatac/tracks.py b/pyatac/tracks.py index f1de3c6..47d2757 100644 --- a/pyatac/tracks.py +++ b/pyatac/tracks.py @@ -9,8 +9,8 @@ from pyatac.bedgraph import BedGraphFile from pyatac.chunk import Chunk from pyatac.utils import smooth -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) -from fragments import getInsertions, getStrandedInsertions +#import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) +from pyatac.fragments import getInsertions, getStrandedInsertions from pyatac.seq import get_sequence, seq_to_mat, complement class Track(Chunk): From aa4ea83a085456a5725382fd37df55d75a8f7811 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 13:54:42 +0800 Subject: [PATCH 04/21] fixed up the list.sort() --- pyatac/chunk.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyatac/chunk.py b/pyatac/chunk.py index 08d3e26..0d68461 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -7,6 +7,7 @@ import gzip import warnings +import functools class Chunk(): """Class that stores reads for a particular chunk of the genome""" @@ -92,12 +93,16 @@ def insert(self, *args): list.insert(self, args[0], args[1]) else: raise ValueError("Expecting Chunk") + def sort(self): """sort regions""" - list.sort(self, cmp = _chunkCompare) + list.sort(self, key=functools.cmp_to_key(_chunkCompare)) # Just hack it; + # Likely to be very slow; + def isSorted(self): """check that regions are sorted""" return all([_chunkCompare(self[i],self[i+1])==-1 for i in range(len(self)-1)]) + def slop(self, chromDict, up = 0, down = 0, new = False): out = ChunkList() for i in self: From 2b533b6cfe9a219e45648023aaccfd21558cdb2e Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 14:11:03 +0800 Subject: [PATCH 05/21] py3 float div fix --- pyatac/chunkmat2d.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyatac/chunkmat2d.py b/pyatac/chunkmat2d.py index 3fb36ce..9011396 100644 --- a/pyatac/chunkmat2d.py +++ b/pyatac/chunkmat2d.py @@ -57,10 +57,12 @@ def assign(self, mat): if mat.shape != self.mat.shape: raise Exception("Dimensions of input mat are wrong. Uh oh!") self.mat = mat + def save(self, filename): """Save object in a text file""" head = ",".join(map(str,[self.chrom,self.start,self.end,self.lower,self.upper])) np.savetxt(filename,self.mat,delimiter="\t", header = head) + @staticmethod def open(filename): f = open(filename,'r') From f068c3084cfd1dd7255b3e999827b60d157950b2 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 14:53:59 +0800 Subject: [PATCH 06/21] changed to pdf --- nucleoatac/NucleosomeCalling.py | 27 +++++++++++++-------------- nucleoatac/Occupancy.py | 18 ++++++++++++++---- nucleoatac/diff_occ.py | 2 +- nucleoatac/run_nuc.py | 4 ++-- nucleoatac/run_occ.py | 10 ++++++---- nucleoatac/run_vprocess.py | 8 ++++---- pyatac/VMat.py | 25 ++++++++++++++----------- pyatac/chunk.py | 5 +++-- pyatac/chunkmat2d.py | 25 ++++++++++++++++--------- pyatac/fragments.py | 4 ++-- pyatac/get_cov.py | 4 ++-- pyatac/get_nucleotide.py | 2 +- pyatac/get_pwm.py | 4 ++-- pyatac/get_sizes.py | 2 +- pyatac/make_bias_track.py | 4 +++- pyatac/make_bias_vplot.py | 11 +++++------ pyatac/make_vplot.py | 6 +++--- pyatac/seq.py | 4 ++-- pyatac/signal_around_sites.py | 4 ++-- pyatac/tracks.py | 13 +++++-------- pyatac/utils.py | 2 +- 21 files changed, 102 insertions(+), 82 deletions(-) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index c4df1ba..9e32837 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -61,7 +61,7 @@ def calculateBackgroundSignal(self, mat, vmat, nuc_cov): self.bias_mat.start + offset, self.bias_mat.end - offset), vmat.mat,mode = 'valid')[0] - self.vals = self.vals * self.nuc_cov/ self.cov.vals + self.vals = self.vals * self.nuc_cov // self.cov.vals @@ -91,8 +91,7 @@ def analMean(self): def norm(x, v, w, mean): """compute values of normal pdf with given mean and sd at values in x""" - norm = (1.0/(np.sqrt(2*np.pi*v)) * - np.exp(-(x - mean)**2/(2*v))) + norm = (1.0/(np.sqrt(2*np.pi*v)) * np.exp(-(x - mean)**2/(2*v))) norm = norm * (w/max(norm)) return norm @@ -139,7 +138,7 @@ def addNorms(x,params): """Add several normal distributions together""" l = len(x) fit = np.zeros(l) - i = len(params)/3 + i = len(params)//3 for j in range(i): fit += norm(x,params[j*3],params[3*j+1],params[3*j+2]) return fit @@ -156,21 +155,21 @@ def fitNorm(guess, bound, sig): allnucs = nuctrack.sorted_nuc_keys x = bisect_left(allnucs,index) if x == 0: - left = index - nuctrack.params.nonredundant_sep/3 - means = (nuctrack.params.nonredundant_sep/3,) + left = index - nuctrack.params.nonredundant_sep//3 + means = (nuctrack.params.nonredundant_sep//3,) elif index - allnucs[x-1] < nuctrack.params.nonredundant_sep: left = allnucs[x-1] means = (index - allnucs[x-1],0) else: - left = index - nuctrack.params.nonredundant_sep/3 - means = (nuctrack.params.nonredundant_sep/3,) + left = index - nuctrack.params.nonredundant_sep//3 + means = (nuctrack.params.nonredundant_sep//3,) if x == len(allnucs)-1: - right = index + nuctrack.params.nonredundant_sep/3 + 1 + right = index + nuctrack.params.nonredundant_sep//3 + 1 elif allnucs[x+1] - index < nuctrack.params.nonredundant_sep: right = allnucs[x+1] means += (allnucs[x+1] - left,) else: - right = index + nuctrack.params.nonredundant_sep/3 +1 + right = index + nuctrack.params.nonredundant_sep//3 +1 sig = nuctrack.smoothed.vals[left:right] sig[sig<0] = 0 if len(means)==1: @@ -237,14 +236,14 @@ def __init__(self, chunk): def initialize(self, parameters): self.params = parameters def getFragmentMat(self): - self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1), - self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac) + self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper//2+1), + self.end + max(self.params.window,self.params.upper//2+1), 0, self.params.upper, atac = self.params.atac) self.mat.makeFragmentMat(self.params.bam) def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window, self.end + self.params.window, 0, self.params.upper) - bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2, - self.end + self.params.window + self.params.upper/2 + 1, log = True) + bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2, + self.end + self.params.window + self.params.upper//2 + 1, log = True) if self.params.fasta is not None: bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) diff --git a/nucleoatac/Occupancy.py b/nucleoatac/Occupancy.py index 6b3ac1d..c1d3707 100644 --- a/nucleoatac/Occupancy.py +++ b/nucleoatac/Occupancy.py @@ -8,7 +8,6 @@ from scipy import signal, optimize, stats import numpy as np import matplotlib.pyplot as plt -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.fragmentsizes import FragmentSizes from pyatac.tracks import Track, CoverageTrack from pyatac.chunk import Chunk @@ -23,9 +22,11 @@ class FragmentMixDistribution: def __init__(self, lower = 0, upper =2000): self.lower = lower self.upper = upper + def getFragmentSizes(self, bamfile, chunklist = None): self.fragmentsizes = FragmentSizes(self.lower, self.upper) self.fragmentsizes.calculateSizes(bamfile, chunks = chunklist) + def modelNFR(self, boundaries = (35,115)): """Model NFR distribution with gamma distribution""" b = np.where(self.fragmentsizes.get(self.lower,boundaries[1]) == max(self.fragmentsizes.get(self.lower,boundaries[1])))[0][0] + self.lower @@ -64,6 +65,7 @@ def gamma_fit(X,o,p): self.nfr_fit.get(boundaries[1],self.upper))) nuc[nuc<=0]=min(min(nfr)*0.1,min(nuc[nuc>0])*0.001) self.nuc_fit = FragmentSizes(self.lower, self.upper, vals = nuc) + def plotFits(self,filename=None): """plot the Fits""" fig = plt.figure() @@ -190,7 +192,7 @@ def __init__(self, insert_dist, upper, fasta, pwm, sep = 120, min_occ = 0.1, fla if step%2 == 0: step = step - 1 self.step = step - self.halfstep = (self.step-1) / 2 + self.halfstep = (self.step-1) // 2 class OccChunk(Chunk): """Class for calculating occupancy and occupancy peaks @@ -201,27 +203,32 @@ def __init__(self, chunk): self.chrom = chunk.chrom self.peaks = {} self.nfrs = [] + def getFragmentMat(self): self.mat = FragmentMat2D(self.chrom, self.start - self.params.flank, self.end + self.params.flank, 0, self.params.upper) self.mat.makeFragmentMat(self.params.bam) + def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.flank, self.end + self.params.flank, 0, self.params.upper) if self.params.fasta is not None: - bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2, - self.end + self.params.window + self.params.upper/2 + 1, log = True) + bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2, + self.end + self.params.window + self.params.upper//2 + 1, log = True) bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) + def calculateOcc(self): """calculate occupancy for chunk""" self.occ = OccupancyTrack(self.chrom,self.start,self.end) self.occ.calculateOccupancyMLE(self.mat, self.bias_mat, self.params) self.occ.makeSmoothed(window_len = self.params.window, sd = self.params.flank/3.0) + def getCov(self): """Get read coverage for regions""" self.cov = CoverageTrack(self.chrom, self.start, self.end) self.cov.calculateCoverage(self.mat, 0, self.params.upper, self.params.window) + def callPeaks(self): """Call peaks of occupancy profile""" peaks = call_peaks(self.occ.smoothed_vals, sep = self.params.sep, min_signal = self.params.min_occ) @@ -229,6 +236,7 @@ def callPeaks(self): tmp = OccPeak(peak + self.start, self) if tmp.occ_lower > self.params.min_occ and tmp.reads > 0: self.peaks[peak] = tmp + def getNucDist(self): """Get nucleosomal insert distribution""" nuc_dist = np.zeros(self.params.upper) @@ -238,6 +246,7 @@ def getNucDist(self): sub_sum = sub_sum / float(sum(sub_sum)) nuc_dist += sub_sum return(nuc_dist) + def process(self, params): """proces chunk -- calculat occupancy, get coverage, call peaks""" self.params = params @@ -246,6 +255,7 @@ def process(self, params): self.calculateOcc() self.getCov() self.callPeaks() + def removeData(self): """remove data from chunk-- deletes all attributes""" names = list(self.__dict__.keys()) diff --git a/nucleoatac/diff_occ.py b/nucleoatac/diff_occ.py index ed64e36..9033ea7 100644 --- a/nucleoatac/diff_occ.py +++ b/nucleoatac/diff_occ.py @@ -57,7 +57,7 @@ def run_diff(args, bases = 500000): """ chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down)) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down)) chunks.merge() maxQueueSize = max(2,int(100 * bases / np.mean([chunk.length() for chunk in chunks]))) #get fragmentsizes diff --git a/nucleoatac/run_nuc.py b/nucleoatac/run_nuc.py index f51ef33..e19c49b 100644 --- a/nucleoatac/run_nuc.py +++ b/nucleoatac/run_nuc.py @@ -148,8 +148,8 @@ def run_nuc(args): else: chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = vmat.mat.shape[1] + vmat.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2, min_length = args.nuc_sep * 2) - chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = vmat.mat.shape[1] + vmat.upper//2 + max(pwm.up,pwm.down) + args.nuc_sep//2, min_length = args.nuc_sep * 2) + chunks.slop(chrs, up = args.nuc_sep//2, down = args.nuc_sep//2) chunks.merge() maxQueueSize = args.cores*10 if args.sizes is not None: diff --git a/nucleoatac/run_occ.py b/nucleoatac/run_occ.py index d709938..78b51de 100644 --- a/nucleoatac/run_occ.py +++ b/nucleoatac/run_occ.py @@ -83,8 +83,8 @@ def run_occ(args): else: chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2) - chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down) + args.nuc_sep//2) + chunks.slop(chrs, up = args.nuc_sep//2, down = args.nuc_sep//2) chunks.merge() maxQueueSize = args.cores*10 fragment_dist = FragmentMixDistribution(0, upper = args.upper) @@ -94,7 +94,7 @@ def run_occ(args): else: fragment_dist.getFragmentSizes(args.bam, chunks) fragment_dist.modelNFR() - fragment_dist.plotFits(args.out + '.occ_fit.eps') + fragment_dist.plotFits(args.out + '.occ_fit.pdf') fragment_dist.fragmentsizes.save(args.out + '.fragmentsizes.txt') params = OccupancyParameters(fragment_dist, args.upper, args.fasta, args.pwm, sep = args.nuc_sep, min_occ = args.min_occ, flank = args.flank, bam = args.bam, ci = args.confidence_interval, step = args.step) @@ -115,6 +115,7 @@ def run_occ(args): peaks_process = mp.Process(target = _writePeaks, args=(peaks_queue, args.out)) peaks_process.start() nuc_dist = np.zeros(args.upper) + for j in sets: tmp = pool1.map(_occHelper, list(zip(j,itertools.repeat(params)))) for result in tmp: @@ -130,6 +131,7 @@ def run_occ(args): pysam.tabix_compress(args.out + '.occpeaks.bed', args.out + '.occpeaks.bed.gz',force = True) shell_command('rm ' + args.out + '.occpeaks.bed') pysam.tabix_index(args.out + '.occpeaks.bed.gz', preset = "bed", force = True) + for i in ('occ','occ.lower_bound','occ.upper_bound'): pysam.tabix_compress(args.out + '.' + i + '.bedgraph', args.out + '.'+i+'.bedgraph.gz',force = True) shell_command('rm ' + args.out + '.' + i + '.bedgraph') @@ -144,7 +146,7 @@ def run_occ(args): plt.plot(list(range(0,args.upper)),dist_out.get(0,args.upper),label = "Nucleosome Distribution") plt.xlabel("Fragment Size") plt.ylabel("Frequency") - fig.savefig(args.out+'.nuc_dist.eps') + fig.savefig(args.out+'.nuc_dist.pdf') plt.close(fig) diff --git a/nucleoatac/run_vprocess.py b/nucleoatac/run_vprocess.py index 401f12e..7f81642 100644 --- a/nucleoatac/run_vprocess.py +++ b/nucleoatac/run_vprocess.py @@ -33,13 +33,13 @@ def run_vprocess(args): #Make extra plots if requeted if args.plot_extra: vmat.autoCorr() - vmat.plot_auto(args.out+'.vplot.Autocorr.eps') + vmat.plot_auto(args.out+'.vplot.Autocorr.pdf') vmat.converto1d() - vmat.plot_1d(args.out+'.vplot.InsertionProfile.eps') - vmat.plot_insertsize(args.out+'.vplot.InsertSizes.eps') + vmat.plot_1d(args.out+'.vplot.InsertionProfile.pdf') + vmat.plot_insertsize(args.out+'.vplot.InsertSizes.pdf') #make plot and save vmat.save(args.out+".VMat") - vmat.plot(filename = args.out+".VMat.eps") + vmat.plot(filename = args.out+".VMat.pdf") diff --git a/pyatac/VMat.py b/pyatac/VMat.py index 683826f..f0af169 100644 --- a/pyatac/VMat.py +++ b/pyatac/VMat.py @@ -34,7 +34,8 @@ def __init__(self, mat, lower, upper): self.mat = mat self.upper = upper self.lower = lower - self.w = mat.shape[1]/2 + self.w = mat.shape[1]//2 + def trim(self,lower,upper,w): """reduce the size of the vplot @@ -108,20 +109,20 @@ def norm_y(self,dist): def converto1d(self): """convert the 2d matrix to a 1d representation of insertions""" self.one_d = np.zeros(self.upper + self.upper%2 +2*self.w+1) - center = self.upper/2 + self.w + center = self.upper//2 + self.w for j in range(self.mat.shape[0]): for i in range(self.mat.shape[1]): ilen=j+self.lower val = copy(self.mat[j,i]) if ilen%2==0: - self.one_d[center-(self.w-i)-(ilen/2)]+= val - self.one_d[center-(self.w-i)+(ilen/2)]+= val + self.one_d[center-(self.w-i)-(ilen//2)]+= val + self.one_d[center-(self.w-i)+(ilen//2)]+= val else: - self.one_d[center-(self.w-i)-(ilen/2)]+= val * 0.5 - self.one_d[center-(self.w-i)+(ilen/2)]+= val * 0.5 - self.one_d[center-(self.w-i)-(ilen/2+1)]+= val * 0.5 - self.one_d[center-(self.w-i)+(ilen/2+1)]+= val * 0.5 - self.one_d = self.one_d / sum(self.one_d) + self.one_d[center-(self.w-i)-(ilen//2)]+= val * 0.5 + self.one_d[center-(self.w-i)+(ilen//2)]+= val * 0.5 + self.one_d[center-(self.w-i)-(ilen//2+1)]+= val * 0.5 + self.one_d[center-(self.w-i)+(ilen//2+1)]+= val * 0.5 + self.one_d = self.one_d // sum(self.one_d) def plot(self, mat=None, title=None, filename=None): """Plot current main matrix or specified matrix (of same dimensions)""" if mat is None: @@ -142,10 +143,11 @@ def plot(self, mat=None, title=None, filename=None): plt.close(fig) else: fig.show() + def plot_1d(self,filename=None): """plot the 1d insertion representation of the matrix""" fig = plt.figure() - xlim = len(self.one_d)/2 + xlim = len(self.one_d)//2 plt.plot(list(range(-xlim,xlim+1)),self.one_d) plt.vlines(-73,0,max(self.one_d)*1.1,linestyles='dashed') plt.vlines(73,0,max(self.one_d)*1.1,linestyles='dashed') @@ -159,11 +161,12 @@ def plot_1d(self,filename=None): np.savetxt(filename2,self.one_d,delimiter="\t") else: fig.show() + def plot_insertsize(self,filename=None): """plot the insert size disribution in the main matrix""" fig = plt.figure() ins = np.sum(self.mat,axis=1) - ins = ins/sum(ins) + ins = ins // sum(ins) plt.plot(list(range(self.lower,self.upper)),ins) plt.xlabel("Insert Size") plt.ylabel("Frequency") diff --git a/pyatac/chunk.py b/pyatac/chunk.py index 0d68461..fa78646 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -39,12 +39,13 @@ def slop(self, chromDict, up = 0, down = 0, new = False): else: self.start = newStart self.end = newEnd + def center(self, new = False): if self.strand == "-": - newEnd = self.end - (self.length()/2) + newEnd = self.end - (self.length()//2) newStart = newEnd - 1 else: - newStart = self.start + (self.length()/2) + newStart = self.start + (self.length()//2) newEnd = newStart +1 if new: out = Chunk(self.chrom, newStart, newEnd, diff --git a/pyatac/chunkmat2d.py b/pyatac/chunkmat2d.py index 9011396..4f78a99 100644 --- a/pyatac/chunkmat2d.py +++ b/pyatac/chunkmat2d.py @@ -18,6 +18,7 @@ def __init__(self, chrom, start, end, lower, upper): self.ncol = end - start self.nrow = upper - lower self.mat = np.zeros((self.nrow, self.ncol)) + def get(self, lower = None, upper = None, start = None, end = None, flip = False): """get a subset (or all) of the matrix stored by ChunkMat2D object based on chromosal position and/or insert size""" @@ -52,6 +53,7 @@ def get(self, lower = None, upper = None, start = None, end = None, flip = False else: new[j,:] = self.mat[j,(x1-1):(x2)][::-1][1:] return new + def assign(self, mat): """assign a matrix to object""" if mat.shape != self.mat.shape: @@ -73,15 +75,16 @@ def open(filename): new= ChunkMat2D(elements[0],elements[1],elements[2],elements[3]) new.assign(mat) return new + def getIns(self): """Collape matrix into insertions. Will reduce span on chromosome""" pattern = np.zeros((self.upper-self.lower,self.upper + (self.upper-1)%2)) - mid = self.upper/2 + mid = self.upper//2 for i in range(self.lower,self.upper): - pattern[i-self.lower,mid+(i-1)/2]=1 - pattern[i-self.lower,mid-(i/2)]=1 + pattern[i-self.lower, mid+(i-1)//2]=1 + pattern[i-self.lower, mid-(i//2)]=1 ins = signal.correlate2d(self.mat,pattern,mode="valid")[0] - insertion_track = InsertionTrack(self.chrom,self.start + pattern.shape[1]/2, self.end - (pattern.shape[1]/2)) + insertion_track = InsertionTrack(self.chrom,self.start + pattern.shape[1]//2, self.end - (pattern.shape[1]//2)) insertion_track.assign_track(ins) return insertion_track def plot(self, filename = None, title = None, lower = None, @@ -119,7 +122,7 @@ def __init__(self, chrom, start, end, lower, upper, atac = True): def updateMat(self, fragment): row = fragment.insert - self.lower if self.mode == "centers": - col = (fragment.insert-1)/2 + fragment.left - self.start + col = (fragment.insert-1)//2 + fragment.left - self.start if col>=0 and col=0: self.mat[row, col] += 1 else: @@ -139,20 +142,24 @@ class BiasMat2D(ChunkMat2D): def __init__(self, chrom, start, end, lower, upper): ChunkMat2D.__init__(self,chrom, start, end, lower, upper) self.mat = np.ones(self.mat.shape) + def makeBiasMat(self, bias_track): """Make 2D matrix representing sequence bias preferences""" - offset = self.upper/2 + offset = self.upper//2 bias = bias_track.get(self.start-offset,self.end+offset) if not bias_track.log: nonzero = np.where(bias !=0)[0] bias = np.log(bias + min(bias[nonzero])) + pattern = np.zeros((self.upper-self.lower,self.upper + (self.upper-1)%2)) - mid = self.upper/2 + mid = self.upper//2 for i in range(self.lower,self.upper): - pattern[i-self.lower,mid+(i-1)/2]=1 - pattern[i-self.lower,mid-(i/2)]=1 + pattern[i-self.lower,mid+(i-1)//2]=1 + pattern[i-self.lower,mid-(i//2)]=1 + for i in range(self.upper-self.lower): self.mat[i]=np.exp(np.convolve(bias,pattern[i,:],mode='valid')) + def normByInsertDist(self, insertsizes): inserts = insertsizes.get(self.lower,self.upper) self.mat = self.mat * np.reshape(np.tile(inserts,self.mat.shape[1]),self.mat.shape,order="F") diff --git a/pyatac/fragments.py b/pyatac/fragments.py index ebd6ad5..379db1e 100644 --- a/pyatac/fragments.py +++ b/pyatac/fragments.py @@ -28,7 +28,7 @@ def makeFragmentMat(bamfile, chrom, start, end, lower, upper, atac = 1): l_pos = read.pos ilen = abs(read.template_length) row = ilen - lower - col = (ilen-1)/2 + l_pos - start + col = (ilen-1)//2 + l_pos - start if col >= 0 and col < ncol and row < nrow and row >= 0: mat[row, col] += 1 bamHandle.close() @@ -117,7 +117,7 @@ def getFragmentSizesFromChunkList(chunks, bamfile, lower, upper, atac = 1): else: l_pos = read.pos ilen = abs(read.template_length) - center = l_pos + (ilen-1) / 2 + center = l_pos + (ilen-1) // 2 if ilen < upper and ilen >= lower and center >= chunk.start and center < chunk.end: sizes[ilen - lower] += 1 bamHandle.close() diff --git a/pyatac/get_cov.py b/pyatac/get_cov.py index 4b4d729..854e277 100644 --- a/pyatac/get_cov.py +++ b/pyatac/get_cov.py @@ -22,7 +22,7 @@ def _covHelper(arg): """Computes coverage track for a particular set of bed regions""" (chunk, args) = arg try: - offset = args.window / 2 + offset = args.window // 2 mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) mat.makeFragmentMat(args.bam) cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end) @@ -62,7 +62,7 @@ def get_cov(args, bases = 50000, splitsize = 1000): if args.bed is None: chrs = read_chrom_sizes_from_bam(args.bam) chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items=bases//splitsize) else: chunks = ChunkList.read(args.bed) chunks.merge() diff --git a/pyatac/get_nucleotide.py b/pyatac/get_nucleotide.py index a0cfd25..112ef8c 100644 --- a/pyatac/get_nucleotide.py +++ b/pyatac/get_nucleotide.py @@ -74,7 +74,7 @@ def get_nucleotide(args): for i in tmp: result += i[0] n += i[1] - result = result / n + result = result // n if args.norm: normfreqs = seq.getNucFreqs(params.fasta, params.nucleotides) result = result / np.reshape(np.repeat(normfreqs,result.shape[1]),result.shape) diff --git a/pyatac/get_pwm.py b/pyatac/get_pwm.py index b185462..ab81d7a 100644 --- a/pyatac/get_pwm.py +++ b/pyatac/get_pwm.py @@ -62,7 +62,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): chrs = read_chrom_sizes_from_fasta(args.fasta) if args.bed is None: chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize, offset = args.flank) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank) sets = chunks.split(bases = bases) @@ -88,7 +88,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): left = result[:,0:(args.flank + 1)] right = result[:,args.flank:] rightflipped = np.fliplr(np.flipud(right)) - combined = (left + rightflipped) / 2 + combined = (left + rightflipped) // 2 result = np.hstack((combined, np.fliplr(np.flipud(combined[:,0:args.flank])))) #save pwm = PWM(result, args.flank, args.flank, params.nucleotides) diff --git a/pyatac/get_sizes.py b/pyatac/get_sizes.py index aac0905..1e040bd 100644 --- a/pyatac/get_sizes.py +++ b/pyatac/get_sizes.py @@ -33,6 +33,6 @@ def get_sizes(args): plt.plot(list(range(sizes.lower,sizes.upper)),sizes.get(sizes.lower,sizes.upper),label = args.out) plt.xlabel("Fragment Size") plt.ylabel("Frequency") - fig.savefig(args.out+'.fragmentsizes.eps') + fig.savefig(args.out+'.fragmentsizes.pdf') plt.close(fig) diff --git a/pyatac/make_bias_track.py b/pyatac/make_bias_track.py index e6204c7..ec777a5 100644 --- a/pyatac/make_bias_track.py +++ b/pyatac/make_bias_track.py @@ -61,14 +61,16 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): else: args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1]) params = _BiasParams(args.fasta, args.pwm) + if args.bed is None: chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed) chunks.checkChroms(list(params.chrs.keys())) chunks.merge() sets = chunks.split(bases = bases) + maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) pool = mp.Pool(processes = max(1,args.cores-1)) out_handle = open(args.out + '.Scores.bedgraph','w') diff --git a/pyatac/make_bias_vplot.py b/pyatac/make_bias_vplot.py index a7c70c2..6e8e8a5 100644 --- a/pyatac/make_bias_vplot.py +++ b/pyatac/make_bias_vplot.py @@ -31,10 +31,9 @@ def _biasVplotHelper(arg): for chunk in chunks: try: chunk.center() - biastrack = InsertionBiasTrack(chunk.chrom, chunk.start - params.flank - 1 - (params.upper/2), - chunk.end + params.flank + params.upper/2+1) + biastrack = InsertionBiasTrack(chunk.chrom, chunk.start-params.flank-1-(params.upper//2), chunk.end+params.flank+params.upper//2+1) if params.bg is not None: - biastrack.read_track(params.bg, empty = 0) + biastrack.read_track(params.bg, empty=0) else: biastrack.computeBias(params.fasta, params.chrs, params.pwm) biasmat = BiasMat2D(chunk.chrom, chunk.start - params.flank - 1, chunk.end + params.flank, @@ -86,13 +85,13 @@ def make_bias_vplot(args): result = sum(tmp) ##Turn matrix into VMat object vmat=VMat(result,args.lower,args.upper) - vmat.plot(filename=args.out+".Bias.Vplot.eps") + vmat.plot(filename=args.out+".Bias.Vplot.pdf") if args.plot_extra: ##get insertion profile represented by vplot vmat.converto1d() - vmat.plot_1d(filename=args.out+'.InsertionProfile.eps') + vmat.plot_1d(filename=args.out+'.InsertionProfile.pdf') #get insert size dstribution represented by vplot - vmat.plot_insertsize(filename= args.out + ".InsertSizes.eps") + vmat.plot_insertsize(filename= args.out + ".InsertSizes.pdf") ##save vmat.save(args.out+".Bias.VMat") diff --git a/pyatac/make_vplot.py b/pyatac/make_vplot.py index 300d742..bca7a11 100644 --- a/pyatac/make_vplot.py +++ b/pyatac/make_vplot.py @@ -74,13 +74,13 @@ def make_vplot(args): ##Turn matrix into VMat object vmat=V.VMat(result,args.lower,args.upper) if not args.no_plot: - vmat.plot(filename=args.out+".Vplot.eps") + vmat.plot(filename=args.out+".Vplot.pdf") if args.plot_extra: ##get insertion profile represented by vplot vmat.converto1d() - vmat.plot_1d(filename=args.out+'.InsertionProfile.eps') + vmat.plot_1d(filename=args.out+'.InsertionProfile.pdf') #get insert size dstribution represented by vplot - vmat.plot_insertsize(filename= args.out + ".InsertSizes.eps") + vmat.plot_insertsize(filename= args.out + ".InsertSizes.pdf") ##save vmat.save(args.out+".VMat") diff --git a/pyatac/seq.py b/pyatac/seq.py index 75296b4..f2a2dee 100644 --- a/pyatac/seq.py +++ b/pyatac/seq.py @@ -55,7 +55,7 @@ def getNucFreqs(fasta, nucleotides): out += [sequence.count(i) for i in nucleotides] n += len(sequence) f.close() - return out/n + return out//n def getNucFreqsFromChunkList(chunks, fasta, nucleotides): @@ -69,5 +69,5 @@ def getNucFreqsFromChunkList(chunks, fasta, nucleotides): out += [sequence.count(i) for i in nucleotides] n += len(sequence) handle.close() - return out/n + return out//n diff --git a/pyatac/signal_around_sites.py b/pyatac/signal_around_sites.py index 7877276..4ffaa9f 100644 --- a/pyatac/signal_around_sites.py +++ b/pyatac/signal_around_sites.py @@ -108,12 +108,12 @@ def get_signal(args): result = sum(tmp) if not args.no_agg: if args.norm: - result = result / len(chunks) + result = result // len(chunks) fig = plt.figure() plt.plot(list(range(-args.up,args.down+1)),result) plt.xlabel("Position relative to Site") plt.ylabel("Signal Intensity") - fig.savefig(args.out+'.agg.track.eps') + fig.savefig(args.out+'.agg.track.pdf') plt.close(fig) np.savetxt(args.out+'.agg.track.txt',result,delimiter="\t") diff --git a/pyatac/tracks.py b/pyatac/tracks.py index 47d2757..9881aae 100644 --- a/pyatac/tracks.py +++ b/pyatac/tracks.py @@ -9,7 +9,6 @@ from pyatac.bedgraph import BedGraphFile from pyatac.chunk import Chunk from pyatac.utils import smooth -#import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.fragments import getInsertions, getStrandedInsertions from pyatac.seq import get_sequence, seq_to_mat, complement @@ -105,8 +104,8 @@ def smooth_track(self, window_len, window='flat', sd = None, self.vals = smooth(self.vals, window_len, window = window, sd = sd, mode = mode, norm = norm) if mode == 'valid': - self.start = self.start + window_len/2 - self.end = self.end - window_len/2 + self.start = self.start + window_len//2 + self.end = self.end - window_len//2 def get(self, start = None, end = None, pos = None): """Obtain value of track at particular interval or position""" if pos: @@ -208,10 +207,9 @@ def __init__(self, chrom, start, end): Track.__init__(self, chrom, start, end, "coverage") def calculateCoverage(self, mat, lower, upper, window_len): """Compute coverage of fragment centers using flat window""" - offset=self.start-mat.start-(window_len/2) + offset=self.start-mat.start-(window_len//2) if offset<0: - raise Exception("Insufficient flanking region on \ - mat to calculate coverage with desired window") + raise Exception("Insufficient flanking region on mat to calculate coverage with desired window") lower=lower-mat.lower upper=upper-mat.lower if offset!=0: @@ -224,8 +222,7 @@ def calculateCoverageSmooth(self,mat,lower,upper,window_len,sd): """Compute coverage of fragment centers using gaussia window""" offset=self.start-mat.start-(window_len/2) if offset<0: - raise Exception("Insufficient flanking region on \ - mat to calculate coverage with desired window") + raise Exception("Insufficient flanking region on mat to calculate coverage with desired window") lower=lower-mat.lower upper=upper-mat.lower if offset!=0: diff --git a/pyatac/utils.py b/pyatac/utils.py index d972f4d..9922f54 100644 --- a/pyatac/utils.py +++ b/pyatac/utils.py @@ -90,7 +90,7 @@ def call_peaks(sigvals, min_signal = 0, sep = 120, boundary = None, order =1): replace = min(sigvals[~np.isnan(sigvals)]) sigvals[np.isnan(sigvals)]=replace if boundary is None: - boundary = sep/2 + boundary = sep//2 random = np.random.RandomState(seed = 25) l = len(sigvals) peaks = signal.argrelmax(sigvals *(1+ From e1ec144df924ab48f5f6fe65c52e4ec40d5a89f8 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Fri, 4 Dec 2020 06:44:50 +0800 Subject: [PATCH 07/21] Port to python 3 and pure python --- nucleoatac/NucleosomeCalling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index 9e32837..9959b70 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -297,7 +297,7 @@ def findAllNucs(self): #find peaks in normalized sigal cands1 = call_peaks(combined, min_signal = 0, sep = self.params.redundant_sep, - boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2) + boundary = self.params.nonredundant_sep//2, order = self.params.redundant_sep//2) for i in cands1: nuc = Nucleosome(i + self.start, self) if nuc.nuc_cov > self.params.min_reads: From bbb24026b7620c530410be321eac6aa3fee0ca4a Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sat, 12 Dec 2020 16:10:47 -0800 Subject: [PATCH 08/21] Update setup.py --- setup.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index e654055..f1a6a58 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,9 @@ import sys import os -if float(sys.version[:3])<2.7 or float(sys.version[:3])>=2.8: - sys.stderr.write("CRITICAL: Python version must be 2.7!\n") - sys.exit(1) - +#if float(sys.version[:3])<2.7 or float(sys.version[:3])>=2.8: +# sys.stderr.write("CRITICAL: Python version must be 2.7!\n") +# sys.exit(1) class NoTestCommand(TestCommand): def run(self): @@ -20,7 +19,7 @@ def run(self): classifiers=[ 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.8', 'Topic :: Scientific/Engineering :: Bio-Informatics'], keywords='ATAC-Seq sequencing bioinformatics', url='https://github.com/GreenleafLab/NucleoATAC', From 01d72ec0eff73ed945e507679bc71ee014780e9e Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 11:38:24 -0800 Subject: [PATCH 09/21] updates to setup --- nucleoatac/__init__.py | 2 +- pyatac/__init__.py | 2 +- setup.py | 13 +++++-------- tests/test_occupancy.py | 4 ++-- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/nucleoatac/__init__.py b/nucleoatac/__init__.py index 078458d..abeeedb 100644 --- a/nucleoatac/__init__.py +++ b/nucleoatac/__init__.py @@ -1 +1 @@ -__version__ = '0.3.4.py3' +__version__ = '0.4.0' diff --git a/pyatac/__init__.py b/pyatac/__init__.py index b28b04f..55aebd7 100644 --- a/pyatac/__init__.py +++ b/pyatac/__init__.py @@ -1,3 +1,3 @@ - +__version__ = '0.4.0' diff --git a/setup.py b/setup.py index f1a6a58..1911d41 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,6 @@ import sys import os -#if float(sys.version[:3])<2.7 or float(sys.version[:3])>=2.8: -# sys.stderr.write("CRITICAL: Python version must be 2.7!\n") -# sys.exit(1) - class NoTestCommand(TestCommand): def run(self): print("NucleoATAC does not support running tests with " @@ -14,20 +10,21 @@ def run(self): setup(name='NucleoATAC', - version='0.3.4', + version='0.4.0', description='python package for calling nucleosomes with ATAC-Seq', classifiers=[ 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3', 'Topic :: Scientific/Engineering :: Bio-Informatics'], keywords='ATAC-Seq sequencing bioinformatics', url='https://github.com/GreenleafLab/NucleoATAC', author='Alicia Schep', - author_email='aschep@stanford.edu', + author_email='aschep@gmail.com', license='MIT', packages=['pyatac','pyatac.pwm','nucleoatac','nucleoatac.vplot'], - install_requires=['cython >= 0.22','numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib'], + python_requires='>=3.8', + install_requires=['numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib'], scripts=['bin/pyatac','bin/nucleoatac'], include_package_data=True, zip_safe=False, diff --git a/tests/test_occupancy.py b/tests/test_occupancy.py index f335f87..01eb403 100644 --- a/tests/test_occupancy.py +++ b/tests/test_occupancy.py @@ -31,7 +31,7 @@ def test_occupancy_calc3(self): a = np.random.multinomial(10,self.fragment_dist.nfr_fit.get()) b = np.random.multinomial(30,self.fragment_dist.nuc_fit.get()) results[i],lower[i],upper[i] = calculateOccupancy(a+b,np.array([1,1,1]),self.params) - print np.mean(results) + self.assertTrue(abs(np.mean(results)-0.75)<0.1) self.assertTrue(sum(upper < 0.75) < 85) self.assertTrue(sum(lower > 0.75) < 85) @@ -51,7 +51,7 @@ def test_occupancy_calc4(self): a = np.random.multinomial(10,nfrprob) b = np.random.multinomial(30,nucprob) results[i],lower[i],upper[i] = calculateOccupancy(a+b,bias,self.params) - print np.mean(results) + self.assertTrue(abs(np.mean(results)-0.75)<0.1) self.assertTrue(sum(upper < 0.75) < 85) self.assertTrue(sum(lower > 0.75) < 85) From 20eac30f4e1b946a26e5d3169e9a52c8582b2576 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 13:49:05 -0800 Subject: [PATCH 10/21] remove pyx bits --- pyatac/fragments.pyx | 146 ---------------------------------------- pyatac/fragments.pyxbld | 10 --- pyatac/fragmentsizes.py | 5 -- pyatac/get_cov.py | 3 +- pyatac/get_ins.py | 1 - pyatac/get_pwm.py | 2 - pyatac/make_vplot.py | 3 - 7 files changed, 1 insertion(+), 169 deletions(-) delete mode 100644 pyatac/fragments.pyx delete mode 100644 pyatac/fragments.pyxbld diff --git a/pyatac/fragments.pyx b/pyatac/fragments.pyx deleted file mode 100644 index a463206..0000000 --- a/pyatac/fragments.pyx +++ /dev/null @@ -1,146 +0,0 @@ -#### Import needed modules ##### -import pyatac.seq as seq -import numpy as np -cimport numpy as np -cimport cython -from pysam.libcalignmentfile cimport AlignmentFile, AlignedSegment -from pysam.libcfaidx cimport FastaFile - -DTYPE = np.float -ctypedef np.float_t DTYPE_t - - -#### Import needed modules ##### -#import pysam - -@cython.boundscheck(False) -def makeFragmentMat(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): - cdef int nrow = upper - lower - cdef int ncol = end - start - cdef np.ndarray[DTYPE_t, ndim=2] mat = np.zeros( (nrow, ncol), dtype = DTYPE) - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int l_pos, ilen, row, col - for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): - if read.is_proper_pair and not read.is_reverse: - if atac: - #get left position - l_pos = read.pos + 4 - #get insert size - #correct by 8 base pairs to be inserion to insertion - ilen = abs(read.template_length) - 8 - else: - l_pos = read.pos - ilen = abs(read.template_length) - row = ilen - lower - col = (ilen-1)/2 + l_pos - start - if col >= 0 and col < ncol and row < nrow and row >= 0: - mat[row, col] += 1 - bamHandle.close() - return mat - -@cython.boundscheck(False) -def getInsertions(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): - cdef int npos = end - start - cdef np.ndarray[DTYPE_t, ndim=1] mat = np.zeros(npos, dtype = DTYPE) - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int l_pos, ilen, r_pos - for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): - if read.is_proper_pair and not read.is_reverse: - if atac: - #get left position - l_pos = read.pos + 4 - #get insert size - #correct by 8 base pairs to be inserion to insertion - ilen = abs(read.template_length) - 8 - else: - l_pos = read.pos - ilen = abs(read.template_length) - r_pos = l_pos + ilen - 1 - if ilen >= lower and ilen < upper: - if l_pos >= start and l_pos < end: - mat[l_pos - start] += 1 - if r_pos >= start and r_pos < end: - mat[r_pos - start] += 1 - bamHandle.close() - return mat - - -@cython.boundscheck(False) -def getStrandedInsertions(str bamfile, str chrom, int start, int end, int lower, int upper, int atac = 1): - cdef int npos = end - start - cdef np.ndarray[DTYPE_t, ndim=1] matplus = np.zeros(npos, dtype = DTYPE) - cdef np.ndarray[DTYPE_t, ndim=1] matminus = np.zeros(npos, dtype = DTYPE) - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int l_pos, ilen, r_pos - for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): - if read.is_proper_pair and not read.is_reverse: - if atac: - #get left position - l_pos = read.pos + 4 - #get insert size - #correct by 8 base pairs to be inserion to insertion - ilen = abs(read.template_length) - 8 - else: - l_pos = read.pos - ilen = abs(read.template_length) - r_pos = l_pos + ilen - 1 - if ilen >= lower and ilen < upper: - if l_pos >= start and l_pos < end: - matplus[l_pos - start] += 1 - if r_pos >= start and r_pos < end: - matminus[r_pos - start] += 1 - bamHandle.close() - return (matplus, matminus) - - - -@cython.boundscheck(False) -def getAllFragmentSizes(str bamfile, int lower, int upper, int atac = 1): - cdef np.ndarray[DTYPE_t, ndim =1] sizes = np.zeros(upper - lower, dtype= np.float) - # loop over samfile - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int ilen - for read in bamHandle: - if read.is_proper_pair and not read.is_reverse: - if atac: - - #get insert size - #correct by 8 base pairs to be inserion to insertion - ilen = abs(read.template_length) - 8 - else: - ilen = abs(read.template_length) - if ilen < upper and ilen >= lower: - sizes[ilen - lower]+=1 - bamHandle.close() - return sizes - - -@cython.boundscheck(False) -def getFragmentSizesFromChunkList(chunks, str bamfile, int lower, int upper, int atac = 1): - cdef np.ndarray[DTYPE_t, ndim =1] sizes = np.zeros(upper - lower, dtype= np.float) - # loop over samfile - cdef AlignmentFile bamHandle = AlignmentFile(bamfile) - cdef AlignedSegment read - cdef int ilen, l_pos, center - for chunk in chunks: - for read in bamHandle.fetch(chunk.chrom, max(0, chunk.start - upper), chunk.end + upper): - if read.is_proper_pair and not read.is_reverse: - if atac: - #get left position - l_pos = read.pos + 4 - #get insert size - #correct by 8 base pairs to be inserion to insertion - ilen = abs(read.template_length) - 8 - else: - l_pos = read.pos - ilen = abs(read.template_length) - center = l_pos + (ilen-1)/2 - if ilen < upper and ilen >= lower and center >= chunk.start and center < chunk.end: - sizes[ilen - lower]+=1 - bamHandle.close() - return sizes - diff --git a/pyatac/fragments.pyxbld b/pyatac/fragments.pyxbld deleted file mode 100644 index 51d39b4..0000000 --- a/pyatac/fragments.pyxbld +++ /dev/null @@ -1,10 +0,0 @@ - -def make_ext(modname, pyxfilename): - from distutils.extension import Extension - import pysam - return Extension(name=modname, - sources=[pyxfilename], - extra_link_args=pysam.get_libraries(), - include_dirs=pysam.get_include(), - define_macros=pysam.get_defines()) - diff --git a/pyatac/fragmentsizes.py b/pyatac/fragmentsizes.py index 10af79c..10fa88e 100644 --- a/pyatac/fragmentsizes.py +++ b/pyatac/fragmentsizes.py @@ -5,13 +5,8 @@ """ import numpy as np -import pyximport -pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.fragments import getAllFragmentSizes, getFragmentSizesFromChunkList - - - class FragmentSizes: """Class for storing fragment size distribution""" def __init__(self, lower, upper, atac = True, vals = None): diff --git a/pyatac/get_cov.py b/pyatac/get_cov.py index 854e277..970ff80 100644 --- a/pyatac/get_cov.py +++ b/pyatac/get_cov.py @@ -12,7 +12,6 @@ import numpy as np import pysam import traceback -import pyximport; pyximport.install() from pyatac.tracks import CoverageTrack from pyatac.chunk import ChunkList from pyatac.utils import shell_command, read_chrom_sizes_from_bam @@ -23,7 +22,7 @@ def _covHelper(arg): (chunk, args) = arg try: offset = args.window // 2 - mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) + mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) mat.makeFragmentMat(args.bam) cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end) cov.calculateCoverage(mat, lower = args.lower, upper = args.upper, window_len = args.window) diff --git a/pyatac/get_ins.py b/pyatac/get_ins.py index 1bf208c..f993450 100644 --- a/pyatac/get_ins.py +++ b/pyatac/get_ins.py @@ -12,7 +12,6 @@ import numpy as np import pysam import traceback -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.tracks import InsertionTrack from pyatac.chunk import ChunkList from pyatac.utils import shell_command, read_chrom_sizes_from_bam diff --git a/pyatac/get_pwm.py b/pyatac/get_pwm.py index ab81d7a..3d809b8 100644 --- a/pyatac/get_pwm.py +++ b/pyatac/get_pwm.py @@ -13,8 +13,6 @@ import pyatac.seq as seq from pyatac.chunk import ChunkList from pyatac.bias import PWM -import pyximport -pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.tracks import InsertionTrack from pyatac.utils import read_chrom_sizes_from_fasta diff --git a/pyatac/make_vplot.py b/pyatac/make_vplot.py index bca7a11..90538b3 100644 --- a/pyatac/make_vplot.py +++ b/pyatac/make_vplot.py @@ -8,9 +8,6 @@ # import necessary for python import os import numpy as np -#import matplotlib as mpl -#mpl.use('PS') -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.chunk import ChunkList from pyatac.chunkmat2d import FragmentMat2D from . import VMat as V From 58706dc3dcd61709ac62f69df329843c72ca2323 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 13:49:22 -0800 Subject: [PATCH 11/21] fix mode for gzip open --- nucleoatac/merge.py | 6 +++--- pyatac/chunk.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/nucleoatac/merge.py b/nucleoatac/merge.py index bed3e8b..0c5e61a 100644 --- a/nucleoatac/merge.py +++ b/nucleoatac/merge.py @@ -23,7 +23,7 @@ def __init__(self, chrom, start, end, occ, occ_lower, occ_upper, reads, source): self.reads = reads self.source = source def asBed(self): - out = "\t".join(map(str,[self.chrom,self.start,self.end,self.occ,self.occ_lower,self.occ_upper,self.reads, self.source])) + out = "\t".join(map(str,[self.chrom,self.start,self.end,self.occ,self.occ_lower,self.occ_upper,self.reads, self.source])) return out def write(self, handle): """write bed line for peak""" @@ -36,7 +36,7 @@ def __init__(self, *args): def read(bedfile, source, min_occ = 0): """Make a list of chunks from a tab-delimited bedfile""" if bedfile[-3:] == '.gz': - infile = gzip.open(bedfile,"r") + infile = gzip.open(bedfile,"rt") else: infile = open(bedfile,"r") out = NucList() @@ -103,7 +103,7 @@ def run_merge(args): pysam.tabix_compress(args.out + '.nucmap_combined.bed', args.out + '.nucmap_combined.bed.gz',force = True) shell_command('rm ' + args.out + '.nucmap_combined.bed') pysam.tabix_index(args.out + '.nucmap_combined.bed.gz', preset = "bed", force = True) - + diff --git a/pyatac/chunk.py b/pyatac/chunk.py index fa78646..bfe7f34 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -97,7 +97,7 @@ def insert(self, *args): def sort(self): """sort regions""" - list.sort(self, key=functools.cmp_to_key(_chunkCompare)) # Just hack it; + list.sort(self, key=functools.cmp_to_key(_chunkCompare)) # Just hack it; # Likely to be very slow; def isSorted(self): @@ -140,7 +140,7 @@ def read(bedfile, weight_col=None, strand_col = None, name_col = None, chromDict min_offset = None, min_length = 1, chrom_source = "FASTA file"): """Make a list of chunks from a tab-delimited bedfile""" if bedfile[-3:] == '.gz': - infile = gzip.open(bedfile,"r") + infile = gzip.open(bedfile,"rt") else: infile = open(bedfile,"r") out = ChunkList() @@ -175,7 +175,7 @@ def read(bedfile, weight_col=None, strand_col = None, name_col = None, chromDict if chromDict is not None and len(bad_chroms)>0: bad_chroms = set(bad_chroms) warn_message = (str(len(bad_chroms)) + " chromosome names in bed file not included in " + - chrom_source + ":\n" + "\n".join(bad_chroms) + "\n " + + chrom_source + ":\n" + "\n".join(bad_chroms) + "\n " + "These regions will be ignored in subsequent analysis") warnings.warn(warn_message) return out From dc391efffea0c552b316221e43faf326cac41b5f Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 14:08:20 -0800 Subject: [PATCH 12/21] update try numba --- nucleoatac/NucleosomeCalling.py | 1 + nucleoatac/multinomial_cov.pyx | 5 +---- nucleoatac/multionomial_cov2.py | 14 ++++++++++++++ pyatac/chunkmat2d.py | 7 +++---- setup.py | 2 +- 5 files changed, 20 insertions(+), 9 deletions(-) create mode 100644 nucleoatac/multionomial_cov2.py diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index 9959b70..eccc052 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -10,6 +10,7 @@ from bisect import bisect_left import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from nucleoatac.multinomial_cov import calculateCov +#from nucleoatac.multionomial_cov2 import calculateCov from nucleoatac.Occupancy import OccupancyTrack from pyatac.tracks import Track, CoverageTrack from pyatac.chunk import Chunk diff --git a/nucleoatac/multinomial_cov.pyx b/nucleoatac/multinomial_cov.pyx index 20b434e..fe3ac26 100644 --- a/nucleoatac/multinomial_cov.pyx +++ b/nucleoatac/multinomial_cov.pyx @@ -10,12 +10,9 @@ import numpy as np cimport numpy as np cimport cython - -DTYPE = np.float +#DTYPE = np.float ctypedef np.float_t DTYPE_t - - @cython.boundscheck(False) def calculateCov(np.ndarray[DTYPE_t, ndim=1] p, np.ndarray[DTYPE_t, ndim=1] v, int r): if p.shape[0]!= v.shape[0]: diff --git a/nucleoatac/multionomial_cov2.py b/nucleoatac/multionomial_cov2.py new file mode 100644 index 0000000..fd7449f --- /dev/null +++ b/nucleoatac/multionomial_cov2.py @@ -0,0 +1,14 @@ +from numba import jit + +@jit("f8(f8[:], f8[:], f8)", nopython=True) +def calculateCov(p, v, r): + if p.shape[0]!= v.shape[0]: + raise ValueError("p and v must be same shape") + value = 0.0 + for i in range(p.shape[0]): + for j in range(i,p.shape[0]): + if i==j: + value += p[i]* (1-p[i]) * v[i]**2 + else: + value += p[i] * p[j] * -2 * v[i] * v[j] + return(value * r) \ No newline at end of file diff --git a/pyatac/chunkmat2d.py b/pyatac/chunkmat2d.py index 4f78a99..7f5b2a8 100644 --- a/pyatac/chunkmat2d.py +++ b/pyatac/chunkmat2d.py @@ -3,7 +3,6 @@ import matplotlib.pyplot as plt import matplotlib.cm as cm from pyatac.tracks import InsertionTrack -#import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.fragments import makeFragmentMat class ChunkMat2D: @@ -59,12 +58,12 @@ def assign(self, mat): if mat.shape != self.mat.shape: raise Exception("Dimensions of input mat are wrong. Uh oh!") self.mat = mat - + def save(self, filename): """Save object in a text file""" head = ",".join(map(str,[self.chrom,self.start,self.end,self.lower,self.upper])) np.savetxt(filename,self.mat,delimiter="\t", header = head) - + @staticmethod def open(filename): f = open(filename,'r') @@ -150,7 +149,7 @@ def makeBiasMat(self, bias_track): if not bias_track.log: nonzero = np.where(bias !=0)[0] bias = np.log(bias + min(bias[nonzero])) - + pattern = np.zeros((self.upper-self.lower,self.upper + (self.upper-1)%2)) mid = self.upper//2 for i in range(self.lower,self.upper): diff --git a/setup.py b/setup.py index 1911d41..5c69e1e 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def run(self): license='MIT', packages=['pyatac','pyatac.pwm','nucleoatac','nucleoatac.vplot'], python_requires='>=3.8', - install_requires=['numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib'], + install_requires=['numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib','numba >= 0.51.2'], scripts=['bin/pyatac','bin/nucleoatac'], include_package_data=True, zip_safe=False, From 8c716bc0d2dc7a5af087d8b7ae39d12097305b98 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 14:10:23 -0800 Subject: [PATCH 13/21] back to cython --- nucleoatac/NucleosomeCalling.py | 1 - nucleoatac/multionomial_cov2.py | 14 -------------- setup.py | 2 +- 3 files changed, 1 insertion(+), 16 deletions(-) delete mode 100644 nucleoatac/multionomial_cov2.py diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index eccc052..9959b70 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -10,7 +10,6 @@ from bisect import bisect_left import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from nucleoatac.multinomial_cov import calculateCov -#from nucleoatac.multionomial_cov2 import calculateCov from nucleoatac.Occupancy import OccupancyTrack from pyatac.tracks import Track, CoverageTrack from pyatac.chunk import Chunk diff --git a/nucleoatac/multionomial_cov2.py b/nucleoatac/multionomial_cov2.py deleted file mode 100644 index fd7449f..0000000 --- a/nucleoatac/multionomial_cov2.py +++ /dev/null @@ -1,14 +0,0 @@ -from numba import jit - -@jit("f8(f8[:], f8[:], f8)", nopython=True) -def calculateCov(p, v, r): - if p.shape[0]!= v.shape[0]: - raise ValueError("p and v must be same shape") - value = 0.0 - for i in range(p.shape[0]): - for j in range(i,p.shape[0]): - if i==j: - value += p[i]* (1-p[i]) * v[i]**2 - else: - value += p[i] * p[j] * -2 * v[i] * v[j] - return(value * r) \ No newline at end of file diff --git a/setup.py b/setup.py index 5c69e1e..623ceda 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def run(self): license='MIT', packages=['pyatac','pyatac.pwm','nucleoatac','nucleoatac.vplot'], python_requires='>=3.8', - install_requires=['numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib','numba >= 0.51.2'], + install_requires=['cython > 0.22', 'numpy >= 1.9.1', 'scipy >= 0.16.0','pysam >= 0.10.0','matplotlib'], scripts=['bin/pyatac','bin/nucleoatac'], include_package_data=True, zip_safe=False, From cb245f429ede15944c471ea49a7f837ff5ce079d Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 14:55:43 -0800 Subject: [PATCH 14/21] remove ps --- bin/nucleoatac | 3 ++- bin/pyatac | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/nucleoatac b/bin/nucleoatac index 6d6c15a..9d52284 100755 --- a/bin/nucleoatac +++ b/bin/nucleoatac @@ -18,12 +18,13 @@ import sys from nucleoatac import __version__ import time import matplotlib as mpl -mpl.use('PS') +#mpl.use('PS') from nucleoatac.cli import nucleoatac_parser, nucleoatac_main if __name__ == '__main__': + freeze_support() print("Command run: "+ ' '.join(map(str,sys.argv))) print("nucleoatac version " + __version__) print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) diff --git a/bin/pyatac b/bin/pyatac index 8906250..e9b5370 100644 --- a/bin/pyatac +++ b/bin/pyatac @@ -16,13 +16,14 @@ of the MIT License (see the file COPYING included with the distribution). import sys import matplotlib as mpl -mpl.use('PS') +#mpl.use('PS') from pyatac import __version__ import time from pyatac.cli import pyatac_parser, pyatac_main if __name__ == '__main__': + freeze_support() print("Command run: "+ ' '.join(map(str,sys.argv))) print("pyatac version "+__version__) print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) From a95f48a3609bfa6fa5dc49f898cba99af69fa4e1 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 14:58:38 -0800 Subject: [PATCH 15/21] some test updates for filenames --- .gitignore | 3 +-- tests/test_cli.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index e8503d2..e3d4617 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,4 @@ /site #results of example run -example/test_example* -example/test_results/* +example/_results/* diff --git a/tests/test_cli.py b/tests/test_cli.py index 60b44d0..9854a04 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,30 +10,30 @@ class NucleoATACTestCase(TestCase): def setUp(self): self.parser = nucleoatac_parser() def test_run(self): - cmd = "nucleoatac run --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/test_results/test --cores 2" + cmd = "nucleoatac run --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --cores 2" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_occ(self): - cmd = "nucleoatac occ --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/test_results/test --cores 2" + cmd = "nucleoatac occ --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --cores 2" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_vprocess(self): - cmd = "nucleoatac vprocess --out example/test_results/test --sizes example/example_results/example.nuc_dist.txt" + cmd = "nucleoatac vprocess --out example/_results/example --sizes example/example_results/example.nuc_dist.txt" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_nuc(self): - cmd = "nucleoatac nuc --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/test_results/test --vmat example/example_results/example.VMat --cores 2" + cmd = "nucleoatac nuc --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --vmat example/example_results/example.VMat --cores 2" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_merge(self): - cmd = "nucleoatac merge --occpeaks example/example_results/example.occpeaks.bed.gz --nucpos example/example_results/example.nucpos.bed.gz --out example/test_results/test" + cmd = "nucleoatac merge --occpeaks example/example_results/example.occpeaks.bed.gz --nucpos example/example_results/example.nucpos.bed.gz --out example/_results/example" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) def test_nfr(self): - cmd = "nucleoatac nfr --bed example/example.bed --occ_track example/example_results/example.occ.bedgraph.gz --calls example/example_results/example.nucmap_combined.bed.gz --out example/test_results/test --bam example/example.bam --fasta example/sacCer3.fa" + cmd = "nucleoatac nfr --bed example/example.bed --occ_track example/example_results/example.occ.bedgraph.gz --calls example/example_results/example.nucmap_combined.bed.gz --out example/_results/example --bam example/example.bam --fasta example/sacCer3.fa" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) - + class PyATACTestCase(TestCase): """ Base TestCase class for pyatac commands @@ -41,10 +41,10 @@ class PyATACTestCase(TestCase): def setUp(self): self.parser = pyatac_parser() def test_sizes(self): - cmd = "pyatac sizes --bam example/example.bam --out example/test_results/test_sizes" + cmd = "pyatac sizes --bam example/example.bam --out example/_results/example_sizes" args = self.parser.parse_args(cmd.split()[1:]) pyatac_main(args) - + From 4efc671aac8675b97bd0463b50bbf8d98ec2aeb3 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 15:06:15 -0800 Subject: [PATCH 16/21] updates to test config --- .gitignore | 2 +- example/{test_results => _results}/README.txt | 0 setup.py | 3 +-- tests.py | 7 ------- 4 files changed, 2 insertions(+), 10 deletions(-) rename example/{test_results => _results}/README.txt (100%) delete mode 100644 tests.py diff --git a/.gitignore b/.gitignore index e3d4617..20e0d1d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ /site #results of example run -example/_results/* +example/_results/example* diff --git a/example/test_results/README.txt b/example/_results/README.txt similarity index 100% rename from example/test_results/README.txt rename to example/_results/README.txt diff --git a/setup.py b/setup.py index 623ceda..f1703fa 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ class NoTestCommand(TestCommand): def run(self): print("NucleoATAC does not support running tests with " - "'python setup.py test'. Please run 'python tests.py'") + "'python setup.py test'. The test suite can be run via 'pytest'") setup(name='NucleoATAC', @@ -28,5 +28,4 @@ def run(self): scripts=['bin/pyatac','bin/nucleoatac'], include_package_data=True, zip_safe=False, - tests_require=['nose'], cmdclass = {'test': NoTestCommand}) diff --git a/tests.py b/tests.py deleted file mode 100644 index b020255..0000000 --- a/tests.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python - -import matplotlib -import nose -matplotlib.use('agg') - -nose.main() From 6bd4fd95ce90d496e0fcb4de8c0068e6fed9bd78 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 16:49:26 -0800 Subject: [PATCH 17/21] try --- nucleoatac/NucleosomeCalling.py | 6 ++---- pyatac/VMat.py | 8 ++++---- pyatac/chunk.py | 4 +--- pyatac/get_nucleotide.py | 2 +- pyatac/get_pwm.py | 2 +- pyatac/seq.py | 4 ++-- pyatac/signal_around_sites.py | 2 +- 7 files changed, 12 insertions(+), 16 deletions(-) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index 9959b70..1919fc2 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -61,9 +61,7 @@ def calculateBackgroundSignal(self, mat, vmat, nuc_cov): self.bias_mat.start + offset, self.bias_mat.end - offset), vmat.mat,mode = 'valid')[0] - self.vals = self.vals * self.nuc_cov // self.cov.vals - - + self.vals = self.vals * self.nuc_cov / self.cov.vals class SignalDistribution: """Class for determining distribution of signal""" @@ -145,7 +143,7 @@ def addNorms(x,params): def err_func(pars,y): """error function for normal fit; to be used for fitNorm""" x = np.linspace(0,len(y)-1,len(y)) - return sum((addNorms(x, pars) - y)**2) + return np.sum((addNorms(x, pars) - y)**2) def fitNorm(guess, bound, sig): """Fit a normal to the signal with lower and upperbounds to sd""" a = (sig,) diff --git a/pyatac/VMat.py b/pyatac/VMat.py index f0af169..4cbb270 100644 --- a/pyatac/VMat.py +++ b/pyatac/VMat.py @@ -122,7 +122,7 @@ def converto1d(self): self.one_d[center-(self.w-i)+(ilen//2)]+= val * 0.5 self.one_d[center-(self.w-i)-(ilen//2+1)]+= val * 0.5 self.one_d[center-(self.w-i)+(ilen//2+1)]+= val * 0.5 - self.one_d = self.one_d // sum(self.one_d) + self.one_d = self.one_d / np.sum(self.one_d) def plot(self, mat=None, title=None, filename=None): """Plot current main matrix or specified matrix (of same dimensions)""" if mat is None: @@ -149,8 +149,8 @@ def plot_1d(self,filename=None): fig = plt.figure() xlim = len(self.one_d)//2 plt.plot(list(range(-xlim,xlim+1)),self.one_d) - plt.vlines(-73,0,max(self.one_d)*1.1,linestyles='dashed') - plt.vlines(73,0,max(self.one_d)*1.1,linestyles='dashed') + plt.vlines(-73,0,np.max(self.one_d)*1.1,linestyles='dashed') + plt.vlines(73,0,np.max(self.one_d)*1.1,linestyles='dashed') plt.xlabel("Position relative to dyad") plt.ylabel("Insertion Frequency") if filename: @@ -166,7 +166,7 @@ def plot_insertsize(self,filename=None): """plot the insert size disribution in the main matrix""" fig = plt.figure() ins = np.sum(self.mat,axis=1) - ins = ins // sum(ins) + ins = ins / np.sum(ins) plt.plot(list(range(self.lower,self.upper)),ins) plt.xlabel("Insert Size") plt.ylabel("Frequency") diff --git a/pyatac/chunk.py b/pyatac/chunk.py index bfe7f34..eb1ebfe 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -7,7 +7,6 @@ import gzip import warnings -import functools class Chunk(): """Class that stores reads for a particular chunk of the genome""" @@ -97,8 +96,7 @@ def insert(self, *args): def sort(self): """sort regions""" - list.sort(self, key=functools.cmp_to_key(_chunkCompare)) # Just hack it; - # Likely to be very slow; + list.sort(self, key=lambda chunk: [chunk.chrom, chunk.start]) def isSorted(self): """check that regions are sorted""" diff --git a/pyatac/get_nucleotide.py b/pyatac/get_nucleotide.py index 112ef8c..a0cfd25 100644 --- a/pyatac/get_nucleotide.py +++ b/pyatac/get_nucleotide.py @@ -74,7 +74,7 @@ def get_nucleotide(args): for i in tmp: result += i[0] n += i[1] - result = result // n + result = result / n if args.norm: normfreqs = seq.getNucFreqs(params.fasta, params.nucleotides) result = result / np.reshape(np.repeat(normfreqs,result.shape[1]),result.shape) diff --git a/pyatac/get_pwm.py b/pyatac/get_pwm.py index 3d809b8..af580d8 100644 --- a/pyatac/get_pwm.py +++ b/pyatac/get_pwm.py @@ -86,7 +86,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): left = result[:,0:(args.flank + 1)] right = result[:,args.flank:] rightflipped = np.fliplr(np.flipud(right)) - combined = (left + rightflipped) // 2 + combined = (left + rightflipped) / 2 result = np.hstack((combined, np.fliplr(np.flipud(combined[:,0:args.flank])))) #save pwm = PWM(result, args.flank, args.flank, params.nucleotides) diff --git a/pyatac/seq.py b/pyatac/seq.py index f2a2dee..75296b4 100644 --- a/pyatac/seq.py +++ b/pyatac/seq.py @@ -55,7 +55,7 @@ def getNucFreqs(fasta, nucleotides): out += [sequence.count(i) for i in nucleotides] n += len(sequence) f.close() - return out//n + return out/n def getNucFreqsFromChunkList(chunks, fasta, nucleotides): @@ -69,5 +69,5 @@ def getNucFreqsFromChunkList(chunks, fasta, nucleotides): out += [sequence.count(i) for i in nucleotides] n += len(sequence) handle.close() - return out//n + return out/n diff --git a/pyatac/signal_around_sites.py b/pyatac/signal_around_sites.py index 4ffaa9f..1b5e8f2 100644 --- a/pyatac/signal_around_sites.py +++ b/pyatac/signal_around_sites.py @@ -108,7 +108,7 @@ def get_signal(args): result = sum(tmp) if not args.no_agg: if args.norm: - result = result // len(chunks) + result = result / len(chunks) fig = plt.figure() plt.plot(list(range(-args.up,args.down+1)),result) plt.xlabel("Position relative to Site") From dfae9fb6cde4effc3e8f3b3bede00661d76bb581 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 18:41:48 -0800 Subject: [PATCH 18/21] add test --- tests/test_nucleosome_calling.py | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/test_nucleosome_calling.py diff --git a/tests/test_nucleosome_calling.py b/tests/test_nucleosome_calling.py new file mode 100644 index 0000000..cc0c198 --- /dev/null +++ b/tests/test_nucleosome_calling.py @@ -0,0 +1,42 @@ +from unittest import TestCase +import numpy as np +import matplotlib +matplotlib.use('agg') + +import nucleoatac.NucleosomeCalling as Nuc +import pyatac.VMat as V +from pyatac.chunkmat2d import FragmentMat2D +from pyatac.chunk import ChunkList +from pyatac.fragmentsizes import FragmentSizes + + +class Test_NuclesomeCalling(TestCase): + """class to test nucleosome calling""" + def setUp(self): + """setup Test_occupancy class by establishing parameters""" + bed_list = ChunkList.read('example/example.bed') + self.chunk = bed_list[0] + self.vmat = V.VMat.open('example/example.VMat') + fragment_dist = FragmentSizes.open('example/example_results/example.fragmentsizes.txt') + self.params = Nuc.NucParameters(vmat = self.vmat, fragmentsizes = fragment_dist, + bam = 'example/example.bam', fasta = 'example/sacCer3.fa', pwm = 'Human', + occ_track = 'example/example_results/example.occ.bedgraph.gz', + sd = 25, nonredundant_sep = 120, redundant_sep = 25, + min_z = 3, min_lr = 0 , atac = True) + nuc = Nuc.NucChunk(self.chunk) + nuc.process(self.params) + self.out = { + 'nucpos' : [nuc.nuc_collection[i] for i in sorted(nuc.nonredundant)], + 'nucpos.redundant' : [nuc.nuc_collection[i] for i in sorted(nuc.redundant)], + 'nucleoatac_signal' : nuc.norm_signal, + 'nucleoatac_raw' : nuc.nuc_signal, + 'nucleoatac_background' : nuc.bias, + 'nucleoatac_signal.smooth' : nuc.smoothed + } + + def test_calling(self): + """test nucleosome positions""" + self.assertTrue(len(self.out['nucpos']) == 3) + def test_signal(self): + """test signal output""" + self.assertTrue(len(self.out['nucleoatac_signal'].vals) == 1093) From 32b388113a2e8bd659e9ec8e5d95a7e8aaad1988 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 20:32:47 -0800 Subject: [PATCH 19/21] fix bug --- nucleoatac/multinomial_cov.pyx | 3 +-- tests/test_nucleosome_calling.py | 6 ++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/nucleoatac/multinomial_cov.pyx b/nucleoatac/multinomial_cov.pyx index fe3ac26..1bfc6c2 100644 --- a/nucleoatac/multinomial_cov.pyx +++ b/nucleoatac/multinomial_cov.pyx @@ -5,12 +5,10 @@ Created on Mon May 12 14:40:53 2014 @author: alicia """ -from __future__ import division import numpy as np cimport numpy as np cimport cython -#DTYPE = np.float ctypedef np.float_t DTYPE_t @cython.boundscheck(False) @@ -19,6 +17,7 @@ def calculateCov(np.ndarray[DTYPE_t, ndim=1] p, np.ndarray[DTYPE_t, ndim=1] v, i raise ValueError("p and v must be same shape") cdef DTYPE_t value cdef unsigned int i, j + value = 0 for i in range(p.shape[0]): for j in range(i,p.shape[0]): if i==j: diff --git a/tests/test_nucleosome_calling.py b/tests/test_nucleosome_calling.py index cc0c198..398d77b 100644 --- a/tests/test_nucleosome_calling.py +++ b/tests/test_nucleosome_calling.py @@ -37,6 +37,12 @@ def setUp(self): def test_calling(self): """test nucleosome positions""" self.assertTrue(len(self.out['nucpos']) == 3) + self.assertTrue(self.out['nucpos'][0].start == 706773) + self.assertTrue(self.out['nucpos'][0].nfr_cov == 8.0) + self.assertTrue(self.out['nucpos'][0].nuc_cov == 51.0) + self.assertTrue(abs(self.out['nucpos'][0].nuc_signal - 16.7928) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].norm_signal - 6.5573) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].smoothed - 3.2558) < 0.001) def test_signal(self): """test signal output""" self.assertTrue(len(self.out['nucleoatac_signal'].vals) == 1093) From 36e3f58388a527d466c78d09e6b8992b768c39b8 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sun, 13 Dec 2020 20:54:52 -0800 Subject: [PATCH 20/21] update tests --- tests/test_cli.py | 8 ++++---- tests/test_nucleosome_calling.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 9854a04..e1b2a47 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,10 +9,6 @@ class NucleoATACTestCase(TestCase): """ def setUp(self): self.parser = nucleoatac_parser() - def test_run(self): - cmd = "nucleoatac run --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --cores 2" - args = self.parser.parse_args(cmd.split()[1:]) - nucleoatac_main(args) def test_occ(self): cmd = "nucleoatac occ --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --cores 2" args = self.parser.parse_args(cmd.split()[1:]) @@ -33,6 +29,10 @@ def test_nfr(self): cmd = "nucleoatac nfr --bed example/example.bed --occ_track example/example_results/example.occ.bedgraph.gz --calls example/example_results/example.nucmap_combined.bed.gz --out example/_results/example --bam example/example.bam --fasta example/sacCer3.fa" args = self.parser.parse_args(cmd.split()[1:]) nucleoatac_main(args) + def test_run(self): + cmd = "nucleoatac run --bam example/example.bam --bed example/example.bed --fasta example/sacCer3.fa --out example/_results/example --cores 2" + args = self.parser.parse_args(cmd.split()[1:]) + nucleoatac_main(args) class PyATACTestCase(TestCase): """ diff --git a/tests/test_nucleosome_calling.py b/tests/test_nucleosome_calling.py index 398d77b..b6362b9 100644 --- a/tests/test_nucleosome_calling.py +++ b/tests/test_nucleosome_calling.py @@ -38,6 +38,7 @@ def test_calling(self): """test nucleosome positions""" self.assertTrue(len(self.out['nucpos']) == 3) self.assertTrue(self.out['nucpos'][0].start == 706773) + self.assertTrue(abs(self.out['nucpos'][0].z - 7.7175) < 0.001) self.assertTrue(self.out['nucpos'][0].nfr_cov == 8.0) self.assertTrue(self.out['nucpos'][0].nuc_cov == 51.0) self.assertTrue(abs(self.out['nucpos'][0].nuc_signal - 16.7928) < 0.001) From 362a4a15e795d0e0eb187866168dd5fde5be3c02 Mon Sep 17 00:00:00 2001 From: Alicia Schep Date: Sat, 19 Dec 2020 19:24:54 -0800 Subject: [PATCH 21/21] update test to be consistent with other in terms of input --- tests/test_nucleosome_calling.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tests/test_nucleosome_calling.py b/tests/test_nucleosome_calling.py index b6362b9..a248424 100644 --- a/tests/test_nucleosome_calling.py +++ b/tests/test_nucleosome_calling.py @@ -3,11 +3,13 @@ import matplotlib matplotlib.use('agg') +from pyatac.utils import read_chrom_sizes_from_fasta import nucleoatac.NucleosomeCalling as Nuc import pyatac.VMat as V from pyatac.chunkmat2d import FragmentMat2D from pyatac.chunk import ChunkList from pyatac.fragmentsizes import FragmentSizes +from pyatac.bias import PWM class Test_NuclesomeCalling(TestCase): @@ -16,7 +18,18 @@ def setUp(self): """setup Test_occupancy class by establishing parameters""" bed_list = ChunkList.read('example/example.bed') self.chunk = bed_list[0] - self.vmat = V.VMat.open('example/example.VMat') + self.vmat = V.VMat.open('example/example_results/example.VMat') + #chrs = read_chrom_sizes_from_fasta('example/sacCer3.fa') + #pwm = PWM.open('Human') + #chunks = ChunkList.read( + # 'example/example.bed', + # chromDict = chrs, + # min_offset = self.vmat.mat.shape[1] + self.vmat.upper//2 + max(pwm.up,pwm.down) + 120//2, + # min_length = 120 * 2 + #) + #chunks.slop(chrs, up = 120//2, down = 120//2) + #chunks.merge() + #self.chunk = chunks[0] fragment_dist = FragmentSizes.open('example/example_results/example.fragmentsizes.txt') self.params = Nuc.NucParameters(vmat = self.vmat, fragmentsizes = fragment_dist, bam = 'example/example.bam', fasta = 'example/sacCer3.fa', pwm = 'Human', @@ -33,17 +46,17 @@ def setUp(self): 'nucleoatac_background' : nuc.bias, 'nucleoatac_signal.smooth' : nuc.smoothed } - def test_calling(self): """test nucleosome positions""" self.assertTrue(len(self.out['nucpos']) == 3) self.assertTrue(self.out['nucpos'][0].start == 706773) - self.assertTrue(abs(self.out['nucpos'][0].z - 7.7175) < 0.001) - self.assertTrue(self.out['nucpos'][0].nfr_cov == 8.0) - self.assertTrue(self.out['nucpos'][0].nuc_cov == 51.0) - self.assertTrue(abs(self.out['nucpos'][0].nuc_signal - 16.7928) < 0.001) - self.assertTrue(abs(self.out['nucpos'][0].norm_signal - 6.5573) < 0.001) - self.assertTrue(abs(self.out['nucpos'][0].smoothed - 3.2558) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].z - 8.2365) < 0.001) + self.assertTrue(self.out['nucpos'][0].nfr_cov == 6.0) + self.assertTrue(self.out['nucpos'][0].nuc_cov == 54.0) + self.assertTrue(abs(self.out['nucpos'][0].nuc_signal - 15.3150) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].norm_signal - 6.4621) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].smoothed - 3.4974) < 0.001) + self.assertTrue(abs(self.out['nucpos'][0].lr - 22.3557) < 0.001) def test_signal(self): """test signal output""" self.assertTrue(len(self.out['nucleoatac_signal'].vals) == 1093)