From 184413387ec8bb542bd815b288405bf7de419cf9 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 13:32:40 +0800 Subject: [PATCH 1/7] 2to3 --- pyatac/VMat.py | 6 +++--- pyatac/__init__.py | 5 ++--- pyatac/bias.py | 2 +- pyatac/chunk.py | 8 ++++---- pyatac/fragmentsizes.py | 2 +- pyatac/get_cov.py | 6 +++--- pyatac/get_ins.py | 10 +++++----- pyatac/get_nucleotide.py | 4 ++-- pyatac/get_pwm.py | 4 ++-- pyatac/get_sizes.py | 2 +- pyatac/make_bias_track.py | 8 ++++---- pyatac/make_bias_vplot.py | 4 ++-- pyatac/make_vplot.py | 6 +++--- pyatac/seq.py | 4 ++-- pyatac/signal_around_sites.py | 6 +++--- pyatac/tracks.py | 10 +++++----- 16 files changed, 43 insertions(+), 44 deletions(-) diff --git a/pyatac/VMat.py b/pyatac/VMat.py index 2c6be8a..683826f 100644 --- a/pyatac/VMat.py +++ b/pyatac/VMat.py @@ -146,7 +146,7 @@ def plot_1d(self,filename=None): """plot the 1d insertion representation of the matrix""" fig = plt.figure() xlim = len(self.one_d)/2 - plt.plot(range(-xlim,xlim+1),self.one_d) + plt.plot(list(range(-xlim,xlim+1)),self.one_d) plt.vlines(-73,0,max(self.one_d)*1.1,linestyles='dashed') plt.vlines(73,0,max(self.one_d)*1.1,linestyles='dashed') plt.xlabel("Position relative to dyad") @@ -164,7 +164,7 @@ def plot_insertsize(self,filename=None): fig = plt.figure() ins = np.sum(self.mat,axis=1) ins = ins/sum(ins) - plt.plot(range(self.lower,self.upper),ins) + plt.plot(list(range(self.lower,self.upper)),ins) plt.xlabel("Insert Size") plt.ylabel("Frequency") if filename: @@ -208,7 +208,7 @@ def open(filename): elif state == 'upper': upper = int(line.strip('\n')) elif state == 'mat': - mat.append(map(float,line.strip('\n').split('\t'))) + mat.append(list(map(float,line.strip('\n').split('\t')))) try: new = VMat(np.array(mat), lower, upper) except NameError: diff --git a/pyatac/__init__.py b/pyatac/__init__.py index def3758..b28b04f 100644 --- a/pyatac/__init__.py +++ b/pyatac/__init__.py @@ -1,4 +1,3 @@ -#Define version based on setup script -import pkg_resources -__version__ = pkg_resources.require("NucleoATAC")[0].version + + diff --git a/pyatac/bias.py b/pyatac/bias.py index df2bcf0..6b36827 100644 --- a/pyatac/bias.py +++ b/pyatac/bias.py @@ -66,7 +66,7 @@ def open(name): elif state == 'nucleotides': nucleotides = line.strip('\n').split() elif state == 'mat': - mat.append(map(float,line.strip('\n').split('\t'))) + mat.append(list(map(float,line.strip('\n').split('\t')))) infile.close() try: new = PWM(np.array(mat), up, down, nucleotides) diff --git a/pyatac/chunk.py b/pyatac/chunk.py index c36fe8b..08d3e26 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -97,7 +97,7 @@ def sort(self): list.sort(self, cmp = _chunkCompare) def isSorted(self): """check that regions are sorted""" - return all([_chunkCompare(self[i],self[i+1])==-1 for i in xrange(len(self)-1)]) + return all([_chunkCompare(self[i],self[i+1])==-1 for i in range(len(self)-1)]) def slop(self, chromDict, up = 0, down = 0, new = False): out = ChunkList() for i in self: @@ -154,7 +154,7 @@ def read(bedfile, weight_col=None, strand_col = None, name_col = None, chromDict start = int(in_line[1]) end = int(in_line[2]) chrom = in_line[0] - if chromDict is not None and chrom not in chromDict.keys(): + if chromDict is not None and chrom not in list(chromDict.keys()): bad_chroms.append(chrom) continue if min_offset: @@ -183,7 +183,7 @@ def convertChromSizes(chromDict, splitsize = None, offset = 0): out = ChunkList() for chrom in chrs: out.extend(ChunkList(*(Chunk(chrom, i, min(i + splitsize, chromDict[chrom] - offset)) - for i in xrange(offset, chromDict[chrom] - offset, splitsize)))) + for i in range(offset, chromDict[chrom] - offset, splitsize)))) return out def split(self, bases = None, items = None): """splits list of chunks into set of sublists""" @@ -202,7 +202,7 @@ def split(self, bases = None, items = None): out.append(self[i:(k+1)]) return out elif items is not None: - out = [ self[i:i+items] for i in xrange(0,len(self),items)] + out = [ self[i:i+items] for i in range(0,len(self),items)] return out else: raise Exception("Need to provide items or bases argument!") diff --git a/pyatac/fragmentsizes.py b/pyatac/fragmentsizes.py index 4fd6ba0..10af79c 100644 --- a/pyatac/fragmentsizes.py +++ b/pyatac/fragmentsizes.py @@ -71,7 +71,7 @@ def open(filename): elif state == 'upper': upper = int(line.strip('\n')) elif state == 'sizes': - fragmentsizes = np.array(map(float,line.rstrip("\n").split("\t"))) + fragmentsizes = np.array(list(map(float,line.rstrip("\n").split("\t")))) try: new = FragmentSizes(lower, upper, vals = fragmentsizes) except NameError: diff --git a/pyatac/get_cov.py b/pyatac/get_cov.py index 29a6fe2..4b4d729 100644 --- a/pyatac/get_cov.py +++ b/pyatac/get_cov.py @@ -29,7 +29,7 @@ def _covHelper(arg): cov.calculateCoverage(mat, lower = args.lower, upper = args.upper, window_len = args.window) cov.vals *= args.scale / float(args.window) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -42,7 +42,7 @@ def _writeCov(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing coverage track\n') traceback.print_exc() print() @@ -75,7 +75,7 @@ def get_cov(args, bases = 50000, splitsize = 1000): write_process = mp.Process(target = _writeCov, args=(write_queue, args.out)) write_process.start() for j in sets: - tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args))) + tmp = pool1.map(_covHelper, list(zip(j,itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() diff --git a/pyatac/get_ins.py b/pyatac/get_ins.py index 8d28e96..1bf208c 100644 --- a/pyatac/get_ins.py +++ b/pyatac/get_ins.py @@ -26,7 +26,7 @@ def _insHelperSmooth(arg): ins.calculateInsertions( args.bam, lower = args.lower, upper = args.upper, atac = args.atac) ins.smooth_track(args.smooth, window = "gaussian", mode= 'valid') except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -39,7 +39,7 @@ def _insHelper(arg): ins = InsertionTrack(chunk.chrom, chunk.start, chunk.end) ins.calculateInsertions( args.bam, lower = args.lower, upper = args.upper, atac = args.atac) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -51,7 +51,7 @@ def _writeIns(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -85,9 +85,9 @@ def get_ins(args, bases = 50000, splitsize = 1000): write_process.start() for j in sets: if args.smooth: - tmp = pool1.map(_insHelperSmooth, zip(j,itertools.repeat(args))) + tmp = pool1.map(_insHelperSmooth, list(zip(j,itertools.repeat(args)))) else: - tmp = pool1.map(_insHelper, zip(j,itertools.repeat(args))) + tmp = pool1.map(_insHelper, list(zip(j,itertools.repeat(args)))) for track in tmp: write_queue.put(track) pool1.close() diff --git a/pyatac/get_nucleotide.py b/pyatac/get_nucleotide.py index 2419523..a0cfd25 100644 --- a/pyatac/get_nucleotide.py +++ b/pyatac/get_nucleotide.py @@ -31,7 +31,7 @@ def _nucleotideHelper(arg): mat += submat n += 1 except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+'\n') + print(('Caught exception when processing:\n'+ chunk.asBed()+'\n')) traceback.print_exc() print() raise e @@ -66,7 +66,7 @@ def get_nucleotide(args): params = _NucleotideParameters(args.up, args.down, args.fasta, args.dinucleotide) sets = chunks.split(bases = 10000) pool = Pool(processes = args.cores) - tmp = pool.map(_nucleotideHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_nucleotideHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = np.zeros(params.matsize) diff --git a/pyatac/get_pwm.py b/pyatac/get_pwm.py index bdfc534..b185462 100644 --- a/pyatac/get_pwm.py +++ b/pyatac/get_pwm.py @@ -34,7 +34,7 @@ def _pwmHelper(arg): mat += ins.getStrandedInsertionSequences(params.fasta, params.nucleotides, up = params.up, down = params.down) n += sum(ins.vals) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -69,7 +69,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): params = _PWMParameters(bam = args.bam, up = args.flank, down = args.flank, fasta = args.fasta, lower = args.lower, upper = args.upper, atac = args.atac, sym = args.sym) pool = Pool(processes = args.cores) - tmp = pool.map(_pwmHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_pwmHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() n = 0.0 diff --git a/pyatac/get_sizes.py b/pyatac/get_sizes.py index cf66f54..aac0905 100644 --- a/pyatac/get_sizes.py +++ b/pyatac/get_sizes.py @@ -30,7 +30,7 @@ def get_sizes(args): if not args.no_plot: #make figure fig = plt.figure() - plt.plot(range(sizes.lower,sizes.upper),sizes.get(sizes.lower,sizes.upper),label = args.out) + plt.plot(list(range(sizes.lower,sizes.upper)),sizes.get(sizes.lower,sizes.upper),label = args.out) plt.xlabel("Fragment Size") plt.ylabel("Frequency") fig.savefig(args.out+'.fragmentsizes.eps') diff --git a/pyatac/make_bias_track.py b/pyatac/make_bias_track.py index 94e6640..e6204c7 100644 --- a/pyatac/make_bias_track.py +++ b/pyatac/make_bias_track.py @@ -23,7 +23,7 @@ def _biasHelper(arg): bias = InsertionBiasTrack(chunk.chrom, chunk.start, chunk.end) bias.computeBias(params.fasta, params.chrs, params.pwm) except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -36,7 +36,7 @@ def _writeBias(track_queue, out): for track in iter(track_queue.get, 'STOP'): track.write_track(out_handle) track_queue.task_done() - except Exception, e: + except Exception as e: print('Caught exception when writing insertion track\n') traceback.print_exc() print() @@ -66,7 +66,7 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): sets = chunks.split(items = bases/splitsize) else: chunks = ChunkList.read(args.bed) - chunks.checkChroms(params.chrs.keys()) + chunks.checkChroms(list(params.chrs.keys())) chunks.merge() sets = chunks.split(bases = bases) maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) @@ -77,7 +77,7 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): write_process = mp.Process(target = _writeBias, args=(write_queue, args.out)) write_process.start() for j in sets: - tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params))) + tmp = pool.map(_biasHelper, list(zip(j,itertools.repeat(params)))) for track in tmp: write_queue.put(track) pool.close() diff --git a/pyatac/make_bias_vplot.py b/pyatac/make_bias_vplot.py index ce80274..a7c70c2 100644 --- a/pyatac/make_bias_vplot.py +++ b/pyatac/make_bias_vplot.py @@ -48,7 +48,7 @@ def _biasVplotHelper(arg): else: mat += add except Exception as e: - print('Caught exception when processing:\n' + chunk.asBed() + "\n") + print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() raise e @@ -80,7 +80,7 @@ def make_bias_vplot(args): sizes = args.sizes, scale = args.scale, pwm = args.pwm, fasta = args.fasta) pool = Pool(processes = args.cores) - tmp = pool.map(_biasVplotHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_biasVplotHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = sum(tmp) diff --git a/pyatac/make_vplot.py b/pyatac/make_vplot.py index edb57a2..300d742 100644 --- a/pyatac/make_vplot.py +++ b/pyatac/make_vplot.py @@ -13,7 +13,7 @@ import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.chunk import ChunkList from pyatac.chunkmat2d import FragmentMat2D -import VMat as V +from . import VMat as V from multiprocessing import Pool import itertools import traceback @@ -35,7 +35,7 @@ def _vplotHelper(arg): add = add/np.sum(add) result += add except Exception as e: - print('Caught exception when processing:\n'+ chunk.asBed()+"\n") + print(('Caught exception when processing:\n'+ chunk.asBed()+"\n")) traceback.print_exc() print() raise e @@ -67,7 +67,7 @@ def make_vplot(args): params = _VplotParams(flank = args.flank, lower = args.lower, upper = args.upper, bam = args.bam, atac = args.atac, scale = args.scale) pool = Pool(processes = args.cores) - tmp = pool.map(_vplotHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_vplotHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() result = sum(tmp) diff --git a/pyatac/seq.py b/pyatac/seq.py index df825ed..75296b4 100644 --- a/pyatac/seq.py +++ b/pyatac/seq.py @@ -22,7 +22,7 @@ def get_sequence(chunk, fastafile): return sequence.upper() -DNA_Translation = string.maketrans('ACGT', 'TGCA') +DNA_Translation = ''.maketrans('ACGT', 'TGCA') def complement(sequence): """Get complement of DNA sequenceuence""" @@ -41,7 +41,7 @@ def seq_to_mat(sequence, nucleotides): raise Exception("Usage Error! Nucleotides must all be of same length! No mixing single nucleotides with dinucleotides, etc") mat = np.zeros((len(nucleotides),len(sequence)-l+1)) for i in range(len(nucleotides)): - mat[i] = np.array(map(int,[sequence[j:j+l] ==nucleotides[i] for j in range(len(sequence)-l+1)])) + mat[i] = np.array(list(map(int,[sequence[j:j+l] ==nucleotides[i] for j in range(len(sequence)-l+1)]))) return(mat) def getNucFreqs(fasta, nucleotides): diff --git a/pyatac/signal_around_sites.py b/pyatac/signal_around_sites.py index 67a03bf..7877276 100644 --- a/pyatac/signal_around_sites.py +++ b/pyatac/signal_around_sites.py @@ -62,7 +62,7 @@ def _signalHelper(arg): sig[np.isnan(sig)]=0 agg += sig except Exception as e: - print('Caught exception when processing:\n' + chunk.asBed() + "\n") + print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() bg.close() @@ -96,7 +96,7 @@ def get_signal(args): args.scale, args.positive, args.all) sets = chunks.split(items = min(args.cores*20,len(chunks))) pool = Pool(processes = args.cores) - tmp = pool.map(_signalHelper, zip(sets,itertools.repeat(params))) + tmp = pool.map(_signalHelper, list(zip(sets,itertools.repeat(params)))) pool.close() pool.join() if args.all: @@ -110,7 +110,7 @@ def get_signal(args): if args.norm: result = result / len(chunks) fig = plt.figure() - plt.plot(range(-args.up,args.down+1),result) + plt.plot(list(range(-args.up,args.down+1)),result) plt.xlabel("Position relative to Site") plt.ylabel("Signal Intensity") fig.savefig(args.out+'.agg.track.eps') diff --git a/pyatac/tracks.py b/pyatac/tracks.py index d1ac322..f1de3c6 100644 --- a/pyatac/tracks.py +++ b/pyatac/tracks.py @@ -47,7 +47,7 @@ def write_track(self, handle, start = None, end = None, vals = None, write_zero if vals is None: vals=self.vals if len(vals)!=self.end-self.start: - print len(vals),self.end-self.start + print(len(vals),self.end-self.start) raise Exception("Error! Inconsistency between length of \ values and start/end values") prev_value = None @@ -88,14 +88,14 @@ def read_track(self, bedgraph, start = None, end = None, empty = np.nan, flank = def log(self, pseudo = 1): """Log values. Add psuedo count so values don't equal 0 before logging""" if self.log: - print "Logging a track that is already log..." + print("Logging a track that is already log...") adjusted = self.vals + pseudo self.vals = np.log(adjusted) self.log = True def exp(self): """Take exponent of values""" if not self.log: - print "taking exponent of a non-logged track..." + print("taking exponent of a non-logged track...") self.vals = np.exp(self.vals) self.log = False def smooth_track(self, window_len, window='flat', sd = None, @@ -135,7 +135,7 @@ def plot(self, name = None, start = None, end = None, filename=None): name = self.name values = self.get(start,end) fig = plt.figure() - plt.plot(range(start,end),values) + plt.plot(list(range(start,end)),values) plt.xlabel(self.chrom) plt.ylabel(name) if filename: @@ -143,7 +143,7 @@ def plot(self, name = None, start = None, end = None, filename=None): plt.close(fig) #Also save text output! filename2 = ".".join(filename.split(".")[:-1]+['txt']) - out = np.vstack((range(start,end),values)) + out = np.vstack((list(range(start,end)),values)) np.savetxt(filename2,out,delimiter="\t") else: fig.show() From 8b4c6aa799326b67695174bef7234a4ff5ed5a24 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 13:33:14 +0800 Subject: [PATCH 2/7] 2to3 --- bin/nucleoatac | 8 ++++---- bin/pyatac | 8 ++++---- nucleoatac/NFRCalling.py | 4 ++-- nucleoatac/NucleosomeCalling.py | 6 +++--- nucleoatac/Occupancy.py | 20 ++++++++++---------- nucleoatac/__init__.py | 4 +--- nucleoatac/cli.py | 4 ++-- nucleoatac/diff_occ.py | 6 +++--- nucleoatac/run_nfr.py | 8 ++++---- nucleoatac/run_nuc.py | 16 ++++++++-------- nucleoatac/run_occ.py | 12 ++++++------ 11 files changed, 47 insertions(+), 49 deletions(-) mode change 100644 => 100755 bin/nucleoatac diff --git a/bin/nucleoatac b/bin/nucleoatac old mode 100644 new mode 100755 index b0e08df..6d6c15a --- a/bin/nucleoatac +++ b/bin/nucleoatac @@ -24,14 +24,14 @@ from nucleoatac.cli import nucleoatac_parser, nucleoatac_main if __name__ == '__main__': - print "Command run: "+ ' '.join(map(str,sys.argv)) - print "nucleoatac version " + __version__ - print "start run at: " + time.strftime("%Y-%m-%d %H:%M") + print("Command run: "+ ' '.join(map(str,sys.argv))) + print("nucleoatac version " + __version__) + print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) try: parser = nucleoatac_parser() args = parser.parse_args() nucleoatac_main(args) - print "end run at: " + time.strftime("%Y-%m-%d %H:%M") + print("end run at: " + time.strftime("%Y-%m-%d %H:%M")) except KeyboardInterrupt: sys.stderr.write("User interrupted nucleoatac.") sys.exit(0) diff --git a/bin/pyatac b/bin/pyatac index cc3dbb4..8906250 100644 --- a/bin/pyatac +++ b/bin/pyatac @@ -23,14 +23,14 @@ from pyatac.cli import pyatac_parser, pyatac_main if __name__ == '__main__': - print "Command run: "+ ' '.join(map(str,sys.argv)) - print "pyatac version "+__version__ - print "start run at: " + time.strftime("%Y-%m-%d %H:%M") + print("Command run: "+ ' '.join(map(str,sys.argv))) + print("pyatac version "+__version__) + print("start run at: " + time.strftime("%Y-%m-%d %H:%M")) try: parser = pyatac_parser() args = parser.parse_args() pyatac_main(args) - print "end run at: " + time.strftime("%Y-%m-%d %H:%M") + print("end run at: " + time.strftime("%Y-%m-%d %H:%M")) except KeyboardInterrupt: sys.stderr.write("User interrupted pyatac.") sys.exit(0) diff --git a/nucleoatac/NFRCalling.py b/nucleoatac/NFRCalling.py index d41fd4c..a0be397 100644 --- a/nucleoatac/NFRCalling.py +++ b/nucleoatac/NFRCalling.py @@ -89,7 +89,7 @@ def findNFRs(self): if self.chrom in tbx.contigs: for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()): nucs.append(int(row[1])) - for j in xrange(1,len(nucs)): + for j in range(1,len(nucs)): left = nucs[j-1] + 73 right = nucs[j] - 72 if right <= left: @@ -106,7 +106,7 @@ def process(self, params): self.findNFRs() def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self,name) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index d146d73..c4df1ba 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -79,7 +79,7 @@ def simulateReads(self): sim_mat = np.reshape(sim_vect, self.vmat.mat.shape) return sim_mat def simulateDist(self, numiters = 1000): - self.scores = map(lambda x: np.sum(self.simulateReads() * self.vmat.mat),range(numiters)) + self.scores = [np.sum(self.simulateReads() * self.vmat.mat) for x in range(numiters)] def analStd(self): flatv = np.ravel(self.vmat.mat) var = calculateCov(self.probs, flatv, self.reads) @@ -310,7 +310,7 @@ def findAllNucs(self): self.nuc_collection[i] = nuc self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys())) self.nonredundant = reduce_peaks( self.sorted_nuc_keys, - map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys), + [self.nuc_collection[x].z for x in self.sorted_nuc_keys], self.params.nonredundant_sep) self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant) def fit(self): @@ -340,7 +340,7 @@ def process(self, params): self.makeInsertionTrack() def removeData(self): """remove data from chunk-- deletes all attributes""" - names = self.__dict__.keys() + names = list(self.__dict__.keys()) for name in names: delattr(self,name) diff --git a/nucleoatac/Occupancy.py b/nucleoatac/Occupancy.py index 9e9cd59..6b3ac1d 100644 --- a/nucleoatac/Occupancy.py +++ b/nucleoatac/Occupancy.py @@ -67,11 +67,11 @@ def gamma_fit(X,o,p): def plotFits(self,filename=None): """plot the Fits""" fig = plt.figure() - plt.plot(range(self.lower,self.upper),self.fragmentsizes.get(), + plt.plot(list(range(self.lower,self.upper)),self.fragmentsizes.get(), label = "Observed") - plt.plot(range(self.lower,self.upper),self.nfr_fit0.get(), label = "NFR Fit") - plt.plot(range(self.lower,self.upper),self.nuc_fit.get(), label = "Nucleosome Model") - plt.plot(range(self.lower,self.upper),self.nfr_fit.get(), label = "NFR Model") + plt.plot(list(range(self.lower,self.upper)),self.nfr_fit0.get(), label = "NFR Fit") + plt.plot(list(range(self.lower,self.upper)),self.nuc_fit.get(), label = "Nucleosome Model") + plt.plot(list(range(self.lower,self.upper)),self.nfr_fit.get(), label = "NFR Model") plt.legend() plt.xlabel("Fragment size") plt.ylabel("Relative Frequency") @@ -109,8 +109,8 @@ def calculateOccupancy(inserts, bias, params): nuc_probs = nuc_probs / np.sum(nuc_probs) nfr_probs = params.nfr_probs * bias nfr_probs = nfr_probs / np.sum(nfr_probs) - x = map(lambda alpha: np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs), params.alphas) - logliks = np.array(map(lambda j: np.sum(x[j]*inserts),range(params.l))) + x = [np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs) for alpha in params.alphas] + logliks = np.array([np.sum(x[j]*inserts) for j in range(params.l)]) logliks[np.isnan(logliks)] = -float('inf') occ = params.alphas[np.argmax(logliks)] #Compute upper and lower bounds for 95% confidence interval @@ -129,11 +129,11 @@ def calculateOccupancyMLE(self, mat, bias_mat, params): """Calculate Occupancy track""" offset=self.start - mat.start if offset Date: Sat, 7 Nov 2020 13:41:41 +0800 Subject: [PATCH 3/7] pure python fragments --- pyatac/chunkmat2d.py | 4 +- pyatac/fragments.py | 125 +++++++++++++++++++++++++++++++++++++++++++ pyatac/tracks.py | 4 +- 3 files changed, 129 insertions(+), 4 deletions(-) create mode 100644 pyatac/fragments.py diff --git a/pyatac/chunkmat2d.py b/pyatac/chunkmat2d.py index 0ffdc7c..3fb36ce 100644 --- a/pyatac/chunkmat2d.py +++ b/pyatac/chunkmat2d.py @@ -3,8 +3,8 @@ import matplotlib.pyplot as plt import matplotlib.cm as cm from pyatac.tracks import InsertionTrack -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) -from fragments import makeFragmentMat +#import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) +from pyatac.fragments import makeFragmentMat class ChunkMat2D: """Class that stores fragments in 2D matrix according to diff --git a/pyatac/fragments.py b/pyatac/fragments.py new file mode 100644 index 0000000..ebd6ad5 --- /dev/null +++ b/pyatac/fragments.py @@ -0,0 +1,125 @@ +#### Import needed modules ##### +import pyatac.seq as seq +import numpy as np +from pysam import AlignmentFile, AlignedSegment +from pysam import FastaFile + +DTYPE = np.float + +#### Import needed modules ##### +#import pysam + +def makeFragmentMat(bamfile, chrom, start, end, lower, upper, atac = 1): + nrow = upper - lower + ncol = end - start + mat = np.zeros( (nrow, ncol), dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) + + + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): + if read.is_proper_pair and not read.is_reverse: + if atac: + #get left position + l_pos = read.pos + 4 + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + l_pos = read.pos + ilen = abs(read.template_length) + row = ilen - lower + col = (ilen-1)/2 + l_pos - start + if col >= 0 and col < ncol and row < nrow and row >= 0: + mat[row, col] += 1 + bamHandle.close() + return mat + +def getInsertions(bamfile, chrom, start, end, lower, upper, atac = 1): + npos = end - start + mat = np.zeros(npos, dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): + if read.is_proper_pair and not read.is_reverse: + if atac: + #get left position + l_pos = read.pos + 4 + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + l_pos = read.pos + ilen = abs(read.template_length) + r_pos = l_pos + ilen - 1 + if ilen >= lower and ilen < upper: + if l_pos >= start and l_pos < end: + mat[l_pos - start] += 1 + if r_pos >= start and r_pos < end: + mat[r_pos - start] += 1 + bamHandle.close() + return mat + +def getStrandedInsertions(bamfile, chrom, start, end, lower, upper, atac = 1): + npos = end - start + matplus = np.zeros(npos, dtype = DTYPE) + matminus = np.zeros(npos, dtype = DTYPE) + bamHandle = AlignmentFile(bamfile) + for read in bamHandle.fetch(chrom, max(0, start - upper), end + upper): + if read.is_proper_pair and not read.is_reverse: + if atac: + #get left position + l_pos = read.pos + 4 + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + l_pos = read.pos + ilen = abs(read.template_length) + r_pos = l_pos + ilen - 1 + if ilen >= lower and ilen < upper: + if l_pos >= start and l_pos < end: + matplus[l_pos - start] += 1 + if r_pos >= start and r_pos < end: + matminus[r_pos - start] += 1 + bamHandle.close() + return (matplus, matminus) + +def getAllFragmentSizes(bamfile, lower, upper, atac = 1): + sizes = np.zeros(upper - lower, dtype= np.float) + # loop over samfile + bamHandle = AlignmentFile(bamfile) + for read in bamHandle: + if read.is_proper_pair and not read.is_reverse: + if atac: + + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + ilen = abs(read.template_length) + if ilen < upper and ilen >= lower: + sizes[ilen - lower]+=1 + bamHandle.close() + return sizes + +def getFragmentSizesFromChunkList(chunks, bamfile, lower, upper, atac = 1): + sizes = np.zeros(upper - lower, dtype= np.float) + # loop over samfile + bamHandle = AlignmentFile(bamfile) + for chunk in chunks: + for read in bamHandle.fetch(chunk.chrom, max(0, chunk.start - upper), chunk.end + upper): + if read.is_proper_pair and not read.is_reverse: + if atac: + #get left position + l_pos = read.pos + 4 + #get insert size + #correct by 8 base pairs to be inserion to insertion + ilen = abs(read.template_length) - 8 + else: + l_pos = read.pos + ilen = abs(read.template_length) + center = l_pos + (ilen-1) / 2 + if ilen < upper and ilen >= lower and center >= chunk.start and center < chunk.end: + sizes[ilen - lower] += 1 + bamHandle.close() + return sizes + diff --git a/pyatac/tracks.py b/pyatac/tracks.py index f1de3c6..47d2757 100644 --- a/pyatac/tracks.py +++ b/pyatac/tracks.py @@ -9,8 +9,8 @@ from pyatac.bedgraph import BedGraphFile from pyatac.chunk import Chunk from pyatac.utils import smooth -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) -from fragments import getInsertions, getStrandedInsertions +#import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) +from pyatac.fragments import getInsertions, getStrandedInsertions from pyatac.seq import get_sequence, seq_to_mat, complement class Track(Chunk): From aa4ea83a085456a5725382fd37df55d75a8f7811 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 13:54:42 +0800 Subject: [PATCH 4/7] fixed up the list.sort() --- pyatac/chunk.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyatac/chunk.py b/pyatac/chunk.py index 08d3e26..0d68461 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -7,6 +7,7 @@ import gzip import warnings +import functools class Chunk(): """Class that stores reads for a particular chunk of the genome""" @@ -92,12 +93,16 @@ def insert(self, *args): list.insert(self, args[0], args[1]) else: raise ValueError("Expecting Chunk") + def sort(self): """sort regions""" - list.sort(self, cmp = _chunkCompare) + list.sort(self, key=functools.cmp_to_key(_chunkCompare)) # Just hack it; + # Likely to be very slow; + def isSorted(self): """check that regions are sorted""" return all([_chunkCompare(self[i],self[i+1])==-1 for i in range(len(self)-1)]) + def slop(self, chromDict, up = 0, down = 0, new = False): out = ChunkList() for i in self: From 2b533b6cfe9a219e45648023aaccfd21558cdb2e Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 14:11:03 +0800 Subject: [PATCH 5/7] py3 float div fix --- pyatac/chunkmat2d.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyatac/chunkmat2d.py b/pyatac/chunkmat2d.py index 3fb36ce..9011396 100644 --- a/pyatac/chunkmat2d.py +++ b/pyatac/chunkmat2d.py @@ -57,10 +57,12 @@ def assign(self, mat): if mat.shape != self.mat.shape: raise Exception("Dimensions of input mat are wrong. Uh oh!") self.mat = mat + def save(self, filename): """Save object in a text file""" head = ",".join(map(str,[self.chrom,self.start,self.end,self.lower,self.upper])) np.savetxt(filename,self.mat,delimiter="\t", header = head) + @staticmethod def open(filename): f = open(filename,'r') From f068c3084cfd1dd7255b3e999827b60d157950b2 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Sat, 7 Nov 2020 14:53:59 +0800 Subject: [PATCH 6/7] changed to pdf --- nucleoatac/NucleosomeCalling.py | 27 +++++++++++++-------------- nucleoatac/Occupancy.py | 18 ++++++++++++++---- nucleoatac/diff_occ.py | 2 +- nucleoatac/run_nuc.py | 4 ++-- nucleoatac/run_occ.py | 10 ++++++---- nucleoatac/run_vprocess.py | 8 ++++---- pyatac/VMat.py | 25 ++++++++++++++----------- pyatac/chunk.py | 5 +++-- pyatac/chunkmat2d.py | 25 ++++++++++++++++--------- pyatac/fragments.py | 4 ++-- pyatac/get_cov.py | 4 ++-- pyatac/get_nucleotide.py | 2 +- pyatac/get_pwm.py | 4 ++-- pyatac/get_sizes.py | 2 +- pyatac/make_bias_track.py | 4 +++- pyatac/make_bias_vplot.py | 11 +++++------ pyatac/make_vplot.py | 6 +++--- pyatac/seq.py | 4 ++-- pyatac/signal_around_sites.py | 4 ++-- pyatac/tracks.py | 13 +++++-------- pyatac/utils.py | 2 +- 21 files changed, 102 insertions(+), 82 deletions(-) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index c4df1ba..9e32837 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -61,7 +61,7 @@ def calculateBackgroundSignal(self, mat, vmat, nuc_cov): self.bias_mat.start + offset, self.bias_mat.end - offset), vmat.mat,mode = 'valid')[0] - self.vals = self.vals * self.nuc_cov/ self.cov.vals + self.vals = self.vals * self.nuc_cov // self.cov.vals @@ -91,8 +91,7 @@ def analMean(self): def norm(x, v, w, mean): """compute values of normal pdf with given mean and sd at values in x""" - norm = (1.0/(np.sqrt(2*np.pi*v)) * - np.exp(-(x - mean)**2/(2*v))) + norm = (1.0/(np.sqrt(2*np.pi*v)) * np.exp(-(x - mean)**2/(2*v))) norm = norm * (w/max(norm)) return norm @@ -139,7 +138,7 @@ def addNorms(x,params): """Add several normal distributions together""" l = len(x) fit = np.zeros(l) - i = len(params)/3 + i = len(params)//3 for j in range(i): fit += norm(x,params[j*3],params[3*j+1],params[3*j+2]) return fit @@ -156,21 +155,21 @@ def fitNorm(guess, bound, sig): allnucs = nuctrack.sorted_nuc_keys x = bisect_left(allnucs,index) if x == 0: - left = index - nuctrack.params.nonredundant_sep/3 - means = (nuctrack.params.nonredundant_sep/3,) + left = index - nuctrack.params.nonredundant_sep//3 + means = (nuctrack.params.nonredundant_sep//3,) elif index - allnucs[x-1] < nuctrack.params.nonredundant_sep: left = allnucs[x-1] means = (index - allnucs[x-1],0) else: - left = index - nuctrack.params.nonredundant_sep/3 - means = (nuctrack.params.nonredundant_sep/3,) + left = index - nuctrack.params.nonredundant_sep//3 + means = (nuctrack.params.nonredundant_sep//3,) if x == len(allnucs)-1: - right = index + nuctrack.params.nonredundant_sep/3 + 1 + right = index + nuctrack.params.nonredundant_sep//3 + 1 elif allnucs[x+1] - index < nuctrack.params.nonredundant_sep: right = allnucs[x+1] means += (allnucs[x+1] - left,) else: - right = index + nuctrack.params.nonredundant_sep/3 +1 + right = index + nuctrack.params.nonredundant_sep//3 +1 sig = nuctrack.smoothed.vals[left:right] sig[sig<0] = 0 if len(means)==1: @@ -237,14 +236,14 @@ def __init__(self, chunk): def initialize(self, parameters): self.params = parameters def getFragmentMat(self): - self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1), - self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac) + self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper//2+1), + self.end + max(self.params.window,self.params.upper//2+1), 0, self.params.upper, atac = self.params.atac) self.mat.makeFragmentMat(self.params.bam) def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window, self.end + self.params.window, 0, self.params.upper) - bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2, - self.end + self.params.window + self.params.upper/2 + 1, log = True) + bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2, + self.end + self.params.window + self.params.upper//2 + 1, log = True) if self.params.fasta is not None: bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) diff --git a/nucleoatac/Occupancy.py b/nucleoatac/Occupancy.py index 6b3ac1d..c1d3707 100644 --- a/nucleoatac/Occupancy.py +++ b/nucleoatac/Occupancy.py @@ -8,7 +8,6 @@ from scipy import signal, optimize, stats import numpy as np import matplotlib.pyplot as plt -import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.fragmentsizes import FragmentSizes from pyatac.tracks import Track, CoverageTrack from pyatac.chunk import Chunk @@ -23,9 +22,11 @@ class FragmentMixDistribution: def __init__(self, lower = 0, upper =2000): self.lower = lower self.upper = upper + def getFragmentSizes(self, bamfile, chunklist = None): self.fragmentsizes = FragmentSizes(self.lower, self.upper) self.fragmentsizes.calculateSizes(bamfile, chunks = chunklist) + def modelNFR(self, boundaries = (35,115)): """Model NFR distribution with gamma distribution""" b = np.where(self.fragmentsizes.get(self.lower,boundaries[1]) == max(self.fragmentsizes.get(self.lower,boundaries[1])))[0][0] + self.lower @@ -64,6 +65,7 @@ def gamma_fit(X,o,p): self.nfr_fit.get(boundaries[1],self.upper))) nuc[nuc<=0]=min(min(nfr)*0.1,min(nuc[nuc>0])*0.001) self.nuc_fit = FragmentSizes(self.lower, self.upper, vals = nuc) + def plotFits(self,filename=None): """plot the Fits""" fig = plt.figure() @@ -190,7 +192,7 @@ def __init__(self, insert_dist, upper, fasta, pwm, sep = 120, min_occ = 0.1, fla if step%2 == 0: step = step - 1 self.step = step - self.halfstep = (self.step-1) / 2 + self.halfstep = (self.step-1) // 2 class OccChunk(Chunk): """Class for calculating occupancy and occupancy peaks @@ -201,27 +203,32 @@ def __init__(self, chunk): self.chrom = chunk.chrom self.peaks = {} self.nfrs = [] + def getFragmentMat(self): self.mat = FragmentMat2D(self.chrom, self.start - self.params.flank, self.end + self.params.flank, 0, self.params.upper) self.mat.makeFragmentMat(self.params.bam) + def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.flank, self.end + self.params.flank, 0, self.params.upper) if self.params.fasta is not None: - bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2, - self.end + self.params.window + self.params.upper/2 + 1, log = True) + bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2, + self.end + self.params.window + self.params.upper//2 + 1, log = True) bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) + def calculateOcc(self): """calculate occupancy for chunk""" self.occ = OccupancyTrack(self.chrom,self.start,self.end) self.occ.calculateOccupancyMLE(self.mat, self.bias_mat, self.params) self.occ.makeSmoothed(window_len = self.params.window, sd = self.params.flank/3.0) + def getCov(self): """Get read coverage for regions""" self.cov = CoverageTrack(self.chrom, self.start, self.end) self.cov.calculateCoverage(self.mat, 0, self.params.upper, self.params.window) + def callPeaks(self): """Call peaks of occupancy profile""" peaks = call_peaks(self.occ.smoothed_vals, sep = self.params.sep, min_signal = self.params.min_occ) @@ -229,6 +236,7 @@ def callPeaks(self): tmp = OccPeak(peak + self.start, self) if tmp.occ_lower > self.params.min_occ and tmp.reads > 0: self.peaks[peak] = tmp + def getNucDist(self): """Get nucleosomal insert distribution""" nuc_dist = np.zeros(self.params.upper) @@ -238,6 +246,7 @@ def getNucDist(self): sub_sum = sub_sum / float(sum(sub_sum)) nuc_dist += sub_sum return(nuc_dist) + def process(self, params): """proces chunk -- calculat occupancy, get coverage, call peaks""" self.params = params @@ -246,6 +255,7 @@ def process(self, params): self.calculateOcc() self.getCov() self.callPeaks() + def removeData(self): """remove data from chunk-- deletes all attributes""" names = list(self.__dict__.keys()) diff --git a/nucleoatac/diff_occ.py b/nucleoatac/diff_occ.py index ed64e36..9033ea7 100644 --- a/nucleoatac/diff_occ.py +++ b/nucleoatac/diff_occ.py @@ -57,7 +57,7 @@ def run_diff(args, bases = 500000): """ chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down)) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down)) chunks.merge() maxQueueSize = max(2,int(100 * bases / np.mean([chunk.length() for chunk in chunks]))) #get fragmentsizes diff --git a/nucleoatac/run_nuc.py b/nucleoatac/run_nuc.py index f51ef33..e19c49b 100644 --- a/nucleoatac/run_nuc.py +++ b/nucleoatac/run_nuc.py @@ -148,8 +148,8 @@ def run_nuc(args): else: chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = vmat.mat.shape[1] + vmat.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2, min_length = args.nuc_sep * 2) - chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = vmat.mat.shape[1] + vmat.upper//2 + max(pwm.up,pwm.down) + args.nuc_sep//2, min_length = args.nuc_sep * 2) + chunks.slop(chrs, up = args.nuc_sep//2, down = args.nuc_sep//2) chunks.merge() maxQueueSize = args.cores*10 if args.sizes is not None: diff --git a/nucleoatac/run_occ.py b/nucleoatac/run_occ.py index d709938..78b51de 100644 --- a/nucleoatac/run_occ.py +++ b/nucleoatac/run_occ.py @@ -83,8 +83,8 @@ def run_occ(args): else: chrs = read_chrom_sizes_from_bam(args.bam) pwm = PWM.open(args.pwm) - chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2) - chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2) + chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down) + args.nuc_sep//2) + chunks.slop(chrs, up = args.nuc_sep//2, down = args.nuc_sep//2) chunks.merge() maxQueueSize = args.cores*10 fragment_dist = FragmentMixDistribution(0, upper = args.upper) @@ -94,7 +94,7 @@ def run_occ(args): else: fragment_dist.getFragmentSizes(args.bam, chunks) fragment_dist.modelNFR() - fragment_dist.plotFits(args.out + '.occ_fit.eps') + fragment_dist.plotFits(args.out + '.occ_fit.pdf') fragment_dist.fragmentsizes.save(args.out + '.fragmentsizes.txt') params = OccupancyParameters(fragment_dist, args.upper, args.fasta, args.pwm, sep = args.nuc_sep, min_occ = args.min_occ, flank = args.flank, bam = args.bam, ci = args.confidence_interval, step = args.step) @@ -115,6 +115,7 @@ def run_occ(args): peaks_process = mp.Process(target = _writePeaks, args=(peaks_queue, args.out)) peaks_process.start() nuc_dist = np.zeros(args.upper) + for j in sets: tmp = pool1.map(_occHelper, list(zip(j,itertools.repeat(params)))) for result in tmp: @@ -130,6 +131,7 @@ def run_occ(args): pysam.tabix_compress(args.out + '.occpeaks.bed', args.out + '.occpeaks.bed.gz',force = True) shell_command('rm ' + args.out + '.occpeaks.bed') pysam.tabix_index(args.out + '.occpeaks.bed.gz', preset = "bed", force = True) + for i in ('occ','occ.lower_bound','occ.upper_bound'): pysam.tabix_compress(args.out + '.' + i + '.bedgraph', args.out + '.'+i+'.bedgraph.gz',force = True) shell_command('rm ' + args.out + '.' + i + '.bedgraph') @@ -144,7 +146,7 @@ def run_occ(args): plt.plot(list(range(0,args.upper)),dist_out.get(0,args.upper),label = "Nucleosome Distribution") plt.xlabel("Fragment Size") plt.ylabel("Frequency") - fig.savefig(args.out+'.nuc_dist.eps') + fig.savefig(args.out+'.nuc_dist.pdf') plt.close(fig) diff --git a/nucleoatac/run_vprocess.py b/nucleoatac/run_vprocess.py index 401f12e..7f81642 100644 --- a/nucleoatac/run_vprocess.py +++ b/nucleoatac/run_vprocess.py @@ -33,13 +33,13 @@ def run_vprocess(args): #Make extra plots if requeted if args.plot_extra: vmat.autoCorr() - vmat.plot_auto(args.out+'.vplot.Autocorr.eps') + vmat.plot_auto(args.out+'.vplot.Autocorr.pdf') vmat.converto1d() - vmat.plot_1d(args.out+'.vplot.InsertionProfile.eps') - vmat.plot_insertsize(args.out+'.vplot.InsertSizes.eps') + vmat.plot_1d(args.out+'.vplot.InsertionProfile.pdf') + vmat.plot_insertsize(args.out+'.vplot.InsertSizes.pdf') #make plot and save vmat.save(args.out+".VMat") - vmat.plot(filename = args.out+".VMat.eps") + vmat.plot(filename = args.out+".VMat.pdf") diff --git a/pyatac/VMat.py b/pyatac/VMat.py index 683826f..f0af169 100644 --- a/pyatac/VMat.py +++ b/pyatac/VMat.py @@ -34,7 +34,8 @@ def __init__(self, mat, lower, upper): self.mat = mat self.upper = upper self.lower = lower - self.w = mat.shape[1]/2 + self.w = mat.shape[1]//2 + def trim(self,lower,upper,w): """reduce the size of the vplot @@ -108,20 +109,20 @@ def norm_y(self,dist): def converto1d(self): """convert the 2d matrix to a 1d representation of insertions""" self.one_d = np.zeros(self.upper + self.upper%2 +2*self.w+1) - center = self.upper/2 + self.w + center = self.upper//2 + self.w for j in range(self.mat.shape[0]): for i in range(self.mat.shape[1]): ilen=j+self.lower val = copy(self.mat[j,i]) if ilen%2==0: - self.one_d[center-(self.w-i)-(ilen/2)]+= val - self.one_d[center-(self.w-i)+(ilen/2)]+= val + self.one_d[center-(self.w-i)-(ilen//2)]+= val + self.one_d[center-(self.w-i)+(ilen//2)]+= val else: - self.one_d[center-(self.w-i)-(ilen/2)]+= val * 0.5 - self.one_d[center-(self.w-i)+(ilen/2)]+= val * 0.5 - self.one_d[center-(self.w-i)-(ilen/2+1)]+= val * 0.5 - self.one_d[center-(self.w-i)+(ilen/2+1)]+= val * 0.5 - self.one_d = self.one_d / sum(self.one_d) + self.one_d[center-(self.w-i)-(ilen//2)]+= val * 0.5 + self.one_d[center-(self.w-i)+(ilen//2)]+= val * 0.5 + self.one_d[center-(self.w-i)-(ilen//2+1)]+= val * 0.5 + self.one_d[center-(self.w-i)+(ilen//2+1)]+= val * 0.5 + self.one_d = self.one_d // sum(self.one_d) def plot(self, mat=None, title=None, filename=None): """Plot current main matrix or specified matrix (of same dimensions)""" if mat is None: @@ -142,10 +143,11 @@ def plot(self, mat=None, title=None, filename=None): plt.close(fig) else: fig.show() + def plot_1d(self,filename=None): """plot the 1d insertion representation of the matrix""" fig = plt.figure() - xlim = len(self.one_d)/2 + xlim = len(self.one_d)//2 plt.plot(list(range(-xlim,xlim+1)),self.one_d) plt.vlines(-73,0,max(self.one_d)*1.1,linestyles='dashed') plt.vlines(73,0,max(self.one_d)*1.1,linestyles='dashed') @@ -159,11 +161,12 @@ def plot_1d(self,filename=None): np.savetxt(filename2,self.one_d,delimiter="\t") else: fig.show() + def plot_insertsize(self,filename=None): """plot the insert size disribution in the main matrix""" fig = plt.figure() ins = np.sum(self.mat,axis=1) - ins = ins/sum(ins) + ins = ins // sum(ins) plt.plot(list(range(self.lower,self.upper)),ins) plt.xlabel("Insert Size") plt.ylabel("Frequency") diff --git a/pyatac/chunk.py b/pyatac/chunk.py index 0d68461..fa78646 100644 --- a/pyatac/chunk.py +++ b/pyatac/chunk.py @@ -39,12 +39,13 @@ def slop(self, chromDict, up = 0, down = 0, new = False): else: self.start = newStart self.end = newEnd + def center(self, new = False): if self.strand == "-": - newEnd = self.end - (self.length()/2) + newEnd = self.end - (self.length()//2) newStart = newEnd - 1 else: - newStart = self.start + (self.length()/2) + newStart = self.start + (self.length()//2) newEnd = newStart +1 if new: out = Chunk(self.chrom, newStart, newEnd, diff --git a/pyatac/chunkmat2d.py b/pyatac/chunkmat2d.py index 9011396..4f78a99 100644 --- a/pyatac/chunkmat2d.py +++ b/pyatac/chunkmat2d.py @@ -18,6 +18,7 @@ def __init__(self, chrom, start, end, lower, upper): self.ncol = end - start self.nrow = upper - lower self.mat = np.zeros((self.nrow, self.ncol)) + def get(self, lower = None, upper = None, start = None, end = None, flip = False): """get a subset (or all) of the matrix stored by ChunkMat2D object based on chromosal position and/or insert size""" @@ -52,6 +53,7 @@ def get(self, lower = None, upper = None, start = None, end = None, flip = False else: new[j,:] = self.mat[j,(x1-1):(x2)][::-1][1:] return new + def assign(self, mat): """assign a matrix to object""" if mat.shape != self.mat.shape: @@ -73,15 +75,16 @@ def open(filename): new= ChunkMat2D(elements[0],elements[1],elements[2],elements[3]) new.assign(mat) return new + def getIns(self): """Collape matrix into insertions. Will reduce span on chromosome""" pattern = np.zeros((self.upper-self.lower,self.upper + (self.upper-1)%2)) - mid = self.upper/2 + mid = self.upper//2 for i in range(self.lower,self.upper): - pattern[i-self.lower,mid+(i-1)/2]=1 - pattern[i-self.lower,mid-(i/2)]=1 + pattern[i-self.lower, mid+(i-1)//2]=1 + pattern[i-self.lower, mid-(i//2)]=1 ins = signal.correlate2d(self.mat,pattern,mode="valid")[0] - insertion_track = InsertionTrack(self.chrom,self.start + pattern.shape[1]/2, self.end - (pattern.shape[1]/2)) + insertion_track = InsertionTrack(self.chrom,self.start + pattern.shape[1]//2, self.end - (pattern.shape[1]//2)) insertion_track.assign_track(ins) return insertion_track def plot(self, filename = None, title = None, lower = None, @@ -119,7 +122,7 @@ def __init__(self, chrom, start, end, lower, upper, atac = True): def updateMat(self, fragment): row = fragment.insert - self.lower if self.mode == "centers": - col = (fragment.insert-1)/2 + fragment.left - self.start + col = (fragment.insert-1)//2 + fragment.left - self.start if col>=0 and col=0: self.mat[row, col] += 1 else: @@ -139,20 +142,24 @@ class BiasMat2D(ChunkMat2D): def __init__(self, chrom, start, end, lower, upper): ChunkMat2D.__init__(self,chrom, start, end, lower, upper) self.mat = np.ones(self.mat.shape) + def makeBiasMat(self, bias_track): """Make 2D matrix representing sequence bias preferences""" - offset = self.upper/2 + offset = self.upper//2 bias = bias_track.get(self.start-offset,self.end+offset) if not bias_track.log: nonzero = np.where(bias !=0)[0] bias = np.log(bias + min(bias[nonzero])) + pattern = np.zeros((self.upper-self.lower,self.upper + (self.upper-1)%2)) - mid = self.upper/2 + mid = self.upper//2 for i in range(self.lower,self.upper): - pattern[i-self.lower,mid+(i-1)/2]=1 - pattern[i-self.lower,mid-(i/2)]=1 + pattern[i-self.lower,mid+(i-1)//2]=1 + pattern[i-self.lower,mid-(i//2)]=1 + for i in range(self.upper-self.lower): self.mat[i]=np.exp(np.convolve(bias,pattern[i,:],mode='valid')) + def normByInsertDist(self, insertsizes): inserts = insertsizes.get(self.lower,self.upper) self.mat = self.mat * np.reshape(np.tile(inserts,self.mat.shape[1]),self.mat.shape,order="F") diff --git a/pyatac/fragments.py b/pyatac/fragments.py index ebd6ad5..379db1e 100644 --- a/pyatac/fragments.py +++ b/pyatac/fragments.py @@ -28,7 +28,7 @@ def makeFragmentMat(bamfile, chrom, start, end, lower, upper, atac = 1): l_pos = read.pos ilen = abs(read.template_length) row = ilen - lower - col = (ilen-1)/2 + l_pos - start + col = (ilen-1)//2 + l_pos - start if col >= 0 and col < ncol and row < nrow and row >= 0: mat[row, col] += 1 bamHandle.close() @@ -117,7 +117,7 @@ def getFragmentSizesFromChunkList(chunks, bamfile, lower, upper, atac = 1): else: l_pos = read.pos ilen = abs(read.template_length) - center = l_pos + (ilen-1) / 2 + center = l_pos + (ilen-1) // 2 if ilen < upper and ilen >= lower and center >= chunk.start and center < chunk.end: sizes[ilen - lower] += 1 bamHandle.close() diff --git a/pyatac/get_cov.py b/pyatac/get_cov.py index 4b4d729..854e277 100644 --- a/pyatac/get_cov.py +++ b/pyatac/get_cov.py @@ -22,7 +22,7 @@ def _covHelper(arg): """Computes coverage track for a particular set of bed regions""" (chunk, args) = arg try: - offset = args.window / 2 + offset = args.window // 2 mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) mat.makeFragmentMat(args.bam) cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end) @@ -62,7 +62,7 @@ def get_cov(args, bases = 50000, splitsize = 1000): if args.bed is None: chrs = read_chrom_sizes_from_bam(args.bam) chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items=bases//splitsize) else: chunks = ChunkList.read(args.bed) chunks.merge() diff --git a/pyatac/get_nucleotide.py b/pyatac/get_nucleotide.py index a0cfd25..112ef8c 100644 --- a/pyatac/get_nucleotide.py +++ b/pyatac/get_nucleotide.py @@ -74,7 +74,7 @@ def get_nucleotide(args): for i in tmp: result += i[0] n += i[1] - result = result / n + result = result // n if args.norm: normfreqs = seq.getNucFreqs(params.fasta, params.nucleotides) result = result / np.reshape(np.repeat(normfreqs,result.shape[1]),result.shape) diff --git a/pyatac/get_pwm.py b/pyatac/get_pwm.py index b185462..ab81d7a 100644 --- a/pyatac/get_pwm.py +++ b/pyatac/get_pwm.py @@ -62,7 +62,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): chrs = read_chrom_sizes_from_fasta(args.fasta) if args.bed is None: chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize, offset = args.flank) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank) sets = chunks.split(bases = bases) @@ -88,7 +88,7 @@ def get_pwm(args, bases = 50000, splitsize = 1000): left = result[:,0:(args.flank + 1)] right = result[:,args.flank:] rightflipped = np.fliplr(np.flipud(right)) - combined = (left + rightflipped) / 2 + combined = (left + rightflipped) // 2 result = np.hstack((combined, np.fliplr(np.flipud(combined[:,0:args.flank])))) #save pwm = PWM(result, args.flank, args.flank, params.nucleotides) diff --git a/pyatac/get_sizes.py b/pyatac/get_sizes.py index aac0905..1e040bd 100644 --- a/pyatac/get_sizes.py +++ b/pyatac/get_sizes.py @@ -33,6 +33,6 @@ def get_sizes(args): plt.plot(list(range(sizes.lower,sizes.upper)),sizes.get(sizes.lower,sizes.upper),label = args.out) plt.xlabel("Fragment Size") plt.ylabel("Frequency") - fig.savefig(args.out+'.fragmentsizes.eps') + fig.savefig(args.out+'.fragmentsizes.pdf') plt.close(fig) diff --git a/pyatac/make_bias_track.py b/pyatac/make_bias_track.py index e6204c7..ec777a5 100644 --- a/pyatac/make_bias_track.py +++ b/pyatac/make_bias_track.py @@ -61,14 +61,16 @@ def make_bias_track(args, bases = 500000, splitsize = 1000): else: args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1]) params = _BiasParams(args.fasta, args.pwm) + if args.bed is None: chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize) - sets = chunks.split(items = bases/splitsize) + sets = chunks.split(items = bases//splitsize) else: chunks = ChunkList.read(args.bed) chunks.checkChroms(list(params.chrs.keys())) chunks.merge() sets = chunks.split(bases = bases) + maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks]))) pool = mp.Pool(processes = max(1,args.cores-1)) out_handle = open(args.out + '.Scores.bedgraph','w') diff --git a/pyatac/make_bias_vplot.py b/pyatac/make_bias_vplot.py index a7c70c2..6e8e8a5 100644 --- a/pyatac/make_bias_vplot.py +++ b/pyatac/make_bias_vplot.py @@ -31,10 +31,9 @@ def _biasVplotHelper(arg): for chunk in chunks: try: chunk.center() - biastrack = InsertionBiasTrack(chunk.chrom, chunk.start - params.flank - 1 - (params.upper/2), - chunk.end + params.flank + params.upper/2+1) + biastrack = InsertionBiasTrack(chunk.chrom, chunk.start-params.flank-1-(params.upper//2), chunk.end+params.flank+params.upper//2+1) if params.bg is not None: - biastrack.read_track(params.bg, empty = 0) + biastrack.read_track(params.bg, empty=0) else: biastrack.computeBias(params.fasta, params.chrs, params.pwm) biasmat = BiasMat2D(chunk.chrom, chunk.start - params.flank - 1, chunk.end + params.flank, @@ -86,13 +85,13 @@ def make_bias_vplot(args): result = sum(tmp) ##Turn matrix into VMat object vmat=VMat(result,args.lower,args.upper) - vmat.plot(filename=args.out+".Bias.Vplot.eps") + vmat.plot(filename=args.out+".Bias.Vplot.pdf") if args.plot_extra: ##get insertion profile represented by vplot vmat.converto1d() - vmat.plot_1d(filename=args.out+'.InsertionProfile.eps') + vmat.plot_1d(filename=args.out+'.InsertionProfile.pdf') #get insert size dstribution represented by vplot - vmat.plot_insertsize(filename= args.out + ".InsertSizes.eps") + vmat.plot_insertsize(filename= args.out + ".InsertSizes.pdf") ##save vmat.save(args.out+".Bias.VMat") diff --git a/pyatac/make_vplot.py b/pyatac/make_vplot.py index 300d742..bca7a11 100644 --- a/pyatac/make_vplot.py +++ b/pyatac/make_vplot.py @@ -74,13 +74,13 @@ def make_vplot(args): ##Turn matrix into VMat object vmat=V.VMat(result,args.lower,args.upper) if not args.no_plot: - vmat.plot(filename=args.out+".Vplot.eps") + vmat.plot(filename=args.out+".Vplot.pdf") if args.plot_extra: ##get insertion profile represented by vplot vmat.converto1d() - vmat.plot_1d(filename=args.out+'.InsertionProfile.eps') + vmat.plot_1d(filename=args.out+'.InsertionProfile.pdf') #get insert size dstribution represented by vplot - vmat.plot_insertsize(filename= args.out + ".InsertSizes.eps") + vmat.plot_insertsize(filename= args.out + ".InsertSizes.pdf") ##save vmat.save(args.out+".VMat") diff --git a/pyatac/seq.py b/pyatac/seq.py index 75296b4..f2a2dee 100644 --- a/pyatac/seq.py +++ b/pyatac/seq.py @@ -55,7 +55,7 @@ def getNucFreqs(fasta, nucleotides): out += [sequence.count(i) for i in nucleotides] n += len(sequence) f.close() - return out/n + return out//n def getNucFreqsFromChunkList(chunks, fasta, nucleotides): @@ -69,5 +69,5 @@ def getNucFreqsFromChunkList(chunks, fasta, nucleotides): out += [sequence.count(i) for i in nucleotides] n += len(sequence) handle.close() - return out/n + return out//n diff --git a/pyatac/signal_around_sites.py b/pyatac/signal_around_sites.py index 7877276..4ffaa9f 100644 --- a/pyatac/signal_around_sites.py +++ b/pyatac/signal_around_sites.py @@ -108,12 +108,12 @@ def get_signal(args): result = sum(tmp) if not args.no_agg: if args.norm: - result = result / len(chunks) + result = result // len(chunks) fig = plt.figure() plt.plot(list(range(-args.up,args.down+1)),result) plt.xlabel("Position relative to Site") plt.ylabel("Signal Intensity") - fig.savefig(args.out+'.agg.track.eps') + fig.savefig(args.out+'.agg.track.pdf') plt.close(fig) np.savetxt(args.out+'.agg.track.txt',result,delimiter="\t") diff --git a/pyatac/tracks.py b/pyatac/tracks.py index 47d2757..9881aae 100644 --- a/pyatac/tracks.py +++ b/pyatac/tracks.py @@ -9,7 +9,6 @@ from pyatac.bedgraph import BedGraphFile from pyatac.chunk import Chunk from pyatac.utils import smooth -#import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()}) from pyatac.fragments import getInsertions, getStrandedInsertions from pyatac.seq import get_sequence, seq_to_mat, complement @@ -105,8 +104,8 @@ def smooth_track(self, window_len, window='flat', sd = None, self.vals = smooth(self.vals, window_len, window = window, sd = sd, mode = mode, norm = norm) if mode == 'valid': - self.start = self.start + window_len/2 - self.end = self.end - window_len/2 + self.start = self.start + window_len//2 + self.end = self.end - window_len//2 def get(self, start = None, end = None, pos = None): """Obtain value of track at particular interval or position""" if pos: @@ -208,10 +207,9 @@ def __init__(self, chrom, start, end): Track.__init__(self, chrom, start, end, "coverage") def calculateCoverage(self, mat, lower, upper, window_len): """Compute coverage of fragment centers using flat window""" - offset=self.start-mat.start-(window_len/2) + offset=self.start-mat.start-(window_len//2) if offset<0: - raise Exception("Insufficient flanking region on \ - mat to calculate coverage with desired window") + raise Exception("Insufficient flanking region on mat to calculate coverage with desired window") lower=lower-mat.lower upper=upper-mat.lower if offset!=0: @@ -224,8 +222,7 @@ def calculateCoverageSmooth(self,mat,lower,upper,window_len,sd): """Compute coverage of fragment centers using gaussia window""" offset=self.start-mat.start-(window_len/2) if offset<0: - raise Exception("Insufficient flanking region on \ - mat to calculate coverage with desired window") + raise Exception("Insufficient flanking region on mat to calculate coverage with desired window") lower=lower-mat.lower upper=upper-mat.lower if offset!=0: diff --git a/pyatac/utils.py b/pyatac/utils.py index d972f4d..9922f54 100644 --- a/pyatac/utils.py +++ b/pyatac/utils.py @@ -90,7 +90,7 @@ def call_peaks(sigvals, min_signal = 0, sep = 120, boundary = None, order =1): replace = min(sigvals[~np.isnan(sigvals)]) sigvals[np.isnan(sigvals)]=replace if boundary is None: - boundary = sep/2 + boundary = sep//2 random = np.random.RandomState(seed = 25) l = len(sigvals) peaks = signal.argrelmax(sigvals *(1+ From e1ec144df924ab48f5f6fe65c52e4ec40d5a89f8 Mon Sep 17 00:00:00 2001 From: oaxiom Date: Fri, 4 Dec 2020 06:44:50 +0800 Subject: [PATCH 7/7] Port to python 3 and pure python --- nucleoatac/NucleosomeCalling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nucleoatac/NucleosomeCalling.py b/nucleoatac/NucleosomeCalling.py index 9e32837..9959b70 100644 --- a/nucleoatac/NucleosomeCalling.py +++ b/nucleoatac/NucleosomeCalling.py @@ -297,7 +297,7 @@ def findAllNucs(self): #find peaks in normalized sigal cands1 = call_peaks(combined, min_signal = 0, sep = self.params.redundant_sep, - boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2) + boundary = self.params.nonredundant_sep//2, order = self.params.redundant_sep//2) for i in cands1: nuc = Nucleosome(i + self.start, self) if nuc.nuc_cov > self.params.min_reads: