Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bin/nucleoatac
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ from nucleoatac.cli import nucleoatac_parser, nucleoatac_main


if __name__ == '__main__':
print "Command run: "+ ' '.join(map(str,sys.argv))
print "nucleoatac version " + __version__
print "start run at: " + time.strftime("%Y-%m-%d %H:%M")
print("Command run: "+ ' '.join(map(str,sys.argv)))
print("nucleoatac version " + __version__)
print("start run at: " + time.strftime("%Y-%m-%d %H:%M"))
try:
parser = nucleoatac_parser()
args = parser.parse_args()
nucleoatac_main(args)
print "end run at: " + time.strftime("%Y-%m-%d %H:%M")
print("end run at: " + time.strftime("%Y-%m-%d %H:%M"))
except KeyboardInterrupt:
sys.stderr.write("User interrupted nucleoatac.")
sys.exit(0)
Expand Down
8 changes: 4 additions & 4 deletions bin/pyatac
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ from pyatac.cli import pyatac_parser, pyatac_main


if __name__ == '__main__':
print "Command run: "+ ' '.join(map(str,sys.argv))
print "pyatac version "+__version__
print "start run at: " + time.strftime("%Y-%m-%d %H:%M")
print("Command run: "+ ' '.join(map(str,sys.argv)))
print("pyatac version "+__version__)
print("start run at: " + time.strftime("%Y-%m-%d %H:%M"))
try:
parser = pyatac_parser()
args = parser.parse_args()
pyatac_main(args)
print "end run at: " + time.strftime("%Y-%m-%d %H:%M")
print("end run at: " + time.strftime("%Y-%m-%d %H:%M"))
except KeyboardInterrupt:
sys.stderr.write("User interrupted pyatac.")
sys.exit(0)
Expand Down
4 changes: 2 additions & 2 deletions nucleoatac/NFRCalling.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def findNFRs(self):
if self.chrom in tbx.contigs:
for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()):
nucs.append(int(row[1]))
for j in xrange(1,len(nucs)):
for j in range(1,len(nucs)):
left = nucs[j-1] + 73
right = nucs[j] - 72
if right <= left:
Expand All @@ -106,7 +106,7 @@ def process(self, params):
self.findNFRs()
def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self,name)

35 changes: 17 additions & 18 deletions nucleoatac/NucleosomeCalling.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def calculateBackgroundSignal(self, mat, vmat, nuc_cov):
self.bias_mat.start + offset,
self.bias_mat.end - offset),
vmat.mat,mode = 'valid')[0]
self.vals = self.vals * self.nuc_cov/ self.cov.vals
self.vals = self.vals * self.nuc_cov // self.cov.vals



Expand All @@ -79,7 +79,7 @@ def simulateReads(self):
sim_mat = np.reshape(sim_vect, self.vmat.mat.shape)
return sim_mat
def simulateDist(self, numiters = 1000):
self.scores = map(lambda x: np.sum(self.simulateReads() * self.vmat.mat),range(numiters))
self.scores = [np.sum(self.simulateReads() * self.vmat.mat) for x in range(numiters)]
def analStd(self):
flatv = np.ravel(self.vmat.mat)
var = calculateCov(self.probs, flatv, self.reads)
Expand All @@ -91,8 +91,7 @@ def analMean(self):

def norm(x, v, w, mean):
"""compute values of normal pdf with given mean and sd at values in x"""
norm = (1.0/(np.sqrt(2*np.pi*v)) *
np.exp(-(x - mean)**2/(2*v)))
norm = (1.0/(np.sqrt(2*np.pi*v)) * np.exp(-(x - mean)**2/(2*v)))
norm = norm * (w/max(norm))
return norm

Expand Down Expand Up @@ -139,7 +138,7 @@ def addNorms(x,params):
"""Add several normal distributions together"""
l = len(x)
fit = np.zeros(l)
i = len(params)/3
i = len(params)//3
for j in range(i):
fit += norm(x,params[j*3],params[3*j+1],params[3*j+2])
return fit
Expand All @@ -156,21 +155,21 @@ def fitNorm(guess, bound, sig):
allnucs = nuctrack.sorted_nuc_keys
x = bisect_left(allnucs,index)
if x == 0:
left = index - nuctrack.params.nonredundant_sep/3
means = (nuctrack.params.nonredundant_sep/3,)
left = index - nuctrack.params.nonredundant_sep//3
means = (nuctrack.params.nonredundant_sep//3,)
elif index - allnucs[x-1] < nuctrack.params.nonredundant_sep:
left = allnucs[x-1]
means = (index - allnucs[x-1],0)
else:
left = index - nuctrack.params.nonredundant_sep/3
means = (nuctrack.params.nonredundant_sep/3,)
left = index - nuctrack.params.nonredundant_sep//3
means = (nuctrack.params.nonredundant_sep//3,)
if x == len(allnucs)-1:
right = index + nuctrack.params.nonredundant_sep/3 + 1
right = index + nuctrack.params.nonredundant_sep//3 + 1
elif allnucs[x+1] - index < nuctrack.params.nonredundant_sep:
right = allnucs[x+1]
means += (allnucs[x+1] - left,)
else:
right = index + nuctrack.params.nonredundant_sep/3 +1
right = index + nuctrack.params.nonredundant_sep//3 +1
sig = nuctrack.smoothed.vals[left:right]
sig[sig<0] = 0
if len(means)==1:
Expand Down Expand Up @@ -237,14 +236,14 @@ def __init__(self, chunk):
def initialize(self, parameters):
self.params = parameters
def getFragmentMat(self):
self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1),
self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac)
self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper//2+1),
self.end + max(self.params.window,self.params.upper//2+1), 0, self.params.upper, atac = self.params.atac)
self.mat.makeFragmentMat(self.params.bam)
def makeBiasMat(self):
self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window,
self.end + self.params.window, 0, self.params.upper)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
self.end + self.params.window + self.params.upper/2 + 1, log = True)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2,
self.end + self.params.window + self.params.upper//2 + 1, log = True)
if self.params.fasta is not None:
bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
self.bias_mat.makeBiasMat(bias_track)
Expand Down Expand Up @@ -298,7 +297,7 @@ def findAllNucs(self):
#find peaks in normalized sigal
cands1 = call_peaks(combined, min_signal = 0,
sep = self.params.redundant_sep,
boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2)
boundary = self.params.nonredundant_sep//2, order = self.params.redundant_sep//2)
for i in cands1:
nuc = Nucleosome(i + self.start, self)
if nuc.nuc_cov > self.params.min_reads:
Expand All @@ -310,7 +309,7 @@ def findAllNucs(self):
self.nuc_collection[i] = nuc
self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys()))
self.nonredundant = reduce_peaks( self.sorted_nuc_keys,
map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys),
[self.nuc_collection[x].z for x in self.sorted_nuc_keys],
self.params.nonredundant_sep)
self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant)
def fit(self):
Expand Down Expand Up @@ -340,7 +339,7 @@ def process(self, params):
self.makeInsertionTrack()
def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self,name)

38 changes: 24 additions & 14 deletions nucleoatac/Occupancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from scipy import signal, optimize, stats
import numpy as np
import matplotlib.pyplot as plt
import pyximport; pyximport.install(setup_args={"include_dirs":np.get_include()})
from pyatac.fragmentsizes import FragmentSizes
from pyatac.tracks import Track, CoverageTrack
from pyatac.chunk import Chunk
Expand All @@ -23,9 +22,11 @@ class FragmentMixDistribution:
def __init__(self, lower = 0, upper =2000):
self.lower = lower
self.upper = upper

def getFragmentSizes(self, bamfile, chunklist = None):
self.fragmentsizes = FragmentSizes(self.lower, self.upper)
self.fragmentsizes.calculateSizes(bamfile, chunks = chunklist)

def modelNFR(self, boundaries = (35,115)):
"""Model NFR distribution with gamma distribution"""
b = np.where(self.fragmentsizes.get(self.lower,boundaries[1]) == max(self.fragmentsizes.get(self.lower,boundaries[1])))[0][0] + self.lower
Expand Down Expand Up @@ -64,14 +65,15 @@ def gamma_fit(X,o,p):
self.nfr_fit.get(boundaries[1],self.upper)))
nuc[nuc<=0]=min(min(nfr)*0.1,min(nuc[nuc>0])*0.001)
self.nuc_fit = FragmentSizes(self.lower, self.upper, vals = nuc)

def plotFits(self,filename=None):
"""plot the Fits"""
fig = plt.figure()
plt.plot(range(self.lower,self.upper),self.fragmentsizes.get(),
plt.plot(list(range(self.lower,self.upper)),self.fragmentsizes.get(),
label = "Observed")
plt.plot(range(self.lower,self.upper),self.nfr_fit0.get(), label = "NFR Fit")
plt.plot(range(self.lower,self.upper),self.nuc_fit.get(), label = "Nucleosome Model")
plt.plot(range(self.lower,self.upper),self.nfr_fit.get(), label = "NFR Model")
plt.plot(list(range(self.lower,self.upper)),self.nfr_fit0.get(), label = "NFR Fit")
plt.plot(list(range(self.lower,self.upper)),self.nuc_fit.get(), label = "Nucleosome Model")
plt.plot(list(range(self.lower,self.upper)),self.nfr_fit.get(), label = "NFR Model")
plt.legend()
plt.xlabel("Fragment size")
plt.ylabel("Relative Frequency")
Expand Down Expand Up @@ -109,8 +111,8 @@ def calculateOccupancy(inserts, bias, params):
nuc_probs = nuc_probs / np.sum(nuc_probs)
nfr_probs = params.nfr_probs * bias
nfr_probs = nfr_probs / np.sum(nfr_probs)
x = map(lambda alpha: np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs), params.alphas)
logliks = np.array(map(lambda j: np.sum(x[j]*inserts),range(params.l)))
x = [np.log(alpha * nuc_probs + (1 - alpha) * nfr_probs) for alpha in params.alphas]
logliks = np.array([np.sum(x[j]*inserts) for j in range(params.l)])
logliks[np.isnan(logliks)] = -float('inf')
occ = params.alphas[np.argmax(logliks)]
#Compute upper and lower bounds for 95% confidence interval
Expand All @@ -129,11 +131,11 @@ def calculateOccupancyMLE(self, mat, bias_mat, params):
"""Calculate Occupancy track"""
offset=self.start - mat.start
if offset<params.flank:
raise Exception("For calculateOccupancyMLE, mat does not have sufficient flanking regions"),offset
raise Exception("For calculateOccupancyMLE, mat does not have sufficient flanking regions")(offset)
self.vals=np.ones(self.end - self.start)*float('nan')
self.lower_bound = np.ones(self.end - self.start)*float('nan')
self.upper_bound =np.ones(self.end - self.start)*float('nan')
for i in xrange(params.halfstep,len(self.vals),params.step):
for i in range(params.halfstep,len(self.vals),params.step):
new_inserts = np.sum(mat.get(lower = 0, upper = params.upper,
start = self.start+i-params.flank, end = self.start+i+params.flank+1),
axis = 1)
Expand Down Expand Up @@ -190,7 +192,7 @@ def __init__(self, insert_dist, upper, fasta, pwm, sep = 120, min_occ = 0.1, fla
if step%2 == 0:
step = step - 1
self.step = step
self.halfstep = (self.step-1) / 2
self.halfstep = (self.step-1) // 2

class OccChunk(Chunk):
"""Class for calculating occupancy and occupancy peaks
Expand All @@ -201,43 +203,50 @@ def __init__(self, chunk):
self.chrom = chunk.chrom
self.peaks = {}
self.nfrs = []

def getFragmentMat(self):
self.mat = FragmentMat2D(self.chrom, self.start - self.params.flank,
self.end + self.params.flank, 0, self.params.upper)
self.mat.makeFragmentMat(self.params.bam)

def makeBiasMat(self):
self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.flank,
self.end + self.params.flank, 0, self.params.upper)
if self.params.fasta is not None:
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
self.end + self.params.window + self.params.upper/2 + 1, log = True)
bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper//2,
self.end + self.params.window + self.params.upper//2 + 1, log = True)
bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
self.bias_mat.makeBiasMat(bias_track)

def calculateOcc(self):
"""calculate occupancy for chunk"""
self.occ = OccupancyTrack(self.chrom,self.start,self.end)
self.occ.calculateOccupancyMLE(self.mat, self.bias_mat, self.params)
self.occ.makeSmoothed(window_len = self.params.window, sd = self.params.flank/3.0)

def getCov(self):
"""Get read coverage for regions"""
self.cov = CoverageTrack(self.chrom, self.start, self.end)
self.cov.calculateCoverage(self.mat, 0, self.params.upper, self.params.window)

def callPeaks(self):
"""Call peaks of occupancy profile"""
peaks = call_peaks(self.occ.smoothed_vals, sep = self.params.sep, min_signal = self.params.min_occ)
for peak in peaks:
tmp = OccPeak(peak + self.start, self)
if tmp.occ_lower > self.params.min_occ and tmp.reads > 0:
self.peaks[peak] = tmp

def getNucDist(self):
"""Get nucleosomal insert distribution"""
nuc_dist = np.zeros(self.params.upper)
for peak in self.peaks.keys():
for peak in list(self.peaks.keys()):
sub = self.mat.get(start = self.peaks[peak].start-self.params.flank, end = self.peaks[peak].start+1+self.params.flank)
sub_sum = np.sum(sub,axis=1)
sub_sum = sub_sum / float(sum(sub_sum))
nuc_dist += sub_sum
return(nuc_dist)

def process(self, params):
"""proces chunk -- calculat occupancy, get coverage, call peaks"""
self.params = params
Expand All @@ -246,9 +255,10 @@ def process(self, params):
self.calculateOcc()
self.getCov()
self.callPeaks()

def removeData(self):
"""remove data from chunk-- deletes all attributes"""
names = self.__dict__.keys()
names = list(self.__dict__.keys())
for name in names:
delattr(self, name)

Expand Down
4 changes: 1 addition & 3 deletions nucleoatac/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
#Define version based on setup script
import pkg_resources
__version__ = pkg_resources.require("NucleoATAC")[0].version
__version__ = '0.3.4.py3'
4 changes: 2 additions & 2 deletions nucleoatac/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def nucleoatac_main(args):
print('---------Calling NFR positions--------------------------------------------------')
run_nfr(args)
elif call == "run":
occ_args = parser.parse_args(map(str,['occ','--bed',args.bed,'--bam',args.bam,
occ_args = parser.parse_args(list(map(str,['occ','--bed',args.bed,'--bam',args.bam,
'--fasta', args.fasta, '--pwm', args.pwm,
'--out',args.out,'--cores',args.cores]))
'--out',args.out,'--cores',args.cores])))
vprocess_args = parser.parse_args(['vprocess','--sizes',args.out+'.nuc_dist.txt','--out',args.out])
nuc_args_list = ['nuc','--bed',args.bed,'--bam',args.bam,'--out',args.out,'--cores', str(args.cores),
'--occ_track', args.out + '.occ.bedgraph.gz','--vmat', args.out + '.VMat',
Expand Down
8 changes: 4 additions & 4 deletions nucleoatac/diff_occ.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _diffHelper(arg):
occ.occ, [occ.peaks[i] for i in sorted(occ.peaks.keys())])
occ.removeData()
except Exception as e:
print('Caught exception when processing:\n'+ chunk.asBed()+"\n")
print(('Caught exception when processing:\n'+ chunk.asBed()+"\n"))
traceback.print_exc()
print()
raise e
Expand All @@ -43,7 +43,7 @@ def _writeDiff(pos_queue, out):
for pos in poslist:
pos.write(out_handle)
pos_queue.task_done()
except Exception, e:
except Exception as e:
print('Caught exception when writing occupancy track\n')
traceback.print_exc()
print()
Expand All @@ -57,7 +57,7 @@ def run_diff(args, bases = 500000):
"""
chrs = read_chrom_sizes_from_bam(args.bam)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down))
chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper//2 + max(pwm.up,pwm.down))
chunks.merge()
maxQueueSize = max(2,int(100 * bases / np.mean([chunk.length() for chunk in chunks])))
#get fragmentsizes
Expand All @@ -78,7 +78,7 @@ def run_diff(args, bases = 500000):
diff_process.start()
nuc_dist = np.zeros(args.upper)
for j in sets:
tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params)))
tmp = pool1.map(_occHelper, list(zip(j,itertools.repeat(params))))
for result in tmp:
diff_queue.put(result[1])
pool1.close()
Expand Down
8 changes: 4 additions & 4 deletions nucleoatac/run_nfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _nfrHelper(arg):
out = nfr.nfrs
nfr.removeData()
except Exception as e:
print('Caught exception when processing:\n'+ chunk.asBed()+"\n")
print(('Caught exception when processing:\n'+ chunk.asBed()+"\n"))
traceback.print_exc()
print()
raise e
Expand All @@ -45,7 +45,7 @@ def _writeNFR(pos_queue, out):
for pos in poslist:
pos.write(out_handle)
pos_queue.task_done()
except Exception, e:
except Exception as e:
print('Caught exception when writing occupancy track\n')
traceback.print_exc()
print()
Expand All @@ -59,7 +59,7 @@ def _writeIns(track_queue, out):
for track in iter(track_queue.get, 'STOP'):
track.write_track(out_handle)
track_queue.task_done()
except Exception, e:
except Exception as e:
print('Caught exception when writing insertion track\n')
traceback.print_exc()
print()
Expand Down Expand Up @@ -104,7 +104,7 @@ def run_nfr(args):
ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
ins_process.start()
for j in sets:
tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
tmp = pool1.map(_nfrHelper, list(zip(j,itertools.repeat(params))))
for result in tmp:
if params.ins_track is None:
nfr_queue.put(result[0])
Expand Down
Loading