Skip to content

Commit 212c59c

Browse files
Merge pull request #5 from MustoeLab/distribution_for_review
merge cleaned up code with master
2 parents 64e118d + 2080b83 commit 212c59c

202 files changed

Lines changed: 89 additions & 88727 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DanceMapper.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,7 @@ def _sample_RINGs(self, window=1, corrtype='apc', bgfile=None, assignprob=0.9,
916916
if verbal:
917917
print('Using MC for sample RING read assignment')
918918

919+
raise AttributeError('Monte Carlo option has been removed')
919920
read, comut, inotj = aFunc.fillRINGMatrix_montecarlo(self.reads, self.mutations, activestatus,
920921
self.BMsolution.mu, self.BMsolution.p,
921922
window, self.reads.shape[0], subtractwindow)
@@ -1003,7 +1004,8 @@ def _null_RINGs(self, window=1, corrtype='g', assignprob=0.9,
10031004
if montecarlo:
10041005
if verbal:
10051006
print('Using MC for null RING read assignment')
1006-
1007+
1008+
raise AttributeError('Monte Carlo option has been removed')
10071009
read, comut, inotj = aFunc.fillRINGMatrix_montecarlo(nullEM.reads, nullEM.mutations, activestatus,
10081010
mu, self.BMsolution.p,
10091011
window, self.reads.shape[0], subtractwindow)

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2020 Mustoe and Weeks Labs
3+
Copyright (c) 2022 Mustoe and Weeks Labs
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,13 @@ For PAIR and RING analysis of deconvoluted reads, we recommend having at least 1
6565
>1,000,000 reads per state.
6666
6767
The current script is serial (single cpu). Run times vary based RNA size, number of reads, and number
68-
of final clusters. When performing primary clustering (*--fit*), anticipate 4-24 hours. When running
68+
of final clusters. When performing primary clustering (*--fit*), anticipate between 1-24 hours. When running
6969
PAIR or RING analysis (*--pairmap* or *--ring*) anticipate 12-48 hours each.
7070

71+
Note that DanceMapper is very memory intensive. As a rough guideline, you will need 50 x N x R bytes, where
72+
N is the RNA length and R is the # of reads. So for a 400 nt long RNA with 1M reads, this would be 20 GB.
73+
We plan to release a memory calculator tool with future releases.
74+
7175

7276
Input:
7377

@@ -122,21 +126,30 @@ the MFE structure, the DMS reactivity profile, and PAIR data (if the --bp flag i
122126

123127
Run foldClusters.py --help for additional options and usage information
124128

129+
Note that the pairing probability option is currently not supported in standard distributions of RNAstructure.
130+
We are working on making this option available. Please contact us for more information in the meantime.
131+
125132

126133

127134

128135
plotClusters.py
129136
----------------
130137
Script for visualizing and comparing reactivities of DanceMaP identified clusters.
131138
(Makes step plots, also known as skyline plots).
132-
Run plotClusters.py --help for usage information
133139

140+
Run plotClusters.py --help for usage information
134141

135142

136143

137144
Example
138145
========
139146

147+
Some example data and commands are provided in the *example* directory.
148+
149+
150+
Some generic example commands are below:
151+
152+
140153
*Preprocess data*
141154

142155
shapemapper --target add.fa --name example --amplicon --output-parsed \
@@ -148,15 +161,10 @@ Example
148161
python DanceMapper.py --mod example_Modified_add_parsed.mut --unt example_Untreated_add_parsed.mut --prof example_add_profile.txt --out example --fit --pair --ring
149162

150163

151-
*Fold and plot structure states (using PAIR restraints and computing pairing probabilities)*
164+
*Fold each ensemble state (MFE) using PAIR restraints and get arcPlot visualization, including of PAIRs*
152165

153-
python foldClusters.py --bp example --prob example-reactivities.txt example
154-
155-
166+
python foldClusters.py --bp example example-reactivities.txt example
156167

157-
Complete class description
158-
--------------------------
159-
Forthcoming
160168

161169

162170

accessoryFunctions.pyx

Lines changed: 0 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import numpy as np
88
cimport numpy as np
99

1010
from readMutStrings cimport READ, parseLine, fillReadMut, incrementArrays
11-
from dSFMT cimport dsfmt_t, dsfmt_init_gen_rand, dsfmt_genrand_close_open
1211

1312

1413
###########################################################################
@@ -447,151 +446,6 @@ def fillRINGMatrix(char[:,::1] reads, char[:,::1] mutations, char[:] activestatu
447446
return read_arr, comut_arr, inotj_arr
448447

449448

450-
451-
def fillRINGMatrix_montecarlo(char[:,::1] reads, char[:,::1] mutations, char[:] activestatus,
452-
double[:,::1] mu, double[:] p, int window, int samplenumber, int subtractwindow):
453-
"""active status is array containing 0/1 whether or not column is to be included
454-
posterior prob calculations"""
455-
456-
457-
# initialize RING matrices
458-
cdef int[:,:,::1] read_arr = np.zeros((p.shape[0], mu.shape[1], mu.shape[1]), dtype=np.int32)
459-
cdef int[:,:,::1] comut_arr = np.zeros((p.shape[0], mu.shape[1], mu.shape[1]), dtype=np.int32)
460-
cdef int[:,:,::1] inotj_arr = np.zeros((p.shape[0], mu.shape[1], mu.shape[1]), dtype=np.int32)
461-
462-
463-
# declare counters
464-
cdef int n,i,j,m
465-
466-
cdef int pdim = p.shape[0]
467-
cdef int maxreadidx = reads.shape[0]-1
468-
469-
# setup the random number generator
470-
cdef dsfmt_t dsfmt
471-
dsfmt_init_gen_rand(&dsfmt, np.uint32(time.time()))
472-
473-
# compute logp
474-
cdef double[:] logp = np.log(p)
475-
476-
# compute logmu and clogmu
477-
cdef double[:,::1] logmu = np.zeros((mu.shape[0], mu.shape[1]))
478-
cdef double[:,::1] clogmu = np.zeros((mu.shape[0], mu.shape[1]))
479-
for i in xrange(pdim):
480-
for j in xrange(mu.shape[1]):
481-
if mu[i,j] > 0:
482-
logmu[i,j] = log( mu[i,j] )
483-
clogmu[i,j] = log( 1-mu[i,j] )
484-
485-
486-
# declare other needed containers
487-
cdef double[:] loglike = np.empty(pdim) # container for read loglike of each model
488-
cdef double[:] ll_i = np.empty(pdim) # container for loglike subtracting i
489-
cdef double[:] ll_ij = np.empty(pdim) # container for loglike subtracting i & j
490-
cdef double[:] weights = np.empty(pdim) # container for normalized probabilties
491-
cdef int[:] occupancy = np.empty(pdim, dtype=np.int32)
492-
493-
# codes for contigency table
494-
cdef int icode
495-
cdef int jcode
496-
497-
cdef int step = 0
498-
499-
for step in xrange(samplenumber):
500-
501-
if step%10000==0:
502-
printf("\r%d", step)
503-
fflush(stdout)
504-
505-
# select read (sample w/ replacement)
506-
n = lrint(dsfmt_genrand_close_open(&dsfmt)*maxreadidx)
507-
508-
# compute overall loglike of the read
509-
readloglike(loglike, activestatus, reads[n,:], mutations[n,:], logp, logmu, clogmu)
510-
511-
_loglike2prob(loglike, weights)
512-
513-
# compute occupancy based on whole read loglike
514-
for m in xrange(pdim):
515-
occupancy[m] = 0
516-
if weights[m] >= dsfmt_genrand_close_open(&dsfmt):
517-
occupancy[m] = 1
518-
519-
520-
# now iterate through all i/j pairs
521-
for i in xrange(read_arr.shape[1]-window+1):
522-
523-
# compute mut code, and skip if not read at all
524-
icode = _computeMutCode(reads[n,:], mutations[n,:], i, window)
525-
if icode < 0: continue
526-
527-
if subtractwindow:
528-
# reset ll_i
529-
for m in xrange(pdim):
530-
ll_i[m] = loglike[m]
531-
532-
# subtract window i
533-
_subtractloglike(ll_i, i, window, reads[n,:], mutations[n,:], activestatus, logmu, clogmu)
534-
535-
# compute weight of read ignoring i
536-
_loglike2prob(ll_i, weights)
537-
538-
539-
# increment the diagonal for keeping track of overall mutation rate
540-
for m in xrange(pdim):
541-
if occupancy[m]:
542-
read_arr[m,i,i] += 1
543-
if icode==1:
544-
comut_arr[m,i,i] += 1
545-
546-
547-
for j in xrange(i+1, read_arr.shape[1]-window+1):
548-
549-
jcode = _computeMutCode(reads[n,:], mutations[n,:], j, window)
550-
if jcode < 0: continue
551-
552-
553-
if subtractwindow:
554-
# reset ll_ij
555-
for m in xrange(pdim):
556-
ll_ij[m] = ll_i[m]
557-
558-
# subtract j
559-
_subtractloglike(ll_ij, j, window, reads[n,:], mutations[n,:], activestatus, logmu, clogmu)
560-
561-
# compute weight of read ignoring i & j
562-
_loglike2prob(ll_ij, weights)
563-
564-
# compute occupancy
565-
for m in xrange(pdim):
566-
occupancy[m] = 0
567-
if weights[m] >= dsfmt_genrand_close_open(&dsfmt):
568-
occupancy[m] = 1
569-
570-
571-
# now iterate through models and increment RING matrices
572-
for m in xrange(pdim):
573-
# add the read
574-
if occupancy[m]:
575-
read_arr[m,i,j] += 1
576-
577-
if icode == 1 and jcode == 1:
578-
comut_arr[m,i,j] += 1
579-
elif icode == 1 and jcode == 0:
580-
inotj_arr[m,i,j] += 1
581-
elif icode == 0 and jcode == 1:
582-
inotj_arr[m,j,i] += 1
583-
584-
585-
586-
# reset cursor to new line
587-
printf("\n\n")
588-
fflush(stdout)
589-
590-
return read_arr, comut_arr, inotj_arr
591-
592-
593-
594-
595449
cdef void _subtractloglike(double[:] loglike, int i_index, int window,
596450
char[:] read, char[:] mutation, char[:] activestatus,
597451
double[:,::1] logmu, double[:,::1] clogmu):

dSFMT/.gitattributes

Lines changed: 0 additions & 1 deletion
This file was deleted.

dSFMT/CHANGE-LOG.txt

Lines changed: 0 additions & 93 deletions
This file was deleted.

dSFMT/FILES.txt

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)