MustoeLab
diff --git a/‎DanceMapper.py‎
Lines changed: 3 additions & 1 deletion b/‎DanceMapper.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 17 additions & 9 deletions b/‎README.md‎
Lines changed: 17 additions & 9 deletions
diff --git a/‎accessoryFunctions.pyx‎
Lines changed: 0 additions & 146 deletions b/‎accessoryFunctions.pyx‎
Lines changed: 0 additions & 146 deletions
diff --git a/‎dSFMT/.gitattributes‎
Lines changed: 0 additions & 1 deletion b/‎dSFMT/.gitattributes‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎dSFMT/CHANGE-LOG.txt‎
Lines changed: 0 additions & 93 deletions b/‎dSFMT/CHANGE-LOG.txt‎
Lines changed: 0 additions & 93 deletions
diff --git a/‎dSFMT/FILES.txt‎
Lines changed: 0 additions & 27 deletions b/‎dSFMT/FILES.txt‎
Lines changed: 0 additions & 27 deletions
@@ -916,6 +916,7 @@ def _sample_RINGs(self, window=1, corrtype='apc', bgfile=None, assignprob=0.9,
             if verbal:
                 print('Using MC for sample RING read assignment')
 
+            raise AttributeError('Monte Carlo option has been removed')
             read, comut, inotj = aFunc.fillRINGMatrix_montecarlo(self.reads, self.mutations, activestatus,
                                                                  self.BMsolution.mu, self.BMsolution.p, 
                                                                  window, self.reads.shape[0], subtractwindow)
@@ -1003,7 +1004,8 @@ def _null_RINGs(self, window=1, corrtype='g', assignprob=0.9,
         if montecarlo:
             if verbal:
                 print('Using MC for null RING read assignment')
-
+            
+            raise AttributeError('Monte Carlo option has been removed')
             read, comut, inotj = aFunc.fillRINGMatrix_montecarlo(nullEM.reads, nullEM.mutations, activestatus,
                                                                  mu, self.BMsolution.p, 
                                                                  window, self.reads.shape[0], subtractwindow)
 
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020 Mustoe and Weeks Labs
+Copyright (c) 2022 Mustoe and Weeks Labs
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 
@@ -65,9 +65,13 @@ For PAIR and RING analysis of deconvoluted reads, we recommend having at least 1
 >1,000,000 reads per state.
 
 The current script is serial (single cpu). Run times vary based RNA size, number of reads, and number
-of final clusters. When performing primary clustering (*--fit*), anticipate 4-24 hours. When running
+of final clusters. When performing primary clustering (*--fit*), anticipate between 1-24 hours. When running
 PAIR or RING analysis (*--pairmap* or *--ring*) anticipate 12-48 hours each. 
 
+Note that DanceMapper is very memory intensive. As a rough guideline, you will need 50 x N x R bytes, where
+N is the RNA length and R is the # of reads. So for a 400 nt long RNA with 1M reads, this would be 20 GB. 
+We plan to release a memory calculator tool with future releases.
+
 
 Input:
 
@@ -122,21 +126,30 @@ the MFE structure, the DMS reactivity profile, and PAIR data (if the --bp flag i
 
 Run foldClusters.py --help for additional options and usage information
 
+Note that the pairing probability option is currently not supported in standard distributions of RNAstructure.
+We are working on making this option available. Please contact us for more information in the meantime.
+
 
 
 
 plotClusters.py
 ----------------
 Script for visualizing and comparing reactivities of DanceMaP identified clusters.
 (Makes step plots, also known as skyline plots).
-Run plotClusters.py --help for usage information
 
+Run plotClusters.py --help for usage information
 
 
 
 Example
 ========
 
+Some example data and commands are provided in the *example* directory. 
+
+
+Some generic example commands are below:
+
+
 *Preprocess data*
 
     shapemapper --target add.fa --name example --amplicon --output-parsed \
@@ -148,15 +161,10 @@ Example
     python DanceMapper.py --mod example_Modified_add_parsed.mut --unt example_Untreated_add_parsed.mut --prof example_add_profile.txt --out example --fit --pair --ring
 
 
-*Fold and plot structure states (using PAIR restraints and computing pairing probabilities)*
+*Fold each ensemble state (MFE) using PAIR restraints and get arcPlot visualization, including of PAIRs*
 
-    python foldClusters.py --bp example --prob example-reactivities.txt example
-
-
+    python foldClusters.py --bp example example-reactivities.txt example
 
-Complete class description 
---------------------------
-Forthcoming
 
 
 
 
@@ -8,7 +8,6 @@ import numpy as np
 cimport numpy as np
 
 from readMutStrings cimport READ, parseLine, fillReadMut, incrementArrays
-from dSFMT cimport dsfmt_t, dsfmt_init_gen_rand, dsfmt_genrand_close_open
 
 
 ###########################################################################
@@ -447,151 +446,6 @@ def fillRINGMatrix(char[:,::1] reads, char[:,::1] mutations, char[:] activestatu
     return read_arr, comut_arr, inotj_arr                                    
 
 
-
-def fillRINGMatrix_montecarlo(char[:,::1] reads, char[:,::1] mutations, char[:] activestatus,
-                              double[:,::1] mu, double[:] p, int window, int samplenumber, int subtractwindow): 
-    """active status is array containing 0/1 whether or not column is to be included 
-    posterior prob calculations"""
-    
-
-    # initialize RING matrices
-    cdef int[:,:,::1] read_arr = np.zeros((p.shape[0], mu.shape[1], mu.shape[1]), dtype=np.int32)
-    cdef int[:,:,::1] comut_arr = np.zeros((p.shape[0], mu.shape[1], mu.shape[1]), dtype=np.int32)
-    cdef int[:,:,::1] inotj_arr = np.zeros((p.shape[0], mu.shape[1], mu.shape[1]), dtype=np.int32)
-
-
-    # declare counters
-    cdef int n,i,j,m
-
-    cdef int pdim = p.shape[0]
-    cdef int maxreadidx = reads.shape[0]-1
-
-    # setup the random number generator
-    cdef dsfmt_t dsfmt
-    dsfmt_init_gen_rand(&dsfmt, np.uint32(time.time()))
-
-    # compute logp
-    cdef double[:] logp = np.log(p)
-    
-    # compute logmu and clogmu
-    cdef double[:,::1] logmu = np.zeros((mu.shape[0], mu.shape[1]))
-    cdef double[:,::1] clogmu = np.zeros((mu.shape[0], mu.shape[1]))
-    for i in xrange(pdim):
-        for j in xrange(mu.shape[1]):
-            if mu[i,j] > 0:
-                logmu[i,j] = log( mu[i,j] )
-                clogmu[i,j] = log( 1-mu[i,j] )
-    
-
-    # declare other needed containers
-    cdef double[:] loglike = np.empty(pdim) # container for read loglike of each model
-    cdef double[:] ll_i = np.empty(pdim) # container for loglike subtracting i
-    cdef double[:] ll_ij = np.empty(pdim) # container for loglike subtracting i & j
-    cdef double[:] weights = np.empty(pdim) # container for normalized probabilties
-    cdef int[:] occupancy = np.empty(pdim, dtype=np.int32)
-    
-    # codes for contigency table
-    cdef int icode    
-    cdef int jcode 
-    
-    cdef int step = 0
-    
-    for step in xrange(samplenumber):
-        
-        if step%10000==0:
-            printf("\r%d", step)
-            fflush(stdout)
-
-        # select read (sample w/ replacement)
-        n = lrint(dsfmt_genrand_close_open(&dsfmt)*maxreadidx)
-
-        # compute overall loglike of the read
-        readloglike(loglike, activestatus, reads[n,:], mutations[n,:], logp, logmu, clogmu)
-        
-        _loglike2prob(loglike, weights)
-        
-        # compute occupancy based on whole read loglike
-        for m in xrange(pdim):
-            occupancy[m] = 0
-            if weights[m] >= dsfmt_genrand_close_open(&dsfmt):
-                occupancy[m] = 1
-
-
-        # now iterate through all i/j pairs
-        for i in xrange(read_arr.shape[1]-window+1):
-            
-            # compute mut code, and skip if not read at all
-            icode = _computeMutCode(reads[n,:], mutations[n,:], i, window)
-            if icode < 0: continue
-            
-            if subtractwindow:
-                # reset ll_i
-                for m in xrange(pdim):
-                    ll_i[m] = loglike[m]
-            
-                # subtract window i
-                _subtractloglike(ll_i, i, window, reads[n,:], mutations[n,:], activestatus, logmu, clogmu)
-            
-                # compute weight of read ignoring i
-                _loglike2prob(ll_i, weights)
-            
-
-            # increment the diagonal for keeping track of overall mutation rate
-            for m in xrange(pdim):
-                if occupancy[m]:
-                    read_arr[m,i,i] += 1
-                    if icode==1:
-                        comut_arr[m,i,i] += 1
-
-
-            for j in xrange(i+1, read_arr.shape[1]-window+1):
-
-                jcode = _computeMutCode(reads[n,:], mutations[n,:], j, window)
-                if jcode < 0: continue
-      
-
-                if subtractwindow:
-                    # reset ll_ij
-                    for m in xrange(pdim):
-                        ll_ij[m] = ll_i[m]
-                
-                    # subtract j
-                    _subtractloglike(ll_ij, j, window, reads[n,:], mutations[n,:], activestatus, logmu, clogmu)
-                
-                    # compute weight of read ignoring i & j
-                    _loglike2prob(ll_ij, weights) 
-                
-                    # compute occupancy
-                    for m in xrange(pdim):
-                        occupancy[m] = 0
-                        if weights[m] >= dsfmt_genrand_close_open(&dsfmt):
-                            occupancy[m] = 1
-         
-
-                # now iterate through models and increment RING matrices
-                for m in xrange(pdim):
-                    # add the read
-                    if occupancy[m]:
-                        read_arr[m,i,j] += 1
-            
-                        if icode == 1 and jcode == 1:
-                            comut_arr[m,i,j] += 1
-                        elif icode == 1 and jcode == 0:
-                            inotj_arr[m,i,j] += 1
-                        elif icode == 0 and jcode == 1:
-                            inotj_arr[m,j,i] += 1
-    
-
-
-    # reset cursor to new line
-    printf("\n\n")
-    fflush(stdout)
-
-    return read_arr, comut_arr, inotj_arr                                    
-
-
-
-
 cdef void _subtractloglike(double[:] loglike, int i_index, int window, 
                            char[:] read, char[:] mutation, char[:] activestatus,
                            double[:,::1] logmu, double[:,::1] clogmu):