diff --git a/README.md b/README.md index f5b58dc..63d92c3 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,14 @@ Output base pairing probability matrix to a file with user specified name (overw ``` Outputs base pairing probability matrices to files with user specified prefix. (default False) ``` +--unpaired FILE_NAME +``` +Output unpaired probability for each nuc and average unpaired probability (AUP) to a file with user specified name. (default False) +``` +--access_u or -U +``` +Print the accessible U%. (default False) +``` --part or -p ``` Partition function calculation only. (default False) diff --git a/linearpartition b/linearpartition index 775708a..97dd095 100755 --- a/linearpartition +++ b/linearpartition @@ -29,6 +29,8 @@ def setgflags(): flags.DEFINE_boolean('fasta', False, "input is in fasta format") # FASTA format flags.DEFINE_integer('dangles', 2, "the way to treat `dangling end' energies for bases adjacent to helices in free ends and multi-loops (only supporting `0' or `2', default=`2')", short_name="d") flags.DEFINE_string('evaly', "", "batch eval all sequences against structure for p(y|x)", short_name='y') # p(y|x) + flags.DEFINE_string('unpaired', '', "output unpaired probability for each nuc and average unpaired probability (AUP)") # AUP + flags.DEFINE_boolean('access_u', False, "Output the percentage of unpaired U", short_name='U') # accessible U% argv = FLAGS(sys.argv) def main(): @@ -52,8 +54,11 @@ def main(): is_fasta = '1' if FLAGS.fasta else '0' dangles = str(FLAGS.dangles) evaly = FLAGS.evaly + unpaired_file = str(FLAGS.unpaired) + access_u = '1' if FLAGS.U else '0' - if FLAGS.p and (FLAGS.o or FLAGS.prefix): + + if FLAGS.p and (FLAGS.o or FLAGS.prefix or FLAGS.unpaired): print("\nWARNING: -p mode has no output for base pairing probability matrix!\n"); if FLAGS.o and FLAGS.r: @@ -85,8 +90,14 @@ def main(): if FLAGS.evaly: use_vienna = True # eval p(y|x) only in Vienna mode + if FLAGS.unpaired: + if os.path.exists(unpaired_file): + print("WARNING: unpaired output file already exists. Choose another name.\n") + print("Exit!\n") + exit() + path = os.path.dirname(os.path.abspath(__file__)) - cmd = ["%s/%s" % (path, ('bin/linearpartition_v' if use_vienna else 'bin/linearpartition_c')), beamsize, is_sharpturn, is_verbose, bpp_file, bpp_prefix, pf_only, bpp_cutoff, forest_file, mea, gamma, TK, threshold, ThreshKnot_prefix, MEA_prefix, MEA_bpseq, shape_file_path, is_fasta, dangles, evaly] + cmd = ["%s/%s" % (path, ('bin/linearpartition_v' if use_vienna else 'bin/linearpartition_c')), beamsize, is_sharpturn, is_verbose, bpp_file, bpp_prefix, pf_only, bpp_cutoff, forest_file, mea, gamma, TK, threshold, ThreshKnot_prefix, MEA_prefix, MEA_bpseq, shape_file_path, is_fasta, dangles, evaly, unpaired_file, access_u] subprocess.call(cmd, stdin=sys.stdin) if __name__ == '__main__': diff --git a/src/LinearPartition.cpp b/src/LinearPartition.cpp index 74fc3ac..f425c5b 100755 --- a/src/LinearPartition.cpp +++ b/src/LinearPartition.cpp @@ -1,7 +1,7 @@ /* *LinearPartition.cpp* - The main code for LinearPartition: Linear-Time Approximation of - RNA Folding Partition Function + The main code for LinearPartition: Linear-Time Approximation of + RNA Folding Partition Function and Base Pairing Probabilities author: He Zhang @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "LinearPartition.h" #include "Utils/utility.h" @@ -31,47 +31,63 @@ using namespace std; -unsigned long quickselect_partition(vector>& scores, unsigned long lower, unsigned long upper) { +unsigned long quickselect_partition(vector> &scores, unsigned long lower, unsigned long upper) +{ pf_type pivot = scores[upper].first; - while (lower < upper) { - while (scores[lower].first < pivot) ++lower; - while (scores[upper].first > pivot) --upper; - if (scores[lower].first == scores[upper].first) ++lower; - else if (lower < upper) swap(scores[lower], scores[upper]); + while (lower < upper) + { + while (scores[lower].first < pivot) + ++lower; + while (scores[upper].first > pivot) + --upper; + if (scores[lower].first == scores[upper].first) + ++lower; + else if (lower < upper) + swap(scores[lower], scores[upper]); } return upper; } // in-place quick-select -pf_type quickselect(vector>& scores, unsigned long lower, unsigned long upper, unsigned long k) { - if ( lower == upper ) return scores[lower].first; +pf_type quickselect(vector> &scores, unsigned long lower, unsigned long upper, unsigned long k) +{ + if (lower == upper) + return scores[lower].first; unsigned long split = quickselect_partition(scores, lower, upper); unsigned long length = split - lower + 1; - if (length == k) return scores[split].first; - else if (k < length) return quickselect(scores, lower, split-1, k); - else return quickselect(scores, split+1, upper, k - length); + if (length == k) + return scores[split].first; + else if (k < length) + return quickselect(scores, lower, split - 1, k); + else + return quickselect(scores, split + 1, upper, k - length); } - -pf_type BeamCKYParser::beam_prune(std::unordered_map &beamstep) { +pf_type BeamCKYParser::beam_prune(std::unordered_map &beamstep) +{ scores.clear(); - for (auto &item : beamstep) { + for (auto &item : beamstep) + { int i = item.first; State &cand = item.second; int k = i - 1; pf_type newalpha = (k >= 0 ? bestC[k].alpha : pf_type(0.0)) + cand.alpha; scores.push_back(make_pair(newalpha, i)); } - if (scores.size() <= beam) return VALUE_MIN; + if (scores.size() <= beam) + return VALUE_MIN; pf_type threshold = quickselect(scores, 0, scores.size() - 1, scores.size() - beam); - for (auto &p : scores) { - if (p.first < threshold) beamstep.erase(p.second); + for (auto &p : scores) + { + if (p.first < threshold) + beamstep.erase(p.second); } return threshold; } -void BeamCKYParser::prepare(unsigned len) { +void BeamCKYParser::prepare(unsigned len) +{ seq_length = len; nucs = new int[seq_length]; @@ -81,24 +97,29 @@ void BeamCKYParser::prepare(unsigned len) { bestM = new unordered_map[seq_length]; bestM2 = new unordered_map[seq_length]; bestMulti = new unordered_map[seq_length]; - + scores.reserve(seq_length); + + unpaired_prob.resize(seq_length + 1, 1.0); + AUP = 0.0; } -void BeamCKYParser::postprocess() { +void BeamCKYParser::postprocess() +{ - delete[] bestC; - delete[] bestH; - delete[] bestP; - delete[] bestM; - delete[] bestM2; - delete[] bestMulti; + delete[] bestC; + delete[] bestH; + delete[] bestP; + delete[] bestM; + delete[] bestM2; + delete[] bestMulti; - delete[] nucs; + delete[] nucs; } -double BeamCKYParser::parse(string& seq) { - +double BeamCKYParser::parse(string &seq) +{ + struct timeval parse_starttime, parse_endtime; gettimeofday(&parse_starttime, NULL); @@ -110,13 +131,16 @@ double BeamCKYParser::parse(string& seq) { vector next_pair[NOTON]; { - for (int nuci = 0; nuci < NOTON; ++nuci) { + for (int nuci = 0; nuci < NOTON; ++nuci) + { // next_pair next_pair[nuci].resize(seq_length, -1); int next = -1; - for (int j = seq_length-1; j >=0; --j) { + for (int j = seq_length - 1; j >= 0; --j) + { next_pair[nuci][j] = next; - if (_allowed_pairs[nuci][nucs[j]]) next = j; + if (_allowed_pairs[nuci][nucs[j]]) + next = j; } } } @@ -128,51 +152,60 @@ double BeamCKYParser::parse(string& seq) { #endif #ifdef lpv - if(seq_length > 0) bestC[0].alpha = 0.0; - if(seq_length > 1) bestC[1].alpha = 0.0; + if (seq_length > 0) + bestC[0].alpha = 0.0; + if (seq_length > 1) + bestC[1].alpha = 0.0; #else - if(seq_length > 0) Fast_LogPlusEquals(bestC[0].alpha, score_external_unpaired(0, 0)); - if(seq_length > 1) Fast_LogPlusEquals(bestC[1].alpha, score_external_unpaired(0, 1)); + if (seq_length > 0) + Fast_LogPlusEquals(bestC[0].alpha, score_external_unpaired(0, 0)); + if (seq_length > 1) + Fast_LogPlusEquals(bestC[1].alpha, score_external_unpaired(0, 1)); #endif value_type newscore; - for(int j = 0; j < seq_length; ++j) { + for (int j = 0; j < seq_length; ++j) + { int nucj = nucs[j]; - int nucj1 = (j+1) < seq_length ? nucs[j+1] : -1; + int nucj1 = (j + 1) < seq_length ? nucs[j + 1] : -1; - unordered_map& beamstepH = bestH[j]; - unordered_map& beamstepMulti = bestMulti[j]; - unordered_map& beamstepP = bestP[j]; - unordered_map& beamstepM2 = bestM2[j]; - unordered_map& beamstepM = bestM[j]; - State& beamstepC = bestC[j]; + unordered_map &beamstepH = bestH[j]; + unordered_map &beamstepMulti = bestMulti[j]; + unordered_map &beamstepP = bestP[j]; + unordered_map &beamstepM2 = bestM2[j]; + unordered_map &beamstepM = bestM[j]; + State &beamstepC = bestC[j]; // beam of H { - if (beam > 0 && beamstepH.size() > beam) beam_prune(beamstepH); + if (beam > 0 && beamstepH.size() > beam) + beam_prune(beamstepH); { // for nucj put H(j, j_next) into H[j_next] int jnext = next_pair[nucj][j]; - if (no_sharp_turn) while (jnext - j < 4 && jnext != -1) jnext = next_pair[nucj][jnext]; - if (jnext != -1) { + if (no_sharp_turn) + while (jnext - j < 4 && jnext != -1) + jnext = next_pair[nucj][jnext]; + if (jnext != -1) + { int nucjnext = nucs[jnext]; int nucjnext_1 = (jnext - 1) > -1 ? nucs[jnext - 1] : -1; #ifdef lpv - int tetra_hex_tri = -1; + int tetra_hex_tri = -1; #ifdef SPECIAL_HP - if (jnext-j-1 == 4) // 6:tetra - tetra_hex_tri = if_tetraloops[j]; - else if (jnext-j-1 == 6) // 8:hexa - tetra_hex_tri = if_hexaloops[j]; - else if (jnext-j-1 == 3) // 5:tri - tetra_hex_tri = if_triloops[j]; + if (jnext - j - 1 == 4) // 6:tetra + tetra_hex_tri = if_tetraloops[j]; + else if (jnext - j - 1 == 6) // 8:hexa + tetra_hex_tri = if_hexaloops[j]; + else if (jnext - j - 1 == 3) // 5:tri + tetra_hex_tri = if_triloops[j]; #endif - newscore = - v_score_hairpin(j, jnext, nucj, nucj1, nucjnext_1, nucjnext, tetra_hex_tri); - Fast_LogPlusEquals(bestH[jnext][j].alpha, newscore/kT); + newscore = -v_score_hairpin(j, jnext, nucj, nucj1, nucjnext_1, nucjnext, tetra_hex_tri); + Fast_LogPlusEquals(bestH[jnext][j].alpha, newscore / kT); #else - newscore = score_hairpin(j, jnext, nucj, nucj1, nucjnext_1, nucjnext); - Fast_LogPlusEquals(bestH[jnext][j].alpha, newscore); + newscore = score_hairpin(j, jnext, nucj, nucj1, nucjnext_1, nucjnext); + Fast_LogPlusEquals(bestH[jnext][j].alpha, newscore); #endif } } @@ -181,13 +214,15 @@ double BeamCKYParser::parse(string& seq) { // for every state h in H[j] // 1. extend h(i, j) to h(i, jnext) // 2. generate p(i, j) - for (auto &item : beamstepH) { + for (auto &item : beamstepH) + { int i = item.first; State &state = item.second; int nuci = nucs[i]; int jnext = next_pair[nuci][j]; - if (jnext != -1) { + if (jnext != -1) + { int nuci1 = (i + 1) < seq_length ? nucs[i + 1] : -1; int nucjnext = nucs[jnext]; int nucjnext_1 = (jnext - 1) > -1 ? nucs[jnext - 1] : -1; @@ -196,15 +231,15 @@ double BeamCKYParser::parse(string& seq) { #ifdef lpv int tetra_hex_tri = -1; #ifdef SPECIAL_HP - if (jnext-i-1 == 4) // 6:tetra + if (jnext - i - 1 == 4) // 6:tetra tetra_hex_tri = if_tetraloops[i]; - else if (jnext-i-1 == 6) // 8:hexa + else if (jnext - i - 1 == 6) // 8:hexa tetra_hex_tri = if_hexaloops[i]; - else if (jnext-i-1 == 3) // 5:tri + else if (jnext - i - 1 == 3) // 5:tri tetra_hex_tri = if_triloops[i]; #endif - newscore = - v_score_hairpin(i, jnext, nuci, nuci1, nucjnext_1, nucjnext, tetra_hex_tri); - Fast_LogPlusEquals(bestH[jnext][i].alpha, newscore/kT); + newscore = -v_score_hairpin(i, jnext, nuci, nuci1, nucjnext_1, nucjnext, tetra_hex_tri); + Fast_LogPlusEquals(bestH[jnext][i].alpha, newscore / kT); #else newscore = score_hairpin(i, jnext, nuci, nuci1, nucjnext_1, nucjnext); Fast_LogPlusEquals(bestH[jnext][i].alpha, newscore); @@ -216,23 +251,27 @@ double BeamCKYParser::parse(string& seq) { } } } - if (j == 0) continue; + if (j == 0) + continue; // beam of Multi { - if (beam > 0 && beamstepMulti.size() > beam) beam_prune(beamstepMulti); + if (beam > 0 && beamstepMulti.size() > beam) + beam_prune(beamstepMulti); - for(auto& item : beamstepMulti) { + for (auto &item : beamstepMulti) + { int i = item.first; - State& state = item.second; + State &state = item.second; int nuci = nucs[i]; - int nuci1 = nucs[i+1]; + int nuci1 = nucs[i + 1]; int jnext = next_pair[nuci][j]; // 1. extend (i, j) to (i, jnext) { - if (jnext != -1) { + if (jnext != -1) + { #ifdef lpv Fast_LogPlusEquals(bestMulti[jnext][i].alpha, state.alpha); #else @@ -245,10 +284,10 @@ double BeamCKYParser::parse(string& seq) { // 2. generate P (i, j) { #ifdef lpv - newscore = - v_score_multi(i, j, nuci, nuci1, nucs[j-1], nucj, seq_length, dangle_mode); - Fast_LogPlusEquals(beamstepP[i].alpha, state.alpha + newscore/kT); + newscore = -v_score_multi(i, j, nuci, nuci1, nucs[j - 1], nucj, seq_length, dangle_mode); + Fast_LogPlusEquals(beamstepP[i].alpha, state.alpha + newscore / kT); #else - newscore = score_multi(i, j, nuci, nuci1, nucs[j-1], nucj, seq_length); + newscore = score_multi(i, j, nuci, nuci1, nucs[j - 1], nucj, seq_length); Fast_LogPlusEquals(beamstepP[i].alpha, state.alpha + newscore); #endif } @@ -256,59 +295,67 @@ double BeamCKYParser::parse(string& seq) { } // beam of P - { - if (beam > 0 && beamstepP.size() > beam) beam_prune(beamstepP); + { + if (beam > 0 && beamstepP.size() > beam) + beam_prune(beamstepP); // for every state in P[j] // 1. generate new helix/bulge // 2. M = P // 3. M2 = M + P // 4. C = C + P - for(auto& item : beamstepP) { + for (auto &item : beamstepP) + { int i = item.first; - State& state = item.second; + State &state = item.second; int nuci = nucs[i]; - int nuci_1 = (i-1>-1) ? nucs[i-1] : -1; + int nuci_1 = (i - 1 > -1) ? nucs[i - 1] : -1; // 1. generate new helix / single_branch // new state is of shape p..i..j..q - if (i >0 && j 0 && j < seq_length - 1) + { #ifndef lpv value_type precomputed = score_junction_B(j, i, nucj, nucj1, nuci_1, nuci); #endif - for (int p = i - 1; p >= std::max(i - SINGLE_MAX_LEN, 0); --p) { + for (int p = i - 1; p >= std::max(i - SINGLE_MAX_LEN, 0); --p) + { int nucp = nucs[p]; - int nucp1 = nucs[p + 1]; + int nucp1 = nucs[p + 1]; int q = next_pair[nucp][j]; - while (q != -1 && ((i - p) + (q - j) - 2 <= SINGLE_MAX_LEN)) { + while (q != -1 && ((i - p) + (q - j) - 2 <= SINGLE_MAX_LEN)) + { int nucq = nucs[q]; int nucq_1 = nucs[q - 1]; - if (p == i - 1 && q == j + 1) { + if (p == i - 1 && q == j + 1) + { // helix #ifdef lpv - newscore = -v_score_single(p,q,i,j, nucp, nucp1, nucq_1, nucq, - nuci_1, nuci, nucj, nucj1); + newscore = -v_score_single(p, q, i, j, nucp, nucp1, nucq_1, nucq, + nuci_1, nuci, nucj, nucj1); // SHAPE for Vienna only if (use_shape) newscore += -(pseudo_energy_stack[p] + pseudo_energy_stack[i] + pseudo_energy_stack[j] + pseudo_energy_stack[q]); - Fast_LogPlusEquals(bestP[q][p].alpha, state.alpha + newscore/kT); + Fast_LogPlusEquals(bestP[q][p].alpha, state.alpha + newscore / kT); #else newscore = score_helix(nucp, nucp1, nucq_1, nucq); Fast_LogPlusEquals(bestP[q][p].alpha, state.alpha + newscore); #endif - } else { + } + else + { // single branch #ifdef lpv - newscore = - v_score_single(p,q,i,j, nucp, nucp1, nucq_1, nucq, - nuci_1, nuci, nucj, nucj1); - Fast_LogPlusEquals(bestP[q][p].alpha, state.alpha + newscore/kT); + newscore = -v_score_single(p, q, i, j, nucp, nucp1, nucq_1, nucq, + nuci_1, nuci, nucj, nucj1); + Fast_LogPlusEquals(bestP[q][p].alpha, state.alpha + newscore / kT); #else newscore = score_junction_B(p, q, nucp, nucp1, nucq_1, nucq) + - precomputed + - score_single_without_junctionB(p, q, i, j, - nuci_1, nuci, nucj, nucj1); + precomputed + + score_single_without_junctionB(p, q, i, j, + nuci_1, nuci, nucj, nucj1); Fast_LogPlusEquals(bestP[q][p].alpha, state.alpha + newscore); #endif } @@ -318,29 +365,32 @@ double BeamCKYParser::parse(string& seq) { } // 2. M = P - if(i > 0 && j < seq_length-1){ + if (i > 0 && j < seq_length - 1) + { #ifdef lpv - newscore = - v_score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length, dangle_mode); - Fast_LogPlusEquals(beamstepM[i].alpha, state.alpha + newscore/kT); + newscore = -v_score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length, dangle_mode); + Fast_LogPlusEquals(beamstepM[i].alpha, state.alpha + newscore / kT); #else - newscore = score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length); - Fast_LogPlusEquals(beamstepM[i].alpha, state.alpha + newscore); + newscore = score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length); + Fast_LogPlusEquals(beamstepM[i].alpha, state.alpha + newscore); #endif } // 3. M2 = M + P int k = i - 1; - if ( k > 0 && !bestM[k].empty()) { + if (k > 0 && !bestM[k].empty()) + { #ifdef lpv - newscore = - v_score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length, dangle_mode); - pf_type m1_alpha = state.alpha + newscore/kT; + newscore = -v_score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length, dangle_mode); + pf_type m1_alpha = state.alpha + newscore / kT; #else newscore = score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length); pf_type m1_alpha = state.alpha + newscore; #endif - for (auto &m : bestM[k]) { + for (auto &m : bestM[k]) + { int newi = m.first; - State& m_state = m.second; + State &m_state = m.second; Fast_LogPlusEquals(beamstepM2[newi].alpha, m_state.alpha + m1_alpha); } } @@ -348,27 +398,30 @@ double BeamCKYParser::parse(string& seq) { // 4. C = C + P { int k = i - 1; - if (k >= 0) { - State& prefix_C = bestC[k]; + if (k >= 0) + { + State &prefix_C = bestC[k]; int nuck = nuci_1; int nuck1 = nuci; #ifdef lpv - newscore = - v_score_external_paired(k+1, j, nuck, nuck1, - nucj, nucj1, seq_length, dangle_mode); - Fast_LogPlusEquals(beamstepC.alpha, prefix_C.alpha + state.alpha + newscore/kT); + newscore = -v_score_external_paired(k + 1, j, nuck, nuck1, + nucj, nucj1, seq_length, dangle_mode); + Fast_LogPlusEquals(beamstepC.alpha, prefix_C.alpha + state.alpha + newscore / kT); #else - newscore = score_external_paired(k+1, j, nuck, nuck1, - nucj, nucj1, seq_length); + newscore = score_external_paired(k + 1, j, nuck, nuck1, + nucj, nucj1, seq_length); Fast_LogPlusEquals(beamstepC.alpha, prefix_C.alpha + state.alpha + newscore); #endif - } else { + } + else + { #ifdef lpv - newscore = - v_score_external_paired(0, j, -1, nucs[0], - nucj, nucj1, seq_length, dangle_mode); - Fast_LogPlusEquals(beamstepC.alpha, state.alpha + newscore/kT); + newscore = -v_score_external_paired(0, j, -1, nucs[0], + nucj, nucj1, seq_length, dangle_mode); + Fast_LogPlusEquals(beamstepC.alpha, state.alpha + newscore / kT); #else newscore = score_external_paired(0, j, -1, nucs[0], - nucj, nucj1, seq_length); + nucj, nucj1, seq_length); Fast_LogPlusEquals(beamstepC.alpha, state.alpha + newscore); #endif } @@ -378,46 +431,53 @@ double BeamCKYParser::parse(string& seq) { // beam of M2 { - if (beam > 0 && beamstepM2.size() > beam) beam_prune(beamstepM2); + if (beam > 0 && beamstepM2.size() > beam) + beam_prune(beamstepM2); - for(auto& item : beamstepM2) { + for (auto &item : beamstepM2) + { int i = item.first; - State& state = item.second; + State &state = item.second; // 1. multi-loop - for (int p = i-1; p >= std::max(i - SINGLE_MAX_LEN, 0); --p) { + for (int p = i - 1; p >= std::max(i - SINGLE_MAX_LEN, 0); --p) + { int nucp = nucs[p]; int q = next_pair[nucp][j]; - if (q != -1 && ((i - p - 1) <= SINGLE_MAX_LEN)) { + if (q != -1 && ((i - p - 1) <= SINGLE_MAX_LEN)) + { #ifdef lpv - Fast_LogPlusEquals(bestMulti[q][p].alpha, state.alpha); + Fast_LogPlusEquals(bestMulti[q][p].alpha, state.alpha); #else - newscore = score_multi_unpaired(p+1, i-1) + - score_multi_unpaired(j+1, q-1); - Fast_LogPlusEquals(bestMulti[q][p].alpha, state.alpha + newscore); + newscore = score_multi_unpaired(p + 1, i - 1) + + score_multi_unpaired(j + 1, q - 1); + Fast_LogPlusEquals(bestMulti[q][p].alpha, state.alpha + newscore); #endif } } // 2. M = M2 - Fast_LogPlusEquals(beamstepM[i].alpha, state.alpha); + Fast_LogPlusEquals(beamstepM[i].alpha, state.alpha); } } // beam of M { - if (beam > 0 && beamstepM.size() > beam) beam_prune(beamstepM); + if (beam > 0 && beamstepM.size() > beam) + beam_prune(beamstepM); - for(auto& item : beamstepM) { + for (auto &item : beamstepM) + { int i = item.first; - State& state = item.second; - if (j < seq_length-1) { + State &state = item.second; + if (j < seq_length - 1) + { #ifdef lpv - Fast_LogPlusEquals(bestM[j+1][i].alpha, state.alpha); + Fast_LogPlusEquals(bestM[j + 1][i].alpha, state.alpha); #else newscore = score_multi_unpaired(j + 1, j + 1); - Fast_LogPlusEquals(bestM[j+1][i].alpha, state.alpha + newscore); + Fast_LogPlusEquals(bestM[j + 1][i].alpha, state.alpha + newscore); #endif } } @@ -426,80 +486,98 @@ double BeamCKYParser::parse(string& seq) { // beam of C { // C = C + U - if (j < seq_length-1) { + if (j < seq_length - 1) + { #ifdef lpv - Fast_LogPlusEquals(bestC[j+1].alpha, beamstepC.alpha); - + Fast_LogPlusEquals(bestC[j + 1].alpha, beamstepC.alpha); + #else - newscore = score_external_unpaired(j+1, j+1); - Fast_LogPlusEquals(bestC[j+1].alpha, beamstepC.alpha + newscore); + newscore = score_external_unpaired(j + 1, j + 1); + Fast_LogPlusEquals(bestC[j + 1].alpha, beamstepC.alpha + newscore); #endif } } - } // end of for-loo j + } // end of for-loo j - State& viterbi = bestC[seq_length-1]; + State &viterbi = bestC[seq_length - 1]; gettimeofday(&parse_endtime, NULL); - double parse_elapsed_time = parse_endtime.tv_sec - parse_starttime.tv_sec + (parse_endtime.tv_usec-parse_starttime.tv_usec)/1000000.0; + double parse_elapsed_time = parse_endtime.tv_sec - parse_starttime.tv_sec + (parse_endtime.tv_usec - parse_starttime.tv_usec) / 1000000.0; - double ensemble; + double ensemble; #ifdef lpv ensemble = -kT * viterbi.alpha / 100.0; // -kT log(Q(x)) - fprintf(stderr,"Free Energy of Ensemble: %.5f kcal/mol\n", ensemble); + fprintf(stderr, "Free Energy of Ensemble: %.5f kcal/mol\n", ensemble); #else ensemble = viterbi.alpha; - fprintf(stderr,"Log Partition Coefficient: %.5f\n", ensemble); + fprintf(stderr, "Log Partition Coefficient: %.5f\n", ensemble); #endif - if(is_verbose) fprintf(stderr,"Partition Function Calculation Time: %.2f seconds.\n", parse_elapsed_time); + if (is_verbose) + fprintf(stderr, "Partition Function Calculation Time: %.2f seconds.\n", parse_elapsed_time); fflush(stdout); // lhuang - if(pf_only && !forest_file.empty()) dump_forest(seq, true); // inside-only forest + if (pf_only && !forest_file.empty()) + dump_forest(seq, true); // inside-only forest - if(!pf_only){ + if (!pf_only) + { outside(next_pair); - if (!forest_file.empty()) - dump_forest(seq, false); // inside-outside forest - cal_PairProb(viterbi); + if (!forest_file.empty()) + dump_forest(seq, false); // inside-outside forest + cal_PairProb(viterbi); + + cal_unpaired(seq); - if (mea_) PairProb_MEA(seq); + if (access_u) + print_accessible_U(seq); - if (threshknot_) ThreshKnot(seq); + if (mea_) + PairProb_MEA(seq); + + if (threshknot_) + ThreshKnot(seq); } postprocess(); return ensemble; } -void BeamCKYParser::print_states(FILE *fptr, unordered_map& states, int j, string label, bool inside_only, double threshold) { - for (auto & item : states) { +void BeamCKYParser::print_states(FILE *fptr, unordered_map &states, int j, string label, bool inside_only, double threshold) +{ + for (auto &item : states) + { int i = item.first; - State & state = item.second; - if (inside_only) fprintf(fptr, "%s %d %d %.5lf\n", label.c_str(), i+1, j+1, state.alpha); + State &state = item.second; + if (inside_only) + fprintf(fptr, "%s %d %d %.5lf\n", label.c_str(), i + 1, j + 1, state.alpha); else if (state.alpha + state.beta > threshold) // lhuang : alpha + beta - totalZ < ... - fprintf(fptr, "%s %d %d %.5lf %.5lf\n", label.c_str(), i+1, j+1, state.alpha, state.beta); + fprintf(fptr, "%s %d %d %.5lf %.5lf\n", label.c_str(), i + 1, j + 1, state.alpha, state.beta); } } -void BeamCKYParser::dump_forest(string seq, bool inside_only) { +void BeamCKYParser::dump_forest(string seq, bool inside_only) +{ printf("Dumping (%s) Forest to %s...\n", (inside_only ? "Inside-Only" : "Inside-Outside"), forest_file.c_str()); - FILE *fptr = fopen(forest_file.c_str(), "w"); // lhuang: should be fout >> + FILE *fptr = fopen(forest_file.c_str(), "w"); // lhuang: should be fout >> fprintf(fptr, "%s\n", seq.c_str()); int n = seq.length(), j; - for (j = 0; j < n; j++) { - if (inside_only) fprintf(fptr, "E %d %.5lf\n", j+1, bestC[j].alpha); - else fprintf(fptr, "E %d %.5lf %.5lf\n", j+1, bestC[j].alpha, bestC[j].beta); + for (j = 0; j < n; j++) + { + if (inside_only) + fprintf(fptr, "E %d %.5lf\n", j + 1, bestC[j].alpha); + else + fprintf(fptr, "E %d %.5lf %.5lf\n", j + 1, bestC[j].alpha, bestC[j].beta); } - double threshold = bestC[n-1].alpha - 9.91152; // lhuang -9.xxx or ? - for (j = 0; j < n; j++) + double threshold = bestC[n - 1].alpha - 9.91152; // lhuang -9.xxx or ? + for (j = 0; j < n; j++) print_states(fptr, bestP[j], j, "P", inside_only, threshold); - for (j = 0; j < n; j++) + for (j = 0; j < n; j++) print_states(fptr, bestM[j], j, "M", inside_only, threshold); - for (j = 0; j < n; j++) + for (j = 0; j < n; j++) print_states(fptr, bestM2[j], j, "M2", inside_only, threshold); - for (j = 0; j < n; j++) + for (j = 0; j < n; j++) print_states(fptr, bestMulti[j], j, "Multi", inside_only, threshold); } @@ -510,7 +588,7 @@ BeamCKYParser::BeamCKYParser(int beam_size, string bppfileindex, bool pfonly, float bppcutoff, - string forestfile, + string forestfile, bool mea, float MEA_gamma, string MEA_file_index, @@ -520,15 +598,17 @@ BeamCKYParser::BeamCKYParser(int beam_size, string ThreshKnot_file_index, string shape_file_path, bool fasta, - int dangles) - : beam(beam_size), - no_sharp_turn(nosharpturn), + int dangles, + string unpairedfile, + bool accessu) + : beam(beam_size), + no_sharp_turn(nosharpturn), is_verbose(verbose), bpp_file(bppfile), bpp_file_index(bppfileindex), pf_only(pfonly), bpp_cutoff(bppcutoff), - forest_file(forestfile), + forest_file(forestfile), mea_(mea), gamma(MEA_gamma), mea_file_index(MEA_file_index), @@ -537,15 +617,20 @@ BeamCKYParser::BeamCKYParser(int beam_size, threshknot_threshold(ThreshKnot_threshold), threshknot_file_index(ThreshKnot_file_index), is_fasta(fasta), - dangle_mode(dangles) { + dangle_mode(dangles), + unpaired_file(unpairedfile), + access_u(accessu) + +{ #ifdef lpv - initialize(); + initialize(); #else - initialize(); - initialize_cachesingle(); + initialize(); + initialize_cachesingle(); #endif - if (shape_file_path != "" ){ + if (shape_file_path != "") + { use_shape = true; int position; string data; @@ -554,40 +639,48 @@ BeamCKYParser::BeamCKYParser(int beam_size, ifstream in(shape_file_path); - if (!in.good()){ - cout<<"Reading SHAPE file error!"<> position >> data).fail()) { - if (isdigit(int(data[0])) == 0){ + while (!(in >> position >> data).fail()) + { + if (isdigit(int(data[0])) == 0) + { SHAPE_data.push_back(double((-1.000000))); } - else { + else + { SHAPE_data.push_back(stod(data)); } } - for (int i = 0; i 1) { + // unpaired and AUP + string unpaired_file; + bool access_u = false; + + if (argc > 1) + { beamsize = atoi(argv[1]); sharpturn = atoi(argv[2]) == 1; is_verbose = atoi(argv[3]) == 1; @@ -623,7 +721,7 @@ int main(int argc, char** argv){ bpp_prefix = argv[5]; pf_only = atoi(argv[6]) == 1; bpp_cutoff = atof(argv[7]); - forest_file = argv[8]; + forest_file = argv[8]; mea = atoi(argv[9]) == 1; MEA_gamma = atof(argv[10]); ThreshKnot = atoi(argv[11]) == 1; @@ -633,14 +731,17 @@ int main(int argc, char** argv){ MEA_bpseq = atoi(argv[15]) == 1; shape_file_path = argv[16]; fasta = atoi(argv[17]) == 1; - dangles = atoi(argv[18]); + dangles = atoi(argv[18]); ystruct = argv[19]; // for p(y|x) + unpaired_file = argv[20]; + access_u = atoi(argv[21]) == 1; } - if (is_verbose) printf("beam size: %d\n", beamsize); + if (is_verbose) + printf("beam size: %d\n", beamsize); // variables for decoding - int num=0, total_len = 0; + int num = 0, total_len = 0; unsigned long long total_states = 0; double total_score = .0; double total_time = .0; @@ -652,27 +753,39 @@ int main(int argc, char** argv){ string rna_seq; vector rna_seq_list, rna_name_list; - if (fasta){ - for (string seq; getline(cin, seq);){ - if (seq.empty()) continue; - else if (seq[0] == '>' or seq[0] == ';'){ + if (fasta) + { + for (string seq; getline(cin, seq);) + { + if (seq.empty()) + continue; + else if (seq[0] == '>' or seq[0] == ';') + { rna_name_list.push_back(seq); // sequence name if (!rna_seq.empty()) rna_seq_list.push_back(rna_seq); rna_seq.clear(); continue; - }else{ + } + else + { rtrim(seq); rna_seq += seq; } } if (!rna_seq.empty()) rna_seq_list.push_back(rna_seq); - } else { - for (string seq; getline(cin, seq);){ - if (seq.empty()) continue; - if (seq[0] == '>' or seq[0] == ';') continue; - if (!isalpha(seq[0])){ + } + else + { + for (string seq; getline(cin, seq);) + { + if (seq.empty()) + continue; + if (seq[0] == '>' or seq[0] == ';') + continue; + if (!isalpha(seq[0])) + { printf("Unrecognized sequence: %s\n", seq.c_str()); continue; } @@ -681,30 +794,36 @@ int main(int argc, char** argv){ } // TODO: no need to store all seqs - for(int i = 0; i < rna_seq_list.size(); i++){ + for (int i = 0; i < rna_seq_list.size(); i++) + { if (rna_name_list.size() > i) printf("%s\n", rna_name_list[i].c_str()); rna_seq = rna_seq_list[i]; printf("%s\n", rna_seq.c_str()); - if (!bpp_file.empty()) { - FILE *fptr = fopen(bpp_file.c_str(), "a"); - if (fptr == NULL) { - printf("Could not open file!\n"); - return 0; + if (!bpp_file.empty()) + { + FILE *fptr = fopen(bpp_file.c_str(), "a"); + if (fptr == NULL) + { + printf("Could not open file!\n"); + return 0; } if (rna_name_list.size() > i) fprintf(fptr, "%s\n", rna_name_list[i].c_str()); - fclose(fptr); + fclose(fptr); } - seq_index ++; - if (!bpp_prefix.empty()) bpp_file_index = bpp_prefix + to_string(seq_index); + seq_index++; + if (!bpp_prefix.empty()) + bpp_file_index = bpp_prefix + to_string(seq_index); + + if (!ThresKnot_prefix.empty()) + ThreshKnot_file_index = ThresKnot_prefix + to_string(seq_index); - if (!ThresKnot_prefix.empty()) ThreshKnot_file_index = ThresKnot_prefix + to_string(seq_index); + if (!MEA_prefix.empty()) + MEA_file_index = MEA_prefix + to_string(seq_index); - if (!MEA_prefix.empty()) MEA_file_index = MEA_prefix + to_string(seq_index); - // convert to uppercase transform(rna_seq.begin(), rna_seq.end(), rna_seq.begin(), ::toupper); @@ -712,21 +831,23 @@ int main(int argc, char** argv){ replace(rna_seq.begin(), rna_seq.end(), 'T', 'U'); // lhuang: moved inside loop, fixing an obscure but crucial bug in initialization - BeamCKYParser parser(beamsize, !sharpturn, is_verbose, bpp_file, bpp_file_index, pf_only, bpp_cutoff, forest_file, mea, MEA_gamma, MEA_file_index, MEA_bpseq, ThreshKnot, ThreshKnot_threshold, ThreshKnot_file_index, shape_file_path, fasta, dangles); + BeamCKYParser parser(beamsize, !sharpturn, is_verbose, bpp_file, bpp_file_index, pf_only, bpp_cutoff, forest_file, mea, MEA_gamma, MEA_file_index, MEA_bpseq, ThreshKnot, ThreshKnot_threshold, ThreshKnot_file_index, shape_file_path, fasta, dangles, unpaired_file, access_u); double ensemble = parser.parse(rna_seq); // ensemble free energy - if (!ystruct.empty()) { - double energy = -eval(rna_seq, ystruct, is_verbose, dangles)/100.; // LinearFoldEval.h - double prob = exp(100*(energy-ensemble)/ -kT); + if (!ystruct.empty()) + { + double energy = -eval(rna_seq, ystruct, is_verbose, dangles) / 100.; // LinearFoldEval.h + double prob = exp(100 * (energy - ensemble) / -kT); printf("x= %s\ty= %s\tDeltaG(x,y)= %.2f\t-kTlogQ(x)= %.5f\tp(y|x)= %.5f\n", rna_seq.c_str(), ystruct.c_str(), energy, ensemble, prob); } } gettimeofday(&total_endtime, NULL); - double total_elapsed_time = total_endtime.tv_sec - total_starttime.tv_sec + (total_endtime.tv_usec-total_starttime.tv_usec)/1000000.0; + double total_elapsed_time = total_endtime.tv_sec - total_starttime.tv_sec + (total_endtime.tv_usec - total_starttime.tv_usec) / 1000000.0; - if(is_verbose) fprintf(stderr,"Total Time: %.2f seconds.\n", total_elapsed_time); + if (is_verbose) + fprintf(stderr, "Total Time: %.2f seconds.\n", total_elapsed_time); return 0; } diff --git a/src/LinearPartition.h b/src/LinearPartition.h index 5a850c1..75c52c4 100755 --- a/src/LinearPartition.h +++ b/src/LinearPartition.h @@ -13,67 +13,66 @@ #include #include #include -#include +#include #include // #define MIN_CUBE_PRUNING_SIZE 20 #define kT 61.63207755 -#define NEG_INF -2e20 +#define NEG_INF -2e20 // #define testtime using namespace std; #ifdef lpv - typedef float pf_type; +typedef float pf_type; #else - typedef double pf_type; +typedef double pf_type; #endif - #ifdef lpv - typedef int value_type; - #define VALUE_MIN numeric_limits::lowest() +typedef int value_type; +#define VALUE_MIN numeric_limits::lowest() #else - typedef double value_type; - #define VALUE_MIN numeric_limits::lowest() +typedef double value_type; +#define VALUE_MIN numeric_limits::lowest() #endif -// A hash function used to hash a pair of any kind -struct hash_pair { - template - size_t operator()(const pair& p) const - { - auto hash1 = hash{}(p.first); - auto hash2 = hash{}(p.second); - return hash1 ^ hash2; - } +// A hash function used to hash a pair of any kind +struct hash_pair +{ + template + size_t operator()(const pair &p) const + { + auto hash1 = hash{}(p.first); + auto hash2 = hash{}(p.second); + return hash1 ^ hash2; + } }; - - struct comp { - template - bool operator()(const T& l, const T& r) const + template + bool operator()(const T &l, const T &r) const { if (l.first == r.first) return l.second < r.second; - + return l.first < r.first; } }; -struct State { +struct State +{ pf_type alpha; pf_type beta; - State(): alpha(VALUE_MIN), beta(VALUE_MIN) {}; + State() : alpha(VALUE_MIN), beta(VALUE_MIN) {}; }; - -class BeamCKYParser { +class BeamCKYParser +{ public: int beam; bool no_sharp_turn; @@ -91,38 +90,45 @@ class BeamCKYParser { float threshknot_threshold; string threshknot_file_index; bool is_fasta; - int dangle_mode; + int dangle_mode; // SHAPE bool use_shape = false; double m = 1.8; double b = -0.6; - - BeamCKYParser(int beam_size=100, - bool nosharpturn=true, - bool is_verbose=false, - string bppfile="", - string bppfileindex="", - bool pf_only=false, - float bpp_cutoff=0.0, - string forestfile="", - bool mea_=false, - float gamma=3.0, - string mea_file_index="", - bool bpseq=false, - bool threshknot_=false, - float threshknot_threshold=0.3, - string threshknot_file_index="", - string shape_file_path="", - bool is_fasta=false, - int dangles=1); + // unpaired and AUP + string unpaired_file; + + // accessible U% + bool access_u; + + BeamCKYParser(int beam_size = 100, + bool nosharpturn = true, + bool is_verbose = false, + string bppfile = "", + string bppfileindex = "", + bool pf_only = false, + float bpp_cutoff = 0.0, + string forestfile = "", + bool mea_ = false, + float gamma = 3.0, + string mea_file_index = "", + bool bpseq = false, + bool threshknot_ = false, + float threshknot_threshold = 0.3, + string threshknot_file_index = "", + string shape_file_path = "", + bool is_fasta = false, + int dangles = 1, + string unpairedfile = "", + bool accessu = false); // DecoderResult parse(string& seq); - double parse(string& seq); + double parse(string &seq); private: - void get_parentheses(char* result, string& seq); + void get_parentheses(char *result, string &seq); unsigned seq_length; @@ -139,42 +145,52 @@ class BeamCKYParser { void prepare(unsigned len); void postprocess(); - void cal_PairProb(State& viterbi); + void cal_PairProb(State &viterbi); - void PairProb_MEA(string & seq); + void PairProb_MEA(string &seq); - void ThreshKnot(string & seq); + void ThreshKnot(string &seq); - string back_trace(const int i, const int j, const vector >& back_pointer); - map get_pairs(string & structure); + string back_trace(const int i, const int j, const vector> &back_pointer); + map get_pairs(string &structure); void outside(vector next_pair[]); void dump_forest(string seq, bool inside_only); - void print_states(FILE *fptr, unordered_map& states, int j, string label, bool inside_only, double threshold); + void print_states(FILE *fptr, unordered_map &states, int j, string label, bool inside_only, double threshold); - pf_type beam_prune(unordered_map& beamstep); + pf_type beam_prune(unordered_map &beamstep); vector> scores; - unordered_map, pf_type, hash_pair> Pij; + unordered_map, pf_type, hash_pair> Pij; + + void output_to_file(string file_name, const char *type); + void output_to_file_MEA_threshknot_bpseq(string file_name, const char *type, map &pairs, string &seq); + + // unpaired and AUP + vector unpaired_prob; - void output_to_file(string file_name, const char * type); - void output_to_file_MEA_threshknot_bpseq(string file_name, const char * type, map & pairs, string & seq); + pf_type AUP; + void cal_unpaired(string &seq); + void output_to_file_unpaired(string &seq, string file_name, const char *type); + + // accessible U% + void print_accessible_U(string &seq); // SHAPE std::vector SHAPE_data; std::vector pseudo_energy_stack; - }; // log space: borrowed from CONTRAfold -inline pf_type Fast_LogExpPlusOne(pf_type x){ - +inline pf_type Fast_LogExpPlusOne(pf_type x) +{ + // Bounds for tolerance of 7.05e-06: (0, 11.8625) // Approximating interval: (0, 0.661537) --> ((T(-0.0065591595)*x+T(0.1276442762))*x+T(0.4996554598))*x+T(0.6931542306); // Approximating interval: (0.661537, 1.63202) --> ((T(-0.0155157557)*x+T(0.1446775699))*x+T(0.4882939746))*x+T(0.6958092989); @@ -185,36 +201,37 @@ inline pf_type Fast_LogExpPlusOne(pf_type x){ // Approximating interval: (5.78907, 7.81627) --> ((T(-0.0001962780)*x+T(0.0046084408))*x+T(0.9634431978))*x+T(0.0983148903); // Approximating interval: (7.81627, 11.8625) --> ((T(-0.0000113994)*x+T(0.0003734731))*x+T(0.9959107193))*x+T(0.0149855051); // 8 polynomials needed. - + assert(pf_type(0.0000000000) <= x && x <= pf_type(11.8624794162) && "Argument out-of-range."); if (x < pf_type(3.3792499610)) { if (x < pf_type(1.6320158198)) { if (x < pf_type(0.6615367791)) - return ((pf_type(-0.0065591595)*x+pf_type(0.1276442762))*x+pf_type(0.4996554598))*x+pf_type(0.6931542306); - return ((pf_type(-0.0155157557)*x+pf_type(0.1446775699))*x+pf_type(0.4882939746))*x+pf_type(0.6958092989); + return ((pf_type(-0.0065591595) * x + pf_type(0.1276442762)) * x + pf_type(0.4996554598)) * x + pf_type(0.6931542306); + return ((pf_type(-0.0155157557) * x + pf_type(0.1446775699)) * x + pf_type(0.4882939746)) * x + pf_type(0.6958092989); } if (x < pf_type(2.4912588184)) - return ((pf_type(-0.0128909247)*x+pf_type(0.1301028251))*x+pf_type(0.5150398748))*x+pf_type(0.6795585882); - return ((pf_type(-0.0072142647)*x+pf_type(0.0877540853))*x+pf_type(0.6208708362))*x+pf_type(0.5909675829); + return ((pf_type(-0.0128909247) * x + pf_type(0.1301028251)) * x + pf_type(0.5150398748)) * x + pf_type(0.6795585882); + return ((pf_type(-0.0072142647) * x + pf_type(0.0877540853)) * x + pf_type(0.6208708362)) * x + pf_type(0.5909675829); } if (x < pf_type(5.7890710412)) { if (x < pf_type(4.4261691294)) - return ((pf_type(-0.0031455354)*x+pf_type(0.0467229449))*x+pf_type(0.7592532310))*x+pf_type(0.4348794399); - return ((pf_type(-0.0010110698)*x+pf_type(0.0185943421))*x+pf_type(0.8831730747))*x+pf_type(0.2523695427); + return ((pf_type(-0.0031455354) * x + pf_type(0.0467229449)) * x + pf_type(0.7592532310)) * x + pf_type(0.4348794399); + return ((pf_type(-0.0010110698) * x + pf_type(0.0185943421)) * x + pf_type(0.8831730747)) * x + pf_type(0.2523695427); } if (x < pf_type(7.8162726752)) - return ((pf_type(-0.0001962780)*x+pf_type(0.0046084408))*x+pf_type(0.9634431978))*x+pf_type(0.0983148903); - return ((pf_type(-0.0000113994)*x+pf_type(0.0003734731))*x+pf_type(0.9959107193))*x+pf_type(0.0149855051); + return ((pf_type(-0.0001962780) * x + pf_type(0.0046084408)) * x + pf_type(0.9634431978)) * x + pf_type(0.0983148903); + return ((pf_type(-0.0000113994) * x + pf_type(0.0003734731)) * x + pf_type(0.9959107193)) * x + pf_type(0.0149855051); } -inline void Fast_LogPlusEquals (pf_type &x, pf_type y) +inline void Fast_LogPlusEquals(pf_type &x, pf_type y) { - if (x < y) std::swap (x, y); - if (y > pf_type(NEG_INF/2) && x-y < pf_type(11.8624794162)) - x = Fast_LogExpPlusOne(x-y) + y; + if (x < y) + std::swap(x, y); + if (y > pf_type(NEG_INF / 2) && x - y < pf_type(11.8624794162)) + x = Fast_LogExpPlusOne(x - y) + y; } inline pf_type Fast_Exp(pf_type x) @@ -227,28 +244,28 @@ inline pf_type Fast_Exp(pf_type x) // Approximating interval: (-1.48054, -0.672505) --> ((T(0.0573782771)*x+T(0.3580258429))*x+T(0.9121133217))*x+T(0.9793091728); // Approximating interval: (-0.672505, -3.9145e-11) --> ((T(0.1199175927)*x+T(0.4815668234))*x+T(0.9975991939))*x+T(0.9999505077); // 6 polynomials needed. - + if (x < pf_type(-2.4915033807)) { if (x < pf_type(-5.8622823336)) { if (x < pf_type(-9.91152)) return pf_type(0); - return ((pf_type(0.0000803850)*x+pf_type(0.0021627428))*x+pf_type(0.0194708555))*x+pf_type(0.0588080014); + return ((pf_type(0.0000803850) * x + pf_type(0.0021627428)) * x + pf_type(0.0194708555)) * x + pf_type(0.0588080014); } if (x < pf_type(-3.8396630909)) - return ((pf_type(0.0013889414)*x+pf_type(0.0244676474))*x+pf_type(0.1471290604))*x+pf_type(0.3042757740); - return ((pf_type(0.0072335607)*x+pf_type(0.0906002677))*x+pf_type(0.3983111356))*x+pf_type(0.6245959221); + return ((pf_type(0.0013889414) * x + pf_type(0.0244676474)) * x + pf_type(0.1471290604)) * x + pf_type(0.3042757740); + return ((pf_type(0.0072335607) * x + pf_type(0.0906002677)) * x + pf_type(0.3983111356)) * x + pf_type(0.6245959221); } if (x < pf_type(-0.6725053211)) { if (x < pf_type(-1.4805375919)) - return ((pf_type(0.0232410351)*x+pf_type(0.2085645908))*x+pf_type(0.6906367911))*x+pf_type(0.8682322329); - return ((pf_type(0.0573782771)*x+pf_type(0.3580258429))*x+pf_type(0.9121133217))*x+pf_type(0.9793091728); + return ((pf_type(0.0232410351) * x + pf_type(0.2085645908)) * x + pf_type(0.6906367911)) * x + pf_type(0.8682322329); + return ((pf_type(0.0573782771) * x + pf_type(0.3580258429)) * x + pf_type(0.9121133217)) * x + pf_type(0.9793091728); } if (x < pf_type(0)) - return ((pf_type(0.1199175927)*x+pf_type(0.4815668234))*x+pf_type(0.9975991939))*x+pf_type(0.9999505077); + return ((pf_type(0.1199175927) * x + pf_type(0.4815668234)) * x + pf_type(0.9975991939)) * x + pf_type(0.9999505077); return (x > pf_type(46.052) ? pf_type(1e20) : expf(x)); } -#endif //FASTCKY_BEAMCKYPAR_H +#endif // FASTCKY_BEAMCKYPAR_H diff --git a/src/bpp.cpp b/src/bpp.cpp index 76f25d3..6a502ce 100644 --- a/src/bpp.cpp +++ b/src/bpp.cpp @@ -6,140 +6,247 @@ created by: 04/2019 */ -#include +#include #include #include #include "LinearPartition.h" using namespace std; -void BeamCKYParser::output_to_file(string file_name, const char * type) { - if(!file_name.empty()) { - printf("Outputing base pairing probability matrix to %s...\n", file_name.c_str()); - FILE *fptr = fopen(file_name.c_str(), type); - if (fptr == NULL) { - printf("Could not open file!\n"); - return; +void BeamCKYParser::output_to_file(string file_name, const char *type) +{ + if (!file_name.empty()) + { + printf("Outputing base pairing probability matrix to %s...\n", file_name.c_str()); + FILE *fptr = fopen(file_name.c_str(), type); + if (fptr == NULL) + { + printf("Could not open file!\n"); + return; } - int turn = no_sharp_turn?3:0; - for (int i = 1; i <= seq_length; i++) { - for (int j = i + turn + 1; j <= seq_length; j++) { - pair key = make_pair(i,j); + int turn = no_sharp_turn ? 3 : 0; + for (int i = 1; i <= seq_length; i++) + { + for (int j = i + turn + 1; j <= seq_length; j++) + { + pair key = make_pair(i, j); auto got = Pij.find(key); - if (got != Pij.end()){ + if (got != Pij.end()) + { fprintf(fptr, "%d %d %.5f\n", i, j, got->second); // lhuang: %.4e->%.5f } } } fprintf(fptr, "\n"); - fclose(fptr); - printf("Done!\n"); + fclose(fptr); + printf("Done!\n"); } return; } -void BeamCKYParser::output_to_file_MEA_threshknot_bpseq(string file_name, const char * type, map& pairs, string & seq) { +void BeamCKYParser::output_to_file_unpaired(string &seq, string file_name, const char *type) +{ + if (!file_name.empty()) + { + printf("Outputing unpaired probability for each nuc and average unpaired probability (AUP) to %s...\n", file_name.c_str()); + FILE *fptr = fopen(file_name.c_str(), type); + if (fptr == NULL) + { + printf("Could not open file!\n"); + return; + } + + for (int i = 1; i <= seq_length; i++) + { + fprintf(fptr, "%d %c %.5f\n", i, seq[i - 1], unpaired_prob[i]); + } - int i,j; + fprintf(fptr, "\nAverage Unpaired Probability (AUP): %.5f\n", AUP); + fclose(fptr); + printf("Done!\n"); + } + + return; +} + +void BeamCKYParser::output_to_file_MEA_threshknot_bpseq(string file_name, const char *type, map &pairs, string &seq) +{ + + int i, j; char nuc; - if(!file_name.empty()) { - printf("Outputing base pairs in bpseq format to %s...\n", file_name.c_str()); - FILE *fptr = fopen(file_name.c_str(), type); - if (fptr == NULL) { - printf("Could not open file!\n"); - return; + if (!file_name.empty()) + { + printf("Outputing base pairs in bpseq format to %s...\n", file_name.c_str()); + FILE *fptr = fopen(file_name.c_str(), type); + if (fptr == NULL) + { + printf("Could not open file!\n"); + return; } - for (int i = 1; i <= seq_length; i++) { - if (pairs.find(i) != pairs.end()){ + for (int i = 1; i <= seq_length; i++) + { + if (pairs.find(i) != pairs.end()) + { j = pairs[i]; } - else{ + else + { j = 0; } - nuc = seq[i-1]; + nuc = seq[i - 1]; fprintf(fptr, "%d %c %d\n", i, nuc, j); } fprintf(fptr, "\n"); - fclose(fptr); - printf("Done!\n"); + fclose(fptr); + printf("Done!\n"); } - else{ - for (int i = 1; i <= seq_length; i++) { - if (pairs.find(i) != pairs.end()){ + else + { + for (int i = 1; i <= seq_length; i++) + { + if (pairs.find(i) != pairs.end()) + { j = pairs[i]; } - else{ + else + { j = 0; } - nuc = seq[i-1]; + nuc = seq[i - 1]; printf("%d %c %d\n", i, nuc, j); } printf("\n"); } - } -void BeamCKYParser::cal_PairProb(State& viterbi) { +void BeamCKYParser::cal_PairProb(State &viterbi) +{ - for(int j=0; j pf_type(-9.91152)) { + if (temp_prob_inside > pf_type(-9.91152)) + { pf_type prob = Fast_Exp(temp_prob_inside); - if(prob > pf_type(1.0)) prob = pf_type(1.0); - if(prob < pf_type(bpp_cutoff)) continue; - Pij[make_pair(i+1, j+1)] = prob; + if (prob > pf_type(1.0)) + prob = pf_type(1.0); + if (prob < pf_type(bpp_cutoff)) + continue; + Pij[make_pair(i + 1, j + 1)] = prob; } } } // -o mode: output to a single file with user specified name; // bpp matrices for different sequences are separated with empty lines - if (!bpp_file.empty()){ + if (!bpp_file.empty()) + { output_to_file(bpp_file, "a"); - } + } // -prefix mode: output to multiple files with user specified prefix; - else if (!bpp_file_index.empty()) { + else if (!bpp_file_index.empty()) + { output_to_file(bpp_file_index, "w"); } return; } +void BeamCKYParser::cal_unpaired(string &seq) +{ + for (const auto &entry : Pij) + { + int i = entry.first.first; + int j = entry.first.second; + pf_type prob = entry.second; + unpaired_prob[i] -= prob; + unpaired_prob[j] -= prob; + } -string BeamCKYParser::back_trace(const int i, const int j, const vector >& back_pointer){ + for (int i = 1; i <= seq_length; i++) + { + if (unpaired_prob[i] < 0) + unpaired_prob[i] = 0; + } - if (i>j) return ""; - if (back_pointer[i][j] == -1){ - if (i == j) return "."; - else return "." + back_trace(i+1,j, back_pointer); - }else if (back_pointer[i][j] != 0){ + pf_type total_unpaired = 0.0; + + for (int i = 1; i <= seq_length; i++) + { + total_unpaired += unpaired_prob[i]; + } + + AUP = total_unpaired / seq_length; + + if (!unpaired_file.empty()) + { + output_to_file_unpaired(seq, unpaired_file, "a"); + } + + return; +} + +void BeamCKYParser::print_accessible_U(string &seq) +{ + pf_type sum_unpaired_U = 0.0; + for (int i = 1; i <= seq_length; i++) + { + if (seq[i - 1] == 'U') + sum_unpaired_U += unpaired_prob[i]; + } + + pf_type accessible_U = sum_unpaired_U / seq_length * 100; + printf("\naccessible U%%: %.3f%%\n", accessible_U); +} + +string BeamCKYParser::back_trace(const int i, const int j, const vector> &back_pointer) +{ + + if (i > j) + return ""; + if (back_pointer[i][j] == -1) + { + if (i == j) + return "."; + else + return "." + back_trace(i + 1, j, back_pointer); + } + else if (back_pointer[i][j] != 0) + { int k = back_pointer[i][j]; assert(k + 1 > 0 && k + 1 <= seq_length); string temp; - if (k == j) temp = ""; - else temp = back_trace(k+1,j, back_pointer); - return "(" + back_trace(i+1,k-1, back_pointer) + ")" + temp; + if (k == j) + temp = ""; + else + temp = back_trace(k + 1, j, back_pointer); + return "(" + back_trace(i + 1, k - 1, back_pointer) + ")" + temp; } assert(false); return ""; } -map BeamCKYParser::get_pairs(string & structure){ +map BeamCKYParser::get_pairs(string &structure) +{ map pairs; stack s; int index = 1; int pre_index = 0; - for (auto & elem : structure){ - if (elem == '(') s.push(index); - else if(elem == ')'){ + for (auto &elem : structure) + { + if (elem == '(') + s.push(index); + else if (elem == ')') + { pre_index = s.top(); pairs[pre_index] = index; pairs[index] = pre_index; @@ -150,37 +257,42 @@ map BeamCKYParser::get_pairs(string & structure){ return pairs; } -void BeamCKYParser::ThreshKnot(string & seq){ - +void BeamCKYParser::ThreshKnot(string &seq) +{ + map rowprob; - vector > prob_list; + vector> prob_list; map pairs; set visited; - for(auto& pij : Pij){ - auto i = pij.first.first; //index starts from 1 - auto j = pij.first.second; + for (auto &pij : Pij) + { + auto i = pij.first.first; // index starts from 1 + auto j = pij.first.second; auto score = pij.second; - if (score < threshknot_threshold) continue; + if (score < threshknot_threshold) + continue; - prob_list.push_back(make_tuple(i,j,score)); + prob_list.push_back(make_tuple(i, j, score)); rowprob[i] = max(rowprob[i], score); rowprob[j] = max(rowprob[j], score); - } - for(auto& elem : prob_list){ + for (auto &elem : prob_list) + { auto i = std::get<0>(elem); auto j = std::get<1>(elem); - auto score = std::get<2>(elem); + auto score = std::get<2>(elem); - if (score == rowprob[i] && score == rowprob[j]){ + if (score == rowprob[i] && score == rowprob[j]) + { - if ((visited.find(i) != visited.end()) || (visited.find(j) != visited.end())) continue; + if ((visited.find(i) != visited.end()) || (visited.find(j) != visited.end())) + continue; visited.insert(i); visited.insert(j); @@ -193,32 +305,38 @@ void BeamCKYParser::ThreshKnot(string & seq){ output_to_file_MEA_threshknot_bpseq(threshknot_file_index, "w", pairs, seq); } -void BeamCKYParser::PairProb_MEA(string & seq) { - - vector > OPT; +void BeamCKYParser::PairProb_MEA(string &seq) +{ + + vector> OPT; OPT.resize(seq_length); - for (int i = 0; i < seq_length; ++i) OPT[i].resize(seq_length); + for (int i = 0; i < seq_length; ++i) + OPT[i].resize(seq_length); vector> P; P.resize(seq_length); - for (int i = 0; i < seq_length; ++i) P[i].resize(seq_length); + for (int i = 0; i < seq_length; ++i) + P[i].resize(seq_length); - vector > back_pointer; + vector> back_pointer; back_pointer.resize(seq_length); - for (int i = 0; i < seq_length; ++i) back_pointer[i].resize(seq_length); + for (int i = 0; i < seq_length; ++i) + back_pointer[i].resize(seq_length); vector> paired; paired.resize(seq_length); vector Q; - for (int i = 0; i < seq_length; ++i) Q.push_back(pf_type(1.0)); + for (int i = 0; i < seq_length; ++i) + Q.push_back(pf_type(1.0)); - for(auto& pij : Pij){ - auto i = pij.first.first-1; - auto j = pij.first.second-1; + for (auto &pij : Pij) + { + auto i = pij.first.first - 1; + auto j = pij.first.second - 1; auto score = pij.second; P[i][j] = score; @@ -228,24 +346,33 @@ void BeamCKYParser::PairProb_MEA(string & seq) { Q[j] -= score; } - for (int i = 0; i < seq_length; ++i) std::sort (paired[i].begin(), paired[i].end()); - for (int l = 0; l< seq_length; l++){ - for (int i = 0; ij) break; + for (int k : paired[i]) + { + if (k > j) + break; pf_type temp_OPT_k1_j; - if (k next_pair[]) +{ -void BeamCKYParser::outside(vector next_pair[]){ - struct timeval bpp_starttime, bpp_endtime; gettimeofday(&bpp_starttime, NULL); - bestC[seq_length-1].beta = 0.0; + bestC[seq_length - 1].beta = 0.0; // from right to left value_type newscore; - for(int j = seq_length-1; j > 0; --j) { + for (int j = seq_length - 1; j > 0; --j) + { int nucj = nucs[j]; - int nucj1 = (j+1) < seq_length ? nucs[j+1] : -1; + int nucj1 = (j + 1) < seq_length ? nucs[j + 1] : -1; - unordered_map& beamstepH = bestH[j]; - unordered_map& beamstepMulti = bestMulti[j]; - unordered_map& beamstepP = bestP[j]; - unordered_map& beamstepM2 = bestM2[j]; - unordered_map& beamstepM = bestM[j]; - State& beamstepC = bestC[j]; + unordered_map &beamstepH = bestH[j]; + unordered_map &beamstepMulti = bestMulti[j]; + unordered_map &beamstepP = bestP[j]; + unordered_map &beamstepM2 = bestM2[j]; + unordered_map &beamstepM = bestM[j]; + State &beamstepC = bestC[j]; // beam of C { // C = C + U - if (j < seq_length-1) { + if (j < seq_length - 1) + { #ifdef lpv - Fast_LogPlusEquals(beamstepC.beta, (bestC[j+1].beta)); - + Fast_LogPlusEquals(beamstepC.beta, (bestC[j + 1].beta)); + #else - newscore = score_external_unpaired(j+1, j+1); - Fast_LogPlusEquals(beamstepC.beta, bestC[j+1].beta + newscore); + newscore = score_external_unpaired(j + 1, j + 1); + Fast_LogPlusEquals(beamstepC.beta, bestC[j + 1].beta + newscore); #endif } } - + // beam of M { - for(auto& item : beamstepM) { + for (auto &item : beamstepM) + { int i = item.first; - State& state = item.second; - if (j < seq_length-1) { + State &state = item.second; + if (j < seq_length - 1) + { #ifdef lpv - Fast_LogPlusEquals(state.beta, bestM[j+1][i].beta); + Fast_LogPlusEquals(state.beta, bestM[j + 1][i].beta); #else newscore = score_multi_unpaired(j + 1, j + 1); - Fast_LogPlusEquals(state.beta, bestM[j+1][i].beta + newscore); + Fast_LogPlusEquals(state.beta, bestM[j + 1][i].beta + newscore); #endif } } @@ -331,21 +467,24 @@ void BeamCKYParser::outside(vector next_pair[]){ // beam of M2 { - for(auto& item : beamstepM2) { + for (auto &item : beamstepM2) + { int i = item.first; - State& state = item.second; + State &state = item.second; // 1. multi-loop { - for (int p = i-1; p >= std::max(i - SINGLE_MAX_LEN, 0); --p) { + for (int p = i - 1; p >= std::max(i - SINGLE_MAX_LEN, 0); --p) + { int nucp = nucs[p]; int q = next_pair[nucp][j]; - if (q != -1 && ((i - p - 1) <= SINGLE_MAX_LEN)) { + if (q != -1 && ((i - p - 1) <= SINGLE_MAX_LEN)) + { #ifdef lpv Fast_LogPlusEquals(state.beta, bestMulti[q][p].beta); #else - newscore = score_multi_unpaired(p+1, i-1) + - score_multi_unpaired(j+1, q-1); + newscore = score_multi_unpaired(p + 1, i - 1) + + score_multi_unpaired(j + 1, q - 1); Fast_LogPlusEquals(state.beta, bestMulti[q][p].beta + newscore); #endif } @@ -358,52 +497,58 @@ void BeamCKYParser::outside(vector next_pair[]){ } // beam of P - { - for(auto& item : beamstepP) { + { + for (auto &item : beamstepP) + { int i = item.first; - State& state = item.second; + State &state = item.second; int nuci = nucs[i]; - int nuci_1 = (i-1>-1) ? nucs[i-1] : -1; + int nuci_1 = (i - 1 > -1) ? nucs[i - 1] : -1; - if (i >0 && j 0 && j < seq_length - 1) + { #ifndef lpv value_type precomputed = score_junction_B(j, i, nucj, nucj1, nuci_1, nuci); #endif - for (int p = i - 1; p >= std::max(i - SINGLE_MAX_LEN, 0); --p) { + for (int p = i - 1; p >= std::max(i - SINGLE_MAX_LEN, 0); --p) + { int nucp = nucs[p]; - int nucp1 = nucs[p + 1]; + int nucp1 = nucs[p + 1]; int q = next_pair[nucp][j]; - while (q != -1 && ((i - p) + (q - j) - 2 <= SINGLE_MAX_LEN)) { + while (q != -1 && ((i - p) + (q - j) - 2 <= SINGLE_MAX_LEN)) + { int nucq = nucs[q]; int nucq_1 = nucs[q - 1]; - if (p == i - 1 && q == j + 1) { + if (p == i - 1 && q == j + 1) + { // helix #ifdef lpv - newscore = -v_score_single(p,q,i,j, nucp, nucp1, nucq_1, nucq, - nuci_1, nuci, nucj, nucj1); + newscore = -v_score_single(p, q, i, j, nucp, nucp1, nucq_1, nucq, + nuci_1, nuci, nucj, nucj1); // SHAPE for Vienna only if (use_shape) { newscore += -(pseudo_energy_stack[p] + pseudo_energy_stack[i] + pseudo_energy_stack[j] + pseudo_energy_stack[q]); } - - Fast_LogPlusEquals(state.beta, bestP[q][p].beta + newscore/kT); + Fast_LogPlusEquals(state.beta, bestP[q][p].beta + newscore / kT); #else newscore = score_helix(nucp, nucp1, nucq_1, nucq); Fast_LogPlusEquals(state.beta, bestP[q][p].beta + newscore); #endif - } else { + } + else + { // single branch #ifdef lpv - newscore = - v_score_single(p,q,i,j, nucp, nucp1, nucq_1, nucq, - nuci_1, nuci, nucj, nucj1); - Fast_LogPlusEquals(state.beta, bestP[q][p].beta + newscore/kT); + newscore = -v_score_single(p, q, i, j, nucp, nucp1, nucq_1, nucq, + nuci_1, nuci, nucj, nucj1); + Fast_LogPlusEquals(state.beta, bestP[q][p].beta + newscore / kT); #else newscore = score_junction_B(p, q, nucp, nucp1, nucq_1, nucq) + - precomputed + - score_single_without_junctionB(p, q, i, j, nuci_1, nuci, nucj, nucj1); + precomputed + + score_single_without_junctionB(p, q, i, j, nuci_1, nuci, nucj, nucj1); Fast_LogPlusEquals(state.beta, bestP[q][p].beta + newscore); #endif } @@ -413,30 +558,33 @@ void BeamCKYParser::outside(vector next_pair[]){ } // 2. M = P - if(i > 0 && j < seq_length-1){ + if (i > 0 && j < seq_length - 1) + { #ifdef lpv - newscore = - v_score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length, dangle_mode); - Fast_LogPlusEquals(state.beta, beamstepM[i].beta + newscore/kT); + newscore = -v_score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length, dangle_mode); + Fast_LogPlusEquals(state.beta, beamstepM[i].beta + newscore / kT); #else - newscore = score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length); - Fast_LogPlusEquals(state.beta, beamstepM[i].beta + newscore); + newscore = score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length); + Fast_LogPlusEquals(state.beta, beamstepM[i].beta + newscore); #endif } // 3. M2 = M + P int k = i - 1; - if ( k > 0 && !bestM[k].empty()) { + if (k > 0 && !bestM[k].empty()) + { #ifdef lpv - newscore = - v_score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length, dangle_mode); - pf_type m1_alpha = newscore/kT; + newscore = -v_score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length, dangle_mode); + pf_type m1_alpha = newscore / kT; #else newscore = score_M1(i, j, j, nuci_1, nuci, nucj, nucj1, seq_length); pf_type m1_alpha = newscore; #endif pf_type m1_plus_P_alpha = state.alpha + m1_alpha; - for (auto &m : bestM[k]) { + for (auto &m : bestM[k]) + { int newi = m.first; - State& m_state = m.second; + State &m_state = m.second; Fast_LogPlusEquals(state.beta, (beamstepM2[newi].beta + m_state.alpha + m1_alpha)); Fast_LogPlusEquals(m_state.beta, (beamstepM2[newi].beta + m1_plus_P_alpha)); } @@ -445,29 +593,32 @@ void BeamCKYParser::outside(vector next_pair[]){ // 4. C = C + P { int k = i - 1; - if (k >= 0) { + if (k >= 0) + { int nuck = nuci_1; int nuck1 = nuci; #ifdef lpv - newscore = - v_score_external_paired(k+1, j, nuck, nuck1, - nucj, nucj1, seq_length, dangle_mode); - pf_type external_paired_alpha_plus_beamstepC_beta = beamstepC.beta + newscore/kT; + newscore = -v_score_external_paired(k + 1, j, nuck, nuck1, + nucj, nucj1, seq_length, dangle_mode); + pf_type external_paired_alpha_plus_beamstepC_beta = beamstepC.beta + newscore / kT; #else - newscore = score_external_paired(k+1, j, nuck, nuck1, nucj, nucj1, seq_length); + newscore = score_external_paired(k + 1, j, nuck, nuck1, nucj, nucj1, seq_length); pf_type external_paired_alpha_plus_beamstepC_beta = beamstepC.beta + newscore; #endif Fast_LogPlusEquals(bestC[k].beta, state.alpha + external_paired_alpha_plus_beamstepC_beta); Fast_LogPlusEquals(state.beta, bestC[k].alpha + external_paired_alpha_plus_beamstepC_beta); - } else { + } + else + { // value_type newscore; #ifdef lpv - newscore = - v_score_external_paired(0, j, -1, nucs[0], - nucj, nucj1, seq_length, dangle_mode); - Fast_LogPlusEquals(state.beta, (beamstepC.beta + newscore/kT)); + newscore = -v_score_external_paired(0, j, -1, nucs[0], + nucj, nucj1, seq_length, dangle_mode); + Fast_LogPlusEquals(state.beta, (beamstepC.beta + newscore / kT)); #else newscore = score_external_paired(0, j, -1, nucs[0], - nucj, nucj1, seq_length); + nucj, nucj1, seq_length); Fast_LogPlusEquals(state.beta, beamstepC.beta + newscore); #endif } @@ -477,17 +628,19 @@ void BeamCKYParser::outside(vector next_pair[]){ // beam of Multi { - for(auto& item : beamstepMulti) { + for (auto &item : beamstepMulti) + { int i = item.first; - State& state = item.second; + State &state = item.second; int nuci = nucs[i]; - int nuci1 = nucs[i+1]; + int nuci1 = nucs[i + 1]; int jnext = next_pair[nuci][j]; // 1. extend (i, j) to (i, jnext) { - if (jnext != -1) { + if (jnext != -1) + { #ifdef lpv Fast_LogPlusEquals(state.beta, (bestMulti[jnext][i].beta)); #else @@ -500,24 +653,24 @@ void BeamCKYParser::outside(vector next_pair[]){ // 2. generate P (i, j) { #ifdef lpv - newscore = - v_score_multi(i, j, nuci, nuci1, nucs[j-1], nucj, seq_length, dangle_mode); - Fast_LogPlusEquals(state.beta, beamstepP[i].beta + newscore/kT); + newscore = -v_score_multi(i, j, nuci, nuci1, nucs[j - 1], nucj, seq_length, dangle_mode); + Fast_LogPlusEquals(state.beta, beamstepP[i].beta + newscore / kT); #else - newscore = score_multi(i, j, nuci, nuci1, nucs[j-1], nucj, seq_length); + newscore = score_multi(i, j, nuci, nuci1, nucs[j - 1], nucj, seq_length); Fast_LogPlusEquals(state.beta, beamstepP[i].beta + newscore); #endif } } } - } // end of for-loo j + } // end of for-loo j gettimeofday(&bpp_endtime, NULL); - double bpp_elapsed_time = bpp_endtime.tv_sec - bpp_starttime.tv_sec + (bpp_endtime.tv_usec-bpp_starttime.tv_usec)/1000000.0; + double bpp_elapsed_time = bpp_endtime.tv_sec - bpp_starttime.tv_sec + (bpp_endtime.tv_usec - bpp_starttime.tv_usec) / 1000000.0; - if(is_verbose) fprintf(stderr,"Base Pairing Probabilities Calculation Time: %.2f seconds.\n", bpp_elapsed_time); + if (is_verbose) + fprintf(stderr, "Base Pairing Probabilities Calculation Time: %.2f seconds.\n", bpp_elapsed_time); fflush(stdout); return; } -