diff --git a/mflib/CMakeLists.txt b/mflib/CMakeLists.txt index c9f536a..36a08d6 100644 --- a/mflib/CMakeLists.txt +++ b/mflib/CMakeLists.txt @@ -14,6 +14,8 @@ ADD_LIBRARY(mflib MFRegionSolver.hpp MFCpgSolver.cpp MFCpgSolver.hpp + MFCpgSolverSuccessive.hpp + MFCpgSolverSuccessive.cpp MethylRead.cpp MethylRead.hpp MFRegionPrinter.cpp diff --git a/mflib/MFCpgEstimator.cpp b/mflib/MFCpgEstimator.cpp index c88a3ba..13212a6 100644 --- a/mflib/MFCpgEstimator.cpp +++ b/mflib/MFCpgEstimator.cpp @@ -1,5 +1,6 @@ #include "MFCpgEstimator.hpp" #include "MFCpgSolver.hpp" +#include "MFCpgSolverSuccessive.hpp" #include diff --git a/mflib/MFCpgEstimator.hpp b/mflib/MFCpgEstimator.hpp index 97db57e..ac3b511 100644 --- a/mflib/MFCpgEstimator.hpp +++ b/mflib/MFCpgEstimator.hpp @@ -7,11 +7,12 @@ using namespace lemon; namespace methylFlow { class MFCpgSolver; - + class MFCpgSolverSuccessive; + class MFCpgEstimator { friend class MFCpgSolver; friend class MFGraph; - + friend class MFCpgSolverSuccessive; public: MFCpgEstimator(MFGraph *graph, std::ostream * ostream, const float scale_mult); ~MFCpgEstimator(); @@ -33,7 +34,8 @@ namespace methylFlow { class CpgEntry { friend class MFCpgEstimator; friend class MFCpgSolver; - public: + friend class MFCpgSolverSuccessive; + public: CpgEntry(T cov, T meth) : Cov(cov), Meth(meth), Beta(0) {}; CpgEntry() : Cov(0), Meth(0), Beta(0) {}; protected: diff --git a/mflib/MFCpgSolverGradient.cpp b/mflib/MFCpgSolverGradient.cpp new file mode 100644 index 0000000..9e2bacd --- /dev/null +++ b/mflib/MFCpgSolverGradient.cpp @@ -0,0 +1,257 @@ +#include "MFCpgSolverGradient.hpp" +#include "MFCpgEstimator.hpp" +#include "MFSolver.hpp" + +namespace methylFlow { + + MFCpgSolverGradient::MFCpgSolverGradient(MFGraph* mfobj, const float length_mult) : MFSolver(mfobj), + estimator(mfobj, &std::cout, length_mult), + alpha_lambda(mfobj->get_graph()), + beta_lambda(mfobj->get_graph()) + { + estimator.computeNormalized(); + } + + MFCpgSolverGradient::~MFCpgSolverGradient() + { + } + + + //L Prime F Calculation +/* + float LPrimeOfF(int estimate_f){ + + double sum = 0; + + for (std::vector::iterator it = m->cpgs.begin(); + it != m->cpgs.end(); ++it) { + int pos = it->first; + MFCpgEstimator::CpgEntry entry = it->second; + + float u = entry.Cov; + float m = entry.Meth + + sum += signFunction(estimate_f) * (-(firstNumerator(m)/MLofF(estimate_f) + secondNumerator(m)/ULofF(f))); + + }/ + +return 0; + + } + + double firstNumerator(int m){ + /* + if(p(v) == m){ + return 1/lvu; + } +return 0; + } + + int MLofF(double estimate_f){ +/* + double sum = 0; + for (ListDigraph::InArcIt arc(mfGraph, mf->get_sink()); arc != INVALID; ++arc) { + ListDigraph::Node v = mfGraph.source(arc); + if (p(v) == m){ + for(int j = 0; j <= u; j++){ + sum += estimate_f /lvu; + } + } + } + return sum; + +return 0; + } + + int ULofF(double estimate_f){ +/* + double sum = 0; + for (ListDigraph::InArcIt arc(mfGraph, mf->get_sink()); arc != INVALID; ++arc) { + ListDigraph::Node v = mfGraph.source(arc); + if (p(v) == u){ + for(int j = 0; j <= u; j++){ + sum += estimate_f /lvu; + } + } + } +return 0; + } + + + + double secondNumerator(int u){ + /* + if(p(v) == u){ + return 1/lvu; + } + return 0; + } + + + int signFunction(int estimate_f){ + if(estimate_f < 0){ + return -1; + }else if (estimate_f > 0){ + return 1; + }else + return 0; + } +*/ + int solve_for_lambda(const float lambda){ + + double f0 = 0; + double fn = 0; + double gamma = 0.01; + for(int i = 0; i < 5; i ++){ + f0 = fn; + fn += -gamma*LPrimeOfF(f0); + } + return fn; + } + + float MFCpgSolverGradient::score(const float lambda) + { + float obj = lp->primal(); + for (ListDigraph::InArcIt arc(mf->mfGraph, mf->sink); arc != INVALID; ++arc) { + obj -= lambda * lp->dual(rows[arc]); + } + return obj; + } + + int MFCpgSolverGradient::add_cols() + { + for (MFCpgEstimator::CpgMap::iterator it = estimator.normalized_map.begin(); + it != estimator.normalized_map.end(); ++it) { + int pos = it->first; + alpha_y[pos] = lp->addCol(); + lp->colLowerBound(alpha_y[pos], 0.); + lp->colUpperBound(alpha_y[pos], 1.); + + beta_y[pos] = lp->addCol(); + lp->colLowerBound(beta_y[pos], 0.); + lp->colUpperBound(beta_y[pos], 1.); + + alpha_m[pos] = lp->addCol(); + lp->colLowerBound(alpha_m[pos], 0.); + lp->colUpperBound(alpha_m[pos], 1.); + + beta_m[pos] = lp->addCol(); + lp->colLowerBound(beta_m[pos], 0.); + lp->colUpperBound(beta_m[pos], 1.); + + } + + // now add alpha and beta for lambda nodes + for (ListDigraph::InArcIt arc(mf->mfGraph, mf->sink); arc != INVALID; ++arc) { + ListDigraph::Node v = mf->mfGraph.source(arc); + + alpha_lambda[v] = lp->addCol(); + lp->colLowerBound(alpha_lambda[v], 0.); + lp->colUpperBound(alpha_lambda[v], 1.); + + beta_lambda[v] = lp->addCol(); + lp->colLowerBound(beta_lambda[v], 0.); + lp->colUpperBound(beta_lambda[v], 1.); + } + return 0; + } + + int MFCpgSolverGradient::make_deviance_objective(Lp::Expr &obj) + { + for (MFCpgEstimator::CpgMap::iterator it = estimator.normalized_map.begin(); + it != estimator.normalized_map.end(); ++it) { + int pos = it->first; + MFCpgEstimator::CpgEntry entry = it->second; + + obj += entry.Cov * ( beta_y[pos] - alpha_y[pos]); + obj += entry.Meth * ( beta_m[pos] - alpha_m[pos]); + } + return 0; + } + + int MFCpgSolverGradient::make_lambda_objective(const float lambda, Lp::Expr &obj) + { + const ListDigraph &mfGraph = mf->get_graph(); + + // add terms to objective for lambda nodes + for (ListDigraph::InArcIt arc(mfGraph, mf->get_sink()); arc != INVALID; ++arc) { + ListDigraph::Node v = mfGraph.source(arc); + obj += lambda * (beta_lambda[v] - alpha_lambda[v]); + } + return 0; + } + + int MFCpgSolverGradient::add_constraints() + { + const ListDigraph &mfGraph = mf->get_graph(); + + //add sink constraints + for (ListDigraph::InArcIt arc(mfGraph, mf->get_sink()); arc != INVALID; ++arc) { + ListDigraph::Node v = mfGraph.source(arc); + + rows[arc] = lp->addRow(scaled_length[arc] * beta_lambda[v] - + scaled_length[arc] * alpha_lambda[v] - nu[v] <= -CONSISTENCY_FACTOR); + } + + //add remaining constraints (if not childless) + for (IterableBoolMap::FalseIt v(mf->fake); + v != INVALID; ++v) { + if (mf->childless[v]) continue; + + for (ListDigraph::OutArcIt arc(mfGraph, v); arc != INVALID; ++arc) { + ListDigraph::Node u = mfGraph.target(arc); + if (u == INVALID) { + std::cerr << "error getting target from arc" << std::endl; + return -1; + } + + MethylRead *m = mf->read(v); + if (!m) continue; + + int rPos = m->start(); + Lp::Expr expr; + + for (std::vector::iterator it = m->cpgs.begin(); + it != m->cpgs.end(); ++it) { + MethylRead::CpgEntry entry = *it; + int loc = rPos + entry.offset - 1; + expr += beta_y[loc] - alpha_y[loc]; + if (entry.methyl) { + expr += beta_m[loc] - alpha_m[loc]; + } + } + expr += nu[u] - nu[v]; + rows[arc] = lp->addRow(expr <= -CONSISTENCY_FACTOR); + } + } + return 0; + } + + int MFCpgSolverGradient::modify_lambda_constraints(const float lambda) + { + const ListDigraph &mfGraph = mf->get_graph(); + + for (ListDigraph::InArcIt arc(mf->mfGraph, mf->sink); arc != INVALID; ++arc) { + ListDigraph::Node v = mfGraph.source(arc); + Lp::Row row = rows[arc]; + lp->row(row, -lambda * beta_lambda[v] - (-lambda * alpha_lambda[v]) - nu[v] <= -CONSISTENCY_FACTOR); + } + return 0; + } + + void MFCpgSolverGradient::print_primal() + { + for (MFCpgEstimator::CpgMap::iterator it = estimator.normalized_map.begin(); + it != estimator.normalized_map.end(); ++it) { + int pos = it->first; + std::cout << pos << ": ay=" << lp->primal(alpha_y[pos]); + std::cout << " by=" << lp->primal(beta_y[pos]); + std::cout << " am=" << lp->primal(alpha_m[pos]); + std::cout << " bm=" << lp->primal(beta_m[pos]); + + MFCpgEstimator::CpgEntry entry = it->second; + std::cout << " y=" << entry.Cov << " my=" << entry.Meth << std::endl; + } + print_nus(); + } +} // namespace methylFlow diff --git a/mflib/MFCpgSolverGradient.hpp b/mflib/MFCpgSolverGradient.hpp new file mode 100644 index 0000000..5d29003 --- /dev/null +++ b/mflib/MFCpgSolverGradient.hpp @@ -0,0 +1,35 @@ +#include "MFSolver.hpp" +#include "MFCpgEstimator.hpp" + +#ifndef MFCPGSOLVERGRADIENT_H +#define MFCPGSOLVERGRADIENT_H + +namespace methylFlow { + + class MFCpgSolverGradient : public MFSolver { + friend class MFCpgEstimator; + + public: + MFCpgSolverGradient(MFGraph *mfobj, const float length_mult); + ~MFCpgSolverGradient(); + + private: + MFCpgEstimator estimator; + std::map alpha_y; + std::map beta_y; + std::map alpha_m; + std::map beta_m; + ListDigraph::NodeMap alpha_lambda; + ListDigraph::NodeMap beta_lambda; + + protected: + float score(const float lambda); + int add_cols(); + int make_deviance_objective(Lp::Expr &obj); + int make_lambda_objective(const float lambda, Lp::Expr &obj); + int add_constraints(); + int modify_lambda_constraints(const float lambda); + void print_primal(); + }; +} // namespace methylFlow +#endif // MFCPGSOLVER_H diff --git a/mflib/MFCpgSolverSuccessive.cpp b/mflib/MFCpgSolverSuccessive.cpp new file mode 100644 index 0000000..a73eb61 --- /dev/null +++ b/mflib/MFCpgSolverSuccessive.cpp @@ -0,0 +1,236 @@ +#include "MFCpgSolverSuccessive.hpp" +#include "MFCpgEstimator.hpp" +#include "MFSolver.hpp" + +namespace methylFlow { + + MFCpgSolverSuccessive::MFCpgSolverSuccessive(MFGraph* mfobj, const float length_mult) : MFSolver(mfobj), + estimator(mfobj, &std::cout, length_mult), + alpha_lambda(mfobj->get_graph()), + beta_lambda(mfobj->get_graph()) + { + estimator.computeNormalized(); + } + + MFCpgSolverSuccessive::~MFCpgSolverSuccessive() + { + } + //L Prime F Calculation + float MFCpgSolverSuccessive::LPrimeOfF(int estimate_f){ + float sum = 0; + + for (MFCpgEstimator::CpgMap::iterator it = estimator.normalized_map.begin(); + it != estimator.normalized_map.end(); ++it){ + int pos = it->first; + MFCpgEstimator::CpgEntry entry = it->second; + float u = entry.Cov; + float m = entry.Meth; + sum += signFunction(estimate_f) * (-(firstNumerator(pos, m)/MLofF(pos,estimate_f,u, m) + secondNumerator(pos,m)/ULofF(pos,estimate_f, u, m))); + } + return sum; + } + + double MFCpgSolverSuccessive::firstNumerator(int ell, int m){ + + if(ell == m){ + return 1/ell; + } + return 0; + } + + int MFCpgSolverSuccessive::MLofF(int ell, double estimate_f, int u, int m){ + + double sum = 0; + for (ListDigraph::InArcIt arc(mf->mfGraph, mf->sink); arc != INVALID; ++arc) { + ListDigraph::Node v = mf->mfGraph.source(arc); + if (ell == m){ + for(int j = 0; j <= u; j++){ + sum += estimate_f /ell; + } + } + } + return sum; + + } + + int MFCpgSolverSuccessive::ULofF(int ell, double estimate_f, int u, int m){ + + double sum = 0; + for (ListDigraph::InArcIt arc(mf->mfGraph, mf->sink); arc != INVALID; ++arc) { + ListDigraph::Node v = mf->mfGraph.source(arc); + if (ell == u){ + for(int j = 0; j <= u; j++){ + sum += estimate_f /ell; + } + } + } + + return sum; + } + + + + double MFCpgSolverSuccessive::secondNumerator(int ell, int u){ + + if(ell == u){ + return 1/ell; + } + return 0; + } + + + int MFCpgSolverSuccessive::signFunction(int estimate_f){ + if(estimate_f < 0){ + return -1; + }else if (estimate_f > 0){ + return 1; + }else + return 0; + } + + float MFCpgSolverSuccessive::score(const float lambda) + { + float obj = lp->primal(); + for (ListDigraph::InArcIt arc(mf->mfGraph, mf->sink); arc != INVALID; ++arc) { + obj -= lambda * lp->dual(rows[arc]); + } + return obj; + } + + int MFCpgSolverSuccessive::add_cols() + { + for (MFCpgEstimator::CpgMap::iterator it = estimator.normalized_map.begin(); + it != estimator.normalized_map.end(); ++it) { + int pos = it->first; + alpha_y[pos] = lp->addCol(); + lp->colLowerBound(alpha_y[pos], 0.); + lp->colUpperBound(alpha_y[pos], 1.); + + beta_y[pos] = lp->addCol(); + lp->colLowerBound(beta_y[pos], 0.); + lp->colUpperBound(beta_y[pos], 1.); + + alpha_m[pos] = lp->addCol(); + lp->colLowerBound(alpha_m[pos], 0.); + lp->colUpperBound(alpha_m[pos], 1.); + + beta_m[pos] = lp->addCol(); + lp->colLowerBound(beta_m[pos], 0.); + lp->colUpperBound(beta_m[pos], 1.); + + } + + // now add alpha and beta for lambda nodes + for (ListDigraph::InArcIt arc(mf->mfGraph, mf->sink); arc != INVALID; ++arc) { + ListDigraph::Node v = mf->mfGraph.source(arc); + + alpha_lambda[v] = lp->addCol(); + lp->colLowerBound(alpha_lambda[v], 0.); + lp->colUpperBound(alpha_lambda[v], 1.); + + beta_lambda[v] = lp->addCol(); + lp->colLowerBound(beta_lambda[v], 0.); + lp->colUpperBound(beta_lambda[v], 1.); + } + return 0; + } + + int MFCpgSolverSuccessive::make_deviance_objective(Lp::Expr &obj) + { + for (MFCpgEstimator::CpgMap::iterator it = estimator.normalized_map.begin(); + it != estimator.normalized_map.end(); ++it) { + int pos = it->first; + MFCpgEstimator::CpgEntry entry = it->second; + + obj += entry.Cov * ( beta_y[pos] - alpha_y[pos]); + obj += entry.Meth * ( beta_m[pos] - alpha_m[pos]); + } + return 0; + } + + int MFCpgSolverSuccessive::make_lambda_objective(const float lambda, Lp::Expr &obj) + { + const ListDigraph &mfGraph = mf->get_graph(); + + // add terms to objective for lambda nodes + for (ListDigraph::InArcIt arc(mfGraph, mf->get_sink()); arc != INVALID; ++arc) { + ListDigraph::Node v = mfGraph.source(arc); + obj += lambda * (beta_lambda[v] - alpha_lambda[v]); + } + return 0; + } + + int MFCpgSolverSuccessive::add_constraints() + { + const ListDigraph &mfGraph = mf->get_graph(); + + //add sink constraints + for (ListDigraph::InArcIt arc(mfGraph, mf->get_sink()); arc != INVALID; ++arc) { + ListDigraph::Node v = mfGraph.source(arc); + + rows[arc] = lp->addRow(scaled_length[arc] * beta_lambda[v] - + scaled_length[arc] * alpha_lambda[v] - nu[v] <= -CONSISTENCY_FACTOR); + } + + //add remaining constraints (if not childless) + for (IterableBoolMap::FalseIt v(mf->fake); + v != INVALID; ++v) { + if (mf->childless[v]) continue; + + for (ListDigraph::OutArcIt arc(mfGraph, v); arc != INVALID; ++arc) { + ListDigraph::Node u = mfGraph.target(arc); + if (u == INVALID) { + std::cerr << "error getting target from arc" << std::endl; + return -1; + } + + MethylRead *m = mf->read(v); + if (!m) continue; + + int rPos = m->start(); + Lp::Expr expr; + + for (std::vector::iterator it = m->cpgs.begin(); + it != m->cpgs.end(); ++it) { + MethylRead::CpgEntry entry = *it; + int loc = rPos + entry.offset - 1; + expr += beta_y[loc] - alpha_y[loc]; + if (entry.methyl) { + expr += beta_m[loc] - alpha_m[loc]; + } + } + expr += nu[u] - nu[v]; + rows[arc] = lp->addRow(expr <= -CONSISTENCY_FACTOR); + } + } + return 0; + } + + int MFCpgSolverSuccessive::modify_lambda_constraints(const float lambda) + { + const ListDigraph &mfGraph = mf->get_graph(); + + for (ListDigraph::InArcIt arc(mf->mfGraph, mf->sink); arc != INVALID; ++arc) { + ListDigraph::Node v = mfGraph.source(arc); + Lp::Row row = rows[arc]; + lp->row(row, -lambda * beta_lambda[v] - (-lambda * alpha_lambda[v]) - nu[v] <= -CONSISTENCY_FACTOR); + } + return 0; + } + + void MFCpgSolverSuccessive::print_primal() + { + for (MFCpgEstimator::CpgMap::iterator it = estimator.normalized_map.begin(); + it != estimator.normalized_map.end(); ++it) { + int pos = it->first; + std::cout << pos << ": ay=" << lp->primal(alpha_y[pos]); + std::cout << " by=" << lp->primal(beta_y[pos]); + std::cout << " am=" << lp->primal(alpha_m[pos]); + std::cout << " bm=" << lp->primal(beta_m[pos]); + + MFCpgEstimator::CpgEntry entry = it->second; + std::cout << " y=" << entry.Cov << " my=" << entry.Meth << std::endl; + } + print_nus(); + } +} // namespace methylFlow diff --git a/mflib/MFCpgSolverSuccessive.hpp b/mflib/MFCpgSolverSuccessive.hpp new file mode 100644 index 0000000..34fa4fd --- /dev/null +++ b/mflib/MFCpgSolverSuccessive.hpp @@ -0,0 +1,41 @@ +#include "MFSolver.hpp" +#include "MFCpgEstimator.hpp" + +#ifndef MFCPGSOLVERSUCCESSIVE_H +#define MFCPGSOLVERSUCCESSIVE_H + +namespace methylFlow { + + class MFCpgSolverSuccessive : public MFSolver { + friend class MFCpgEstimator; + + public: + MFCpgSolverSuccessive(MFGraph *mfobj, const float length_mult); + ~MFCpgSolverSuccessive(); + + private: + MFCpgEstimator estimator; + std::map alpha_y; + std::map beta_y; + std::map alpha_m; + std::map beta_m; + ListDigraph::NodeMap alpha_lambda; + ListDigraph::NodeMap beta_lambda; + float LPrimeOfF(int estimate_f); + double firstNumerator(int ell, int m); + int MLofF(int ell, double estimate_f, int u, int m); + int ULofF(int ell, double estimate_f, int u, int m); + double secondNumerator(int ell, int u); + int signFunction(int estimate_f); + + protected: + float score(const float lambda); + int add_cols(); + int make_deviance_objective(Lp::Expr &obj); + int make_lambda_objective(const float lambda, Lp::Expr &obj); + int add_constraints(); + int modify_lambda_constraints(const float lambda); + void print_primal(); + }; +} // namespace methylFlow +#endif // MFCPGSOLVER_H diff --git a/mflib/MFGraph.hpp b/mflib/MFGraph.hpp index 101f6da..59e3d64 100644 --- a/mflib/MFGraph.hpp +++ b/mflib/MFGraph.hpp @@ -19,7 +19,7 @@ namespace methylFlow { friend class MFSolver; friend class MFRegionSolver; friend class MFCpgSolver; - + friend class MFCpgSolverSuccessive; public: MFGraph(); ~MFGraph(); diff --git a/mflib/MFGraph_solve.cpp b/mflib/MFGraph_solve.cpp index ae44f21..469f860 100644 --- a/mflib/MFGraph_solve.cpp +++ b/mflib/MFGraph_solve.cpp @@ -10,6 +10,7 @@ #include "MFGraph.hpp" #include "MFRegionSolver.hpp" #include "MFCpgSolver.hpp" +#include "MFCpgSolver.hpp" namespace methylFlow { void MFGraph::add_terminals() diff --git a/mflib/MFSolver.hpp b/mflib/MFSolver.hpp index 7b06499..fc95a19 100644 --- a/mflib/MFSolver.hpp +++ b/mflib/MFSolver.hpp @@ -34,7 +34,7 @@ namespace methylFlow { // make the LP object // virtual LP creator - int make_lp(const float length_mult); + int make_lp(const float length_mult); virtual int add_cols() =0; virtual int make_deviance_objective(Lp::Expr &obj) =0; diff --git a/mflib/MethylRead.hpp b/mflib/MethylRead.hpp index 8582290..9f42aa5 100644 --- a/mflib/MethylRead.hpp +++ b/mflib/MethylRead.hpp @@ -14,6 +14,8 @@ namespace methylFlow { private: friend class MFCpgEstimator; friend class MFCpgSolver; + friend class MFCpgSolverSuccessive; + typedef struct {int offset; bool methyl; } CpgEntry; typedef struct {int start; int length; char indel; } CigarEntry;