From 9b8941cf5a3a723eb6a08fb94df36ed35e3fa3ad Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Tue, 10 Apr 2018 18:29:59 -0400 Subject: [PATCH 01/15] Accept queries in loop from stdin --- Makefile | 35 +++++++- src/mantis.cc | 4 +- src/queries.cc | 211 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+), 4 deletions(-) create mode 100644 src/queries.cc diff --git a/Makefile b/Makefile index 625a419..d8d5333 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,31 @@ TARGETS= mantis +MYLIBS=-L/home/paf2023/tools/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-phpi22i4vio3f55thjj5xxeupzmj463k/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/gettext-0.19.8.1-cnbmeuapkcyxq23tumt4esnuvijpl4w7/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/bzip2-1.0.6-x5hwxgowvbzmnowngidcjin5qicfg4fl/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/libxml2-2.9.4-ox4x2yk4rqlh3ag4wxxgzficxgwbcsy7/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/xz-5.2.3-npj3lafufjb7d57yhkthdo3qeeollgiq/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/python-2.7.14-4q4ykdbphrdjx2n4ztg5getsrjkl7dkz/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/ncurses-6.0-37ewjt6dvfa4vn3coyfnx5ejnfzg6d27/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-pkmmuwsdolh4vk2qrjvevqbrds55oxet/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-3yfqdmrhogpxaehsissamfd4bagokf3y/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-kamnyqrq5i6e6ghsccvnx4banzgdvn6r/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/binutils-2.28-t6r4t2u7mpl73qqhveutncj4gamajiip/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/binutils-2.28-a4bouodbguxjzgeoogr7qjqculofd4cx/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/zlib-1.2.11-7eta3q6r3ozz5vh7zaffgjajf4fwvxby/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.5.4/zlib-1.2.11-upeb67xkoclf5fgx7musxjw5v7gk7yw7/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/zlib-1.2.11-erzei6xutxne2tjssbmelgutsnhz6x6s/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/zlib-1.2.11-rjqs3cq27e2o4tbce7kzmictrih57avc/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-oiwzkauf2kl22pu7j2vjupr3ffu3mzts/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-5awddeormrqlmqq2frhjrdcu5szebebq/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-sqviukvzkgl524rq2dtz6d5ytukaxlj3/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.64.0-sx2zxhmitq565rmn3jutk67ffqrko6zx/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/boost-1.63.0-gpfsh3erjtf2xyksy66crnxzqib6qrva/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.63.0-eu4aw5v6ewmqd5kuzieorttiljxcuscv/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.54.0-7qaxqizxejlur5pr5mfecf2umfp52lmr/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-6.3.0-han2ovjpjdnuwkhtfhugdq66cjkte7q3/lib64 \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-6.3.0-xnoykzftyh27xs6vqkwvtft2cce5yd4k/lib64 \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-4.5.4-tmeuqohwhscsb2hegxgfjmtxy7ywelud/lib64 \ ifdef D DEBUG=-g -DDEBUG @@ -33,7 +60,7 @@ CFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE)\ -Wno-unused-result -Wno-strict-aliasing -Wno-unused-function -Wno-sign-compare \ -Wno-implicit-function-declaration -LDFLAGS += $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ +LDFLAGS += $(MYLIBS) $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ -lboost_thread -lm -lz -lrt # @@ -43,7 +70,8 @@ LDFLAGS += $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ all: $(TARGETS) # dependencies between programs and .o files -mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +#mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/queries.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/queries.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o # dependencies between .o files and .h files $(OBJDIR)/mantis.o: $(LOC_SRC)/mantis.cc @@ -52,7 +80,8 @@ $(OBJDIR)/util.o: $(LOC_SRC)/util.cc $(LOC_INCLUDE)/util.h $(OBJDIR)/bitvector.o: $(LOC_SRC)/bitvector.cc $(LOC_INCLUDE)/bitvector.h $(OBJDIR)/kmer.o: $(LOC_SRC)/kmer.cc $(LOC_INCLUDE)/kmer.h $(OBJDIR)/coloreddbg.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h -$(OBJDIR)/query.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h +#$(OBJDIR)/query.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h +$(OBJDIR)/queries.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h $(OBJDIR)/validatemantis.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h $(OBJDIR)/hashutil.o: $(LOC_INCLUDE)/hashutil.h diff --git a/src/mantis.cc b/src/mantis.cc index b80ca6d..b2a40e2 100644 --- a/src/mantis.cc +++ b/src/mantis.cc @@ -46,6 +46,7 @@ void explore_options_verbose(T& res) { } int query_main (QueryOpts& opt); +int queries_main (QueryOpts& opt); int build_main (BuildOpts& opt); int validate_main (ValidateOpts& opt); @@ -177,7 +178,8 @@ int main ( int argc, char *argv[] ) { if(res) { switch(selected) { case mode::build: build_main(bopt); break; - case mode::query: query_main(qopt); break; + // case mode::query: query_main(qopt); break; + case mode::query: queries_main(qopt); break; case mode::validate: validate_main(vopt); break; case mode::help: std::cout << make_man_page(cli, "mantis"); break; } diff --git a/src/queries.cc b/src/queries.cc new file mode 100644 index 0000000..93ce066 --- /dev/null +++ b/src/queries.cc @@ -0,0 +1,211 @@ +/* + * ============================================================================ + * + * Filename: query.cc + * + * Description: + * + * Version: 1.0 + * Created: 2017-10-27 12:56:50 AM + * Revision: none + * Compiler: gcc + * + * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu + * Organization: Stony Brook University + * + * ============================================================================ + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MantisFS.h" +#include "ProgOpts.h" +#include "spdlog/spdlog.h" +#include "kmer.h" +#include "coloreddbg.h" +#include "common_types.h" +#include "CLI/CLI.hpp" +#include "CLI/Timer.hpp" + +#define MAX_NUM_SAMPLES 2600 +#define OUTPUT_FILE "samples.output" + +void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, + std::ofstream& opfile) { + mantis::QueryResults qres; + uint32_t cnt= 0; + { + CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; + //size_t qctr{0}; + //size_t nquery{multi_kmers.size()}; + for (auto& kmers : multi_kmers) { + //std::sort(kmers.begin(), kmers.end()); + opfile << cnt++ << '\t' << kmers.size() << '\n'; + mantis::QueryResult result = cdbg.find_samples(kmers); + for (auto it = result.begin(); it != result.end(); ++it) { + opfile << cdbg.get_sample(it->first) << '\t' << it->second << '\n'; + } + //++qctr; + } + } +} + + +void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, + std::ofstream& opfile) { + mantis::QueryResults qres; + uint32_t cnt= 0; + { + CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; + opfile << "[\n"; + size_t qctr{0}; + size_t nquery{multi_kmers.size()}; + for (auto& kmers : multi_kmers) { + //std::sort(kmers.begin(), kmers.end()); + opfile << "{ \"qnum\": " << cnt++ << ", \"num_kmers\": " << kmers.size() << ", \"res\": {\n"; + mantis::QueryResult result = cdbg.find_samples(kmers); + for (auto it = result.begin(); it != result.end(); ++it) { + opfile << " \"" <first) << "\": " << it->second ; + if (std::next(it) != result.end()) { + opfile << ",\n"; + } + } + opfile << "}}"; + if (qctr < nquery - 1) { opfile << ","; } + opfile << "\n"; + ++qctr; + } + opfile << "]\n"; + } + +} + + +/* + * === FUNCTION ============================================================= + * Name: main + * Description: + * ============================================================================ + */ +int queries_main (QueryOpts& opt) +{ + //CLI::App app("Mantis query"); + + std::string prefix = opt.prefix; + std::string query_file = opt.query_file; + std::string output_file = opt.output;//{"samples.output"}; + bool use_json = opt.use_json; + /* + app.add_option("-i,--input-prefix", prefix, "Prefix of input files.")->required(); + app.add_option("-o,--outout", output_file, "Where to write query output."); + app.add_option("query", query_file, "Prefix of input files.")->required(); + app.add_flag("-j,--json", use_json, "Write the output in JSON format"); + CLI11_PARSE(app, argc, argv); + */ + + // Make sure the prefix is a full folder + if (prefix.back() != '/') { + prefix.push_back('/'); + } + + spdlog::logger* console = opt.console.get(); + console->info("Reading colored dbg from disk."); + + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); + + ColoredDbg*>, KeyObject> cdbg(cqf_file, + eqclass_files, + sample_file); + uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; + console->info("Read colored dbg with {} k-mers and {} color classes", + cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + + //cdbg.get_cqf()->dump_metadata(); + //CQF cqf(query_file, false); + //CQF::Iterator it = cqf.begin(1); + //mantis::QuerySet input_kmers; + //do { + //KeyObject k = *it; + //input_kmers.insert(k.key); + //++it; + //} while (!it.done()); + + //mantis::QuerySets multi_kmers; + //multi_kmers.push_back(input_kmers); + + console->info("Reading query kmers from disk."); + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + + while (1) { + cout << "Enter query file: " << endl; + cin >> query_file; + + mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), + seed, + cdbg.range(), + kmer_size, + total_kmers); + console->info("Total k-mers to query: {}", total_kmers); + + // Attempt to optimize bulk query + /* + bool doBulk = multi_kmers.size() >= 100; + BulkQuery bq; + if (doBulk) { + uint32_t exp_id{0}; + for (auto& kmers : multi_kmers) { + for (auto& k : kmers) { + bq.qs.insert(k); + bq.map[k].push_back(exp_id); + } + ++exp_id; + } + } + */ + + std::ofstream opfile(output_file); + console->info("Querying the colored dbg."); + + if (use_json) { + output_results_json(multi_kmers, cdbg, opfile); + } else { + output_results(multi_kmers, cdbg, opfile); + } + //std::cout << "Writing samples and abundances out." << std::endl; + opfile.close(); + console->info("Writing done."); + + } + + return EXIT_SUCCESS; +} /* ---------- end of function main ---------- */ From 8d022a60b00bb538a1097e4402c5fc404260e41e Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Tue, 10 Apr 2018 19:18:33 -0400 Subject: [PATCH 02/15] Individual server command line option --- src/mantis.cc | 20 +++++++++++++++----- src/queries.cc | 11 +++++++---- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/mantis.cc b/src/mantis.cc index b2a40e2..950ceaa 100644 --- a/src/mantis.cc +++ b/src/mantis.cc @@ -46,7 +46,7 @@ void explore_options_verbose(T& res) { } int query_main (QueryOpts& opt); -int queries_main (QueryOpts& opt); +int server_main (QueryOpts& opt); int build_main (BuildOpts& opt); int validate_main (ValidateOpts& opt); @@ -58,16 +58,18 @@ int validate_main (ValidateOpts& opt); */ int main ( int argc, char *argv[] ) { using namespace clipp; - enum class mode {build, query, validate, help}; + enum class mode {build, query, server, validate, help}; mode selected = mode::help; auto console = spdlog::stdout_color_mt("mantis_console"); BuildOpts bopt; - QueryOpts qopt; + QueryOpts qopt, sopt; ValidateOpts vopt; + bopt.console = console; qopt.console = console; + sopt.console = console; vopt.console = console; start_time = time(NULL); @@ -146,6 +148,13 @@ int main ( int argc, char *argv[] ) { option("-o", "--output") & value("output_file", qopt.output) % "Where to write query output.", value(ensure_file_exists, "query", qopt.query_file) % "Prefix of input files." ); + auto server_mode = ( + command("server").set(selected, mode::server), + option("-j", "--json").set(sopt.use_json) % "Write the output in JSON format", + required("-p", "--input-prefix") & value(ensure_dir_exists, "query_prefix", sopt.prefix) % "Prefix of input files." // , + // option("-o", "--output") & value("output_file", qopt.output) % "Where to write query output.", + // value(ensure_file_exists, "query", qopt.query_file) % "Prefix of input files." + ); auto validate_mode = ( command("validate").set(selected, mode::validate), @@ -156,11 +165,12 @@ int main ( int argc, char *argv[] ) { ); auto cli = ( - (build_mode | query_mode | validate_mode | command("help").set(selected,mode::help) ), + (build_mode | query_mode | server_mode | validate_mode | command("help").set(selected,mode::help) ), option("-v", "--version").call([]{std::cout << "version 1.0\n\n";}).doc("show version") ); assert(build_mode.flags_are_prefix_free()); assert(query_mode.flags_are_prefix_free()); + assert(server_mode.flags_are_prefix_free()); assert(validate_mode.flags_are_prefix_free()); decltype(parse(argc, argv, cli)) res; @@ -179,7 +189,7 @@ int main ( int argc, char *argv[] ) { switch(selected) { case mode::build: build_main(bopt); break; // case mode::query: query_main(qopt); break; - case mode::query: queries_main(qopt); break; + case mode::server: server_main(sopt); break; case mode::validate: validate_main(vopt); break; case mode::help: std::cout << make_man_page(cli, "mantis"); break; } diff --git a/src/queries.cc b/src/queries.cc index 93ce066..553c0f0 100644 --- a/src/queries.cc +++ b/src/queries.cc @@ -113,13 +113,11 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbgrequired(); @@ -135,7 +133,7 @@ int queries_main (QueryOpts& opt) } spdlog::logger* console = opt.console.get(); - console->info("Reading colored dbg from disk."); + console->info("Reading colored dbg from disk: " + prefix); std::string cqf_file(prefix + CQF_FILE); std::string sample_file(prefix + SAMPLEID_FILE); @@ -167,8 +165,13 @@ int queries_main (QueryOpts& opt) uint64_t total_kmers = 0; while (1) { + std::string query_file = opt.query_file; + std::string output_file = opt.output;//{"samples.output"}; + cout << "Enter query file: " << endl; cin >> query_file; + cout << "Enter output file: " << endl; + cin >> output_file; mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), seed, From 82aa2aaa7e0313659924c6d45aab2d2a2558d849 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 00:54:29 -0400 Subject: [PATCH 03/15] Refactor queries from stdin to adopt for sockets --- Makefile | 1 + src/queries.cc | 170 +++++++++++++++++++++++-------------------------- 2 files changed, 82 insertions(+), 89 deletions(-) diff --git a/Makefile b/Makefile index d8d5333..71b94d5 100644 --- a/Makefile +++ b/Makefile @@ -88,6 +88,7 @@ $(OBJDIR)/hashutil.o: $(LOC_INCLUDE)/hashutil.h # dependencies between .o files and .cc (or .c) files $(OBJDIR)/gqf.o: $(LOC_SRC)/cqf/gqf.c $(LOC_INCLUDE)/cqf/gqf.h +$(OBJDIR)/queries.o: $(LOC_SRC)/sockets.hh # # generic build rules diff --git a/src/queries.cc b/src/queries.cc index 553c0f0..0e826ff 100644 --- a/src/queries.cc +++ b/src/queries.cc @@ -53,6 +53,8 @@ #include "CLI/CLI.hpp" #include "CLI/Timer.hpp" +// #include "sockets.hh" + #define MAX_NUM_SAMPLES 2600 #define OUTPUT_FILE "samples.output" @@ -106,6 +108,44 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject> * colored_dbg; + spdlog::logger * console; + + void run_query(std::string query_file, std::string output_file) + { + ColoredDbg*>, KeyObject> & cdbg = *colored_dbg; + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + + uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; + console->info("Use colored dbg with {} k-mers and {} color classes", + cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + console->info("K-mer size", kmer_size); + + mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), + seed, + cdbg.range(), + kmer_size, + total_kmers); + console->info("Total k-mers to query: {}", total_kmers); + + std::ofstream opfile(output_file); + console->info("Querying the colored dbg."); + + // if (use_json) { + // output_results_json(multi_kmers, cdbg, opfile); + // } else { + // output_results(multi_kmers, cdbg, opfile); + // } + // TODO use_json + output_results(multi_kmers, cdbg, opfile); + + opfile.close(); + console->info("Writing done."); + } +} /* * === FUNCTION ============================================================= @@ -115,100 +155,52 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbgrequired(); - app.add_option("-o,--outout", output_file, "Where to write query output."); - app.add_option("query", query_file, "Prefix of input files.")->required(); - app.add_flag("-j,--json", use_json, "Write the output in JSON format"); - CLI11_PARSE(app, argc, argv); - */ - - // Make sure the prefix is a full folder - if (prefix.back() != '/') { - prefix.push_back('/'); - } + try + { + // boost::asio::io_service io_service; + // server s(io_service, 23901); - spdlog::logger* console = opt.console.get(); - console->info("Reading colored dbg from disk: " + prefix); - - std::string cqf_file(prefix + CQF_FILE); - std::string sample_file(prefix + SAMPLEID_FILE); - std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), - std::string(EQCLASS_FILE).c_str()); - - ColoredDbg*>, KeyObject> cdbg(cqf_file, - eqclass_files, - sample_file); - uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; - console->info("Read colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); - - //cdbg.get_cqf()->dump_metadata(); - //CQF cqf(query_file, false); - //CQF::Iterator it = cqf.begin(1); - //mantis::QuerySet input_kmers; - //do { - //KeyObject k = *it; - //input_kmers.insert(k.key); - //++it; - //} while (!it.done()); - - //mantis::QuerySets multi_kmers; - //multi_kmers.push_back(input_kmers); - - console->info("Reading query kmers from disk."); - uint32_t seed = 2038074743; - uint64_t total_kmers = 0; - - while (1) { - std::string query_file = opt.query_file; - std::string output_file = opt.output;//{"samples.output"}; - - cout << "Enter query file: " << endl; - cin >> query_file; - cout << "Enter output file: " << endl; - cin >> output_file; - - mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), - kmer_size, - total_kmers); - console->info("Total k-mers to query: {}", total_kmers); - - // Attempt to optimize bulk query - /* - bool doBulk = multi_kmers.size() >= 100; - BulkQuery bq; - if (doBulk) { - uint32_t exp_id{0}; - for (auto& kmers : multi_kmers) { - for (auto& k : kmers) { - bq.qs.insert(k); - bq.map[k].push_back(exp_id); - } - ++exp_id; + std::string prefix = opt.prefix; + // bool use_json = opt.use_json; + + // Make sure the prefix is a full folder + if (prefix.back() != '/') { + prefix.push_back('/'); } - } - */ - std::ofstream opfile(output_file); - console->info("Querying the colored dbg."); + query::console = opt.console.get(); + query::console->info("Reading colored dbg from disk: " + prefix); - if (use_json) { - output_results_json(multi_kmers, cdbg, opfile); - } else { - output_results(multi_kmers, cdbg, opfile); - } - //std::cout << "Writing samples and abundances out." << std::endl; - opfile.close(); - console->info("Writing done."); + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); + query::colored_dbg = new ColoredDbg*>, KeyObject>(cqf_file, eqclass_files, sample_file); + + + // console->info("Reading query kmers from disk."); + // console->info("Run server accepting queries."); + // io_service.run(); + // read from stdin + + while (1) { + std::string query_file = opt.query_file; + std::string output_file = opt.output; //{"samples.output"}; + + cout << "Enter query file: " << endl; + cin >> query_file; + cout << "Enter output file: " << endl; + cin >> output_file; + + query::run_query(query_file, output_file); + } + } + catch (std::exception& e) + { + std::cerr << "Exception: " << e.what() << "\n"; } - return EXIT_SUCCESS; + delete query::colored_dbg; + return EXIT_SUCCESS; } /* ---------- end of function main ---------- */ From e54e9420bdec097e6cc089e0b10ad2b64e4fb365 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 02:37:30 -0400 Subject: [PATCH 04/15] Extract query header --- src/query.hh | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 src/query.hh diff --git a/src/query.hh b/src/query.hh new file mode 100644 index 0000000..f703718 --- /dev/null +++ b/src/query.hh @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MantisFS.h" +#include "ProgOpts.h" +#include "spdlog/spdlog.h" +#include "kmer.h" +#include "coloreddbg.h" +#include "common_types.h" +#include "CLI/CLI.hpp" +#include "CLI/Timer.hpp" + +namespace query +{ + ColoredDbg*>, KeyObject> * colored_dbg; + spdlog::logger * console; + + void run_query(std::string query_file, std::string output_file); +} \ No newline at end of file From c1422235df71f75fa2bdfe30262cb53456c57723 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 03:16:22 -0400 Subject: [PATCH 05/15] Queries based on sockets --- src/client.py | 14 +++++ src/queries.cc | 157 ++++++++++++++++--------------------------------- src/sockets.hh | 100 +++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 107 deletions(-) create mode 100644 src/client.py create mode 100644 src/sockets.hh diff --git a/src/client.py b/src/client.py new file mode 100644 index 0000000..7890707 --- /dev/null +++ b/src/client.py @@ -0,0 +1,14 @@ +import socket +import sys + +HOST, PORT = "localhost", 23901 + +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + +s.connect((HOST, PORT)) +for x in range(0, 5): + print("Step 1") + s.send('/home/paf2023/run_mantis/queries/read2.fa ./out.res') + print("Step 2") + print(str(s.recv(1000))) + print(x) diff --git a/src/queries.cc b/src/queries.cc index 0e826ff..6e84064 100644 --- a/src/queries.cc +++ b/src/queries.cc @@ -1,59 +1,5 @@ -/* - * ============================================================================ - * - * Filename: query.cc - * - * Description: - * - * Version: 1.0 - * Created: 2017-10-27 12:56:50 AM - * Revision: none - * Compiler: gcc - * - * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu - * Organization: Stony Brook University - * - * ============================================================================ - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "MantisFS.h" -#include "ProgOpts.h" -#include "spdlog/spdlog.h" -#include "kmer.h" -#include "coloreddbg.h" -#include "common_types.h" -#include "CLI/CLI.hpp" -#include "CLI/Timer.hpp" - -// #include "sockets.hh" +// #include "query.hh" +#include "sockets.hh" #define MAX_NUM_SAMPLES 2600 #define OUTPUT_FILE "samples.output" @@ -108,43 +54,41 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject> * colored_dbg; - spdlog::logger * console; - - void run_query(std::string query_file, std::string output_file) - { - ColoredDbg*>, KeyObject> & cdbg = *colored_dbg; - uint32_t seed = 2038074743; - uint64_t total_kmers = 0; - - uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; - console->info("Use colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); - console->info("K-mer size", kmer_size); - - mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), - kmer_size, - total_kmers); - console->info("Total k-mers to query: {}", total_kmers); - - std::ofstream opfile(output_file); - console->info("Querying the colored dbg."); - - // if (use_json) { - // output_results_json(multi_kmers, cdbg, opfile); - // } else { - // output_results(multi_kmers, cdbg, opfile); - // } - // TODO use_json - output_results(multi_kmers, cdbg, opfile); - - opfile.close(); - console->info("Writing done."); - } + ColoredDbg*>, KeyObject> & cdbg = *colored_dbg; + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + + uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; + console->info("Use colored dbg with {} k-mers and {} color classes", + cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + console->info("K-mer size: {}", kmer_size); + + std::cout << "QF: " << query_file << std::endl; + std::cout << "OUT: " << output_file << std::endl; + + + mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), + seed, + cdbg.range(), + kmer_size, + total_kmers); + console->info("Total k-mers to query: {}", total_kmers); + + std::ofstream opfile(output_file); + console->info("Querying the colored dbg."); + + // if (use_json) { + // output_results_json(multi_kmers, cdbg, opfile); + // } else { + // output_results(multi_kmers, cdbg, opfile); + // } + // TODO use_json + output_results(multi_kmers, cdbg, opfile); + + opfile.close(); + console->info("Writing done."); } /* @@ -157,8 +101,8 @@ int server_main (QueryOpts& opt) { try { - // boost::asio::io_service io_service; - // server s(io_service, 23901); + boost::asio::io_service io_service; + server s(io_service, 23901); std::string prefix = opt.prefix; // bool use_json = opt.use_json; @@ -178,23 +122,22 @@ int server_main (QueryOpts& opt) query::colored_dbg = new ColoredDbg*>, KeyObject>(cqf_file, eqclass_files, sample_file); - // console->info("Reading query kmers from disk."); - // console->info("Run server accepting queries."); - // io_service.run(); + query::console->info("Reading query kmers from disk."); + query::console->info("Run server accepting queries."); + io_service.run(); // read from stdin + // while (1) { + // std::string query_file = opt.query_file; + // std::string output_file = opt.output; //{"samples.output"}; - while (1) { - std::string query_file = opt.query_file; - std::string output_file = opt.output; //{"samples.output"}; + // cout << "Enter query file: " << endl; + // cin >> query_file; + // cout << "Enter output file: " << endl; + // cin >> output_file; - cout << "Enter query file: " << endl; - cin >> query_file; - cout << "Enter output file: " << endl; - cin >> output_file; - - query::run_query(query_file, output_file); - } + // query::run_query(query_file, output_file); + // } } catch (std::exception& e) { diff --git a/src/sockets.hh b/src/sockets.hh new file mode 100644 index 0000000..25c4dd7 --- /dev/null +++ b/src/sockets.hh @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include + +#include "query.hh" + +using boost::asio::ip::tcp; + +class session + : public std::enable_shared_from_this +{ +public: + session(tcp::socket socket) + : socket_(std::move(socket)) + { + } + + void start() + { + do_read(); + } + +private: + void do_read() + { + // read query file path and output file path + auto self(shared_from_this()); + socket_.async_read_some(boost::asio::buffer(data_, max_length), + [this, self](boost::system::error_code ec, std::size_t length) + { + std::cout << "Read: " << data_ << std::endl; + + char * delimeter_ptr = strstr(data_, " "); + if (!delimeter_ptr) + { + // TODO incorrect query format error. Call do_read? + return; + } + std::string query_filepath(data_, delimeter_ptr - data_); + std::string output_filepath(delimeter_ptr + 1); // assume exactly one space between filepaths + + query::run_query(query_filepath, output_filepath); // TODO check result + + if (!ec) + { + do_write(length, query_filepath); + } + }); + } + + void do_write(std::size_t length, std::string outfile) + { + // respond with output filepath + auto self(shared_from_this()); + std::cout << "Respond with message: " << data_ << std::endl; + boost::asio::async_write(socket_, boost::asio::buffer(data_, length), + [this, self](boost::system::error_code ec, std::size_t /*length*/) + { + if (!ec) + { + do_read(); + } + }); + } + + tcp::socket socket_; + enum { max_length = 4096 }; + char data_[max_length]; +}; + +class server +{ +public: + server(boost::asio::io_service& io_service, short port) + : acceptor_(io_service, tcp::endpoint(tcp::v4(), port)), + socket_(io_service) + { + do_accept(); + } + +private: + void do_accept() + { + acceptor_.async_accept(socket_, + [this](boost::system::error_code ec) + { + if (!ec) + { + std::make_shared(std::move(socket_))->start(); + } + + do_accept(); + }); + } + + tcp::acceptor acceptor_; + tcp::socket socket_; +}; \ No newline at end of file From 6a452ba4ff4cae1b7849fb908ea4730944bebfa4 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 07:56:04 -0400 Subject: [PATCH 06/15] Remove debug output. DONT FORGET TO MAKE with NH=1 OPTION --- src/client.py | 13 ++++++++----- src/queries.cc | 6 ++---- src/sockets.hh | 7 +++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/client.py b/src/client.py index 7890707..80f4fb8 100644 --- a/src/client.py +++ b/src/client.py @@ -6,9 +6,12 @@ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((HOST, PORT)) + +query_filepath = "/home/paf2023/run_mantis/queries/query{0}.fa" +output_filepath = "/home/paf2023/run_mantis/out/out{0}.res" + for x in range(0, 5): - print("Step 1") - s.send('/home/paf2023/run_mantis/queries/read2.fa ./out.res') - print("Step 2") - print(str(s.recv(1000))) - print(x) + print("Send request {0}".format(query_filepath.format(x))) + s.send(query_filepath.format(x) + " " + output_filepath.format(x)) + result = str(s.recv(4096)) + print("Got result {0}".format(result)) diff --git a/src/queries.cc b/src/queries.cc index 6e84064..d26a556 100644 --- a/src/queries.cc +++ b/src/queries.cc @@ -64,10 +64,8 @@ void query::run_query(std::string query_file, std::string output_file) console->info("Use colored dbg with {} k-mers and {} color classes", cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); console->info("K-mer size: {}", kmer_size); - - std::cout << "QF: " << query_file << std::endl; - std::cout << "OUT: " << output_file << std::endl; - + console->info("Query file: {}", query_file); + console->info("Output file: {}", output_file); mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), seed, diff --git a/src/sockets.hh b/src/sockets.hh index 25c4dd7..5061d8a 100644 --- a/src/sockets.hh +++ b/src/sockets.hh @@ -30,8 +30,6 @@ private: socket_.async_read_some(boost::asio::buffer(data_, max_length), [this, self](boost::system::error_code ec, std::size_t length) { - std::cout << "Read: " << data_ << std::endl; - char * delimeter_ptr = strstr(data_, " "); if (!delimeter_ptr) { @@ -45,7 +43,7 @@ private: if (!ec) { - do_write(length, query_filepath); + do_write(length, output_filepath); } }); } @@ -54,7 +52,8 @@ private: { // respond with output filepath auto self(shared_from_this()); - std::cout << "Respond with message: " << data_ << std::endl; + memset(data_, 0, sizeof(data_)); + strcpy(data_, outfile.c_str()); boost::asio::async_write(socket_, boost::asio::buffer(data_, length), [this, self](boost::system::error_code ec, std::size_t /*length*/) { From 79c82da25caa8b403a43cb73f5fc2bd3e82b8975 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 12:36:19 -0400 Subject: [PATCH 07/15] Support both query and queries server --- Makefile | 8 +- src/mantis.cc | 2 +- src/queries.cc | 147 ---------------------------------- src/query.cc | 96 ++++++++++++++++++++++ src/query.hh | 45 ----------- src/{sockets.hh => sockets.h} | 35 ++++++-- 6 files changed, 127 insertions(+), 206 deletions(-) delete mode 100644 src/queries.cc delete mode 100644 src/query.hh rename src/{sockets.hh => sockets.h} (62%) diff --git a/Makefile b/Makefile index 71b94d5..f764edb 100644 --- a/Makefile +++ b/Makefile @@ -70,8 +70,7 @@ LDFLAGS += $(MYLIBS) $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system all: $(TARGETS) # dependencies between programs and .o files -#mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/queries.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o -mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/queries.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o # dependencies between .o files and .h files $(OBJDIR)/mantis.o: $(LOC_SRC)/mantis.cc @@ -80,15 +79,14 @@ $(OBJDIR)/util.o: $(LOC_SRC)/util.cc $(LOC_INCLUDE)/util.h $(OBJDIR)/bitvector.o: $(LOC_SRC)/bitvector.cc $(LOC_INCLUDE)/bitvector.h $(OBJDIR)/kmer.o: $(LOC_SRC)/kmer.cc $(LOC_INCLUDE)/kmer.h $(OBJDIR)/coloreddbg.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h -#$(OBJDIR)/query.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h -$(OBJDIR)/queries.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h +$(OBJDIR)/query.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h $(OBJDIR)/validatemantis.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h $(OBJDIR)/hashutil.o: $(LOC_INCLUDE)/hashutil.h # dependencies between .o files and .cc (or .c) files $(OBJDIR)/gqf.o: $(LOC_SRC)/cqf/gqf.c $(LOC_INCLUDE)/cqf/gqf.h -$(OBJDIR)/queries.o: $(LOC_SRC)/sockets.hh +$(OBJDIR)/query.o: $(LOC_SRC)/sockets.h # # generic build rules diff --git a/src/mantis.cc b/src/mantis.cc index 950ceaa..264ad9e 100644 --- a/src/mantis.cc +++ b/src/mantis.cc @@ -188,7 +188,7 @@ int main ( int argc, char *argv[] ) { if(res) { switch(selected) { case mode::build: build_main(bopt); break; - // case mode::query: query_main(qopt); break; + case mode::query: query_main(qopt); break; case mode::server: server_main(sopt); break; case mode::validate: validate_main(vopt); break; case mode::help: std::cout << make_man_page(cli, "mantis"); break; diff --git a/src/queries.cc b/src/queries.cc deleted file mode 100644 index d26a556..0000000 --- a/src/queries.cc +++ /dev/null @@ -1,147 +0,0 @@ -// #include "query.hh" -#include "sockets.hh" - -#define MAX_NUM_SAMPLES 2600 -#define OUTPUT_FILE "samples.output" - -void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, - std::ofstream& opfile) { - mantis::QueryResults qres; - uint32_t cnt= 0; - { - CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; - //size_t qctr{0}; - //size_t nquery{multi_kmers.size()}; - for (auto& kmers : multi_kmers) { - //std::sort(kmers.begin(), kmers.end()); - opfile << cnt++ << '\t' << kmers.size() << '\n'; - mantis::QueryResult result = cdbg.find_samples(kmers); - for (auto it = result.begin(); it != result.end(); ++it) { - opfile << cdbg.get_sample(it->first) << '\t' << it->second << '\n'; - } - //++qctr; - } - } -} - - -void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, - std::ofstream& opfile) { - mantis::QueryResults qres; - uint32_t cnt= 0; - { - CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; - opfile << "[\n"; - size_t qctr{0}; - size_t nquery{multi_kmers.size()}; - for (auto& kmers : multi_kmers) { - //std::sort(kmers.begin(), kmers.end()); - opfile << "{ \"qnum\": " << cnt++ << ", \"num_kmers\": " << kmers.size() << ", \"res\": {\n"; - mantis::QueryResult result = cdbg.find_samples(kmers); - for (auto it = result.begin(); it != result.end(); ++it) { - opfile << " \"" <first) << "\": " << it->second ; - if (std::next(it) != result.end()) { - opfile << ",\n"; - } - } - opfile << "}}"; - if (qctr < nquery - 1) { opfile << ","; } - opfile << "\n"; - ++qctr; - } - opfile << "]\n"; - } - -} - -void query::run_query(std::string query_file, std::string output_file) -{ - ColoredDbg*>, KeyObject> & cdbg = *colored_dbg; - uint32_t seed = 2038074743; - uint64_t total_kmers = 0; - - uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; - console->info("Use colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); - console->info("K-mer size: {}", kmer_size); - console->info("Query file: {}", query_file); - console->info("Output file: {}", output_file); - - mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), - kmer_size, - total_kmers); - console->info("Total k-mers to query: {}", total_kmers); - - std::ofstream opfile(output_file); - console->info("Querying the colored dbg."); - - // if (use_json) { - // output_results_json(multi_kmers, cdbg, opfile); - // } else { - // output_results(multi_kmers, cdbg, opfile); - // } - // TODO use_json - output_results(multi_kmers, cdbg, opfile); - - opfile.close(); - console->info("Writing done."); -} - -/* - * === FUNCTION ============================================================= - * Name: main - * Description: - * ============================================================================ - */ -int server_main (QueryOpts& opt) -{ - try - { - boost::asio::io_service io_service; - server s(io_service, 23901); - - std::string prefix = opt.prefix; - // bool use_json = opt.use_json; - - // Make sure the prefix is a full folder - if (prefix.back() != '/') { - prefix.push_back('/'); - } - - query::console = opt.console.get(); - query::console->info("Reading colored dbg from disk: " + prefix); - - std::string cqf_file(prefix + CQF_FILE); - std::string sample_file(prefix + SAMPLEID_FILE); - std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), - std::string(EQCLASS_FILE).c_str()); - query::colored_dbg = new ColoredDbg*>, KeyObject>(cqf_file, eqclass_files, sample_file); - - - query::console->info("Reading query kmers from disk."); - query::console->info("Run server accepting queries."); - io_service.run(); - - // read from stdin - // while (1) { - // std::string query_file = opt.query_file; - // std::string output_file = opt.output; //{"samples.output"}; - - // cout << "Enter query file: " << endl; - // cin >> query_file; - // cout << "Enter output file: " << endl; - // cin >> output_file; - - // query::run_query(query_file, output_file); - // } - } - catch (std::exception& e) - { - std::cerr << "Exception: " << e.what() << "\n"; - } - - delete query::colored_dbg; - return EXIT_SUCCESS; -} /* ---------- end of function main ---------- */ diff --git a/src/query.cc b/src/query.cc index 0cc39b8..e45b997 100644 --- a/src/query.cc +++ b/src/query.cc @@ -53,6 +53,8 @@ #include "CLI/CLI.hpp" #include "CLI/Timer.hpp" +#include "sockets.h" + #define MAX_NUM_SAMPLES 2600 #define OUTPUT_FILE "samples.output" @@ -106,6 +108,40 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) { + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + + uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; + console->info("Use colored dbg with {} k-mers and {} color classes", + cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + console->info("K-mer size: {}", kmer_size); + console->info("Query file: {}", query_file); + console->info("Output file: {}", output_file); + + mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), + seed, + cdbg.range(), + kmer_size, + total_kmers); + console->info("Total k-mers to query: {}", total_kmers); + + std::ofstream opfile(output_file); + console->info("Querying the colored dbg."); + + if (use_json) { + output_results_json(multi_kmers, cdbg, opfile); + } else { + output_results(multi_kmers, cdbg, opfile); + } + + opfile.close(); + console->info("Writing done."); +} + /* * === FUNCTION ============================================================= @@ -202,3 +238,63 @@ int query_main (QueryOpts& opt) return EXIT_SUCCESS; } /* ---------- end of function main ---------- */ + + + +/* + * === FUNCTION ============================================================= + * Name: main + * Description: + * ============================================================================ + */ +int server_main (QueryOpts& opt) +{ + try + { + std::string prefix = opt.prefix; + bool use_json = opt.use_json; + + // Make sure the prefix is a full folder + if (prefix.back() != '/') { + prefix.push_back('/'); + } + + spdlog::logger* console = opt.console.get(); + console->info("Reading colored dbg from disk: " + prefix); + + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); + ColoredDbg*>, KeyObject> cdbg(cqf_file, + eqclass_files, + sample_file); + + boost::asio::io_service io_service; + server s(io_service, 23901, cdbg, console, use_json); // more parameters + + console->info("Reading query kmers from disk."); + console->info("Run server accepting queries."); + io_service.run(); + + // read from stdin + // while (1) { + // std::string query_file = opt.query_file; + // std::string output_file = opt.output; //{"samples.output"}; + + // cout << "Enter query file: " << endl; + // cin >> query_file; + // cout << "Enter output file: " << endl; + // cin >> output_file; + + // query::run_query(query_file, output_file); + // } + } + catch (std::exception& e) + { + std::cerr << "Exception: " << e.what() << "\n"; + } + + return EXIT_SUCCESS; +} /* ---------- end of function main ---------- */ + diff --git a/src/query.hh b/src/query.hh deleted file mode 100644 index f703718..0000000 --- a/src/query.hh +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "MantisFS.h" -#include "ProgOpts.h" -#include "spdlog/spdlog.h" -#include "kmer.h" -#include "coloreddbg.h" -#include "common_types.h" -#include "CLI/CLI.hpp" -#include "CLI/Timer.hpp" - -namespace query -{ - ColoredDbg*>, KeyObject> * colored_dbg; - spdlog::logger * console; - - void run_query(std::string query_file, std::string output_file); -} \ No newline at end of file diff --git a/src/sockets.hh b/src/sockets.h similarity index 62% rename from src/sockets.hh rename to src/sockets.h index 5061d8a..7cf685b 100644 --- a/src/sockets.hh +++ b/src/sockets.h @@ -4,16 +4,23 @@ #include #include -#include "query.hh" - using boost::asio::ip::tcp; +extern void run_query(std::string query_file, std::string output_file, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json); + class session : public std::enable_shared_from_this { public: - session(tcp::socket socket) - : socket_(std::move(socket)) + session(tcp::socket socket, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) + : socket_(std::move(socket)), + cdbg(cdbg), console(console), use_json(use_json) { } @@ -39,7 +46,7 @@ class session std::string query_filepath(data_, delimeter_ptr - data_); std::string output_filepath(delimeter_ptr + 1); // assume exactly one space between filepaths - query::run_query(query_filepath, output_filepath); // TODO check result + run_query(query_filepath, output_filepath, cdbg, console, use_json); // TODO check result if (!ec) { @@ -67,14 +74,22 @@ class session tcp::socket socket_; enum { max_length = 4096 }; char data_[max_length]; + + ColoredDbg*>, KeyObject>& cdbg; + spdlog::logger * console; + bool use_json; }; class server { public: - server(boost::asio::io_service& io_service, short port) + server(boost::asio::io_service& io_service, short port, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) : acceptor_(io_service, tcp::endpoint(tcp::v4(), port)), - socket_(io_service) + socket_(io_service), + cdbg(cdbg), console(console), use_json(use_json) { do_accept(); } @@ -87,7 +102,7 @@ class server { if (!ec) { - std::make_shared(std::move(socket_))->start(); + std::make_shared(std::move(socket_), cdbg, console, use_json)->start(); } do_accept(); @@ -96,4 +111,8 @@ class server tcp::acceptor acceptor_; tcp::socket socket_; + + ColoredDbg*>, KeyObject>& cdbg; + spdlog::logger * console; + bool use_json; }; \ No newline at end of file From 39ddeb533dfcb5d91e6f15a06b7ceab90aec6600 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 12:53:12 -0400 Subject: [PATCH 08/15] Reduce code duplication for query and server --- src/query.cc | 83 ++++++---------------------------------------------- 1 file changed, 9 insertions(+), 74 deletions(-) diff --git a/src/query.cc b/src/query.cc index e45b997..0cd4275 100644 --- a/src/query.cc +++ b/src/query.cc @@ -145,26 +145,16 @@ void run_query(std::string query_file, std::string output_file, /* * === FUNCTION ============================================================= - * Name: main + * Name: query main * Description: * ============================================================================ */ -int query_main (QueryOpts& opt) +int query_main(QueryOpts& opt) { - //CLI::App app("Mantis query"); - std::string prefix = opt.prefix; std::string query_file = opt.query_file; std::string output_file = opt.output;//{"samples.output"}; bool use_json = opt.use_json; - /* - app.add_option("-i,--input-prefix", prefix, "Prefix of input files.")->required(); - app.add_option("-o,--outout", output_file, "Where to write query output."); - app.add_option("query", query_file, "Prefix of input files.")->required(); - app.add_flag("-j,--json", use_json, "Write the output in JSON format"); - CLI11_PARSE(app, argc, argv); - */ - // Make sure the prefix is a full folder if (prefix.back() != '/') { prefix.push_back('/'); @@ -181,60 +171,7 @@ int query_main (QueryOpts& opt) ColoredDbg*>, KeyObject> cdbg(cqf_file, eqclass_files, sample_file); - uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; - console->info("Read colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); - - //cdbg.get_cqf()->dump_metadata(); - //CQF cqf(query_file, false); - //CQF::Iterator it = cqf.begin(1); - //mantis::QuerySet input_kmers; - //do { - //KeyObject k = *it; - //input_kmers.insert(k.key); - //++it; - //} while (!it.done()); - - //mantis::QuerySets multi_kmers; - //multi_kmers.push_back(input_kmers); - - console->info("Reading query kmers from disk."); - uint32_t seed = 2038074743; - uint64_t total_kmers = 0; - mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), - kmer_size, - total_kmers); - console->info("Total k-mers to query: {}", total_kmers); - - // Attempt to optimize bulk query - /* - bool doBulk = multi_kmers.size() >= 100; - BulkQuery bq; - if (doBulk) { - uint32_t exp_id{0}; - for (auto& kmers : multi_kmers) { - for (auto& k : kmers) { - bq.qs.insert(k); - bq.map[k].push_back(exp_id); - } - ++exp_id; - } - } - */ - - std::ofstream opfile(output_file); - console->info("Querying the colored dbg."); - - if (use_json) { - output_results_json(multi_kmers, cdbg, opfile); - } else { - output_results(multi_kmers, cdbg, opfile); - } - //std::cout << "Writing samples and abundances out." << std::endl; - opfile.close(); - console->info("Writing done."); + run_query(query_file, output_file, cdbg, console, use_json); return EXIT_SUCCESS; } /* ---------- end of function main ---------- */ @@ -243,14 +180,13 @@ int query_main (QueryOpts& opt) /* * === FUNCTION ============================================================= - * Name: main + * Name: server main * Description: * ============================================================================ */ -int server_main (QueryOpts& opt) +int server_main(QueryOpts& opt) { - try - { + try { std::string prefix = opt.prefix; bool use_json = opt.use_json; @@ -277,7 +213,7 @@ int server_main (QueryOpts& opt) console->info("Run server accepting queries."); io_service.run(); - // read from stdin + // read from stdin instead of socket // while (1) { // std::string query_file = opt.query_file; // std::string output_file = opt.output; //{"samples.output"}; @@ -287,11 +223,10 @@ int server_main (QueryOpts& opt) // cout << "Enter output file: " << endl; // cin >> output_file; - // query::run_query(query_file, output_file); + // run_query(query_file, output_file, cdbg, console, use_json); // } } - catch (std::exception& e) - { + catch (std::exception& e) { std::cerr << "Exception: " << e.what() << "\n"; } From 0fae95b0c7c95bcf80e3207a73a163b19735e5c0 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 13:36:46 -0400 Subject: [PATCH 09/15] Clean makefile from local environment settings --- Makefile | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index f764edb..50d89ed 100644 --- a/Makefile +++ b/Makefile @@ -1,31 +1,4 @@ TARGETS= mantis -MYLIBS=-L/home/paf2023/tools/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-phpi22i4vio3f55thjj5xxeupzmj463k/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/gettext-0.19.8.1-cnbmeuapkcyxq23tumt4esnuvijpl4w7/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/bzip2-1.0.6-x5hwxgowvbzmnowngidcjin5qicfg4fl/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/libxml2-2.9.4-ox4x2yk4rqlh3ag4wxxgzficxgwbcsy7/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/xz-5.2.3-npj3lafufjb7d57yhkthdo3qeeollgiq/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/python-2.7.14-4q4ykdbphrdjx2n4ztg5getsrjkl7dkz/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/ncurses-6.0-37ewjt6dvfa4vn3coyfnx5ejnfzg6d27/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-pkmmuwsdolh4vk2qrjvevqbrds55oxet/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-3yfqdmrhogpxaehsissamfd4bagokf3y/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-kamnyqrq5i6e6ghsccvnx4banzgdvn6r/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/binutils-2.28-t6r4t2u7mpl73qqhveutncj4gamajiip/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/binutils-2.28-a4bouodbguxjzgeoogr7qjqculofd4cx/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/zlib-1.2.11-7eta3q6r3ozz5vh7zaffgjajf4fwvxby/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.5.4/zlib-1.2.11-upeb67xkoclf5fgx7musxjw5v7gk7yw7/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/zlib-1.2.11-erzei6xutxne2tjssbmelgutsnhz6x6s/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/zlib-1.2.11-rjqs3cq27e2o4tbce7kzmictrih57avc/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-oiwzkauf2kl22pu7j2vjupr3ffu3mzts/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-5awddeormrqlmqq2frhjrdcu5szebebq/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-sqviukvzkgl524rq2dtz6d5ytukaxlj3/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.64.0-sx2zxhmitq565rmn3jutk67ffqrko6zx/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/boost-1.63.0-gpfsh3erjtf2xyksy66crnxzqib6qrva/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.63.0-eu4aw5v6ewmqd5kuzieorttiljxcuscv/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.54.0-7qaxqizxejlur5pr5mfecf2umfp52lmr/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-6.3.0-han2ovjpjdnuwkhtfhugdq66cjkte7q3/lib64 \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-6.3.0-xnoykzftyh27xs6vqkwvtft2cce5yd4k/lib64 \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-4.5.4-tmeuqohwhscsb2hegxgfjmtxy7ywelud/lib64 \ ifdef D DEBUG=-g -DDEBUG @@ -60,7 +33,7 @@ CFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE)\ -Wno-unused-result -Wno-strict-aliasing -Wno-unused-function -Wno-sign-compare \ -Wno-implicit-function-declaration -LDFLAGS += $(MYLIBS) $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ +LDFLAGS += $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ -lboost_thread -lm -lz -lrt # @@ -70,7 +43,7 @@ LDFLAGS += $(MYLIBS) $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system all: $(TARGETS) # dependencies between programs and .o files -mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o # dependencies between .o files and .h files $(OBJDIR)/mantis.o: $(LOC_SRC)/mantis.cc From d5f3ff87761cdbaa3a6b2c0b9c32c9c11a7b31b2 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Tue, 10 Apr 2018 18:29:59 -0400 Subject: [PATCH 10/15] Accept queries in loop from stdin --- Makefile | 35 +++++++- src/mantis.cc | 20 ++++- src/queries.cc | 214 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 262 insertions(+), 7 deletions(-) create mode 100644 src/queries.cc diff --git a/Makefile b/Makefile index 3d8c450..5649f32 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,31 @@ TARGETS= mantis +MYLIBS=-L/home/paf2023/tools/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-phpi22i4vio3f55thjj5xxeupzmj463k/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/gettext-0.19.8.1-cnbmeuapkcyxq23tumt4esnuvijpl4w7/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/bzip2-1.0.6-x5hwxgowvbzmnowngidcjin5qicfg4fl/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/libxml2-2.9.4-ox4x2yk4rqlh3ag4wxxgzficxgwbcsy7/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/xz-5.2.3-npj3lafufjb7d57yhkthdo3qeeollgiq/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/python-2.7.14-4q4ykdbphrdjx2n4ztg5getsrjkl7dkz/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/ncurses-6.0-37ewjt6dvfa4vn3coyfnx5ejnfzg6d27/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-pkmmuwsdolh4vk2qrjvevqbrds55oxet/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-3yfqdmrhogpxaehsissamfd4bagokf3y/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-kamnyqrq5i6e6ghsccvnx4banzgdvn6r/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/binutils-2.28-t6r4t2u7mpl73qqhveutncj4gamajiip/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/binutils-2.28-a4bouodbguxjzgeoogr7qjqculofd4cx/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/zlib-1.2.11-7eta3q6r3ozz5vh7zaffgjajf4fwvxby/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.5.4/zlib-1.2.11-upeb67xkoclf5fgx7musxjw5v7gk7yw7/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/zlib-1.2.11-erzei6xutxne2tjssbmelgutsnhz6x6s/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/zlib-1.2.11-rjqs3cq27e2o4tbce7kzmictrih57avc/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-oiwzkauf2kl22pu7j2vjupr3ffu3mzts/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-5awddeormrqlmqq2frhjrdcu5szebebq/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-sqviukvzkgl524rq2dtz6d5ytukaxlj3/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.64.0-sx2zxhmitq565rmn3jutk67ffqrko6zx/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/boost-1.63.0-gpfsh3erjtf2xyksy66crnxzqib6qrva/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.63.0-eu4aw5v6ewmqd5kuzieorttiljxcuscv/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.54.0-7qaxqizxejlur5pr5mfecf2umfp52lmr/lib \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-6.3.0-han2ovjpjdnuwkhtfhugdq66cjkte7q3/lib64 \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-6.3.0-xnoykzftyh27xs6vqkwvtft2cce5yd4k/lib64 \ +-L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-4.5.4-tmeuqohwhscsb2hegxgfjmtxy7ywelud/lib64 \ ifdef D DEBUG=-g -DDEBUG @@ -33,7 +60,7 @@ CFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE)\ -Wno-unused-result -Wno-strict-aliasing -Wno-unused-function -Wno-sign-compare \ -Wno-implicit-function-declaration -LDFLAGS += $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ +LDFLAGS += $(MYLIBS) $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ -lboost_thread -lm -lz -lrt # @@ -43,7 +70,8 @@ LDFLAGS += $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ all: $(TARGETS) # dependencies between programs and .o files -mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +#mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/queries.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/queries.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o # dependencies between .o files and .h files $(OBJDIR)/mantis.o: $(LOC_SRC)/mantis.cc @@ -52,7 +80,8 @@ $(OBJDIR)/util.o: $(LOC_SRC)/util.cc $(LOC_INCLUDE)/util.h $(OBJDIR)/bitvector.o: $(LOC_SRC)/bitvector.cc $(LOC_INCLUDE)/bitvector.h $(OBJDIR)/kmer.o: $(LOC_SRC)/kmer.cc $(LOC_INCLUDE)/kmer.h $(OBJDIR)/coloreddbg.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h -$(OBJDIR)/query.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h +#$(OBJDIR)/query.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h +$(OBJDIR)/queries.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h $(OBJDIR)/validatemantis.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h $(OBJDIR)/hashutil.o: $(LOC_INCLUDE)/hashutil.h diff --git a/src/mantis.cc b/src/mantis.cc index b80ca6d..950ceaa 100644 --- a/src/mantis.cc +++ b/src/mantis.cc @@ -46,6 +46,7 @@ void explore_options_verbose(T& res) { } int query_main (QueryOpts& opt); +int server_main (QueryOpts& opt); int build_main (BuildOpts& opt); int validate_main (ValidateOpts& opt); @@ -57,16 +58,18 @@ int validate_main (ValidateOpts& opt); */ int main ( int argc, char *argv[] ) { using namespace clipp; - enum class mode {build, query, validate, help}; + enum class mode {build, query, server, validate, help}; mode selected = mode::help; auto console = spdlog::stdout_color_mt("mantis_console"); BuildOpts bopt; - QueryOpts qopt; + QueryOpts qopt, sopt; ValidateOpts vopt; + bopt.console = console; qopt.console = console; + sopt.console = console; vopt.console = console; start_time = time(NULL); @@ -145,6 +148,13 @@ int main ( int argc, char *argv[] ) { option("-o", "--output") & value("output_file", qopt.output) % "Where to write query output.", value(ensure_file_exists, "query", qopt.query_file) % "Prefix of input files." ); + auto server_mode = ( + command("server").set(selected, mode::server), + option("-j", "--json").set(sopt.use_json) % "Write the output in JSON format", + required("-p", "--input-prefix") & value(ensure_dir_exists, "query_prefix", sopt.prefix) % "Prefix of input files." // , + // option("-o", "--output") & value("output_file", qopt.output) % "Where to write query output.", + // value(ensure_file_exists, "query", qopt.query_file) % "Prefix of input files." + ); auto validate_mode = ( command("validate").set(selected, mode::validate), @@ -155,11 +165,12 @@ int main ( int argc, char *argv[] ) { ); auto cli = ( - (build_mode | query_mode | validate_mode | command("help").set(selected,mode::help) ), + (build_mode | query_mode | server_mode | validate_mode | command("help").set(selected,mode::help) ), option("-v", "--version").call([]{std::cout << "version 1.0\n\n";}).doc("show version") ); assert(build_mode.flags_are_prefix_free()); assert(query_mode.flags_are_prefix_free()); + assert(server_mode.flags_are_prefix_free()); assert(validate_mode.flags_are_prefix_free()); decltype(parse(argc, argv, cli)) res; @@ -177,7 +188,8 @@ int main ( int argc, char *argv[] ) { if(res) { switch(selected) { case mode::build: build_main(bopt); break; - case mode::query: query_main(qopt); break; + // case mode::query: query_main(qopt); break; + case mode::server: server_main(sopt); break; case mode::validate: validate_main(vopt); break; case mode::help: std::cout << make_man_page(cli, "mantis"); break; } diff --git a/src/queries.cc b/src/queries.cc new file mode 100644 index 0000000..553c0f0 --- /dev/null +++ b/src/queries.cc @@ -0,0 +1,214 @@ +/* + * ============================================================================ + * + * Filename: query.cc + * + * Description: + * + * Version: 1.0 + * Created: 2017-10-27 12:56:50 AM + * Revision: none + * Compiler: gcc + * + * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu + * Organization: Stony Brook University + * + * ============================================================================ + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MantisFS.h" +#include "ProgOpts.h" +#include "spdlog/spdlog.h" +#include "kmer.h" +#include "coloreddbg.h" +#include "common_types.h" +#include "CLI/CLI.hpp" +#include "CLI/Timer.hpp" + +#define MAX_NUM_SAMPLES 2600 +#define OUTPUT_FILE "samples.output" + +void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, + std::ofstream& opfile) { + mantis::QueryResults qres; + uint32_t cnt= 0; + { + CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; + //size_t qctr{0}; + //size_t nquery{multi_kmers.size()}; + for (auto& kmers : multi_kmers) { + //std::sort(kmers.begin(), kmers.end()); + opfile << cnt++ << '\t' << kmers.size() << '\n'; + mantis::QueryResult result = cdbg.find_samples(kmers); + for (auto it = result.begin(); it != result.end(); ++it) { + opfile << cdbg.get_sample(it->first) << '\t' << it->second << '\n'; + } + //++qctr; + } + } +} + + +void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, + std::ofstream& opfile) { + mantis::QueryResults qres; + uint32_t cnt= 0; + { + CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; + opfile << "[\n"; + size_t qctr{0}; + size_t nquery{multi_kmers.size()}; + for (auto& kmers : multi_kmers) { + //std::sort(kmers.begin(), kmers.end()); + opfile << "{ \"qnum\": " << cnt++ << ", \"num_kmers\": " << kmers.size() << ", \"res\": {\n"; + mantis::QueryResult result = cdbg.find_samples(kmers); + for (auto it = result.begin(); it != result.end(); ++it) { + opfile << " \"" <first) << "\": " << it->second ; + if (std::next(it) != result.end()) { + opfile << ",\n"; + } + } + opfile << "}}"; + if (qctr < nquery - 1) { opfile << ","; } + opfile << "\n"; + ++qctr; + } + opfile << "]\n"; + } + +} + + +/* + * === FUNCTION ============================================================= + * Name: main + * Description: + * ============================================================================ + */ +int server_main (QueryOpts& opt) +{ + //CLI::App app("Mantis query"); + + std::string prefix = opt.prefix; + bool use_json = opt.use_json; + /* + app.add_option("-i,--input-prefix", prefix, "Prefix of input files.")->required(); + app.add_option("-o,--outout", output_file, "Where to write query output."); + app.add_option("query", query_file, "Prefix of input files.")->required(); + app.add_flag("-j,--json", use_json, "Write the output in JSON format"); + CLI11_PARSE(app, argc, argv); + */ + + // Make sure the prefix is a full folder + if (prefix.back() != '/') { + prefix.push_back('/'); + } + + spdlog::logger* console = opt.console.get(); + console->info("Reading colored dbg from disk: " + prefix); + + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); + + ColoredDbg*>, KeyObject> cdbg(cqf_file, + eqclass_files, + sample_file); + uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; + console->info("Read colored dbg with {} k-mers and {} color classes", + cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + + //cdbg.get_cqf()->dump_metadata(); + //CQF cqf(query_file, false); + //CQF::Iterator it = cqf.begin(1); + //mantis::QuerySet input_kmers; + //do { + //KeyObject k = *it; + //input_kmers.insert(k.key); + //++it; + //} while (!it.done()); + + //mantis::QuerySets multi_kmers; + //multi_kmers.push_back(input_kmers); + + console->info("Reading query kmers from disk."); + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + + while (1) { + std::string query_file = opt.query_file; + std::string output_file = opt.output;//{"samples.output"}; + + cout << "Enter query file: " << endl; + cin >> query_file; + cout << "Enter output file: " << endl; + cin >> output_file; + + mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), + seed, + cdbg.range(), + kmer_size, + total_kmers); + console->info("Total k-mers to query: {}", total_kmers); + + // Attempt to optimize bulk query + /* + bool doBulk = multi_kmers.size() >= 100; + BulkQuery bq; + if (doBulk) { + uint32_t exp_id{0}; + for (auto& kmers : multi_kmers) { + for (auto& k : kmers) { + bq.qs.insert(k); + bq.map[k].push_back(exp_id); + } + ++exp_id; + } + } + */ + + std::ofstream opfile(output_file); + console->info("Querying the colored dbg."); + + if (use_json) { + output_results_json(multi_kmers, cdbg, opfile); + } else { + output_results(multi_kmers, cdbg, opfile); + } + //std::cout << "Writing samples and abundances out." << std::endl; + opfile.close(); + console->info("Writing done."); + + } + + return EXIT_SUCCESS; +} /* ---------- end of function main ---------- */ From 07f82cb25863fea1b09a27aaad3e3fc4f5066e6e Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 00:54:29 -0400 Subject: [PATCH 11/15] Add queries based on sockets --- Makefile | 1 + src/client.py | 17 ++++ src/queries.cc | 215 +++++++++++++++++-------------------------------- src/query.hh | 45 +++++++++++ src/sockets.hh | 99 +++++++++++++++++++++++ 5 files changed, 236 insertions(+), 141 deletions(-) create mode 100644 src/client.py create mode 100644 src/query.hh create mode 100644 src/sockets.hh diff --git a/Makefile b/Makefile index 5649f32..b6b6a9b 100644 --- a/Makefile +++ b/Makefile @@ -88,6 +88,7 @@ $(OBJDIR)/hashutil.o: $(LOC_INCLUDE)/hashutil.h # dependencies between .o files and .cc (or .c) files $(OBJDIR)/gqf.o: $(LOC_SRC)/cqf/gqf.c $(LOC_INCLUDE)/cqf/gqf.h +$(OBJDIR)/queries.o: $(LOC_SRC)/sockets.hh # # generic build rules diff --git a/src/client.py b/src/client.py new file mode 100644 index 0000000..80f4fb8 --- /dev/null +++ b/src/client.py @@ -0,0 +1,17 @@ +import socket +import sys + +HOST, PORT = "localhost", 23901 + +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + +s.connect((HOST, PORT)) + +query_filepath = "/home/paf2023/run_mantis/queries/query{0}.fa" +output_filepath = "/home/paf2023/run_mantis/out/out{0}.res" + +for x in range(0, 5): + print("Send request {0}".format(query_filepath.format(x))) + s.send(query_filepath.format(x) + " " + output_filepath.format(x)) + result = str(s.recv(4096)) + print("Got result {0}".format(result)) diff --git a/src/queries.cc b/src/queries.cc index 553c0f0..d26a556 100644 --- a/src/queries.cc +++ b/src/queries.cc @@ -1,57 +1,5 @@ -/* - * ============================================================================ - * - * Filename: query.cc - * - * Description: - * - * Version: 1.0 - * Created: 2017-10-27 12:56:50 AM - * Revision: none - * Compiler: gcc - * - * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu - * Organization: Stony Brook University - * - * ============================================================================ - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "MantisFS.h" -#include "ProgOpts.h" -#include "spdlog/spdlog.h" -#include "kmer.h" -#include "coloreddbg.h" -#include "common_types.h" -#include "CLI/CLI.hpp" -#include "CLI/Timer.hpp" +// #include "query.hh" +#include "sockets.hh" #define MAX_NUM_SAMPLES 2600 #define OUTPUT_FILE "samples.output" @@ -106,6 +54,40 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject> & cdbg = *colored_dbg; + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + + uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; + console->info("Use colored dbg with {} k-mers and {} color classes", + cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + console->info("K-mer size: {}", kmer_size); + console->info("Query file: {}", query_file); + console->info("Output file: {}", output_file); + + mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), + seed, + cdbg.range(), + kmer_size, + total_kmers); + console->info("Total k-mers to query: {}", total_kmers); + + std::ofstream opfile(output_file); + console->info("Querying the colored dbg."); + + // if (use_json) { + // output_results_json(multi_kmers, cdbg, opfile); + // } else { + // output_results(multi_kmers, cdbg, opfile); + // } + // TODO use_json + output_results(multi_kmers, cdbg, opfile); + + opfile.close(); + console->info("Writing done."); +} /* * === FUNCTION ============================================================= @@ -115,100 +97,51 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbgrequired(); - app.add_option("-o,--outout", output_file, "Where to write query output."); - app.add_option("query", query_file, "Prefix of input files.")->required(); - app.add_flag("-j,--json", use_json, "Write the output in JSON format"); - CLI11_PARSE(app, argc, argv); - */ - - // Make sure the prefix is a full folder - if (prefix.back() != '/') { - prefix.push_back('/'); - } + try + { + boost::asio::io_service io_service; + server s(io_service, 23901); - spdlog::logger* console = opt.console.get(); - console->info("Reading colored dbg from disk: " + prefix); + std::string prefix = opt.prefix; + // bool use_json = opt.use_json; - std::string cqf_file(prefix + CQF_FILE); - std::string sample_file(prefix + SAMPLEID_FILE); - std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), - std::string(EQCLASS_FILE).c_str()); + // Make sure the prefix is a full folder + if (prefix.back() != '/') { + prefix.push_back('/'); + } - ColoredDbg*>, KeyObject> cdbg(cqf_file, - eqclass_files, - sample_file); - uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; - console->info("Read colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + query::console = opt.console.get(); + query::console->info("Reading colored dbg from disk: " + prefix); - //cdbg.get_cqf()->dump_metadata(); - //CQF cqf(query_file, false); - //CQF::Iterator it = cqf.begin(1); - //mantis::QuerySet input_kmers; - //do { - //KeyObject k = *it; - //input_kmers.insert(k.key); - //++it; - //} while (!it.done()); - - //mantis::QuerySets multi_kmers; - //multi_kmers.push_back(input_kmers); - - console->info("Reading query kmers from disk."); - uint32_t seed = 2038074743; - uint64_t total_kmers = 0; - - while (1) { - std::string query_file = opt.query_file; - std::string output_file = opt.output;//{"samples.output"}; - - cout << "Enter query file: " << endl; - cin >> query_file; - cout << "Enter output file: " << endl; - cin >> output_file; - - mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), - kmer_size, - total_kmers); - console->info("Total k-mers to query: {}", total_kmers); - - // Attempt to optimize bulk query - /* - bool doBulk = multi_kmers.size() >= 100; - BulkQuery bq; - if (doBulk) { - uint32_t exp_id{0}; - for (auto& kmers : multi_kmers) { - for (auto& k : kmers) { - bq.qs.insert(k); - bq.map[k].push_back(exp_id); - } - ++exp_id; - } - } - */ + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); + query::colored_dbg = new ColoredDbg*>, KeyObject>(cqf_file, eqclass_files, sample_file); - std::ofstream opfile(output_file); - console->info("Querying the colored dbg."); - if (use_json) { - output_results_json(multi_kmers, cdbg, opfile); - } else { - output_results(multi_kmers, cdbg, opfile); - } - //std::cout << "Writing samples and abundances out." << std::endl; - opfile.close(); - console->info("Writing done."); + query::console->info("Reading query kmers from disk."); + query::console->info("Run server accepting queries."); + io_service.run(); + + // read from stdin + // while (1) { + // std::string query_file = opt.query_file; + // std::string output_file = opt.output; //{"samples.output"}; + + // cout << "Enter query file: " << endl; + // cin >> query_file; + // cout << "Enter output file: " << endl; + // cin >> output_file; + // query::run_query(query_file, output_file); + // } + } + catch (std::exception& e) + { + std::cerr << "Exception: " << e.what() << "\n"; } - return EXIT_SUCCESS; + delete query::colored_dbg; + return EXIT_SUCCESS; } /* ---------- end of function main ---------- */ diff --git a/src/query.hh b/src/query.hh new file mode 100644 index 0000000..f703718 --- /dev/null +++ b/src/query.hh @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MantisFS.h" +#include "ProgOpts.h" +#include "spdlog/spdlog.h" +#include "kmer.h" +#include "coloreddbg.h" +#include "common_types.h" +#include "CLI/CLI.hpp" +#include "CLI/Timer.hpp" + +namespace query +{ + ColoredDbg*>, KeyObject> * colored_dbg; + spdlog::logger * console; + + void run_query(std::string query_file, std::string output_file); +} \ No newline at end of file diff --git a/src/sockets.hh b/src/sockets.hh new file mode 100644 index 0000000..5061d8a --- /dev/null +++ b/src/sockets.hh @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include + +#include "query.hh" + +using boost::asio::ip::tcp; + +class session + : public std::enable_shared_from_this +{ +public: + session(tcp::socket socket) + : socket_(std::move(socket)) + { + } + + void start() + { + do_read(); + } + +private: + void do_read() + { + // read query file path and output file path + auto self(shared_from_this()); + socket_.async_read_some(boost::asio::buffer(data_, max_length), + [this, self](boost::system::error_code ec, std::size_t length) + { + char * delimeter_ptr = strstr(data_, " "); + if (!delimeter_ptr) + { + // TODO incorrect query format error. Call do_read? + return; + } + std::string query_filepath(data_, delimeter_ptr - data_); + std::string output_filepath(delimeter_ptr + 1); // assume exactly one space between filepaths + + query::run_query(query_filepath, output_filepath); // TODO check result + + if (!ec) + { + do_write(length, output_filepath); + } + }); + } + + void do_write(std::size_t length, std::string outfile) + { + // respond with output filepath + auto self(shared_from_this()); + memset(data_, 0, sizeof(data_)); + strcpy(data_, outfile.c_str()); + boost::asio::async_write(socket_, boost::asio::buffer(data_, length), + [this, self](boost::system::error_code ec, std::size_t /*length*/) + { + if (!ec) + { + do_read(); + } + }); + } + + tcp::socket socket_; + enum { max_length = 4096 }; + char data_[max_length]; +}; + +class server +{ +public: + server(boost::asio::io_service& io_service, short port) + : acceptor_(io_service, tcp::endpoint(tcp::v4(), port)), + socket_(io_service) + { + do_accept(); + } + +private: + void do_accept() + { + acceptor_.async_accept(socket_, + [this](boost::system::error_code ec) + { + if (!ec) + { + std::make_shared(std::move(socket_))->start(); + } + + do_accept(); + }); + } + + tcp::acceptor acceptor_; + tcp::socket socket_; +}; \ No newline at end of file From f9b47ecfdf9c554966317f0219ab9fbdb5addd8f Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 12:36:19 -0400 Subject: [PATCH 12/15] Support both query and queries server --- Makefile | 8 +- src/mantis.cc | 2 +- src/queries.cc | 147 ---------------------------------- src/query.cc | 96 ++++++++++++++++++++++ src/query.hh | 45 ----------- src/{sockets.hh => sockets.h} | 35 ++++++-- 6 files changed, 127 insertions(+), 206 deletions(-) delete mode 100644 src/queries.cc delete mode 100644 src/query.hh rename src/{sockets.hh => sockets.h} (62%) diff --git a/Makefile b/Makefile index b6b6a9b..85fa78d 100644 --- a/Makefile +++ b/Makefile @@ -70,8 +70,7 @@ LDFLAGS += $(MYLIBS) $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system all: $(TARGETS) # dependencies between programs and .o files -#mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/queries.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o -mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/queries.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o # dependencies between .o files and .h files $(OBJDIR)/mantis.o: $(LOC_SRC)/mantis.cc @@ -80,15 +79,14 @@ $(OBJDIR)/util.o: $(LOC_SRC)/util.cc $(LOC_INCLUDE)/util.h $(OBJDIR)/bitvector.o: $(LOC_SRC)/bitvector.cc $(LOC_INCLUDE)/bitvector.h $(OBJDIR)/kmer.o: $(LOC_SRC)/kmer.cc $(LOC_INCLUDE)/kmer.h $(OBJDIR)/coloreddbg.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h -#$(OBJDIR)/query.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h -$(OBJDIR)/queries.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h +$(OBJDIR)/query.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h $(OBJDIR)/validatemantis.o: $(LOC_INCLUDE)/cqf/gqf.h $(LOC_INCLUDE)/hashutil.h $(LOC_INCLUDE)/util.h $(LOC_INCLUDE)/coloreddbg.h $(LOC_INCLUDE)/bitvector.h $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/kmer.h $(OBJDIR)/hashutil.o: $(LOC_INCLUDE)/hashutil.h # dependencies between .o files and .cc (or .c) files $(OBJDIR)/gqf.o: $(LOC_SRC)/cqf/gqf.c $(LOC_INCLUDE)/cqf/gqf.h -$(OBJDIR)/queries.o: $(LOC_SRC)/sockets.hh +$(OBJDIR)/query.o: $(LOC_SRC)/sockets.h # # generic build rules diff --git a/src/mantis.cc b/src/mantis.cc index 950ceaa..264ad9e 100644 --- a/src/mantis.cc +++ b/src/mantis.cc @@ -188,7 +188,7 @@ int main ( int argc, char *argv[] ) { if(res) { switch(selected) { case mode::build: build_main(bopt); break; - // case mode::query: query_main(qopt); break; + case mode::query: query_main(qopt); break; case mode::server: server_main(sopt); break; case mode::validate: validate_main(vopt); break; case mode::help: std::cout << make_man_page(cli, "mantis"); break; diff --git a/src/queries.cc b/src/queries.cc deleted file mode 100644 index d26a556..0000000 --- a/src/queries.cc +++ /dev/null @@ -1,147 +0,0 @@ -// #include "query.hh" -#include "sockets.hh" - -#define MAX_NUM_SAMPLES 2600 -#define OUTPUT_FILE "samples.output" - -void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, - std::ofstream& opfile) { - mantis::QueryResults qres; - uint32_t cnt= 0; - { - CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; - //size_t qctr{0}; - //size_t nquery{multi_kmers.size()}; - for (auto& kmers : multi_kmers) { - //std::sort(kmers.begin(), kmers.end()); - opfile << cnt++ << '\t' << kmers.size() << '\n'; - mantis::QueryResult result = cdbg.find_samples(kmers); - for (auto it = result.begin(); it != result.end(); ++it) { - opfile << cdbg.get_sample(it->first) << '\t' << it->second << '\n'; - } - //++qctr; - } - } -} - - -void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, - std::ofstream& opfile) { - mantis::QueryResults qres; - uint32_t cnt= 0; - { - CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; - opfile << "[\n"; - size_t qctr{0}; - size_t nquery{multi_kmers.size()}; - for (auto& kmers : multi_kmers) { - //std::sort(kmers.begin(), kmers.end()); - opfile << "{ \"qnum\": " << cnt++ << ", \"num_kmers\": " << kmers.size() << ", \"res\": {\n"; - mantis::QueryResult result = cdbg.find_samples(kmers); - for (auto it = result.begin(); it != result.end(); ++it) { - opfile << " \"" <first) << "\": " << it->second ; - if (std::next(it) != result.end()) { - opfile << ",\n"; - } - } - opfile << "}}"; - if (qctr < nquery - 1) { opfile << ","; } - opfile << "\n"; - ++qctr; - } - opfile << "]\n"; - } - -} - -void query::run_query(std::string query_file, std::string output_file) -{ - ColoredDbg*>, KeyObject> & cdbg = *colored_dbg; - uint32_t seed = 2038074743; - uint64_t total_kmers = 0; - - uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; - console->info("Use colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); - console->info("K-mer size: {}", kmer_size); - console->info("Query file: {}", query_file); - console->info("Output file: {}", output_file); - - mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), - kmer_size, - total_kmers); - console->info("Total k-mers to query: {}", total_kmers); - - std::ofstream opfile(output_file); - console->info("Querying the colored dbg."); - - // if (use_json) { - // output_results_json(multi_kmers, cdbg, opfile); - // } else { - // output_results(multi_kmers, cdbg, opfile); - // } - // TODO use_json - output_results(multi_kmers, cdbg, opfile); - - opfile.close(); - console->info("Writing done."); -} - -/* - * === FUNCTION ============================================================= - * Name: main - * Description: - * ============================================================================ - */ -int server_main (QueryOpts& opt) -{ - try - { - boost::asio::io_service io_service; - server s(io_service, 23901); - - std::string prefix = opt.prefix; - // bool use_json = opt.use_json; - - // Make sure the prefix is a full folder - if (prefix.back() != '/') { - prefix.push_back('/'); - } - - query::console = opt.console.get(); - query::console->info("Reading colored dbg from disk: " + prefix); - - std::string cqf_file(prefix + CQF_FILE); - std::string sample_file(prefix + SAMPLEID_FILE); - std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), - std::string(EQCLASS_FILE).c_str()); - query::colored_dbg = new ColoredDbg*>, KeyObject>(cqf_file, eqclass_files, sample_file); - - - query::console->info("Reading query kmers from disk."); - query::console->info("Run server accepting queries."); - io_service.run(); - - // read from stdin - // while (1) { - // std::string query_file = opt.query_file; - // std::string output_file = opt.output; //{"samples.output"}; - - // cout << "Enter query file: " << endl; - // cin >> query_file; - // cout << "Enter output file: " << endl; - // cin >> output_file; - - // query::run_query(query_file, output_file); - // } - } - catch (std::exception& e) - { - std::cerr << "Exception: " << e.what() << "\n"; - } - - delete query::colored_dbg; - return EXIT_SUCCESS; -} /* ---------- end of function main ---------- */ diff --git a/src/query.cc b/src/query.cc index 0cc39b8..e45b997 100644 --- a/src/query.cc +++ b/src/query.cc @@ -53,6 +53,8 @@ #include "CLI/CLI.hpp" #include "CLI/Timer.hpp" +#include "sockets.h" + #define MAX_NUM_SAMPLES 2600 #define OUTPUT_FILE "samples.output" @@ -106,6 +108,40 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) { + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + + uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; + console->info("Use colored dbg with {} k-mers and {} color classes", + cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + console->info("K-mer size: {}", kmer_size); + console->info("Query file: {}", query_file); + console->info("Output file: {}", output_file); + + mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), + seed, + cdbg.range(), + kmer_size, + total_kmers); + console->info("Total k-mers to query: {}", total_kmers); + + std::ofstream opfile(output_file); + console->info("Querying the colored dbg."); + + if (use_json) { + output_results_json(multi_kmers, cdbg, opfile); + } else { + output_results(multi_kmers, cdbg, opfile); + } + + opfile.close(); + console->info("Writing done."); +} + /* * === FUNCTION ============================================================= @@ -202,3 +238,63 @@ int query_main (QueryOpts& opt) return EXIT_SUCCESS; } /* ---------- end of function main ---------- */ + + + +/* + * === FUNCTION ============================================================= + * Name: main + * Description: + * ============================================================================ + */ +int server_main (QueryOpts& opt) +{ + try + { + std::string prefix = opt.prefix; + bool use_json = opt.use_json; + + // Make sure the prefix is a full folder + if (prefix.back() != '/') { + prefix.push_back('/'); + } + + spdlog::logger* console = opt.console.get(); + console->info("Reading colored dbg from disk: " + prefix); + + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); + ColoredDbg*>, KeyObject> cdbg(cqf_file, + eqclass_files, + sample_file); + + boost::asio::io_service io_service; + server s(io_service, 23901, cdbg, console, use_json); // more parameters + + console->info("Reading query kmers from disk."); + console->info("Run server accepting queries."); + io_service.run(); + + // read from stdin + // while (1) { + // std::string query_file = opt.query_file; + // std::string output_file = opt.output; //{"samples.output"}; + + // cout << "Enter query file: " << endl; + // cin >> query_file; + // cout << "Enter output file: " << endl; + // cin >> output_file; + + // query::run_query(query_file, output_file); + // } + } + catch (std::exception& e) + { + std::cerr << "Exception: " << e.what() << "\n"; + } + + return EXIT_SUCCESS; +} /* ---------- end of function main ---------- */ + diff --git a/src/query.hh b/src/query.hh deleted file mode 100644 index f703718..0000000 --- a/src/query.hh +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "MantisFS.h" -#include "ProgOpts.h" -#include "spdlog/spdlog.h" -#include "kmer.h" -#include "coloreddbg.h" -#include "common_types.h" -#include "CLI/CLI.hpp" -#include "CLI/Timer.hpp" - -namespace query -{ - ColoredDbg*>, KeyObject> * colored_dbg; - spdlog::logger * console; - - void run_query(std::string query_file, std::string output_file); -} \ No newline at end of file diff --git a/src/sockets.hh b/src/sockets.h similarity index 62% rename from src/sockets.hh rename to src/sockets.h index 5061d8a..7cf685b 100644 --- a/src/sockets.hh +++ b/src/sockets.h @@ -4,16 +4,23 @@ #include #include -#include "query.hh" - using boost::asio::ip::tcp; +extern void run_query(std::string query_file, std::string output_file, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json); + class session : public std::enable_shared_from_this { public: - session(tcp::socket socket) - : socket_(std::move(socket)) + session(tcp::socket socket, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) + : socket_(std::move(socket)), + cdbg(cdbg), console(console), use_json(use_json) { } @@ -39,7 +46,7 @@ class session std::string query_filepath(data_, delimeter_ptr - data_); std::string output_filepath(delimeter_ptr + 1); // assume exactly one space between filepaths - query::run_query(query_filepath, output_filepath); // TODO check result + run_query(query_filepath, output_filepath, cdbg, console, use_json); // TODO check result if (!ec) { @@ -67,14 +74,22 @@ class session tcp::socket socket_; enum { max_length = 4096 }; char data_[max_length]; + + ColoredDbg*>, KeyObject>& cdbg; + spdlog::logger * console; + bool use_json; }; class server { public: - server(boost::asio::io_service& io_service, short port) + server(boost::asio::io_service& io_service, short port, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) : acceptor_(io_service, tcp::endpoint(tcp::v4(), port)), - socket_(io_service) + socket_(io_service), + cdbg(cdbg), console(console), use_json(use_json) { do_accept(); } @@ -87,7 +102,7 @@ class server { if (!ec) { - std::make_shared(std::move(socket_))->start(); + std::make_shared(std::move(socket_), cdbg, console, use_json)->start(); } do_accept(); @@ -96,4 +111,8 @@ class server tcp::acceptor acceptor_; tcp::socket socket_; + + ColoredDbg*>, KeyObject>& cdbg; + spdlog::logger * console; + bool use_json; }; \ No newline at end of file From c0cf47c4d03da41b0894200295d2641cd2e5b752 Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Wed, 11 Apr 2018 12:53:12 -0400 Subject: [PATCH 13/15] Clean the code and add README for mantis server --- Makefile | 31 +------------- README.md | 45 +++++++++++++++++++- src/mantis.cc | 2 - src/query.cc | 111 +++++++++++--------------------------------------- 4 files changed, 68 insertions(+), 121 deletions(-) diff --git a/Makefile b/Makefile index 85fa78d..9120a75 100644 --- a/Makefile +++ b/Makefile @@ -1,31 +1,4 @@ TARGETS= mantis -MYLIBS=-L/home/paf2023/tools/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-phpi22i4vio3f55thjj5xxeupzmj463k/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/gettext-0.19.8.1-cnbmeuapkcyxq23tumt4esnuvijpl4w7/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/bzip2-1.0.6-x5hwxgowvbzmnowngidcjin5qicfg4fl/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/libxml2-2.9.4-ox4x2yk4rqlh3ag4wxxgzficxgwbcsy7/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/xz-5.2.3-npj3lafufjb7d57yhkthdo3qeeollgiq/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/python-2.7.14-4q4ykdbphrdjx2n4ztg5getsrjkl7dkz/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/ncurses-6.0-37ewjt6dvfa4vn3coyfnx5ejnfzg6d27/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-pkmmuwsdolh4vk2qrjvevqbrds55oxet/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-3yfqdmrhogpxaehsissamfd4bagokf3y/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/binutils-2.28-kamnyqrq5i6e6ghsccvnx4banzgdvn6r/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/binutils-2.28-t6r4t2u7mpl73qqhveutncj4gamajiip/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/binutils-2.28-a4bouodbguxjzgeoogr7qjqculofd4cx/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/zlib-1.2.11-7eta3q6r3ozz5vh7zaffgjajf4fwvxby/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.5.4/zlib-1.2.11-upeb67xkoclf5fgx7musxjw5v7gk7yw7/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/zlib-1.2.11-erzei6xutxne2tjssbmelgutsnhz6x6s/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/zlib-1.2.11-rjqs3cq27e2o4tbce7kzmictrih57avc/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-oiwzkauf2kl22pu7j2vjupr3ffu3mzts/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-5awddeormrqlmqq2frhjrdcu5szebebq/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.65.1-sqviukvzkgl524rq2dtz6d5ytukaxlj3/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.64.0-sx2zxhmitq565rmn3jutk67ffqrko6zx/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/boost-1.63.0-gpfsh3erjtf2xyksy66crnxzqib6qrva/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.63.0-eu4aw5v6ewmqd5kuzieorttiljxcuscv/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-6.3.0/boost-1.54.0-7qaxqizxejlur5pr5mfecf2umfp52lmr/lib \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-6.3.0-han2ovjpjdnuwkhtfhugdq66cjkte7q3/lib64 \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-6.3.0-xnoykzftyh27xs6vqkwvtft2cce5yd4k/lib64 \ --L/pbtech_mounts/softlib001/apps/EL6/spack/opt/spack/linux-rhel6-x86_64/gcc-4.4.6/gcc-4.5.4-tmeuqohwhscsb2hegxgfjmtxy7ywelud/lib64 \ ifdef D DEBUG=-g -DDEBUG @@ -60,7 +33,7 @@ CFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE)\ -Wno-unused-result -Wno-strict-aliasing -Wno-unused-function -Wno-sign-compare \ -Wno-implicit-function-declaration -LDFLAGS += $(MYLIBS) $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ +LDFLAGS += $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ -lboost_thread -lm -lz -lrt # @@ -70,7 +43,7 @@ LDFLAGS += $(MYLIBS) $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system all: $(TARGETS) # dependencies between programs and .o files -mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o # dependencies between .o files and .h files $(OBJDIR)/mantis.o: $(LOC_SRC)/mantis.cc diff --git a/README.md b/README.md index 2cf0349..bb56e5b 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ OPTIONS Prefix of input files. ``` - The command takes the following options : + The command takes the following options: - `--input-prefix,-p`: the directory where the output of coloreddbg command is present. - `--output,-o`: the file where the query results should be written (default : `samples.output`). @@ -94,10 +94,51 @@ OPTIONS - query transcripts: input transcripts to be queried. Finally, rather than writing the results in the "simple" output format, they can be written in JSON if you - provide the `--json,-j` flag to the `query` comamnd. + provide the `--json,-j` flag to the `query` command. The output file in contains the list of experiments (i.e., hits) corresponding to each queried transcript. +Server +------- + +`mantis server` lets you run mantis as a server which loads a mantis index and waits for queries. + +```bash + $ make mantis + $ ./mantis server -p raw/ +``` + +The options and arguments are as follows: + +```bash +SYNOPSIS + mantis server [-j] -p + +OPTIONS + -j, --json Write the output in JSON format + + + Prefix of input files. +``` + + The command takes the following options: + - `--input-prefix,-p`: the directory where the output of coloreddbg command is present. + + Rather than writing the results in the "simple" output format, they can be written in JSON if you + provide the `--json,-j` flag to the `query` command. + + The server loads mantis index to memory and starts to accept queries (port 23901 is used by default) in the following format: + +``` + +``` + + The query has the following arguments: + - query filepath: input transcripts to be queried. + - output filepath: the file where the query results should be written. The output file in contains the list of experiments (i.e., hits) corresponding to each queried transcript. + + There is a client example written in python `src/client.py` which demonstrates how one can communicate with the server. + Contributing ------------ Contributions via GitHub pull requests are welcome. diff --git a/src/mantis.cc b/src/mantis.cc index 264ad9e..0a3023b 100644 --- a/src/mantis.cc +++ b/src/mantis.cc @@ -152,8 +152,6 @@ int main ( int argc, char *argv[] ) { command("server").set(selected, mode::server), option("-j", "--json").set(sopt.use_json) % "Write the output in JSON format", required("-p", "--input-prefix") & value(ensure_dir_exists, "query_prefix", sopt.prefix) % "Prefix of input files." // , - // option("-o", "--output") & value("output_file", qopt.output) % "Where to write query output.", - // value(ensure_file_exists, "query", qopt.query_file) % "Prefix of input files." ); auto validate_mode = ( diff --git a/src/query.cc b/src/query.cc index e45b997..80db1ee 100644 --- a/src/query.cc +++ b/src/query.cc @@ -58,10 +58,10 @@ #define MAX_NUM_SAMPLES 2600 #define OUTPUT_FILE "samples.output" -void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, +void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, std::ofstream& opfile) { mantis::QueryResults qres; - uint32_t cnt= 0; + uint32_t cnt= 0; { CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; //size_t qctr{0}; @@ -79,10 +79,10 @@ void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, +void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, std::ofstream& opfile) { mantis::QueryResults qres; - uint32_t cnt= 0; + uint32_t cnt= 0; { CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; opfile << "[\n"; @@ -145,112 +145,48 @@ void run_query(std::string query_file, std::string output_file, /* * === FUNCTION ============================================================= - * Name: main + * Name: query main * Description: * ============================================================================ */ -int query_main (QueryOpts& opt) +int query_main(QueryOpts& opt) { - //CLI::App app("Mantis query"); - std::string prefix = opt.prefix; std::string query_file = opt.query_file; std::string output_file = opt.output;//{"samples.output"}; bool use_json = opt.use_json; - /* - app.add_option("-i,--input-prefix", prefix, "Prefix of input files.")->required(); - app.add_option("-o,--outout", output_file, "Where to write query output."); - app.add_option("query", query_file, "Prefix of input files.")->required(); - app.add_flag("-j,--json", use_json, "Write the output in JSON format"); - CLI11_PARSE(app, argc, argv); - */ - // Make sure the prefix is a full folder if (prefix.back() != '/') { prefix.push_back('/'); } spdlog::logger* console = opt.console.get(); - console->info("Reading colored dbg from disk."); - - std::string cqf_file(prefix + CQF_FILE); - std::string sample_file(prefix + SAMPLEID_FILE); - std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), - std::string(EQCLASS_FILE).c_str()); - - ColoredDbg*>, KeyObject> cdbg(cqf_file, - eqclass_files, - sample_file); - uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; - console->info("Read colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + console->info("Reading colored dbg from disk."); - //cdbg.get_cqf()->dump_metadata(); - //CQF cqf(query_file, false); - //CQF::Iterator it = cqf.begin(1); - //mantis::QuerySet input_kmers; - //do { - //KeyObject k = *it; - //input_kmers.insert(k.key); - //++it; - //} while (!it.done()); - - //mantis::QuerySets multi_kmers; - //multi_kmers.push_back(input_kmers); - - console->info("Reading query kmers from disk."); - uint32_t seed = 2038074743; - uint64_t total_kmers = 0; - mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), - kmer_size, - total_kmers); - console->info("Total k-mers to query: {}", total_kmers); - - // Attempt to optimize bulk query - /* - bool doBulk = multi_kmers.size() >= 100; - BulkQuery bq; - if (doBulk) { - uint32_t exp_id{0}; - for (auto& kmers : multi_kmers) { - for (auto& k : kmers) { - bq.qs.insert(k); - bq.map[k].push_back(exp_id); - } - ++exp_id; - } - } - */ + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); - std::ofstream opfile(output_file); - console->info("Querying the colored dbg."); + ColoredDbg*>, KeyObject> cdbg(cqf_file, + eqclass_files, + sample_file); + run_query(query_file, output_file, cdbg, console, use_json); - if (use_json) { - output_results_json(multi_kmers, cdbg, opfile); - } else { - output_results(multi_kmers, cdbg, opfile); - } - //std::cout << "Writing samples and abundances out." << std::endl; - opfile.close(); - console->info("Writing done."); - - return EXIT_SUCCESS; -} /* ---------- end of function main ---------- */ + return EXIT_SUCCESS; +} /* ---------- end of function main ---------- */ /* * === FUNCTION ============================================================= - * Name: main + * Name: server main * Description: * ============================================================================ */ -int server_main (QueryOpts& opt) +int server_main(QueryOpts& opt) { - try - { + try { std::string prefix = opt.prefix; bool use_json = opt.use_json; @@ -277,7 +213,7 @@ int server_main (QueryOpts& opt) console->info("Run server accepting queries."); io_service.run(); - // read from stdin + // read from stdin instead of socket // while (1) { // std::string query_file = opt.query_file; // std::string output_file = opt.output; //{"samples.output"}; @@ -287,11 +223,10 @@ int server_main (QueryOpts& opt) // cout << "Enter output file: " << endl; // cin >> output_file; - // query::run_query(query_file, output_file); + // run_query(query_file, output_file, cdbg, console, use_json); // } } - catch (std::exception& e) - { + catch (std::exception& e) { std::cerr << "Exception: " << e.what() << "\n"; } From 00517bc324695d3c8316539e4c3e180feebea2fb Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Thu, 12 Apr 2018 22:44:44 -0400 Subject: [PATCH 14/15] Remove old comment --- src/query.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query.cc b/src/query.cc index 142f89d..623cef3 100644 --- a/src/query.cc +++ b/src/query.cc @@ -206,7 +206,7 @@ int server_main(QueryOpts& opt) sample_file); boost::asio::io_service io_service; - server s(io_service, 23901, cdbg, console, use_json); // more parameters + server s(io_service, 23901, cdbg, console, use_json); console->info("Reading query kmers from disk."); console->info("Run server accepting queries."); From fb2f964d447e701b46135512f06f8e358a61810b Mon Sep 17 00:00:00 2001 From: Pavel Fedotov Date: Thu, 19 Apr 2018 18:22:50 -0400 Subject: [PATCH 15/15] Installation scripts, external include of sdsl --- Makefile | 2 +- scripts/install_mantis_server.sh | 14 ++++++++++++++ scripts/libpaths.sh | 6 ++++++ scripts/load_deps_spack.sh | 7 +++++++ scripts/patch_makefile.sh | 6 ++++++ 5 files changed, 34 insertions(+), 1 deletion(-) create mode 100755 scripts/install_mantis_server.sh create mode 100755 scripts/libpaths.sh create mode 100755 scripts/load_deps_spack.sh create mode 100755 scripts/patch_makefile.sh diff --git a/Makefile b/Makefile index 9120a75..a4b64f7 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ LOC_INCLUDE=include LOC_SRC=src OBJDIR=obj -CXXFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE) \ +CXXFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE) -I$(MYINCL) \ -Wno-unused-result -Wno-strict-aliasing -Wno-unused-function -Wno-sign-compare CFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE)\ diff --git a/scripts/install_mantis_server.sh b/scripts/install_mantis_server.sh new file mode 100755 index 0000000..df2ea7f --- /dev/null +++ b/scripts/install_mantis_server.sh @@ -0,0 +1,14 @@ +#!/bin/bash +. load_deps_spack.sh # load mantis dependencies with spack and save environment variables + +TOOLS=~/tools +mkdir -p $TOOLS/src +git clone https://github.com/simongog/sdsl-lite.git $TOOLS/src/sdsl-lite +$TOOLS/src/sdsl-lite/install.sh $TOOLS + +git clone https://github.com/effect/mantis.git $TOOLS/mantis + +LIBRARY_PATH=$TOOLS/lib:$LIBRARY_PATH +./patch_makefile.sh $TOOLS/mantis/Makefile +cd $TOOLS/mantis +MYINCL=$TOOLS/include make NH=1 diff --git a/scripts/libpaths.sh b/scripts/libpaths.sh new file mode 100755 index 0000000..91fc548 --- /dev/null +++ b/scripts/libpaths.sh @@ -0,0 +1,6 @@ +#!/bin/bash +IFS=":" +for line in $LIBRARY_PATH; do + echo -L$line '\\' +done + diff --git a/scripts/load_deps_spack.sh b/scripts/load_deps_spack.sh new file mode 100755 index 0000000..e0e2ddf --- /dev/null +++ b/scripts/load_deps_spack.sh @@ -0,0 +1,7 @@ +#!/bin/bash +spack load gcc +spack load cmake +spack load boost +spack load zlib +spack load bzip2 +spack load binutils diff --git a/scripts/patch_makefile.sh b/scripts/patch_makefile.sh new file mode 100755 index 0000000..c7caed8 --- /dev/null +++ b/scripts/patch_makefile.sh @@ -0,0 +1,6 @@ +#!/bin/bash +MAKEFILE=$1 +LIBPATHS="$(./libpaths.sh)" +LIBPATHS="$(echo "$LIBPATHS" | sed '$ ! s/$/\\/')" # escape new lines +sed -i "/TARGETS=/aMYLIBS=$LIBPATHS" $MAKEFILE # add MYLIBS var with all libraries path +sed -i "s/LDFLAGS += /LDFLAGS += \$\(MYLIBS\) /g" $MAKEFILE # add MYLIBS to LDFLAGS