From f6a5dbf179bef8e4bdf9bf50e7e768f628dca0a4 Mon Sep 17 00:00:00 2001 From: Stephen Tanner Date: Mon, 2 May 2016 09:18:08 -0700 Subject: [PATCH] Shankar merge fixes --- COPYING | 2 +- Changelog | 11 +- Makefile | 4 +- scratch/make_release_tarball.bash | 2 +- src/Makefile | 2 +- src/break_blocks.cpp | 6 +- src/extract_variants.cpp | 8 +- src/gatk_to_gvcf.cpp | 1 + src/get_called_regions.cpp | 8 +- src/libblock/GatkVcfRecord.hh | 8 +- src/libtrio/related_sample_util.cpp | 115 +++++++++++++- src/libtrio/related_sample_util.hh | 17 ++- src/libutil/parse_util.cpp | 40 ++--- src/libutil/parse_util.hh | 7 +- src/libutil/test/parse_util_test.cpp | 179 +--------------------- src/merge_variants.cpp | 221 ++++++++++++++++++++------- src/remove_region.cpp | 6 +- src/set_haploid_region.cpp | 7 +- 18 files changed, 343 insertions(+), 301 deletions(-) diff --git a/COPYING b/COPYING index 0c5dd25..d22bd02 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,4 @@ -Copyright (c) 2009-2012 Illumina, Inc. +Copyright (c) 2009-2016 Illumina, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/Changelog b/Changelog index a3adc63..9014f14 100644 --- a/Changelog +++ b/Changelog @@ -1,8 +1,9 @@ -v0.17.0 -- add new check_reference tool -- add tty detection for all tools requiring input on stdin -v0.16.1 -- merge: Fix vcf header from FILT to FILTER, doc method limitations +v0.16.x +- Fixed merge_variants bug associated with haploid chromosomes +- Fixed merge_variants bug around indels (samples moving past locus would get +"./." genotype). Implemented rewind function +- Fixed merge_variants bug to merge snv and indel alleles at the same pos - write one +record instead of multiple v0.16 - Add new remove_region tool (contributed by S. Ajay) - Add 'invert' option to extract_variants diff --git a/Makefile b/Makefile index 6279232..2794165 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ export BOOST_ROOT := $(REDIST_DIR)/boost/stage .PHONY: all build clean install redist test -all: install +all: install test build: redist $(MAKE) -C $(SRC_DIR) @@ -21,7 +21,7 @@ build: redist redist: $(MAKE) -C $(REDIST_DIR) -install: test +install: build mkdir -p $(BIN_DIR) && $(MAKE) -C $(SRC_DIR) $@ clean: srcclean diff --git a/scratch/make_release_tarball.bash b/scratch/make_release_tarball.bash index 911153b..7e404f7 100755 --- a/scratch/make_release_tarball.bash +++ b/scratch/make_release_tarball.bash @@ -49,7 +49,7 @@ git archive --prefix=$pname_root/ HEAD | tar -x -C $outdir rm -rf $pname/scratch # make version number substitutions: -for f in README.md; do +for f in README.txt; do sed "s/\${VERSION}/$gitversion/" < $f >| $pname/$f done diff --git a/src/Makefile b/src/Makefile index 1264af8..b6d1d33 100644 --- a/src/Makefile +++ b/src/Makefile @@ -35,7 +35,7 @@ export LDLIBS = -ltabix_and_faidx -lz -lm GVCFTOOLS_HH := gvcftools.hh TRIOPROGS := trio twins merge_variants -BLOCKPROGS := break_blocks check_reference extract_variants gatk_to_gvcf get_called_regions set_haploid_region remove_region +BLOCKPROGS := break_blocks extract_variants gatk_to_gvcf get_called_regions set_haploid_region remove_region PROGS = $(TRIOPROGS) $(BLOCKPROGS) PROG_OBJS = $(PROGS:%=%.o) diff --git a/src/break_blocks.cpp b/src/break_blocks.cpp index 5222b1e..32da278 100644 --- a/src/break_blocks.cpp +++ b/src/break_blocks.cpp @@ -42,7 +42,7 @@ #include "boost/program_options.hpp" -#include +//#include #include #include @@ -161,9 +161,7 @@ try_main(int argc,char* argv[]) { po_parse_fail=true; } - const bool isStdinTerminal(isatty(fileno(stdin))); - - if ((argc<=1) || (vm.count("help")) || po_parse_fail || isStdinTerminal) { + if ((argc<=1) || (vm.count("help")) || po_parse_fail) { log_os << "\n" << progname << " converts non-reference blocks to individual positions in specified regions\n\n"; log_os << "version: " << gvcftools_version() << "\n\n"; log_os << "usage: " << progname << " [options] < (g)VCF > unblocked_(g)VCF\n\n"; diff --git a/src/extract_variants.cpp b/src/extract_variants.cpp index 43f776c..316ce75 100644 --- a/src/extract_variants.cpp +++ b/src/extract_variants.cpp @@ -34,10 +34,10 @@ #include "VcfHeaderHandler.hh" #include "vcf_util.hh" + #include "boost/program_options.hpp" -#include -#include +//#include #include #include @@ -159,9 +159,7 @@ try_main(int argc,char* argv[]) { po_parse_fail=true; } - const bool isStdinTerminal(isatty(fileno(stdin))); - - if ((vm.count("help")) || po_parse_fail || isStdinTerminal) { + if ((vm.count("help")) || po_parse_fail) { log_os << "\n" << progname << " extracts variants from a VCF file\n\n"; log_os << "version: " << gvcftools_version() << "\n\n"; log_os << "usage: " << progname << " [options] < (g)VCF > variants_only_VCF\n\n"; diff --git a/src/gatk_to_gvcf.cpp b/src/gatk_to_gvcf.cpp index b59aa77..bd8f685 100644 --- a/src/gatk_to_gvcf.cpp +++ b/src/gatk_to_gvcf.cpp @@ -24,6 +24,7 @@ // // +/// \file /// /// \author Chris Saunders /// diff --git a/src/get_called_regions.cpp b/src/get_called_regions.cpp index 585dd5f..2361a89 100644 --- a/src/get_called_regions.cpp +++ b/src/get_called_regions.cpp @@ -33,10 +33,10 @@ #include "VcfHeaderHandler.hh" #include "vcf_util.hh" + #include "boost/program_options.hpp" -#include -#include +//#include #include #include @@ -219,9 +219,7 @@ try_main(int argc,char* argv[]) { po_parse_fail=true; } - const bool isStdinTerminal(isatty(fileno(stdin))); - - if ((vm.count("help")) || po_parse_fail || isStdinTerminal) { + if ((vm.count("help")) || po_parse_fail) { log_os << "\n" << progname << " creates a bed file of called regions from a gVCF\n\n"; log_os << "version: " << gvcftools_version() << "\n\n"; log_os << "usage: " << progname << " [options] < gVCF > called.bed\n\n"; diff --git a/src/libblock/GatkVcfRecord.hh b/src/libblock/GatkVcfRecord.hh index af4a132..f97c792 100644 --- a/src/libblock/GatkVcfRecord.hh +++ b/src/libblock/GatkVcfRecord.hh @@ -46,11 +46,9 @@ /// -/// A value which might exist, and might be convertible to an integer -/// if it does. -/// -/// 'Convertible to an integer' in this case are ints or floats which -/// are rounded to the nearest int +/// A value which might exist, and might be convertable to an integer +/// if it does 'convertable to an integer' in this case are ints or +/// floats which are rounded to the nearest int /// struct MaybeInt { diff --git a/src/libtrio/related_sample_util.cpp b/src/libtrio/related_sample_util.cpp index 23daab5..af2e64d 100644 --- a/src/libtrio/related_sample_util.cpp +++ b/src/libtrio/related_sample_util.cpp @@ -1,6 +1,6 @@ // -*- mode: c++; indent-tabs-mode: nil; -*- // -// Copyright (c) 2009-2012 Illumina, Inc. +// Copyright (c) 2009-2015 Illumina, Inc. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation @@ -26,7 +26,7 @@ /// \file /// -/// \author Chris Saunders +/// \author Chris Saunders and Subramanian Shankar Ajay /// @@ -118,7 +118,8 @@ get_indel_allele( } } - return (is_standard_diploid && _gtcode.size()==2); + //return (is_standard_diploid && _gtcode.size()==2); + return ((is_standard_diploid && _gtcode.size() == 2) || (_gtcode.size() == 1 && _gtcode[0] >= 0)); } @@ -159,10 +160,24 @@ get_site_allele( } } - return (is_standard_diploid && _gtcode.size()==2); + //return (is_standard_diploid && _gtcode.size()==2); + return ((is_standard_diploid && _gtcode.size() == 2) || (_gtcode.size() == 1 && _gtcode[0] >= 0)); } +bool +snp_type_info::is_ref() const{ + + BOOST_FOREACH(const int gt, _gtcode) + { + if(gt == 0) + continue; + else + return false; + } + return true; +} + // extract unsigned value for key from the vcf info field bool @@ -256,6 +271,7 @@ site_crawler(const sample_info& si, , _tabs(NULL) , _is_sample_begin_state(true) , _is_sample_end_state(false) + , _is_reference(false) , _next_file(0) , _ref_seg(ref_seg) , _n_word(0) @@ -266,6 +282,9 @@ site_crawler(const sample_info& si, , _is_site_allele_current(false) , _is_indel_allele_current(false) { + enum {MAX_WORD=50}; + memset(_prev_word,0,sizeof(char *)*(MAX_WORD)); + memset(_next_word,0,sizeof(char *)*(MAX_WORD)); update(is_store_header); } @@ -274,12 +293,16 @@ site_crawler(const sample_info& si, site_crawler:: ~site_crawler() { if (NULL != _tabs) delete _tabs; + if (NULL != _prev_word[0]){ + unsigned idx(0); + while (idx < _n_word) + free(_prev_word[idx++]); + } } - void site_crawler:: dump_state(std::ostream& os) const { @@ -287,6 +310,10 @@ dump_state(std::ostream& os) const { os << "LOCUS_CRAWLER STATE:\n"; os << "\tchrom: " << chrom(); os << "\tposition: " << pos() << " offset: " << _locus_offset << "\n"; + os << "\t_is_call: " << _is_call << "\n"; + os << "\t_is_site_allele_current: " << _is_site_allele_current; + os << "\t_is_sample_begin_state: " << _is_sample_begin_state; + os << "\t_is_sample_end_state: " << _is_sample_end_state; os << "\tis_indel: " << is_indel() << "\n"; os << "\tfile: '" << afile << "'\n"; os << "\tline: '"; @@ -296,6 +323,8 @@ dump_state(std::ostream& os) const { + + bool site_crawler:: update_site_allele() const @@ -437,7 +466,8 @@ process_record_line(char* line) //} if (! _is_sample_begin_state) { - if (! (last_vpos < vpos()) ) { + //if (! (last_vpos < vpos()) ) { + if (vpos() < last_vpos ) { if (_opt.is_murdock_mode) { _vpos=last_vpos; _locus_size=0; @@ -474,6 +504,46 @@ dump_header(std::ostream& os) const { } +bool +site_crawler:: +rewind_site(const vcf_pos& lpos) { + + if(_locus_offset > 0){ + while(_vpos.pos > lpos.pos){ + _locus_offset--; + _vpos.pos--; + } + _is_call = update_allele(); + _is_reference = _opt.sti().is_ref(); + } + else{ + unsigned idx(0); + while(idx < _n_word){ + _next_word[idx] = _word[idx]; + _word[idx] = _prev_word[idx]; + idx++; + } + _vpos.pos = _opt.sti().pos(_word); + _is_site_allele_current = false; + _is_indel_allele_current = false; + _vpos.is_indel = (_opt.sti().get_is_indel(_word)); + _opt.sti().get_nonindel_ref_length(pos(),is_indel(),_word,_locus_size); + + if(is_indel()) + _locus_size = 1; + + _locus_offset = _locus_size - 1; + _vpos.pos += _locus_offset; + while(_vpos.pos > lpos.pos){ + _locus_offset--; + _vpos.pos--; + } + _is_call = update_allele(); + _is_reference = _opt.sti().is_ref(); + } + + return true; +} void @@ -502,6 +572,7 @@ update(bool is_store_header) { } _vpos.pos++; + _is_site_allele_current = false; if (_opt.is_region()) { // check for pos preceding the start of region of interest in a multi-base record: @@ -564,6 +635,38 @@ update(bool is_store_header) { _next_file++; } + if (_next_word[0]){ + unsigned idx(0); + while(idx < _n_word){ + _word[idx] = _next_word[idx]; + _next_word[idx] = NULL; + idx++; + } + _vpos.pos = _opt.sti().pos(_word); + _locus_offset = 0; + _is_site_allele_current = false; + _is_indel_allele_current = false; + _vpos.is_indel=(_opt.sti().get_is_indel(_word)); + _opt.sti().get_nonindel_ref_length(pos(),is_indel(),_word,_locus_size); + _is_call=update_allele(); + + return; + } + + unsigned idx(0); + if ( ! _is_sample_begin_state){ + while (idx < _n_word){ + size_t size = strlen(_word[idx]) + sizeof(char) ; + + if (NULL != _prev_word[idx]) + free(_prev_word[idx]); + _prev_word[idx] = (char *)malloc(size); + strncpy(_prev_word[idx], _word[idx], size); + idx++; + } + } + + // read through file to get to a data line: bool is_eof(true); char* line(NULL); diff --git a/src/libtrio/related_sample_util.hh b/src/libtrio/related_sample_util.hh index 23ae64e..4dc46c6 100644 --- a/src/libtrio/related_sample_util.hh +++ b/src/libtrio/related_sample_util.hh @@ -1,6 +1,6 @@ // -*- mode: c++; indent-tabs-mode: nil; -*- // -// Copyright (c) 2009-2012 Illumina, Inc. +// Copyright (c) 2009-2015 Illumina, Inc. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation @@ -26,7 +26,7 @@ /// \file /// -/// \author Chris Saunders +/// \author Chris Saunders and Subramanian Shankar Ajay /// #pragma once @@ -198,6 +198,8 @@ struct snp_type_info { std::vector& allele, const char* const* word) const; + bool is_ref() const; + unsigned total(const char* const* word) const { unsigned dp; @@ -421,6 +423,9 @@ struct site_crawler { void update(const bool is_store_header = false); + bool + rewind_site(const vcf_pos& lpos); + bool is_pos_valid() const { return (! _is_sample_end_state); } @@ -514,6 +519,11 @@ struct site_crawler { return _n_total; } + bool + is_reference() const { + return _is_reference; + } + private: bool @@ -547,11 +557,14 @@ private: tabix_streamer* _tabs; bool _is_sample_begin_state; bool _is_sample_end_state; + bool _is_reference; unsigned _next_file; const reference_contig_segment& _ref_seg; enum { MAX_WORD=50 }; char* _word[MAX_WORD]; + char* _prev_word[MAX_WORD]; + char* _next_word[MAX_WORD]; unsigned _n_word; unsigned _locus_size; diff --git a/src/libutil/parse_util.cpp b/src/libutil/parse_util.cpp index 5e6e9d7..cdbdd5b 100644 --- a/src/libutil/parse_util.cpp +++ b/src/libutil/parse_util.cpp @@ -24,15 +24,14 @@ // // -/// +/// \file + /// \author Chris Saunders /// #include "blt_exception.hh" #include "parse_util.hh" -#include "boost/spirit/include/qi.hpp" - #include #include #include @@ -154,37 +153,28 @@ parse_long_str(const std::string& s) { double -parse_double( - const char*& s, - const char* s_end) -{ - double val; - const char* s_start(s); - if (s_end == NULL) s_end = s + strlen(s); - bool isPass(boost::spirit::qi::parse(s, s_end, boost::spirit::double_, val)); - if (isPass) - { - isPass = (s_start != s); - } - if (!isPass) - { - parse_exception("double", s_start); +parse_double(const char*& s) { + + errno = 0; + + char* endptr; + const double val(strtod(s, &endptr)); + if ((errno == ERANGE) || (endptr == s)) { + parse_exception("double",s); } + + s = endptr; return val; } double -parse_double_str( - const std::string& s) -{ +parse_double_str(const std::string& s) { const char* s2(s.c_str()); - const char* const s2_end(s2+s.size()); - const double val(parse_double(s2,s2_end)); - if (s2 != s2_end) { + const double val(parse_double(s2)); + if ((s2-s.c_str())!=static_cast(s.length())) { parse_exception("double",s.c_str()); } return val; } - diff --git a/src/libutil/parse_util.hh b/src/libutil/parse_util.hh index 1cd38ac..ab08d79 100644 --- a/src/libutil/parse_util.hh +++ b/src/libutil/parse_util.hh @@ -50,9 +50,7 @@ long parse_long(const char*& s); double -parse_double( - const char*& s, - const char* s_end = NULL); +parse_double(const char*& s); @@ -69,8 +67,7 @@ long parse_long_str(const std::string& s); double -parse_double_str( - const std::string& s); +parse_double_str(const std::string& s); diff --git a/src/libutil/test/parse_util_test.cpp b/src/libutil/test/parse_util_test.cpp index 2c48f2a..1d35ff0 100644 --- a/src/libutil/test/parse_util_test.cpp +++ b/src/libutil/test/parse_util_test.cpp @@ -22,195 +22,32 @@ // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. // +// #include "boost/test/unit_test.hpp" #include "parse_util.hh" -#include #include BOOST_AUTO_TEST_SUITE( parse_util ) -// -// check int parsing -// -BOOST_AUTO_TEST_CASE( test_parse_int ) -{ + +BOOST_AUTO_TEST_CASE( test_parse_int ) { const char* two = "2"; const int val(parse_int(two)); - BOOST_REQUIRE_EQUAL(val, 2); + BOOST_CHECK_EQUAL(val, 2); } -BOOST_AUTO_TEST_CASE( test_parse_int_big ) -{ - const char* twobig = "20000000000000000000"; - BOOST_REQUIRE_THROW(parse_int(twobig), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_int_small ) -{ - const char* twosmall = "-20000000000000000000"; - BOOST_REQUIRE_THROW(parse_int(twosmall), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_int_empty ) -{ - const char* empty = ""; - BOOST_REQUIRE_THROW(parse_int(empty), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_int_tolerate_suffix ) -{ - const char* suffix = "123abc"; - const int val(parse_int(suffix)); - BOOST_REQUIRE_EQUAL(val,123); -} - -BOOST_AUTO_TEST_CASE( test_parse_int_str ) -{ +BOOST_AUTO_TEST_CASE( test_parse_int_str ) { static const char two[] = "2"; const int val(parse_int_str(std::string(two))); - BOOST_REQUIRE_EQUAL(val, 2); + BOOST_CHECK_EQUAL(val, 2); } -BOOST_AUTO_TEST_CASE( test_parse_int_str_bad_input ) -{ +BOOST_AUTO_TEST_CASE( test_parse_int_str_bad_input ) { static const std::string junk("ABCD"); - BOOST_REQUIRE_THROW(parse_int_str(junk), std::exception); -} - - -// -// check long parsing -// - -BOOST_AUTO_TEST_CASE( test_parse_long ) -{ - const char* two = "9223372036854775807"; - const long val(parse_long(two)); - BOOST_REQUIRE_EQUAL(val, 9223372036854775807l); -} - -BOOST_AUTO_TEST_CASE( test_parse_long_big ) -{ - const char* twobig = "9223372036854775808"; - BOOST_REQUIRE_THROW(parse_long(twobig), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_long_small ) -{ - const char* twosmall = "-20000000000000000000000000000000000000000000000000000"; - BOOST_REQUIRE_THROW(parse_long(twosmall), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_long_empty ) -{ - const char* empty = ""; - BOOST_REQUIRE_THROW(parse_long(empty), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_long_str ) -{ - static const char two[] = "2"; - const long val(parse_long_str(std::string(two))); - BOOST_REQUIRE_EQUAL(val, 2l); -} - - -// -// check unsigned parsing -// -BOOST_AUTO_TEST_CASE( test_parse_unsigned ) -{ - const char* two = "2"; - const unsigned val(parse_unsigned(two)); - BOOST_REQUIRE_EQUAL(val, 2u); -} - -BOOST_AUTO_TEST_CASE( test_parse_unsigned_big ) -{ - const char* twobig = "20000000000000000000"; - BOOST_REQUIRE_THROW(parse_unsigned(twobig), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_unsigned_small ) -{ - const char* twosmall = "-2"; - BOOST_REQUIRE_THROW(parse_unsigned(twosmall), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_unsigned_empty ) -{ - const char* empty = ""; - BOOST_REQUIRE_THROW(parse_unsigned(empty), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_unsigned_tolerate_suffix ) -{ - const char* suffix = "123abc"; - const unsigned val(parse_unsigned(suffix)); - BOOST_REQUIRE_EQUAL(val,123u); -} - -BOOST_AUTO_TEST_CASE( test_parse_unsigned_str ) -{ - static const char two[] = "2"; - const unsigned val(parse_unsigned_str(std::string(two))); - BOOST_REQUIRE_EQUAL(val, 2u); -} - -BOOST_AUTO_TEST_CASE( test_parse_unsigned_str_bad_input ) -{ - static const std::string junk("ABCD"); - BOOST_REQUIRE_THROW(parse_unsigned_str(junk), std::exception); -} - - - -// -// check double parsing -// -const double tol(0.0001); - -BOOST_AUTO_TEST_CASE( test_parse_double ) -{ - const char* two = "2.0"; - const double val(parse_double(two)); - BOOST_REQUIRE_CLOSE(val, 2.0, tol); -} - -BOOST_AUTO_TEST_CASE( test_parse_double_exp ) -{ - const char* big = "2.0e+100"; - const double val(parse_double(big)); - BOOST_REQUIRE_CLOSE(val, 2.0e+100, tol); -} - -BOOST_AUTO_TEST_CASE( test_parse_double_inf ) -{ - const char* inf = "Infinity"; - const double val(parse_double(inf)); - BOOST_REQUIRE(std::isinf(val)); -} - -BOOST_AUTO_TEST_CASE( test_parse_double_str ) -{ - const char* two = "2.0"; - const double val(parse_double_str(std::string(two))); - BOOST_REQUIRE_CLOSE(val, 2.0, tol); -} - -BOOST_AUTO_TEST_CASE( test_parse_double_str_bad_input ) -{ - static const std::string junk("ABCD"); - BOOST_REQUIRE_THROW(parse_double_str(junk), std::exception); -} - -BOOST_AUTO_TEST_CASE( test_parse_double_str_bad_input2 ) -{ - static const std::string junk(""); - BOOST_REQUIRE_THROW(parse_double_str(junk), std::exception); + BOOST_CHECK_THROW(parse_int_str(junk), std::exception); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/merge_variants.cpp b/src/merge_variants.cpp index f75bfb2..3c0e7f5 100644 --- a/src/merge_variants.cpp +++ b/src/merge_variants.cpp @@ -1,6 +1,6 @@ // -*- mode: c++; indent-tabs-mode: nil; -*- // -// Copyright (c) 2009-2012 Illumina, Inc. +// Copyright (c) 2009-2015 Illumina, Inc. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation @@ -24,8 +24,9 @@ // // +/// \file /// -/// \author Chris Saunders +/// \author Chris Saunders and Subramanian Shankar Ajay /// #include "gvcftools.hh" @@ -75,9 +76,12 @@ struct merge_reporter { private: std::ostream& _os; + std::string _cur_merged_rec; + std::string _prev_merged_rec; + vcf_pos _last_merged_pos; bool _is_header_output; - // not persistent, just used to reduce allocation: + // not persisistent, just used to reduce allocation: std::vector words; }; @@ -96,17 +100,18 @@ refAltWriter( const unsigned n_alleles(alleles.size()); assert(0 != n_alleles); - os << '\t' << alleles.get_key(0); // REF + os << '\t' ; + os << alleles.get_key(0); // REF // ALT: - os << '\t'; + os << "\t"; if (n_alleles>1) { for (unsigned i(1); i1) os << ","; os << alleles.get_key(i); } } else { - os << '.'; + os << "."; } } @@ -124,6 +129,8 @@ print_locus( if (! _is_header_output) { sa[0]->dump_header(_os); + _os << "##FORMAT="; + _os << '\n'; _os << "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"; const unsigned sample_size(sa.size()); @@ -143,7 +150,8 @@ print_locus( fkey_t merged_filters; for (unsigned st(0); st sample_keys(n_samples); for (unsigned st(0); stchrom() // CHROM << '\t' << low_pos.pos // POS << '\t' << '.'; // ID - + std::vector genotypes; @@ -184,7 +195,8 @@ print_locus( { // get ref, alt and gt for all samples: // - const char ref_base(ref_seg.get_base(sa[0]->pos()-1)); + //const char ref_base(ref_seg.get_base(sa[0]->pos()-1)); + const char ref_base(ref_seg.get_base(low_pos.pos-1)); id_set alleles; alleles.insert_key(ref_base); @@ -228,7 +240,10 @@ print_locus( std::string ref_allele; for (unsigned st(0); st ref_allele.size()) { ref_allele=sample.get_indel_ref(); @@ -238,11 +253,16 @@ print_locus( std::vector alt_adjust(n_samples); for (unsigned st(0); st alleles; @@ -250,19 +270,38 @@ print_locus( for (unsigned st(0); st >& sa, + const vcf_pos low_pos, + const reference_contig_segment& ref_seg, + merge_reporter& mr) { + + + const unsigned n_samples(sa.size()); + + bool is_any_nonref_called(false); + + const char ref_base(ref_seg.get_base(low_pos.pos-1)); + + if (ref_base=='N') return; + + for (unsigned st(0); st >& sa, @@ -380,11 +487,11 @@ merge_site(const std::vector >& sa, for (unsigned st(0); st& input_files, vcf_pos low_pos; for (unsigned st(0); stis_pos_valid()) continue; - if ((! is_low_pos_set) || (sa[st]->vpos() < low_pos)) { + //if ((! is_low_pos_set) || (sa[st]->vpos() < low_pos)) { + if ((! is_low_pos_set) || (sa[st]->vpos().pos < low_pos.pos) || (sa[st]->vpos().pos <= low_pos.pos && sa[st]->vpos().is_indel && !low_pos.is_indel)){ low_pos = sa[st]->vpos(); is_low_pos_set=true; } @@ -466,15 +574,30 @@ merge_variants(const std::vector& input_files, } else { - mr.print_locus(sa, low_pos, ref_seg,true); + for (unsigned idx(0); idxvpos())) + if ( sa[idx]->vpos().pos - low_pos.pos == 1) + sa[idx]->rewind_site(low_pos); + merge_indel_site(sa, low_pos, ref_seg, mr); + + //mr.print_locus(sa, low_pos, ref_seg, true); + + //for (unsigned idx(0); idxvpos().pos < low_pos.pos && sa[idx]->vpos().is_indel) + //sa[idx]->update(); + } for (unsigned st(0); stis_pos_valid() && (low_pos == sa[st]->vpos())) { + if (sa[st]->is_pos_valid() && (low_pos.pos >= sa[st]->vpos().pos)) { sa[st]->update(); } } } + + vcf_pos end_pos; + merge_site(sa, end_pos, ref_seg, mr); + //mr.print_locus(sa, end_pos, ref_seg, true); } @@ -537,22 +660,10 @@ try_main(int argc,char* argv[]) { if (input_files.empty()) is_show_help=true; if (is_show_help) { - log_os << "\n" << progname << " merges the variants from multiple gVCF files\n\n"; + log_os << "\n" << progname << " merge the variants from multiple gVCF files\n\n"; log_os << "version: " << gvcftools_version() << "\n\n"; log_os << "usage: " << progname << " [options] > merged_variants\n\n"; log_os << visible << "\n"; - log_os << -"\n" -"Note this is a simple prototype merge implementation and comes with\n" -"considerable restrictions:\n" -"\n" -" - Most INFO fields are not recomputed/forwared to the merged record.\n" -" - Most sample fields are not forwarded to the merged record (this would\n" -" require knowing which fields depend on allele count.)\n" -" - Merged record FILTER field is a simple union of all merged samples,\n" -" thus a single filtered sample will filter the merged record.\n" -" - No handling of overlapping indel alleles\n" -"\n"; exit(2); } diff --git a/src/remove_region.cpp b/src/remove_region.cpp index abb8388..23b32d4 100644 --- a/src/remove_region.cpp +++ b/src/remove_region.cpp @@ -42,7 +42,7 @@ #include "boost/program_options.hpp" -#include +//#include #include #include @@ -147,9 +147,7 @@ try_main(int argc,char* argv[]) { po_parse_fail=true; } - const bool isStdinTerminal(isatty(fileno(stdin))); - - if ((argc<=1) || (vm.count("help")) || po_parse_fail || isStdinTerminal) { + if ((argc<=1) || (vm.count("help")) || po_parse_fail) { log_os << "\n" << progname << " removes variant call information from specified regions\n\n"; log_os << "version: " << gvcftools_version() << "\n\n"; log_os << "usage: " << progname << " [options] < (g)VCF > region_removed_(g)VCF\n\n"; diff --git a/src/set_haploid_region.cpp b/src/set_haploid_region.cpp index a67370d..e1a60c4 100644 --- a/src/set_haploid_region.cpp +++ b/src/set_haploid_region.cpp @@ -39,9 +39,10 @@ #include "VcfHeaderHandler.hh" #include "VcfRecord.hh" + #include "boost/program_options.hpp" -#include +//#include #include #include @@ -222,9 +223,7 @@ try_main(int argc,char* argv[]) { po_parse_fail=true; } - const bool isStdinTerminal(isatty(fileno(stdin))); - - if ((argc<=1) || (vm.count("help")) || po_parse_fail || isStdinTerminal) { + if ((argc<=1) || (vm.count("help")) || po_parse_fail) { log_os << "\n" << progname << " converts regions of a gVCF or VCF from diploid to haploid\n\n"; log_os << "version: " << gvcftools_version() << "\n\n"; log_os << "usage: " << progname << " [options] < (g)VCF > haploid_region_(g)VCF\n\n";