From 6fed71b27ae2ff2e0a0116988a68084f11b53b67 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Mon, 11 May 2026 14:09:10 +0100 Subject: [PATCH] Improve "not defined in the header" messages Make the FILTER and INFO variants report the location (ref + pos) where the missing tag was found, to make the problem record easier to find. The FORMAT variant already did this. The Contig one is also not changed as it already reports the reference name (which will be the missing one...) and the position isn't very important in that case. For all cases, add an extra message noting that processing a file with missing headers may fail. This will only be printed once for each affected file. Signed-off-by: Rob Davies --- vcf.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/vcf.c b/vcf.c index 544fe8c01..b042eea6b 100644 --- a/vcf.c +++ b/vcf.c @@ -3180,6 +3180,8 @@ static int vcf_parse_format_dict2(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v, return -1; } hts_log_warning("FORMAT '%s' at %s:%"PRIhts_pos" is not defined in the header, assuming Type=String", t, bcf_seqname_safe(h,v), v->pos+1); + if ((v->errcode & (BCF_ERR_TAG_UNDEF|BCF_ERR_CTG_UNDEF)) == 0) + hts_log_warning("Missing headers may cause later processing to fail"); kstring_t tmp = {0,0,0}; int l; ksprintf(&tmp, "##FORMAT=", t); @@ -3788,7 +3790,10 @@ static int vcf_parse_filter(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char { // Simple error recovery for FILTERs not defined in the header. It will not help when VCF header has // been already printed, but will enable tools like vcfcheck to proceed. - hts_log_warning("FILTER '%s' is not defined in the header", t); + hts_log_warning("FILTER '%s' at %s:%"PRIhts_pos" is not defined in the header", + t, bcf_seqname_safe(h,v), v->pos+1); + if ((v->errcode & (BCF_ERR_TAG_UNDEF|BCF_ERR_CTG_UNDEF)) == 0) + hts_log_warning("Missing headers may cause later processing to fail"); kstring_t tmp = {0,0,0}; int l; ksprintf(&tmp, "##FILTER=", t); @@ -3847,7 +3852,10 @@ static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p k = kh_get(vdict, d, key); if (k == kh_end(d) || kh_val(d, k).info[BCF_HL_INFO] == 15) { - hts_log_warning("INFO '%s' is not defined in the header, assuming Type=String", key); + hts_log_warning("INFO '%s' at %s:%"PRIhts_pos" is not defined in the header, assuming Type=String", + key, bcf_seqname_safe(h,v), v->pos+1); + if ((v->errcode & (BCF_ERR_TAG_UNDEF|BCF_ERR_CTG_UNDEF)) == 0) + hts_log_warning("Missing headers may cause later processing to fail"); kstring_t tmp = {0,0,0}; int l; ksprintf(&tmp, "##INFO=", key); @@ -4033,6 +4041,8 @@ int vcf_parse(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v) k = kh_get(vdict, d, p); if (k == kh_end(d)) { hts_log_warning("Contig '%s' is not defined in the header. (Quick workaround: index the file with tabix.)", p); + if ((v->errcode & (BCF_ERR_TAG_UNDEF|BCF_ERR_CTG_UNDEF)) == 0) + hts_log_warning("Missing headers may cause later processing to fail"); v->errcode = BCF_ERR_CTG_UNDEF; if ((k = fix_chromosome(h, d, p)) == kh_end(d)) { hts_log_error("Could not add dummy header for contig '%s'", p);