Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
210155b
Prototype planned VCF FORMAT parser
jhl-oai Apr 28, 2026
3ccf1c8
Document CCDG FORMAT parser benchmark
jhl-oai Apr 28, 2026
1e46cb8
Optimize planned AD and PL parsing
jhl-oai Apr 28, 2026
ccffd35
Add general VCF FORMAT plan prototype
jhl-oai Apr 28, 2026
03e0d7f
Benchmark compiled FORMAT interpreter mode
jhl-oai Apr 28, 2026
f2d3db1
Specialize compiled FORMAT opcode handlers
jhl-oai Apr 28, 2026
3d93431
Harden and specialize FORMAT planning
jhl-oai Apr 28, 2026
bad7312
Direct-write planned FORMAT payloads
jhl-oai Apr 28, 2026
dfef785
Guard planned FORMAT fast paths
jhl-oai Apr 28, 2026
ff9b3b5
Add dynamic strict FORMAT numeric executor
jhl-oai Apr 28, 2026
544660c
Tighten dynamic FORMAT executor
jhl-oai Apr 28, 2026
45439ac
Benchmark dynamic FORMAT conversions
jhl-oai Apr 28, 2026
3ec2e9f
Clarify FORMAT plan benchmark state
codex Apr 29, 2026
ba09a36
Add dynamic FORMAT likelihood shape executor
codex Apr 29, 2026
61b4bd1
Add VCF FORMAT shape benchmark corpus
codex Apr 29, 2026
fea9b9e
Add large FORMAT shape benchmark pass
codex Apr 29, 2026
e9442b7
Cache dynamic FORMAT likelihood shapes
codex Apr 29, 2026
3185c8a
Add dynamic GT-only FORMAT fast path
codex Apr 29, 2026
3d51a29
Tighten dynamic likelihood parsing
codex Apr 29, 2026
949eee3
Skip integer sentinel checks when proven absent
codex Apr 29, 2026
aae9423
Elide likelihood row op rebuild
codex Apr 29, 2026
8f1a943
Refactor dynamic FORMAT parsing to composable ops
codex Apr 29, 2026
006059a
Harden composable FORMAT parser
codex Apr 29, 2026
b43c383
Compact composable FORMAT row widths
codex Apr 29, 2026
b96d982
Trim FORMAT parser to dynamic path
codex Apr 29, 2026
cb025b1
Add threaded bcftools FORMAT benchmarks
codex Apr 29, 2026
ec63fbb
Consolidate FORMAT plan documentation
codex Apr 29, 2026
b433732
Harden dynamic FORMAT planner
codex Apr 29, 2026
65758b1
Add broader bcftools command benchmarks
codex Apr 29, 2026
ceda038
Harden FORMAT planner tests
codex Apr 30, 2026
f9e9934
remove cooldown
codex Apr 30, 2026
be6eaa4
Reuse FORMAT string spans in planner
codex Apr 30, 2026
0767a55
update
codex Apr 30, 2026
52f5507
Trim benchmark corpus tooling from product branch
codex Apr 30, 2026
8c809ba
fix ci
codex Apr 30, 2026
0999351
Document FORMAT planner modules
codex Apr 30, 2026
bd64318
Simplify FORMAT planner integer vectors
codex May 7, 2026
7813ee3
Consolidate FORMAT planner review notes
codex May 8, 2026
5ccb995
Merge remote-tracking branch 'upstream/develop' into feature/vcf-pars…
codex May 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ shlib-exports-*.txt
/test/test_bgzf
/test/test_expr
/test/test_faidx
/test/test_format_plan_cache
/test/test_index
/test/test_introspection
/test/test_kfunc
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ BUILT_TEST_PROGRAMS = \
test/test_str2int \
test/test_time_funcs \
test/test_view \
test/test_format_plan_cache \
test/test_index \
test/test-vcf-api \
test/test-vcf-sweep \
Expand Down Expand Up @@ -690,6 +691,7 @@ check test: all $(HTSCODECS_TEST_TARGETS)
test/test_str2int
test/test_time_funcs
test/fieldarith test/fieldarith.sam
test/test_format_plan_cache
test/hfile
if test "x$(BUILT_PLUGINS)" != "x"; then \
HTS_PATH=. test/with-shlib.sh test/plugins-dlhts -g ./libhts.$(SHLIB_FLAVOUR); \
Expand Down Expand Up @@ -790,6 +792,9 @@ test/test_time_funcs: test/test_time_funcs.o
test/test_view: test/test_view.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LIBS) -lpthread

test/test_format_plan_cache: test/test_format_plan_cache.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_format_plan_cache.o libhts.a $(LIBS) -lpthread

test/test_index: test/test_index.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_index.o libhts.a $(LIBS) -lpthread

Expand Down Expand Up @@ -885,6 +890,7 @@ test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_reg
test/test_str2int.o: test/test_str2int.c config.h $(textutils_internal_h)
test/test_time_funcs.o: test/test_time_funcs.c config.h $(hts_time_funcs_h)
test/test_view.o: test/test_view.c config.h $(cram_h) $(htslib_sam_h) $(htslib_vcf_h) $(htslib_hts_log_h)
test/test_format_plan_cache.o: test/test_format_plan_cache.c config.h $(htslib_kstring_h) $(htslib_vcf_h)
test/test_faidx.o: test/test_faidx.c config.h $(htslib_faidx_h)
test/test_index.o: test/test_index.c config.h $(htslib_sam_h) $(htslib_vcf_h)
test/test-vcf-api.o: test/test-vcf-api.c config.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_vcfutils_h) $(htslib_kbitset_h) $(htslib_kstring_h) $(htslib_kseq_h)
Expand Down
147 changes: 147 additions & 0 deletions docs/FORMAT_PLAN_OVERVIEW.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# VCF FORMAT Planner Review Notes

This is an interim review note for the opt-in VCF FORMAT planner in `vcf.c`.
It is intended to make the implementation easier to review in this branch; it
is not proposed as permanent user-facing documentation.

## Purpose

The planner is an optional fast path for parsing VCF sample `FORMAT` columns
into BCF. It is disabled by default and only runs when:

```sh
HTS_VCF_FORMAT_PLAN=1
```

Unset, `0`, or unknown values use the existing generic parser. The hard
correctness rule is byte-identical BCF output compared with the generic parser.
Any unsupported or suspicious FORMAT column falls back for the whole column.

`HTS_VCF_FORMAT_PLAN_STATS=1` enables temporary diagnostic counters for review
and benchmarking. These environment variables are branch controls, not stable
public API.

## Entry Point

`vcf_parse_format()` tries `vcf_parse_format_planned()` before the generic
FORMAT parser when the environment gate is enabled. The planned parser returns
success only after it has fully emitted the FORMAT column. It returns the
fallback code before the caller invokes the generic parser when compilation,
width resolution, parsing, or row validation does not meet the supported
contract.

The planned path never commits partial FORMAT output after detecting a row-local
failure. Rollback and fallback are part of the normal control flow.

## Plan Cache

Plans are stored in private `bcf_hdr_aux_t` state. Cache keys are the literal
FORMAT string plus the active private header generation. The cache stores both
supported and unsupported plans so repeated unsupported schemas avoid repeated
tokenization and metadata lookup.

`bcf_hdr_sync()` clears cached plans and advances the generation because FORMAT
ids, types, and lengths are header-local. The planner refuses dirty headers.

## Compilation

Compilation works from header metadata rather than exact whole-FORMAT string
kernels. For each FORMAT token, the compiler records:

- the header id;
- declared type;
- declared number model;
- whether the row needs record-local width resolution or sample-text scanning;
- the executor row kind.

The compiler rejects empty tokens, undefined tags, duplicate tags, unsupported
types or number models, and non-standard `GT` declarations. Tokenization does
not collapse empty fields, so malformed schemas such as `GT::DP` still fall
back in a way that preserves generic-parser behavior.

## Supported Rows

The current executor has six row kinds:

- `GT2`
- `INT1`
- `INTVEC`
- `FLOAT1`
- `FLOATN`
- `STR`

The earlier width-specific integer-vector row kinds were removed. A single
`INTVEC` path handles fixed and row-local integer widths, including the
over-width comma check needed to preserve fallback behavior.

Supported shapes include:

- simple diploid `GT` values with one-character alleles or missing values,
separated by `/` or `|`, including phased-missing forms such as `.|.`,
`0|.`, and `.|0`;
- integer and float scalar fields;
- integer and float vector fields within the planner width cap;
- numeric `Number=A`, `Number=R`, and `Number=G` widths resolved from the
current record allele count;
- bounded measured `Number=.` numeric rows;
- bounded `Type=String,Number=1` rows;
- selected-sample parsing via `bcf_hdr_set_samples()`.

Unsupported or intentionally generic cases include undefined tags, duplicate
FORMAT tags, dirty headers, unsupported type or number declarations, unsupported
GT encodings, malformed separators, unsafe row widths, and string/float-heavy
layouts that do not benefit from planning.

## Width Resolution

Header-fixed rows use the declared width directly, after resolving
allele-dependent widths for the current record. Numeric widths must fit the
planner cap of 64 values.

Measured numeric and string rows perform a first pass over original sample
columns. This is required to match the generic parser's width and padding
rules. If samples are selected, the planner still scans original sample
columns but measures and emits retained samples densely.

Strings are capped at 256 bytes in the planned path. Wider string rows fall
back for the whole FORMAT column.

## Fallback Contract

Fallback is expected and intentional. It happens before generic parsing when
the compiler or row executor sees unsupported structure, unsupported widths,
unexpected separators, unsupported GT shape, parse failures, sample-count
mismatch, or allocation/internal consistency errors.

Diagnostic fallback reasons are:

- `unsupported`
- `numeric_width`
- `string_width`
- `gt_shape`
- `parse`
- `separator`
- `sample_count`

## Test And Benchmark Evidence

Focused correctness checks used for this review branch:

```sh
make test/test_view test/test_format_plan_cache bgzip tabix
./test/test_format_plan_cache
perl test/test.pl -F test_vcf_format_plan
test/maintainer/check_spaces.pl vcf.c docs/FORMAT_PLAN_OVERVIEW.md \
test/format-plan-malformed-fields.vcf test/test.pl
git diff --check
```

The focused FORMAT-plan test fragment covers disabled/unknown environment
behavior, selected samples, malformed FORMAT tokens, malformed numeric fields,
phased missing GT values, cache invalidation after header metadata changes, and
fallback after partial planned parsing.

The current public-fork PR body contains the maintainer-facing performance
summary. The compact benchmark artifacts live on the corpus branch
`feature/vcf-parsing-speedup-corpus`, including the current `test_view` and
bcftools summaries for commit `bd643182c8fa722abbc0cb89860263a90bb97020`.
61 changes: 61 additions & 0 deletions test/format-plan-cache.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
##fileformat=VCFv4.3
##contig=<ID=1>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=F01,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F02,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F03,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F04,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F05,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F06,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F07,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F08,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F09,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F10,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F11,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F12,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F13,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F14,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F15,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F16,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F17,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F18,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F19,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=F20,Number=1,Type=Integer,Description="Cache test field">
##FORMAT=<ID=LONGFORMATFIELD01,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD02,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD03,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD04,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD05,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD06,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD07,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD08,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD09,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD10,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD11,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD12,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD13,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD14,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD15,Number=1,Type=Integer,Description="Long cache test field">
##FORMAT=<ID=LONGFORMATFIELD16,Number=1,Type=Integer,Description="Long cache test field">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2
1 1 . A C . PASS . GT:F01 0/1:1 1/1:2
1 2 . A C . PASS . GT:F02 0/1:2 1/1:3
1 3 . A C . PASS . GT:F03 0/1:3 1/1:4
1 4 . A C . PASS . GT:F04 0/1:4 1/1:5
1 5 . A C . PASS . GT:F05 0/1:5 1/1:6
1 6 . A C . PASS . GT:F06 0/1:6 1/1:7
1 7 . A C . PASS . GT:F07 0/1:7 1/1:8
1 8 . A C . PASS . GT:F08 0/1:8 1/1:9
1 9 . A C . PASS . GT:F09 0/1:9 1/1:10
1 10 . A C . PASS . GT:F10 0/1:10 1/1:11
1 11 . A C . PASS . GT:F11 0/1:11 1/1:12
1 12 . A C . PASS . GT:F12 0/1:12 1/1:13
1 13 . A C . PASS . GT:F13 0/1:13 1/1:14
1 14 . A C . PASS . GT:F14 0/1:14 1/1:15
1 15 . A C . PASS . GT:F15 0/1:15 1/1:16
1 16 . A C . PASS . GT:F16 0/1:16 1/1:17
1 17 . A C . PASS . GT:F17 0/1:17 1/1:18
1 18 . A C . PASS . GT:F18 0/1:18 1/1:19
1 19 . A C . PASS . GT:F19 0/1:19 1/1:20
1 20 . A C . PASS . GT:F20 0/1:20 1/1:21
1 21 . A C . PASS . GT:LONGFORMATFIELD01:LONGFORMATFIELD02:LONGFORMATFIELD03:LONGFORMATFIELD04:LONGFORMATFIELD05:LONGFORMATFIELD06:LONGFORMATFIELD07:LONGFORMATFIELD08:LONGFORMATFIELD09:LONGFORMATFIELD10:LONGFORMATFIELD11:LONGFORMATFIELD12:LONGFORMATFIELD13:LONGFORMATFIELD14:LONGFORMATFIELD15:LONGFORMATFIELD16 0/1:1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16 1/1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17
16 changes: 16 additions & 0 deletions test/format-plan-composable.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
##fileformat=VCFv4.3
##contig=<ID=chr22,length=50818468>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype likelihoods">
##FORMAT=<ID=XX,Number=2,Type=Integer,Description="Fixed-width extension values">
##FORMAT=<ID=XS,Number=1,Type=String,Description="String extension value">
##FORMAT=<ID=VX,Number=.,Type=Integer,Description="Variable-width fallback values">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3
chr22 10610000 . A T 50 PASS . GT:AD 0/1:4,5 0/0:9,0 ./.:0,0
chr22 10610010 . A T 50 PASS . GT:AD:DP:XX:PL 0/1:4,5:9:7,8:90,0,120 0/0:9,0:9:1,2:0,30,200 ./.:0,0:0:.,.:.
chr22 10610020 . A C,G 50 PASS . DP:XS:GT:XX:AD:GQ:PL 12:alpha:1/2:3,4:1,5,6:60:100,90,80,70,0,20 8:beta:0/2:5,6:4,0,4:35:80,70,60,50,40,0 0:.:./.:.,.:0,0,0:.:.
chr22 10610030 . G C 50 PASS . GT:AD:DP:GQ:PL 0/1:3,4:7:50:70,0 0/0:6,0:6:35:0,50 ./.:0,0:0:.:.
chr22 10610040 . G C 50 PASS . GT:AD:DP:VX:PL 0/1:3,4:7:1,2,3:70,0,90 0/0:6,0:6:5:0,50,120 ./.:0,0:0:.:.
38 changes: 38 additions & 0 deletions test/format-plan-edge.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
##fileformat=VCFv4.3
##contig=<ID=chr22,length=50818468>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=AB,Number=1,Type=Float,Description="Allele balance">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype likelihoods">
##FORMAT=<ID=PGT,Number=1,Type=String,Description="Physical phasing haplotype information">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype quality">
##FORMAT=<ID=FT,Number=1,Type=String,Description="Filter">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype likelihoods">
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth">
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Strand bias table">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3
chr22 10510061 . A T 64.12 PASS . GT:AB:AD:DP:GQ:PL 0/0:.:3,0:3:9:0,9,104 0/1:0.5:5,4:9:99:99,0,123 ./.:.:0,0:0:.:.
chr22 10510352 . AT A 50 PASS . GT:AD:DP:GQ:PGT:PID:PL 1/1:0,5:5:15:1|1:10510352_AT_A:225,15,0 0/1:3,2:5:20:0|1:10510352_AT_A:20,0,200 ./.:0,0:0:.:.:.:.
chr22 10520000 . A C,G 50 PASS . GT:AD:DP:GQ:PL 1/2:1,4,5:10:60:100,80,70,60,0,20 0/2:3,0,2:5:30:80,70,60,50,40,0 ./.:0,0,0:0:.:.
chr22 10530000 . G A 50 PASS . GT:DP:AD:GQ:PL 0/1:7:3,4:42:99,0,120 0/0:5:5,0:15:0,15,200 ./.:0:0,0:.:.
chr22 10540000 . C T 50 PASS . GT:HQ:DP:GQ 0/1:10,20:7:40 0/0:.,.:5:50 ./.:.:0:.
chr22 10550000 . C T 50 PASS . GT:FT:DP:GQ 0/1:PASS:7:40 0/0:LowQual:5:50 ./.:.:0:.
chr22 10560000 . A C,G 50 PASS . GT:GL:DP:GQ 0/1:-0.1,-1.2,-9.9,-2.0,-3.0,-4.0:7:40 1/2:-9.9,-8.8,-7.7,-6.6,-5.5,-4.4:5:50 ./.:.:0:.
chr22 10570000 . A T 50 PASS . GT:AD:DP:GQ:PL 0:3,0:3:10:0,10,100 1:0,3:3:20:100,10,0 .:0,0:0:.:.
chr22 10580000 . A C,G,T,AA,AC,AG,AT,CA,CC,CG 50 PASS . GT:AD:DP:GQ:PL 10/10:0,0,0,0,0,0,0,0,0,0,7:7:20:200,190,180,170,160,150,140,130,120,110,100,90,80,70,60,50,40,30,20,10,0,10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290,300,310,320,330,340,350,360,370,380,390,400,410,420,430,440,450,460 0/10:3,0,0,0,0,0,0,0,0,0,2:5:30:0,10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290,300,310,320,330,340,350,360,370,380,390,400,410,420,430,440,450,460,470,480,490,500,510,520,530,540,550,560,570,580,590,600,610,620,630,640,650 ./.:0,0,0,0,0,0,0,0,0,0,0:0:.:.
chr22 10585000 . A T 50 PASS . GT:AD:DP:GQ:PL 0/0:.:3:10:. 0/1:.:5:20:. ./.:.:0:.:.
chr22 10586000 . A T 50 PASS . GT:AD:DP:GQ:PL 0/1:3,4:7:50:90,0,120 0/1:3:3:20:80,0 ./.:0,0:0:.:.
chr22 10587000 . A C,G 50 PASS . GT:AD:DP:GQ:PL 1/2:1,4,5:10:60:100,80,70,60,0,20 0/2:.:5:30:. ./.:0,0,0:0:.:.
chr22 10590000 . A T 50 PASS . DP:GQ:GT:AD:PL 11:50:0/1:6,5:80,0,90 8:45:0/0:8,0:0,45,100 0:.:./.:0,0:.
chr22 10591000 . A T 50 PASS . AD:PL:GT:DP:GQ 4,3:70,0,80:0/1:7:60 9,0:0,70,120:0/0:9:50 0,0:.:./.:0:.
chr22 10592000 . A T 50 PASS . GT:DP:AB:GQ:AD:PL 0/1:12:0.42:70:7,5:90,0,100 0/0:10:0.01:60:10,0:0,60,120 ./.:0:.:.:0,0:.
chr22 10593000 . A T 50 PASS . GT:HQ:MIN_DP:SB 0/1:127,128:12:3,4,5,6 0/0:-129,20:8:8,0,0,0 ./.:.,.:0:.,.,.,.
chr22 10593500 . A T 50 PASS . GT:HQ:MIN_DP:SB 0/1:127,128:32767:3,4,5,6 0/0:-32760,32768:-32761:8,0,0,0 ./.:32767,32768:0:127,128,32767,32768
chr22 10594000 . A T 50 PASS . GT:AD:DP:GQ:PGT:PID:PL 0|1:4,5:9:50:0|1:P1:90,0,90 0/1:3,2:5:20:0|1:10594000_A_T_LONG_PHASE_SET:20,0,200 ./.:0,0:0:.:.:.:.
chr22 10595000 . A T 50 PASS . GT 0/1 1|1 ./.
chr22 10595500 . A T 50 PASS . GT .|. 0|. .|0
chr22 10596000 . A T 50 PASS . GT 0 1 .
chr22 10597000 . A C,G,T,AA,AC,AG,AT,CA,CC,CG 50 PASS . GT 10/10 0/10 ./.
6 changes: 6 additions & 0 deletions test/format-plan-empty-format-tag.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
##fileformat=VCFv4.3
##contig=<ID=1>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1
1 1 . A C . PASS . GT::DP 0/1::5
10 changes: 10 additions & 0 deletions test/format-plan-fallback.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
##fileformat=VCFv4.3
##contig=<ID=1>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID">
##FORMAT=<ID=QS,Number=1,Type=Float,Description="Quality score">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3
1 1 . A C . PASS . GT:PID:DP 0/1:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA:12 0/0:P2:8 ./.:.:0
1 2 . A C . PASS . GT:DP 0/1 0/0:8 ./.:0
1 3 . A C . PASS . GT:QS:DP 0/1:1.5:999999999999999999999999999999 0/0:2.5:8 ./.:.:0
8 changes: 8 additions & 0 deletions test/format-plan-float-string.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.3
##contig=<ID=1>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype likelihoods">
##FORMAT=<ID=FT,Number=1,Type=String,Description="Sample filter">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2
1 1 . A C,G . PASS . GT:GL:FT:DP 0/1:-0.25,-0.50,-0.75,-1.00,-1.25,-1.50:PASS:12 1/2:-0.50,-0.75,-1.00,-1.25,-1.50,-1.75:LowQual:8
13 changes: 13 additions & 0 deletions test/format-plan-float-vector.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
##fileformat=VCFv4.3
##contig=<ID=1>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype likelihoods">
##FORMAT=<ID=QS,Number=.,Type=Float,Description="Variable-width quality scores">
##FORMAT=<ID=AB,Number=1,Type=Float,Description="Allele balance">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3
1 1 . A C . PASS . GT:GL:QS:AB:DP 0/1:-0.1,-1.2,-3:0.5,1.5:0.50:7 0/0:0,-5,-9:0.1,0.2,0.3,0.4:.:8 ./.:.:1.25,2.5:.:0
1 2 . A C,G . PASS . GT:GL:QS:AB:DP 1/2:-9,-8,-7,-6,-5,-4:0.1:0.25:12 0/2:-2,-3,-4,-5,-6,-7:0.2,0.3:0.75:9 ./.:.:.:.:0
1 3 . G T . PASS . QS:GT:GL:AB:DP 0.1,0.2:0/1:-1,-2,-3:0.5:5 .:0/0:0,-10,-20:0.0:4 0.3,0.4,0.5:./.:.:.:0
1 4 . T G . PASS . GT:GL:QS:AB:DP 0/1:.:.:.:6 0/0:.:.:.:4 ./.:.:.:.:0
1 5 . C T . PASS . GT:GL:QS:AB:DP 0/1:-0.4,-0.8:0.1:0.5:3 0/0:0,-2:0.2:0.1:2 ./.:.:.:.:0
6 changes: 6 additions & 0 deletions test/format-plan-gt-header-shape.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
##fileformat=VCFv4.3
##contig=<ID=chr22,length=50818468>
##FORMAT=<ID=GT,Number=2,Type=String,Description="Malformed genotype header">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3
chr22 10620000 . A T 50 PASS . GT:DP 0/1:7 0/0:5 ./.:0
Loading