diff --git a/docs/advanced/debugging-results.md b/docs/advanced/debugging-results.md
index 256439b..23f2a04 100644
--- a/docs/advanced/debugging-results.md
+++ b/docs/advanced/debugging-results.md
@@ -34,13 +34,13 @@ A variant you expect to find is not in the database:
 | Check | Details |
 |-------|---------|
 | Was it in the source VCFs? | AFQuery only stores variants present in ingested VCFs |
-| Was it FILTER=PASS? | Default ingestion skips non-PASS variants. Check with `afquery info` for `pass_only_filter` |
+| Was the alt allele ever called? | A variant is stored only if at least one sample carries the alt allele or has a failed call there. A site seen only as `0/0` in every sample is genuinely absent. Note that non-PASS calls *are* kept (in `fail_bitmap`) — such a variant is not missing, it just shows `AC=0`. |
 | Multiallelic sites | AFQuery stores each ALT separately. Query the specific ALT allele, not just position |
 | Chromosome naming | Ensure consistent `chr` prefix usage |
 
 ### 4. Unexpected N_FAIL > 0
 
-`N_FAIL > 0` means some eligible samples had the alt allele called but with `FILTER≠PASS`. These samples are excluded from AC/AN. This is usually benign (1–2 samples), but a high N_FAIL warrants investigation:
+`N_FAIL > 0` means some eligible samples had a call with `FILTER≠PASS` at this position. These samples are excluded from AC, but they remain eligible and still count in AN. This is usually benign (1–2 samples), but a high N_FAIL warrants investigation:
 
 | N_FAIL relative to n_eligible | Likely cause |
 |---|---|
diff --git a/docs/advanced/filter-pass-tracking.md b/docs/advanced/filter-pass-tracking.md
index cc18c61..11d03ca 100644
--- a/docs/advanced/filter-pass-tracking.md
+++ b/docs/advanced/filter-pass-tracking.md
@@ -11,9 +11,10 @@ In VCF format, the FILTER column indicates whether a variant call passed quality
 - `PASS` or `.` (missing) — the variant passed all filters
 - Any other value (e.g., `LowQual`, `VQSRTrancheSNP99.90to100.00`) — the variant failed one or more filters
 
-AFQuery default behavior:
+AFQuery's default behavior:
 
-- **PASS-only**: only `FILTER=PASS` variants are counted in AC/AN. This is always enforced.
+- Only `FILTER=PASS` calls contribute to **AC** (and therefore to **AF**). This is always enforced.
+- **AN** is not affected by the filter — it counts every eligible sample at the position, failed calls included.
 
 ---
 
@@ -21,19 +22,19 @@ AFQuery default behavior:
 
 AFQuery stores a third bitmap per variant alongside `het_bitmap` and `hom_bitmap`:
 
-- **`fail_bitmap`** — bit set for each sample that has a non-ref genotype (AC>0) AND `FILTER≠PASS`
+- **`fail_bitmap`** — bit set for each sample whose call at this site has `FILTER≠PASS`. In practice this is two cases: a sample that carries the alt allele but whose call failed a filter, and a sample with a missing genotype (`./.`) at a site that itself failed a filter.
 
-This means:
+What this means for a sample in `fail_bitmap`:
 
-- A sample in `fail_bitmap` was genotyped with the alt allele but the call failed QC
-- Such samples are **not** counted in AC/AN (they don't affect AF)
-- Their count is exposed as `N_FAIL`
+- Its alt alleles are **not** counted in AC, so it does not raise AF.
+- It is still an eligible sample, so it **is** counted in AN. The failed call lowers AF by sitting in the denominator without contributing to the numerator.
+- Its count is exposed separately as `N_FAIL`.
 
 ---
 
 ## Database Creation
 
-The `fail_bitmap` is always written and always tracks PASS-only ingestion:
+The `fail_bitmap` is always written, regardless of build options:
 
 ```bash
 afquery create-db --manifest manifest.tsv --output-dir ./db/ --genome-build GRCh38
@@ -52,10 +53,10 @@ afquery query --db ./db/ --locus chr1:925952
 ```
 
 ```
-chr1:925952 G>A  AC=142  AN=2742  AF=0.0518  n_eligible=1371  N_HET=138  N_HOM_ALT=2  N_HOM_REF=1231  N_FAIL=7
+chr1:925952 G>A  AC=142  AN=2742  AF=0.0518  n_eligible=1371  N_HET=138  N_HOM_ALT=2  N_HOM_REF=1224  N_FAIL=7
 ```
 
-`N_FAIL=7` means 7 eligible samples had the alt allele called but with FILTER≠PASS.
+`N_FAIL=7` means 7 eligible samples had a call with FILTER≠PASS at this site. They are part of `n_eligible` (and of AN), so the genotype counts still add up: 138 + 2 + 1224 + 7 = 1371.
 
 ### Python API
 
@@ -83,21 +84,23 @@ Each carrier row shows its `filter` column as `PASS` or `FAIL`, along with sampl
 
 ## VCF Annotation
 
-AFQuery adds an additional INFO field to annotated VCFs:
+Among the INFO fields `afquery annotate` writes, one reports the failed-call count:
 
 | Field | Type | Description |
 |-------|------|-------------|
-| `AFQUERY_N_FAIL` | Integer | Eligible samples with FILTER≠PASS at this variant |
+| `AFQUERY_N_FAIL` | Integer | Eligible samples with `FILTER≠PASS` at this variant |
 
 ```bash
 afquery annotate --db ./db/ --input variants.vcf --output annotated.vcf
 ```
 
+See [Understanding Output](../getting-started/understanding-output.md#vcf-annotation-fields) for the full set of `AFQUERY_*` fields.
+
 ---
 
 ## PASS-Only Enforcement
 
-AF reflects the quality-filtered allele frequency — the frequency of the alt allele among high-quality calls. This is appropriate for most clinical and research use cases. PASS-only ingestion is always enforced.
+PASS-only counting is always enforced and cannot be turned off. It applies to the numerator only: a failed call never adds an alt allele to AC, but the sample stays in the eligible set and so remains in AN. The result is a deliberately conservative AF — a failed carrier weighs on the denominator without lifting the numerator, which is the safe direction for clinical and research use.
 
 ---
 
diff --git a/docs/advanced/multi-cohort-strategies.md b/docs/advanced/multi-cohort-strategies.md
index b87578a..6ea03fd 100644
--- a/docs/advanced/multi-cohort-strategies.md
+++ b/docs/advanced/multi-cohort-strategies.md
@@ -61,7 +61,7 @@ All samples in one database, with phenotype codes distinguishing cohorts.
 ### Manifest Design
 
 ```tsv
-sample_id	vcf_path	sex	technology	phenotype_codes
+sample_name	vcf_path	sex	tech_name	phenotype_codes
 CARD_001	/data/card/001.vcf.gz	female	wgs	cardiology,EUR,control
 CARD_002	/data/card/002.vcf.gz	male	wgs	cardiology,EUR,case_HCM
 NEURO_001	/data/neuro/001.vcf.gz	female	wes	neurology,AFR,case_epilepsy
@@ -99,7 +99,7 @@ afquery query --db ./merged/ --locus chr1:12345678 --ref C --alt T \
 |-----------|-------------|
 | Single database to maintain | Rebuilding requires all VCFs accessible |
 | Cross-cohort queries via phenotype filters | Phenotype code design must be planned upfront |
-| One annotation pass covers all cohorts | Adding a new cohort requires `afquery update` |
+| One annotation pass covers all cohorts | Adding a new cohort requires `afquery update-db` |
 | Flexible ad-hoc stratification | Larger database, longer rebuild time |
 
 ---
@@ -134,7 +134,11 @@ Maintain both per-cohort and merged databases.
 afquery annotate --db /databases/institutional/cardiology/ \
   --input patient.vcf.gz --output step1.vcf.gz
 
-# Annotate against shared controls (use a different INFO prefix via Python API)
+# Annotate against shared controls in a second pass
+# Note: the AFQUERY_* INFO fields are overwritten on each pass — if you need
+# both sets of frequencies, extract the values from step1.vcf.gz first.
+afquery annotate --db /databases/shared/combined_controls/ \
+  --input step1.vcf.gz --output step2.vcf.gz
 ```
 
 ---
diff --git a/docs/advanced/performance.md b/docs/advanced/performance.md
index 43657cf..ace0553 100644
--- a/docs/advanced/performance.md
+++ b/docs/advanced/performance.md
@@ -50,7 +50,7 @@ Expected scaling (50K samples, 2,500 buckets, GRCh38):
 | 32 | ~18 min | ~24× |
 | 52 | ~13 min | ~38× |
 
-Scaling is near-linear up to ~32 cores; beyond that, I/O and SQLite contention limit further gains.
+Scaling is near-linear up to ~32 cores; beyond that, disk I/O contention limits further gains.
 
 Total RAM required: `build_threads × build_memory`
 
@@ -80,7 +80,7 @@ afquery create-db ... --threads 32 --build-threads 16 --build-memory 4GB
 graph TD
     A["Query Request<br/>chr1:925952"]
     B["Open DuckDB<br/>connection"]
-    C["Locate Parquet<br/>bucket_0/data.parquet"]
+    C["Locate Parquet<br/>bucket_0.parquet"]
     D["Read rows<br/>matching pos"]
     E["Deserialize<br/>bitmaps"]
     F["Bitmap AND<br/>with eligible<br/>sample set"]
diff --git a/docs/advanced/ploidy-and-sex-chroms.md b/docs/advanced/ploidy-and-sex-chroms.md
index 030693a..987b991 100644
--- a/docs/advanced/ploidy-and-sex-chroms.md
+++ b/docs/advanced/ploidy-and-sex-chroms.md
@@ -103,12 +103,12 @@ afquery query --db ./db/ --locus chrM:3243
 
 For every query result, the following identity holds:
 
-**N_HET + N_HOM_ALT + N_HOM_REF + N_FAIL = n_eligible**
+**N_HET + N_HOM_ALT + N_HOM_REF + N_FAIL + N_NO_COVERAGE = n_eligible**
 
-This can be used to validate results. N_HOM_REF is the number of eligible samples that are homozygous reference (i.e., do not carry the alt allele and passed quality filters).
+This can be used to validate results. N_HOM_REF is the number of eligible samples that are homozygous reference (i.e., do not carry the alt allele and passed quality filters). N_NO_COVERAGE is 0 unless a coverage-evidence filter is active — see [Coverage Evidence](coverage-evidence.md).
 
 !!! note "Mutual exclusivity"
-    N_HET, N_HOM_ALT, N_HOM_REF, and N_FAIL are mutually exclusive. A sample with a non-ref allele but FILTER≠PASS is counted in N_FAIL only — it does not appear in N_HET or N_HOM_ALT. Likewise, N_HOM_REF counts only PASS-filtered samples.
+    N_HET, N_HOM_ALT, N_HOM_REF, N_FAIL, and N_NO_COVERAGE are mutually exclusive. A sample with a non-ref allele but FILTER≠PASS is counted in N_FAIL only — it does not appear in N_HET or N_HOM_ALT. Likewise, N_HOM_REF counts only PASS-filtered samples.
 
 ### chrX non-PAR
 
diff --git a/docs/faq.md b/docs/faq.md
index 33fd8ed..1d7acff 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -149,7 +149,7 @@ On chrX non-PAR positions, males contribute AN=1 and females contribute AN=2. Wh
 
 ## What does N_FAIL mean in query output?
 
-`N_FAIL` is the count of eligible samples that had the alt allele called but with `FILTER≠PASS`. Shown as `N_FAIL=N` in text output. These samples are not counted in AC/AN. See [FILTER=PASS Tracking](advanced/filter-pass-tracking.md).
+`N_FAIL` is the count of eligible samples whose call had `FILTER≠PASS` at the position. Shown as `N_FAIL=N` in text output. These samples are excluded from AC, but they stay eligible and still count toward AN. See [FILTER=PASS Tracking](advanced/filter-pass-tracking.md).
 
 ---
 
diff --git a/docs/getting-started/concepts.md b/docs/getting-started/concepts.md
index 41a324b..fb1d5be 100644
--- a/docs/getting-started/concepts.md
+++ b/docs/getting-started/concepts.md
@@ -126,13 +126,13 @@ graph TB
 
 ### Per-Variant Bitmaps
 
-For each variant row, AFQuery stores three [Roaring Bitmaps](https://roaringbitmap.org/):
+For each variant row, AFQuery stores per-sample genotype information as [Roaring Bitmaps](https://roaringbitmap.org/). Three of them drive every query:
 
-- **`het_bitmap`** — bit set for each sample that is heterozygous (GT=0/1 or 1/0)
-- **`hom_bitmap`** — bit set for each sample that is homozygous alt (GT=1/1 or GT=1)
-- **`fail_bitmap`** bit set for each sample called with FILTER≠PASS
+- **`het_bitmap`** — sample is heterozygous (`GT=0/1`) with `FILTER=PASS`
+- **`hom_bitmap`** — sample is homozygous alt (`GT=1/1`, or `GT=1` on haploid regions) with `FILTER=PASS`
+- **`fail_bitmap`** — the sample's call at this site has `FILTER≠PASS`
 
-Each sample has a stable integer ID (0-indexed). The bit position in the bitmap equals the sample ID.
+Each sample has a stable integer ID (0-indexed). The bit position in the bitmap equals the sample ID. Databases built with coverage-quality filters carry two additional bitmaps — see [Data Model](../reference/data-model.md#parquet-schema).
 
 ### Parquet Storage
 
@@ -141,8 +141,8 @@ Bitmaps are serialized and stored in Parquet files, partitioned by chromosome an
 ```
 variants/
   chr1/
-    bucket_0/   ← positions 0–999,999
-    bucket_1/   ← positions 1,000,000–1,999,999
+    bucket_0.parquet   ← positions 0–999,999
+    bucket_1.parquet   ← positions 1,000,000–1,999,999
     ...
   chr2/
     ...
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
index cb9b1bd..1a3ace0 100644
--- a/docs/getting-started/quickstart.md
+++ b/docs/getting-started/quickstart.md
@@ -135,6 +135,7 @@ The output VCF gains INFO fields (see [Annotate a VCF](../guides/annotate-vcf.md
 | `AFQUERY_N_HOM_ALT` | A (per ALT) | Homozygous alt sample count |
 | `AFQUERY_N_HOM_REF` | A (per ALT) | Homozygous ref sample count |
 | `AFQUERY_N_FAIL` | 1 (per site) | Samples with FILTER≠PASS |
+| `AFQUERY_N_NO_COVERAGE` | A (per ALT) | Eligible samples lacking coverage evidence (0 unless a coverage-evidence filter is active) |
 
 ---
 
diff --git a/docs/getting-started/tutorial.md b/docs/getting-started/tutorial.md
index 92f6431..df85551 100644
--- a/docs/getting-started/tutorial.md
+++ b/docs/getting-started/tutorial.md
@@ -86,7 +86,7 @@ Every query result includes the following fields:
 | **AC** | Allele count — alt allele copies in eligible samples |
 | **AN** | Allele number — total alleles examined (adjusted for coverage and ploidy) |
 | **AF** | Allele frequency — `AC / AN` |
-| **n_eligible** | Samples in the filtered set (before coverage or ploidy adjustments). Here, 10 = no filter applied. |
+| **n_eligible** | Eligible samples at this position — those passing the metadata filters *and* covered here. Ploidy is applied afterwards, to AN. Here, 10 = no filter applied and every sample is covered. |
 | **N_HET** | Eligible samples heterozygous for the alt allele |
 | **N_HOM_ALT** | Eligible samples homozygous for the alt allele |
 | **N_HOM_REF** | Eligible samples homozygous reference |
@@ -94,7 +94,7 @@ Every query result includes the following fields:
 
 For `chr1:925952`: AN=20 = 10 diploid samples × 2 alleles; AC=6 = 4 het samples (1 copy each) + 1 hom-alt (2 copies).
 
-**Accounting identity:** `n_eligible = N_HET + N_HOM_ALT + N_HOM_REF + N_FAIL` always holds (10 = 4+1+5+0 ✓).
+**Accounting identity:** `n_eligible = N_HET + N_HOM_ALT + N_HOM_REF + N_FAIL + N_NO_COVERAGE` always holds. `N_NO_COVERAGE` is 0 here (it only becomes non-zero with a coverage-evidence filter), so 10 = 4+1+5+0+0 ✓.
 
 ### N_FAIL in practice
 
diff --git a/docs/getting-started/understanding-output.md b/docs/getting-started/understanding-output.md
index ecf6c7f..e1528b9 100644
--- a/docs/getting-started/understanding-output.md
+++ b/docs/getting-started/understanding-output.md
@@ -14,8 +14,8 @@ This page explains what each field in AFQuery output means and how to interpret
 | **N_HET** | int | Number of eligible samples heterozygous for the alt allele (GT=0/1) |
 | **N_HOM_ALT** | int | Number of eligible samples homozygous for the alt allele (GT=1/1 or GT=1). Includes haploid carriers on sex chromosomes and chrM. See [Ploidy](../advanced/ploidy-and-sex-chroms.md#genotype-counting). |
 | **N_HOM_REF** | int | Number of eligible samples homozygous reference (GT=0/0 or GT=0) |
-| **n_eligible** | int | Number of samples in the eligible set (after sex/phenotype/tech filters) |
-| **N_FAIL** | int | Number of eligible samples with a non-ref allele called but FILTER≠PASS at this position. These samples are counted *only* in N_FAIL — not in N_HET, N_HOM_ALT, or N_HOM_REF. |
+| **n_eligible** | int | Number of eligible samples — those passing the sex/phenotype/tech filters *and* covered at this position |
+| **N_FAIL** | int | Number of eligible samples whose call at this position had FILTER≠PASS. These samples are counted *only* in N_FAIL — not in N_HET, N_HOM_ALT, or N_HOM_REF — but they stay eligible and still count toward AN. |
 | **N_NO_COVERAGE** | int | Number of eligible samples whose tech lacks coverage evidence at this position. Excluded from `N_HOM_REF` to keep AC/AN conservative. Always `0` unless a coverage-evidence filter is active. See [Coverage Evidence](../advanced/coverage-evidence.md). |
 
 
diff --git a/docs/guides/annotate-vcf.md b/docs/guides/annotate-vcf.md
index 758916f..01b6df5 100644
--- a/docs/guides/annotate-vcf.md
+++ b/docs/guides/annotate-vcf.md
@@ -26,7 +26,7 @@ afquery annotate \
 | `AFQUERY_N_HET` | Integer | A (per ALT) | Heterozygous sample count |
 | `AFQUERY_N_HOM_ALT` | Integer | A (per ALT) | Homozygous alt sample count |
 | `AFQUERY_N_HOM_REF` | Integer | A (per ALT) | Homozygous ref sample count |
-| `AFQUERY_N_FAIL` | Integer | 1 (per site) | Samples with FILTER≠PASS and alt allele called. Mutually exclusive with N_HET/N_HOM_ALT/N_HOM_REF. |
+| `AFQUERY_N_FAIL` | Integer | 1 (per site) | Eligible samples whose call had FILTER≠PASS. Excluded from AC, but still counted in AN. Mutually exclusive with N_HET/N_HOM_ALT/N_HOM_REF. |
 | `AFQUERY_N_NO_COVERAGE` | Integer | A (per ALT) | Eligible samples whose tech lacks coverage evidence at this position. Excluded from `N_HOM_REF` to keep AC/AN conservative. Always `0` unless a coverage-evidence filter is active. See [Coverage Evidence](../advanced/coverage-evidence.md). |
 
 !!! note "Multi-allelic sites"
diff --git a/docs/guides/create-database.md b/docs/guides/create-database.md
index 9e2c3c4..6e48a08 100644
--- a/docs/guides/create-database.md
+++ b/docs/guides/create-database.md
@@ -27,8 +27,8 @@ afquery create-db \
 
 ## What Happens
 
-1. **Ingest phase** — Each VCF is parsed with cyvcf2. Genotypes and INFO fields are written to a SQLite temporary database, one row per variant per sample.
-2. **Build phase** — DuckDB reads the SQLite data, aggregates per 1-Mbp bucket, and writes Roaring Bitmap Parquet files partitioned by chromosome/bucket.
+1. **Ingest phase** — Each VCF is parsed with cyvcf2. Genotypes and quality fields are written to a temporary per-sample Parquet file, one row per variant per sample.
+2. **Build phase** — DuckDB reads the temporary Parquet files, aggregates per 1-Mbp bucket, and writes Roaring Bitmap Parquet files partitioned by chromosome and bucket.
 3. **Finalize** — `manifest.json` and `metadata.sqlite` are written to the output directory.
 
 ---
@@ -39,12 +39,10 @@ afquery create-db \
 ./db/
 ├── manifest.json          # Build configuration (genome build, schema version, etc.)
 ├── metadata.sqlite        # Sample/phenotype/technology/changelog metadata
-├── variants/              # Hive-partitioned Parquet files
+├── variants/              # Parquet files partitioned by chromosome and bucket
 │   ├── chr1/
-│   │   ├── bucket_0/      # Positions 0–999,999
-│   │   │   └── data.parquet
-│   │   ├── bucket_1/      # Positions 1,000,000–1,999,999
-│   │   │   └── data.parquet
+│   │   ├── bucket_0.parquet      # Positions 0–999,999
+│   │   ├── bucket_1.parquet      # Positions 1,000,000–1,999,999
 │   │   └── ...
 │   ├── chr2/
 │   └── ...
@@ -95,7 +93,7 @@ afquery create-db --manifest manifest.tsv --output-dir ./db/ --genome-build GRCh
 
 ## FILTER=PASS Behavior
 
-By default, only variants with `FILTER=PASS` (or no FILTER field) are counted in AC/AN. Variants that fail filters are tracked in `fail_bitmap`. PASS-only ingestion is always enforced — there is currently no CLI option to change this behaviour.
+Only `FILTER=PASS` calls (or calls with no FILTER field) contribute to AC, and therefore to AF. AN is not affected — it counts every eligible sample, failed calls included. Calls that fail a filter are tracked in `fail_bitmap` and surfaced as `N_FAIL`. PASS-only counting is always enforced — there is currently no CLI option to change this behaviour.
 
 See [FILTER=PASS Tracking](../advanced/filter-pass-tracking.md) for details.
 
diff --git a/docs/guides/sample-filtering.md b/docs/guides/sample-filtering.md
index e7657b2..10c32cf 100644
--- a/docs/guides/sample-filtering.md
+++ b/docs/guides/sample-filtering.md
@@ -1,6 +1,6 @@
 # Sample Filtering
 
-AFQuery supports flexible metadata-based selection of samples for AF computation. Filters are available on all query commands (`query`, `annotate`, `dump`).
+AFQuery supports flexible metadata-based selection of samples for AF computation. Filters are available on all query commands (`query`, `annotate`, `dump`, `variant-info`).
 
 **Phenotype codes are arbitrary string labels** — you define them in your manifest. They can be ICD-10 codes, HPO terms, project tags (`control`, `pilot`), or any strings meaningful to your cohort. The filtering system does not interpret the codes — it matches them exactly as stored.
 
diff --git a/docs/guides/update-database.md b/docs/guides/update-database.md
index 7df1dfb..b0b2717 100644
--- a/docs/guides/update-database.md
+++ b/docs/guides/update-database.md
@@ -1,6 +1,6 @@
 # Update a Database
 
-`afquery update-db` supports three operations: adding new samples, removing existing samples, and compacting the database to reclaim space.
+`afquery update-db` adds new samples, removes existing samples, updates sample metadata, and compacts the database to reclaim space.
 
 ### Update Timeline
 
diff --git a/docs/index.md b/docs/index.md
index a634a4d..1ab7a88 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -31,7 +31,7 @@ AFQuery pre-indexes genotypes as [Roaring Bitmaps](https://roaringbitmap.org/) i
 - **Ploidy-aware** — correct AN on chrX PAR/non-PAR, chrY, and chrM.
 - **Technology-aware AN** — per-position capture BED intersection across WGS, WES kits, and panels.
 - **Carrier lookup** — list samples carrying any variant with full metadata (sex, tech, phenotypes, genotype, FILTER status).
-- **VCF annotation** — add `AFQUERY_AC/AN/AF/N_HET/N_HOM_ALT/N_HOM_REF/N_FAIL` INFO fields from any sample subset.
+- **VCF annotation** — add `AFQUERY_AC/AN/AF/N_HET/N_HOM_ALT/N_HOM_REF/N_FAIL/N_NO_COVERAGE` INFO fields from any sample subset.
 - **Audit changelog** — every database operation is recorded for reproducibility.
 
 ---
@@ -43,7 +43,7 @@ graph TD
     A["🔍 Input VCFs<br/>single-sample"]
     B["📥 Ingest<br/>cyvcf2 reads VCFs →<br/>per-sample Parquet files emitted"]
     C["🏗️ Build<br/>DuckDB aggregates per bucket →<br/>Roaring Bitmaps → Parquet"]
-    D["💾 Database on Disk<br/>variants/chr*/bucket_*/<br/>capture/*.pkl<br/>metadata.sqlite<br/>manifest.json"]
+    D["💾 Database on Disk<br/>variants/chr*/bucket_*.parquet<br/>capture/*.pkl<br/>metadata.sqlite<br/>manifest.json"]
     E["⚡ Query Engine<br/>Load bitmap → filter samples →<br/>compute AC/AN/AF<br/>~10-100ms"]
     F["📊 Annotate"]
     G["🔄 Update"]
diff --git a/docs/reference/data-model.md b/docs/reference/data-model.md
index cb8c6f1..de1ce99 100644
--- a/docs/reference/data-model.md
+++ b/docs/reference/data-model.md
@@ -10,12 +10,10 @@ This page documents the on-disk layout of an AFQuery database, including file fo
 <db_dir>/
 ├── manifest.json          # Build configuration
 ├── metadata.sqlite        # Sample/phenotype/technology/changelog metadata
-├── variants/              # Hive-partitioned Parquet variant data
+├── variants/              # Parquet variant data, partitioned by chromosome and bucket
 │   ├── chr1/
-│   │   ├── bucket_0/      # Positions 0–999,999
-│   │   │   └── data.parquet
-│   │   ├── bucket_1/      # Positions 1,000,000–1,999,999
-│   │   │   └── data.parquet
+│   │   ├── bucket_0.parquet      # Positions 0–999,999
+│   │   ├── bucket_1.parquet      # Positions 1,000,000–1,999,999
 │   │   └── ...
 │   ├── chr2/
 │   └── ...
@@ -33,11 +31,13 @@ Stores the build configuration used during `create-db`.
 | Field | Type | Description |
 |-------|------|-------------|
 | `genome_build` | string | `"GRCh37"` or `"GRCh38"` |
-| `schema_version` | string | `"2.0"` |
-| `pass_only_filter` | bool | Whether FILTER=PASS was enforced during ingest |
+| `version` | string | manifest.json format version |
 | `db_version` | string | User-specified version label |
+| `sample_count` | int | Number of samples at build time |
+| `schema_version` | string | `"2.0"`, or `"3.0"` when the database was built with coverage-quality filters |
+| `pass_only_filter` | bool | Always `true` — PASS-only counting is always enforced |
+| `coverage_filter` | object | The `min_dp` / `min_gq` / `min_qual` / `min_covered` thresholds used at build time (all `0` when unused) |
 | `created_at` | string | ISO 8601 timestamp |
-| `manifest_path` | string | Path to original manifest TSV |
 
 ---
 
@@ -102,12 +102,16 @@ Each bucket Parquet file has this schema:
 | `pos` | `uint32` | 1-based genomic position |
 | `ref` | `large_utf8` | Reference allele |
 | `alt` | `large_utf8` | Alternate allele |
-| `het_bitmap` | `large_binary` | Serialized Roaring Bitmap of heterozygous sample IDs |
-| `hom_bitmap` | `large_binary` | Serialized Roaring Bitmap of homozygous alt sample IDs |
-| `fail_bitmap` | `large_binary` | Serialized Roaring Bitmap of FILTER≠PASS sample IDs |
+| `het_bitmap` | `large_binary` | Roaring Bitmap of heterozygous, `FILTER=PASS` sample IDs |
+| `hom_bitmap` | `large_binary` | Roaring Bitmap of homozygous-alt, `FILTER=PASS` sample IDs |
+| `fail_bitmap` | `large_binary` | Roaring Bitmap of sample IDs whose call has `FILTER≠PASS` at this site |
+| `filtered_bitmap` | `large_binary` | Roaring Bitmap of WES non-carriers with uncertain coverage |
+| `quality_pass_bitmap` | `large_binary` | Roaring Bitmap of carriers meeting the DP/GQ/QUAL thresholds |
 
 Rows are sorted by `(pos, alt)` within each bucket.
 
+`filtered_bitmap` and `quality_pass_bitmap` back the [coverage-evidence](../advanced/coverage-evidence.md) feature. The columns are always present, but they are empty unless the database was built with coverage-quality filters (schema version `3.0`).
+
 !!! important "large_utf8 / large_binary"
     AFQuery uses `large_utf8` and `large_binary` (64-bit offsets) rather than `utf8` / `binary` (32-bit). This is required for compatibility with DuckDB's Parquet reader on large chromosomes.
 
@@ -118,9 +122,10 @@ Rows are sorted by `(pos, alt)` within each bucket.
 Bitmaps use the [Roaring Bitmap](https://roaringbitmap.org/) format, serialized by pyroaring's portable serialization.
 
 - Bit position = sample ID (0-indexed integer)
-- `het_bitmap`: bit set iff sample is heterozygous at this variant
-- `hom_bitmap`: bit set iff sample is homozygous alt at this variant
-- `fail_bitmap`: bit set iff sample has genotype AC>0 AND FILTER≠PASS
+- `het_bitmap`: bit set iff the sample is heterozygous with `FILTER=PASS`
+- `hom_bitmap`: bit set iff the sample is homozygous alt with `FILTER=PASS`
+- `fail_bitmap`: bit set iff the sample's call has `FILTER≠PASS` — either a carrier whose call failed a filter, or a missing genotype (`./.`) at a failed site
+- `filtered_bitmap`, `quality_pass_bitmap`: see the Parquet schema above; both are empty unless the database was built with coverage-quality filters
 
 To deserialize in Python:
 ```python
@@ -152,11 +157,11 @@ bucket_id = pos // 1_000_000
 ```
 
 !!! warning "DuckDB integer arithmetic"
-    When computing bucket IDs in DuckDB SQL, always use:
+    When computing bucket IDs in DuckDB SQL, always use the integer-division operator:
     ```sql
-    CAST(pos AS BIGINT) / 1000000
+    CAST(pos AS BIGINT) // 1000000
     ```
-    Not `CAST(pos / 1000000 AS BIGINT)` — DuckDB performs float division first and rounds, producing wrong bucket IDs.
+    Not `CAST(pos / 1000000 AS BIGINT)` — the `/` operator does float division, and casting the rounded result afterwards produces wrong bucket IDs.
 
 ---
 
diff --git a/docs/reference/glossary.md b/docs/reference/glossary.md
index e9f8547..0db3816 100644
--- a/docs/reference/glossary.md
+++ b/docs/reference/glossary.md
@@ -34,7 +34,7 @@ A sample that passes all query filters (sex, phenotype, technology) and has cove
 
 ## N_FAIL
 
-The count of eligible samples at a position where the genotype call had FILTER≠PASS in the source VCF. These samples are not counted in AC/AN. See [FILTER=PASS Tracking](../advanced/filter-pass-tracking.md).
+The count of eligible samples whose call at a position had FILTER≠PASS in the source VCF. These samples are excluded from AC, but they remain eligible and so still count toward AN. See [FILTER=PASS Tracking](../advanced/filter-pass-tracking.md).
 
 ## N_NO_COVERAGE
 
@@ -66,7 +66,7 @@ A compressed bitset data structure that efficiently stores sets of integers. AFQ
 
 ## Schema Version
 
-The AFQuery database format version stored in `manifest.json`. Each variant stores `het_bitmap`, `hom_bitmap`, and `fail_bitmap`. See [Data Model](data-model.md).
+The AFQuery database format version stored in `manifest.json` — `2.0`, or `3.0` when the database was built with coverage-quality filters. See [Data Model](data-model.md).
 
 ## Technology
 
diff --git a/docs/reference/python-api.md b/docs/reference/python-api.md
index 114ce4b..3937e12 100644
--- a/docs/reference/python-api.md
+++ b/docs/reference/python-api.md
@@ -453,7 +453,7 @@ class QueryResult:
     N_HET: int               # Heterozygous count
     N_HOM_ALT: int           # Homozygous alt count
     N_HOM_REF: int           # Homozygous ref count
-    N_FAIL: int              # Samples with alt allele called but FILTER≠PASS
+    N_FAIL: int              # Eligible samples whose call had FILTER≠PASS (excluded from AC, kept in AN)
     N_NO_COVERAGE: int       # Eligible samples whose tech lacks evidence (excluded from N_HOM_REF)
 ```
 
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index af9d921..6eee06f 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -77,7 +77,7 @@ AFQuery requires DuckDB to use Parquet for all temporary files. Arrow IPC is not
 
 **Cause:** A DuckDB float-division rounding bug was present in older AFQuery versions. When computing bucket IDs, `CAST(pos / 1000000 AS BIGINT)` rounds floats incorrectly (e.g., position 1,500,000 → bucket 2 instead of 1).
 
-**Fix:** Upgrade to the latest AFQuery version. The fix uses `CAST(pos AS BIGINT) / 1000000` for correct integer division. Rebuild the database after upgrading.
+**Fix:** Upgrade to the latest AFQuery version. The fix uses `CAST(pos AS BIGINT) // 1000000` — the integer-division operator — for correct bucket IDs. Rebuild the database after upgrading.
 
 ---
 
diff --git a/docs/use-cases/acmg-use-cases.md b/docs/use-cases/acmg-use-cases.md
index 66132a9..5ebb3b7 100644
--- a/docs/use-cases/acmg-use-cases.md
+++ b/docs/use-cases/acmg-use-cases.md
@@ -207,7 +207,7 @@ On chrX non-PAR regions, males contribute AN=1 and females AN=2. A cohort with 8
 
 ### High N_FAIL at the Variant Site
 
-`N_FAIL` counts eligible samples that were genotyped with the alt allele but failed quality filters (`FILTER≠PASS`). These samples are excluded from AC/AN, so AF is not directly inflated. However, a high N_FAIL indicates the site has systematic QC problems — and warrants caution before applying ACMG criteria.
+`N_FAIL` counts eligible samples whose call at the site failed quality filters (`FILTER≠PASS`). These samples are excluded from AC but stay in AN, so they cannot inflate AF — if anything they make it slightly more conservative. A high N_FAIL still indicates the site has systematic QC problems, and warrants caution before applying ACMG criteria.
 
 | N_FAIL / n_eligible | Interpretation |
 |---|---|
diff --git a/docs/use-cases/sex-specific-af.md b/docs/use-cases/sex-specific-af.md
index b8a6dfd..b649303 100644
--- a/docs/use-cases/sex-specific-af.md
+++ b/docs/use-cases/sex-specific-af.md
@@ -34,10 +34,10 @@ afquery query --db ./db/ --locus chrX:153296777
 ```
 
 ```
-chrX:153296777 C>T  AC=15  AN=2500  AF=0.0060  n_eligible=1500  N_HET=5  N_HOM_ALT=0  N_HOM_REF=1495  N_FAIL=0
+chrX:153296777 C>T  AC=15  AN=2500  AF=0.0060  n_eligible=1500  N_HET=5  N_HOM_ALT=10  N_HOM_REF=1485  N_FAIL=0
 ```
 
-AN=2500: 500 males × 1 + 1000 females × 2 = 2500 (mixed ploidy)
+AN=2500: 500 males × 1 + 1000 females × 2 = 2500 (mixed ploidy). The 10 hemizygous male carriers land in N_HOM_ALT, not N_HET — on haploid positions there is no heterozygous state.
 
 ### 2. Query males only (hemizygous frequency)
 
@@ -46,10 +46,10 @@ afquery query --db ./db/ --locus chrX:153296777 --sex male
 ```
 
 ```
-chrX:153296777 C>T  AC=10  AN=500  AF=0.0200  n_eligible=500  N_HET=10  N_HOM_ALT=0  N_HOM_REF=490  N_FAIL=0
+chrX:153296777 C>T  AC=10  AN=500  AF=0.0200  n_eligible=500  N_HET=0  N_HOM_ALT=10  N_HOM_REF=490  N_FAIL=0
 ```
 
-Hemizygous rate: 2% (N_HET here represents hemizygous males, GT=1)
+Hemizygous rate: 2%. The carriers appear in N_HOM_ALT, not N_HET — males are haploid at chrX non-PAR, so `GT=1` carriers are counted as homozygous-alt.
 
 ### 3. Query females only (carrier frequency)
 
diff --git a/src/afquery/cli.py b/src/afquery/cli.py
index 0beea90..dda602e 100644
--- a/src/afquery/cli.py
+++ b/src/afquery/cli.py
@@ -74,26 +74,23 @@ def _print_results(results, fmt: str) -> None:
                 "ref": r.variant.ref, "alt": r.variant.alt,
                 "AC": r.AC, "AN": r.AN, "AF": r.AF, "n_eligible": r.n_samples_eligible,
                 "N_HET": r.N_HET, "N_HOM_ALT": r.N_HOM_ALT, "N_HOM_REF": r.N_HOM_REF,
+                "N_FAIL": r.N_FAIL, "N_NO_COVERAGE": r.N_NO_COVERAGE,
             }
-            if r.N_FAIL is not None:
-                entry["N_FAIL"] = r.N_FAIL
             out.append(entry)
         click.echo(json.dumps(out, indent=2))
     elif fmt == "tsv":
-        has_fail = any(r.N_FAIL is not None for r in results)
-        header = "chrom\tpos\tref\talt\tAC\tAN\tAF\tn_eligible\tN_HET\tN_HOM_ALT\tN_HOM_REF"
-        if has_fail:
-            header += "\tN_FAIL"
+        header = (
+            "chrom\tpos\tref\talt\tAC\tAN\tAF\tn_eligible\t"
+            "N_HET\tN_HOM_ALT\tN_HOM_REF\tN_FAIL\tN_NO_COVERAGE"
+        )
         click.echo(header)
         for r in results:
             af = f"{r.AF:.6f}" if r.AF is not None else "NA"
             line = (
                 f"{r.variant.chrom}\t{r.variant.pos}\t{r.variant.ref}\t"
                 f"{r.variant.alt}\t{r.AC}\t{r.AN}\t{af}\t{r.n_samples_eligible}\t"
-                f"{r.N_HET}\t{r.N_HOM_ALT}\t{r.N_HOM_REF}"
+                f"{r.N_HET}\t{r.N_HOM_ALT}\t{r.N_HOM_REF}\t{r.N_FAIL}\t{r.N_NO_COVERAGE}"
             )
-            if has_fail:
-                line += f"\t{r.N_FAIL if r.N_FAIL is not None else '?'}"
             click.echo(line)
     else:  # text
         if not results:
@@ -101,11 +98,11 @@ def _print_results(results, fmt: str) -> None:
             return
         for r in results:
             af = f"{r.AF:.4f}" if r.AF is not None else "NA"
-            fail_str = f"  N_FAIL={r.N_FAIL}" if r.N_FAIL is not None else ""
             click.echo(
                 f"{r.variant.chrom}:{r.variant.pos} {r.variant.ref}>{r.variant.alt}  "
                 f"AC={r.AC}  AN={r.AN}  AF={af}  n_eligible={r.n_samples_eligible}  "
-                f"N_HET={r.N_HET}  N_HOM_ALT={r.N_HOM_ALT}  N_HOM_REF={r.N_HOM_REF}{fail_str}"
+                f"N_HET={r.N_HET}  N_HOM_ALT={r.N_HOM_ALT}  N_HOM_REF={r.N_HOM_REF}  "
+                f"N_FAIL={r.N_FAIL}  N_NO_COVERAGE={r.N_NO_COVERAGE}"
             )
 
 
@@ -324,7 +321,8 @@ def annotate(db, input_vcf, output_vcf, phenotype, sex, tech, threads, verbose,
       AFQUERY_N_HET       heterozygous carrier count (per ALT)
       AFQUERY_N_HOM_ALT   homozygous alt count (per ALT)
       AFQUERY_N_HOM_REF   homozygous ref count (per ALT)
-      AFQUERY_N_FAIL      samples with FILTER!=PASS
+      AFQUERY_N_FAIL      samples with FILTER!=PASS (per site)
+      AFQUERY_N_NO_COVERAGE  eligible samples lacking coverage evidence (per ALT)
     """
     if no_warn:
         import warnings
diff --git a/tests/test_cli.py b/tests/test_cli.py
index b8c4526..4251286 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -435,6 +435,34 @@ def test_query_tsv_with_fail_col(runner, test_db):
     assert "N_FAIL" in lines[0]
 
 
+def test_query_output_includes_n_no_coverage_all_formats(runner, test_db):
+    """N_FAIL and N_NO_COVERAGE appear in text/tsv/json output, and TSV columns line up."""
+    base = ["query", "--db", test_db, "--locus", "chr1:1500"]
+
+    text = runner.invoke(cli, base)
+    assert text.exit_code == 0
+    assert "N_FAIL=" in text.output
+    assert "N_NO_COVERAGE=" in text.output
+
+    tsv = runner.invoke(cli, base + ["--format", "tsv"])
+    assert tsv.exit_code == 0
+    tsv_lines = tsv.output.strip().splitlines()
+    assert len(tsv_lines) >= 2
+    header = tsv_lines[0].split("\t")
+    assert "N_FAIL" in header
+    assert header[-1] == "N_NO_COVERAGE"
+    for row in tsv_lines[1:]:
+        assert len(row.split("\t")) == len(header)
+
+    js = runner.invoke(cli, base + ["--format", "json"])
+    assert js.exit_code == 0
+    data = json.loads(js.output)
+    assert len(data) >= 1
+    for entry in data:
+        assert isinstance(entry["N_FAIL"], int)
+        assert isinstance(entry["N_NO_COVERAGE"], int)
+
+
 # --- afquery annotate: verbose flag ---
 
 def test_annotate_verbose(runner, test_db, tmp_path):