From 018ddd0242e4f0fd4cee087fd2f2425c4707e8d9 Mon Sep 17 00:00:00 2001
From: BubbleCal <BubbleCal@users.noreply.github.com>
Date: Wed, 1 Jul 2026 16:26:28 +0800
Subject: [PATCH] docs(indexing): add rabitq performance guide

---
 docs/docs.json                                |   1 +
 docs/indexing/index.mdx                       |   2 +-
 docs/indexing/quantization.mdx                |   8 +-
 docs/indexing/rabitq.mdx                      | 113 ++++++++++++++++++
 docs/snippets/indexing.mdx                    |   2 +
 .../indexing/rabitq-approx-mode-latency.svg   |  35 ++++++
 .../indexing/rabitq-approx-mode-qps.svg       |  35 ++++++
 .../images/indexing/rabitq-hot-memory.svg     |  39 ++++++
 .../images/indexing/rabitq-p99-latency.svg    |  39 ++++++
 .../images/indexing/rabitq-qps-per-core.svg   |  39 ++++++
 .../assets/images/indexing/rabitq-recall.svg  |  39 ++++++
 tests/py/test_indexing.py                     |  23 ++++
 12 files changed, 370 insertions(+), 5 deletions(-)
 create mode 100644 docs/indexing/rabitq.mdx
 create mode 100644 docs/static/assets/images/indexing/rabitq-approx-mode-latency.svg
 create mode 100644 docs/static/assets/images/indexing/rabitq-approx-mode-qps.svg
 create mode 100644 docs/static/assets/images/indexing/rabitq-hot-memory.svg
 create mode 100644 docs/static/assets/images/indexing/rabitq-p99-latency.svg
 create mode 100644 docs/static/assets/images/indexing/rabitq-qps-per-core.svg
 create mode 100644 docs/static/assets/images/indexing/rabitq-recall.svg

diff --git a/docs/docs.json b/docs/docs.json
index 87db70bc..0f624475 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -110,6 +110,7 @@
                   "indexing/scalar-index",
                   "indexing/gpu-indexing",
                   "indexing/quantization",
+                  "indexing/rabitq",
                   "indexing/reindexing"
                 ]
               },
diff --git a/docs/indexing/index.mdx b/docs/indexing/index.mdx
index 815cf5f2..246287ac 100644
--- a/docs/indexing/index.mdx
+++ b/docs/indexing/index.mdx
@@ -53,7 +53,7 @@ Vector indexes can use different quantization methods to compress vectors and im
 | :----------- | :------- | :---------- |
 | `PQ` (Product Quantization) | Default choice for most vector search scenarios. Use when you need to balance index size and recall. | Divides vectors into subvectors and quantizes each subvector independently. Provides a good balance between compression ratio and search accuracy. |
 | `SQ` (Scalar Quantization) | Use when you need faster indexing or when vector dimensions have consistent value ranges. | Quantizes each dimension independently. Simpler than PQ but typically provides less compression. |
-| `RQ` (RabitQ Quantization) | Use when you need maximum compression or have specific per-dimension requirements. | Per-dimension quantization using a RabitQ codebook. Provides fine-grained control over compression per dimension. For `IVF_RQ`, vector dimensions must be divisible by `8`. |
+| `RQ` (RaBitQ Quantization) | Use when you need compact high-dimensional vector search with strong recall and low serving memory. | RaBitQ supports a classic 1-bit path and multi-bit quantization for higher recall. For `IVF_RQ`, vector dimensions must be divisible by `8`. See the [RaBitQ indexing guide](/indexing/rabitq) for performance and memory tradeoffs. |
 | `None/Flat` | Use for binary vectors (with `hamming` distance) or when you need maximum recall and have sufficient storage. | No quantization—stores raw vectors. Provides the highest accuracy but requires more storage and memory. |
 
 ## Understanding the IVF-PQ Index
diff --git a/docs/indexing/quantization.mdx b/docs/indexing/quantization.mdx
index 85d277cb..2da67fb0 100644
--- a/docs/indexing/quantization.mdx
+++ b/docs/indexing/quantization.mdx
@@ -16,7 +16,7 @@ Use quantization when:
 LanceDB currently exposes multiple quantized vector index types, including:
 - `IVF_PQ` -- Inverted File index with Product Quantization (default). See the [vector indexing guide](/indexing/vector-index) for `IVF_PQ` examples.
 - `IVF_SQ` -- Inverted File index with Scalar Quantization. This is available in Python and Rust; TypeScript does not currently expose `IvfSq`.
-- `IVF_RQ` -- Inverted File index with **RaBitQ** quantization (binary, 1 bit per dimension). Requires vector dimensions divisible by `8`. See [below](#rabitq-quantization) for details.
+- `IVF_RQ` -- Inverted File index with **RaBitQ** quantization. It supports the classic 1-bit representation and multi-bit quantization for higher recall. Requires vector dimensions divisible by `8`. See [below](#rabitq-quantization) for details.
 - `IVF_HNSW_SQ` -- IVF partitions with an **HNSW graph per partition** plus **Scalar Quantization**. Strong recall/latency/size trade-off for most workloads.
 - `IVF_HNSW_PQ` -- IVF partitions with an **HNSW graph per partition** plus **Product Quantization**. Prefer when PQ-level compression matters and you still want HNSW-style in-partition search.
 
@@ -26,7 +26,7 @@ Use the same distance metric when training the index and running queries against
 
 ## RaBitQ quantization
 
-RaBitQ is a binary quantization method that represents each normalized embedding using **1 bit per dimension**, plus a couple of small corrective scalars. In practice, a 1,024-dimensional `float32` vector that would normally take 4 KB can be compressed to roughly a few hundred bytes with RaBitQ, while still maintaining reasonable recall.
+RaBitQ is a quantization method that can represent each normalized embedding using the classic **1 bit per dimension** layout, plus a couple of small corrective scalars. LanceDB also supports multi-bit RaBitQ through `num_bits`, which stores extra quantized signal for higher recall. In practice, a 1,024-dimensional `float32` vector that would normally take 4 KB can be compressed to roughly a few hundred bytes with RaBitQ, while still maintaining reasonable recall.
 
 ### How RaBitQ works
 
@@ -42,7 +42,7 @@ Compared to `IVF_PQ`, RaBitQ:
 - Builds indexes faster and handles updates more easily
 - Maintains or improves recall at high dimensionality under the same storage budget
 
-For a deeper dive into the theory and some benchmark results, see the blog post: [LanceDB's RaBitQ Quantization for Blazing Fast Vector Search](https://lancedb.com/blog/feature-rabitq-quantization/).
+For a deeper dive into current performance, memory, multi-bit recall, and `approx_mode` tradeoffs, see the [RaBitQ indexing guide](/indexing/rabitq). For the original theory and early benchmark results, see the blog post: [LanceDB's RaBitQ Quantization for Blazing Fast Vector Search](https://lancedb.com/blog/feature-rabitq-quantization/).
 
 ### Using RaBitQ
 
@@ -54,7 +54,7 @@ When using `IVF_RQ`, vector dimensions must be divisible by `8`.
 
 `num_bits` controls how many bits per dimension are used:
 
-1 bit is the classic RaBitQ setting. You can set it to 2, 4, or 8 bits to improve fidelity for better precision or recall — the main trade-off is additional storage for the extra bits per dimension, with only a modest increase in query-time compute.
+1 bit is the classic RaBitQ setting. You can set it to a higher `num_bits` value to improve fidelity for better precision or recall. The main trade-off is additional storage for the extra bits per dimension, with only a modest increase in query-time compute.
 It's also possible to tune the number of IVF partitions in `IVF_RQ`, similar to how you would do in `IVF_PQ`.
 
 <Warning title="Reading multi-bit indexes across versions">
diff --git a/docs/indexing/rabitq.mdx b/docs/indexing/rabitq.mdx
new file mode 100644
index 00000000..6bc64179
--- /dev/null
+++ b/docs/indexing/rabitq.mdx
@@ -0,0 +1,113 @@
+---
+title: "RaBitQ Indexing"
+sidebarTitle: "RaBitQ"
+description: "Use IVF_RQ and multi-bit RaBitQ to improve recall, tail latency, and serving memory for high-dimensional vector search in LanceDB."
+icon: "gauge-high"
+keywords: ["rabitq", "ivf_rq", "approx_mode", "vector search", "quantization", "recall"]
+---
+import { PyRabitqCreateIndex as RabitqCreateIndex } from '/snippets/indexing.mdx';
+
+RaBitQ (`IVF_RQ`) is LanceDB's high-compression vector index for large embedding workloads. It is built for the cases where you want strong recall and low latency without keeping full-precision vectors in the hot serving path.
+
+<Info>
+The improvements described on this page are available in LanceDB Cloud.
+</Info>
+
+## Start with the result
+
+On a DBpedia 1M benchmark with 1,536-dimensional vectors, `top_k=10`, `nprobes=24`, and no raw-vector refine, multi-bit `IVF_RQ` improves both quality and speed compared with `IVF_PQ`.
+
+`QPS/core` is single-core throughput. It is not the total throughput of a LanceDB deployment.
+
+![p99 latency comparison for IVF_PQ and multi-bit IVF_RQ](/static/assets/images/indexing/rabitq-p99-latency.svg)
+
+![QPS per core comparison for IVF_PQ and multi-bit IVF_RQ](/static/assets/images/indexing/rabitq-qps-per-core.svg)
+
+| Index | Recall@10 | Avg latency | p99 latency | QPS/core |
+| :--- | ---: | ---: | ---: | ---: |
+| `IVF_PQ`, no refine | 74.83% | 12.80 ms | 14.72 ms | 78.0 |
+| `IVF_RQ`, 3-bit | 93.52% | 3.82 ms | 4.74 ms | 261.0 |
+| `IVF_RQ`, 5-bit | 96.24% | 4.56 ms | 5.57 ms | 218.1 |
+| `IVF_RQ`, 7-bit | 96.83% | 4.96 ms | 5.94 ms | 200.7 |
+
+The 5-bit `IVF_RQ` index reaches 96.24% recall while keeping p99 latency about 2.6x lower than `IVF_PQ` in this benchmark. If you prioritize throughput, 3-bit `IVF_RQ` reaches 261 QPS/core with much higher recall than `IVF_PQ`.
+
+## Higher recall without raw-vector refine
+
+Classic RaBitQ stores one bit per dimension. Multi-bit RaBitQ keeps the same compact search structure and adds extra bits that preserve more of the original vector signal.
+
+![Recall comparison for IVF_PQ and multi-bit IVF_RQ](/static/assets/images/indexing/rabitq-recall.svg)
+
+This matters because high recall with `IVF_PQ` often depends on `refine_factor`: LanceDB first searches the compressed index, then fetches and reranks extra candidates using the original full-precision vectors. That can work well, but it increases the memory and I/O pressure of the serving path.
+
+With multi-bit `IVF_RQ`, LanceDB can recover much of that quality directly from the quantized index. You can get high recall without making raw-vector refine the default path for every query.
+
+## Serving memory tradeoff
+
+The memory difference is easiest to see if you separate index storage from hot serving memory.
+
+![Hot serving memory comparison for IVF_PQ refine and IVF_RQ modes](/static/assets/images/indexing/rabitq-hot-memory.svg)
+
+| Serving path | Hot vector memory intuition |
+| :--- | :--- |
+| `IVF_PQ`, no refine | Very small, but recall can be limited. |
+| 5-bit `IVF_RQ` with `approx_mode="fast"` | Can use the 1-bit RaBitQ search path, so hot search memory can match the 1-bit budget. |
+| 5-bit `IVF_RQ` with multi-bit scoring | Uses more quantized bits, but still avoids full raw vectors in the hot path. |
+| `IVF_PQ` with raw-vector refine | Requires the compressed index plus full-precision vectors for reranking. |
+
+<Note>
+A 5-bit `IVF_RQ` index stores more quantized code bytes than a 1-bit `IVF_RQ` index, and can be larger on disk than a default `IVF_PQ` index. The advantage is in high-recall serving: multi-bit `IVF_RQ` can use compact quantized codes for reranking, while `IVF_PQ` typically needs full-precision vectors to recover similar recall.
+</Note>
+
+This is also where `approx_mode` becomes useful. A 5-bit `IVF_RQ` index can run a fast query that only uses the 1-bit path, or a higher-recall query that uses more of the stored multi-bit signal. You do not need to rebuild the index to move between those points.
+
+## How LanceDB makes it fast
+
+The recent RaBitQ work improves the full query path:
+
+- **Fast rotation**: RaBitQ uses a randomized rotation before quantization. The optimized rotation path reduces the cost of preparing vectors for compact binary-style scoring.
+- **Multi-bit reranking**: Extra bits give LanceDB more signal during candidate scoring, so recall improves without always falling back to raw-vector refine.
+- **SIMD distance kernels**: The inner scoring loop runs over packed quantized data and uses CPU vector instructions to evaluate many dimensions at once.
+- **A one-bit fast path**: Even if the index stores extra bits, `approx_mode="fast"` can search with the 1-bit representation when you want the lowest-latency path.
+
+The result is not just a smaller index format. It is a faster high-recall serving path for large, high-dimensional vectors.
+
+## Tune the tradeoff at query time
+
+`approx_mode` lets you tune recall and performance per query instead of rebuilding separate indexes for different product needs. The option applies to RQ-quantized indexes such as `IVF_RQ`; other index types ignore it.
+
+| `approx_mode` | When to use it |
+| :--- | :--- |
+| `fast` | Lowest-latency path. Useful for autocomplete, exploration, high-fanout retrieval, or queries with recall headroom. |
+| `normal` | Default balance. Good starting point for most production traffic. |
+| `accurate` | Higher-recall path. Useful for quality-sensitive retrieval, evaluation, and requests where a few extra milliseconds are acceptable. |
+
+On the same 5-bit `IVF_RQ` index, the query mode controls the speed/quality point:
+
+![Average latency comparison across approx_mode values](/static/assets/images/indexing/rabitq-approx-mode-latency.svg)
+
+![QPS per core comparison across approx_mode values](/static/assets/images/indexing/rabitq-approx-mode-qps.svg)
+
+| `approx_mode` | Recall@10 | Avg latency | Approx QPS/core |
+| :--- | ---: | ---: | ---: |
+| `fast` | 81.42% | 3.175 ms | 315 |
+| `normal` | 96.11% | 3.802 ms | 263 |
+| `accurate` | 96.57% | 4.508 ms | 222 |
+
+Approx QPS/core is computed from the single-core mean latency in this benchmark. Use it for relative comparison, not as a cluster-level throughput estimate.
+
+See the [vector search guide](/search/vector-search) for the current query API behavior and the full interaction between `approx_mode`, `nprobes`, and `refine_factor`.
+
+## Create an IVF_RQ index
+
+To switch a table to RaBitQ, create an `IVF_RQ` index with the `IvfRq` config object. Start with `num_bits=5` when recall matters, and lower it if you want a smaller index.
+
+<CodeGroup>
+    <CodeBlock filename="Python" language="Python" icon="python">
+    {RabitqCreateIndex}
+    </CodeBlock>
+</CodeGroup>
+
+All of these RaBitQ updates are available in LanceDB Cloud. Build an `IVF_RQ` index once, then tune recall and performance at query time with `approx_mode` as your workload changes.
+
+Use `IVF_RQ` when your workload has high-dimensional embeddings, needs strong recall, and cannot afford to keep full-precision vectors hot just to rerank every query. For small vectors, especially dimensions at or below 256, benchmark `IVF_PQ` as well because it can still be a better fit.
diff --git a/docs/snippets/indexing.mdx b/docs/snippets/indexing.mdx
index 278a097e..e1cd7c09 100644
--- a/docs/snippets/indexing.mdx
+++ b/docs/snippets/indexing.mdx
@@ -12,6 +12,8 @@ export const PyGpuIndexCuda = "table.create_index(\n    num_partitions=256,\n
 
 export const PyGpuIndexMps = "table.create_index(\n    num_partitions=256,\n    num_sub_vectors=96,\n    accelerator=\"mps\",\n)\n";
 
+export const PyRabitqCreateIndex = "from lancedb.index import IvfRq\n\ntable.create_index(\n    \"vector\",\n    config=IvfRq(\n        distance_type=\"cosine\",\n        num_bits=5,\n    ),\n    replace=True,\n)\n";
+
 export const PyReindexingIncremental = "table = db.open_table(\"reindexing_incremental\")\ntable.add([{\"vector\": [3.1, 4.1], \"text\": \"Frodo was a happy puppy\"}])\ntable.optimize()\n";
 
 export const PyScalarIndexBuild = "tbl = db.open_table(\"scalar_index_build\")\ntbl.create_scalar_index(\"book_id\")\ntbl.create_scalar_index(\"publisher\", index_type=\"BITMAP\")\n";
diff --git a/docs/static/assets/images/indexing/rabitq-approx-mode-latency.svg b/docs/static/assets/images/indexing/rabitq-approx-mode-latency.svg
new file mode 100644
index 00000000..da15235c
--- /dev/null
+++ b/docs/static/assets/images/indexing/rabitq-approx-mode-latency.svg
@@ -0,0 +1,35 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 720 380" role="img" aria-labelledby="title desc">
+  <title id="title">approx_mode latency comparison</title>
+  <desc id="desc">Bar chart comparing average latency for fast, normal, and accurate approx_mode settings on the same 5-bit IVF_RQ index.</desc>
+  <rect width="720" height="380" fill="#ffffff"/>
+  <text x="360" y="34" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="22" font-weight="700" fill="#111827">approx_mode latency</text>
+  <text x="360" y="58" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">Same 5-bit IVF_RQ index, lower is better</text>
+  <line x1="82" y1="296" x2="650" y2="296" stroke="#111827" stroke-width="1.4"/>
+  <line x1="82" y1="86" x2="82" y2="296" stroke="#111827" stroke-width="1.4"/>
+  <g stroke="#e5e7eb" stroke-width="1">
+    <line x1="82" y1="243.5" x2="650" y2="243.5"/>
+    <line x1="82" y1="191" x2="650" y2="191"/>
+    <line x1="82" y1="138.5" x2="650" y2="138.5"/>
+    <line x1="82" y1="86" x2="650" y2="86"/>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">
+    <text x="72" y="300" text-anchor="end">0</text>
+    <text x="72" y="248" text-anchor="end">1.25</text>
+    <text x="72" y="195" text-anchor="end">2.50</text>
+    <text x="72" y="143" text-anchor="end">3.75</text>
+    <text x="72" y="90" text-anchor="end">5 ms</text>
+  </g>
+  <rect x="156" y="162.7" width="98" height="133.3" fill="#2563eb" rx="5"/>
+  <rect x="314" y="136.3" width="98" height="159.7" fill="#2563eb" rx="5"/>
+  <rect x="472" y="106.7" width="98" height="189.3" fill="#2563eb" rx="5"/>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" font-weight="700" fill="#1d4ed8">
+    <text x="205" y="154" text-anchor="middle">3.175 ms</text>
+    <text x="363" y="128" text-anchor="middle">3.802 ms</text>
+    <text x="521" y="98" text-anchor="middle">4.508 ms</text>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" fill="#111827">
+    <text x="205" y="322" text-anchor="middle">fast</text>
+    <text x="363" y="322" text-anchor="middle">normal</text>
+    <text x="521" y="322" text-anchor="middle">accurate</text>
+  </g>
+</svg>
diff --git a/docs/static/assets/images/indexing/rabitq-approx-mode-qps.svg b/docs/static/assets/images/indexing/rabitq-approx-mode-qps.svg
new file mode 100644
index 00000000..cf839b73
--- /dev/null
+++ b/docs/static/assets/images/indexing/rabitq-approx-mode-qps.svg
@@ -0,0 +1,35 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 720 380" role="img" aria-labelledby="title desc">
+  <title id="title">approx_mode QPS per core comparison</title>
+  <desc id="desc">Bar chart comparing approximate QPS per core for fast, normal, and accurate approx_mode settings on the same 5-bit IVF_RQ index.</desc>
+  <rect width="720" height="380" fill="#ffffff"/>
+  <text x="360" y="34" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="22" font-weight="700" fill="#111827">approx_mode QPS per core</text>
+  <text x="360" y="58" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">Derived from average latency as 1000 / avg_latency_ms</text>
+  <line x1="82" y1="296" x2="650" y2="296" stroke="#111827" stroke-width="1.4"/>
+  <line x1="82" y1="86" x2="82" y2="296" stroke="#111827" stroke-width="1.4"/>
+  <g stroke="#e5e7eb" stroke-width="1">
+    <line x1="82" y1="243.5" x2="650" y2="243.5"/>
+    <line x1="82" y1="191" x2="650" y2="191"/>
+    <line x1="82" y1="138.5" x2="650" y2="138.5"/>
+    <line x1="82" y1="86" x2="650" y2="86"/>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">
+    <text x="72" y="300" text-anchor="end">0</text>
+    <text x="72" y="248" text-anchor="end">85</text>
+    <text x="72" y="195" text-anchor="end">170</text>
+    <text x="72" y="143" text-anchor="end">255</text>
+    <text x="72" y="90" text-anchor="end">340</text>
+  </g>
+  <rect x="156" y="101.5" width="98" height="194.5" fill="#2563eb" rx="5"/>
+  <rect x="314" y="133.6" width="98" height="162.4" fill="#2563eb" rx="5"/>
+  <rect x="472" y="158.9" width="98" height="137.1" fill="#2563eb" rx="5"/>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" font-weight="700" fill="#1d4ed8">
+    <text x="205" y="93" text-anchor="middle">315</text>
+    <text x="363" y="125" text-anchor="middle">263</text>
+    <text x="521" y="151" text-anchor="middle">222</text>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" fill="#111827">
+    <text x="205" y="322" text-anchor="middle">fast</text>
+    <text x="363" y="322" text-anchor="middle">normal</text>
+    <text x="521" y="322" text-anchor="middle">accurate</text>
+  </g>
+</svg>
diff --git a/docs/static/assets/images/indexing/rabitq-hot-memory.svg b/docs/static/assets/images/indexing/rabitq-hot-memory.svg
new file mode 100644
index 00000000..39c248b2
--- /dev/null
+++ b/docs/static/assets/images/indexing/rabitq-hot-memory.svg
@@ -0,0 +1,39 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 860 430" role="img" aria-labelledby="title desc">
+  <title id="title">Serving hot memory comparison</title>
+  <desc id="desc">Horizontal bar chart showing relative hot memory for PQ no refine, RQ fast, RQ full 5-bit, and PQ plus refine.</desc>
+  <rect width="860" height="430" fill="#ffffff"/>
+  <text x="430" y="34" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="24" font-weight="700" fill="#111827">Serving hot memory by query path</text>
+  <text x="430" y="58" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">Relative to raw f32 vectors, lower is better</text>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" fill="#111827">
+    <text x="64" y="113">PQ no refine</text>
+    <text x="64" y="183">RQ fast</text>
+    <text x="64" y="253">RQ full 5-bit</text>
+    <text x="64" y="323">PQ + refine</text>
+  </g>
+  <line x1="220" y1="360" x2="800" y2="360" stroke="#111827" stroke-width="1.4"/>
+  <g stroke="#e5e7eb" stroke-width="1">
+    <line x1="220" y1="82" x2="220" y2="360"/>
+    <line x1="365" y1="82" x2="365" y2="360"/>
+    <line x1="510" y1="82" x2="510" y2="360"/>
+    <line x1="655" y1="82" x2="655" y2="360"/>
+    <line x1="800" y1="82" x2="800" y2="360"/>
+  </g>
+  <rect x="220" y="92" width="18" height="38" fill="#f97316" rx="5"/>
+  <rect x="220" y="162" width="18" height="38" fill="#2563eb" rx="5"/>
+  <rect x="220" y="232" width="89" height="38" fill="#2563eb" rx="5"/>
+  <rect x="220" y="302" width="589" height="38" fill="#f97316" rx="5"/>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" font-weight="700">
+    <text x="250" y="116" fill="#c2410c">~1/32 raw</text>
+    <text x="250" y="186" fill="#1d4ed8">~1/32 raw</text>
+    <text x="322" y="256" fill="#1d4ed8">~5/32 raw</text>
+    <text x="684" y="326" fill="#c2410c">~33/32 raw</text>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="11" fill="#374151">
+    <text x="220" y="381" text-anchor="middle">0</text>
+    <text x="365" y="381" text-anchor="middle">0.25x</text>
+    <text x="510" y="381" text-anchor="middle">0.50x</text>
+    <text x="655" y="381" text-anchor="middle">0.75x</text>
+    <text x="800" y="381" text-anchor="middle">1.0x raw</text>
+  </g>
+  <text x="430" y="410" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">A 5-bit RQ index can use the 1-bit fast path for PQ-like hot memory, or full bits for higher recall.</text>
+</svg>
diff --git a/docs/static/assets/images/indexing/rabitq-p99-latency.svg b/docs/static/assets/images/indexing/rabitq-p99-latency.svg
new file mode 100644
index 00000000..eea3dfa4
--- /dev/null
+++ b/docs/static/assets/images/indexing/rabitq-p99-latency.svg
@@ -0,0 +1,39 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 820 420" role="img" aria-labelledby="title desc">
+  <title id="title">DBpedia p99 latency comparison</title>
+  <desc id="desc">Bar chart comparing p99 latency for IVF_PQ and IVF_RQ 3-bit, 5-bit, and 7-bit indexes on DBpedia 1M.</desc>
+  <rect width="820" height="420" fill="#ffffff"/>
+  <text x="410" y="34" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="24" font-weight="700" fill="#111827">DBpedia 1M: p99 latency</text>
+  <text x="410" y="58" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">Lower is better, nprobes=24, top-k=10, no raw-vector refine</text>
+  <line x1="86" y1="326" x2="744" y2="326" stroke="#111827" stroke-width="1.4"/>
+  <line x1="86" y1="86" x2="86" y2="326" stroke="#111827" stroke-width="1.4"/>
+  <g stroke="#e5e7eb" stroke-width="1">
+    <line x1="86" y1="266" x2="744" y2="266"/>
+    <line x1="86" y1="206" x2="744" y2="206"/>
+    <line x1="86" y1="146" x2="744" y2="146"/>
+    <line x1="86" y1="86" x2="744" y2="86"/>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">
+    <text x="76" y="330" text-anchor="end">0</text>
+    <text x="76" y="270" text-anchor="end">4</text>
+    <text x="76" y="210" text-anchor="end">8</text>
+    <text x="76" y="150" text-anchor="end">12</text>
+    <text x="76" y="90" text-anchor="end">16 ms</text>
+  </g>
+  <rect x="145" y="105.2" width="92" height="220.8" fill="#f97316" rx="5"/>
+  <rect x="302" y="254.9" width="92" height="71.1" fill="#2563eb" rx="5"/>
+  <rect x="459" y="242.4" width="92" height="83.6" fill="#2563eb" rx="5"/>
+  <rect x="616" y="236.9" width="92" height="89.1" fill="#2563eb" rx="5"/>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" font-weight="700">
+    <text x="191" y="96" text-anchor="middle" fill="#c2410c">14.72 ms</text>
+    <text x="348" y="246" text-anchor="middle" fill="#1d4ed8">4.74 ms</text>
+    <text x="505" y="234" text-anchor="middle" fill="#1d4ed8">5.57 ms</text>
+    <text x="662" y="228" text-anchor="middle" fill="#1d4ed8">5.94 ms</text>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" fill="#111827">
+    <text x="191" y="352" text-anchor="middle">IVF_PQ</text>
+    <text x="348" y="352" text-anchor="middle">RQ 3-bit</text>
+    <text x="505" y="352" text-anchor="middle">RQ 5-bit</text>
+    <text x="662" y="352" text-anchor="middle">RQ 7-bit</text>
+  </g>
+  <text x="410" y="388" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">5-bit IVF_RQ keeps p99 latency about 2.6x lower than IVF_PQ while reaching 96.24% recall.</text>
+</svg>
diff --git a/docs/static/assets/images/indexing/rabitq-qps-per-core.svg b/docs/static/assets/images/indexing/rabitq-qps-per-core.svg
new file mode 100644
index 00000000..d8b4e3be
--- /dev/null
+++ b/docs/static/assets/images/indexing/rabitq-qps-per-core.svg
@@ -0,0 +1,39 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 820 420" role="img" aria-labelledby="title desc">
+  <title id="title">DBpedia QPS per core comparison</title>
+  <desc id="desc">Bar chart comparing QPS per core for IVF_PQ and IVF_RQ 3-bit, 5-bit, and 7-bit indexes on DBpedia 1M.</desc>
+  <rect width="820" height="420" fill="#ffffff"/>
+  <text x="410" y="34" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="24" font-weight="700" fill="#111827">DBpedia 1M: QPS per core</text>
+  <text x="410" y="58" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">Higher is better, nprobes=24, top-k=10, no raw-vector refine</text>
+  <line x1="86" y1="326" x2="744" y2="326" stroke="#111827" stroke-width="1.4"/>
+  <line x1="86" y1="86" x2="86" y2="326" stroke="#111827" stroke-width="1.4"/>
+  <g stroke="#e5e7eb" stroke-width="1">
+    <line x1="86" y1="266" x2="744" y2="266"/>
+    <line x1="86" y1="206" x2="744" y2="206"/>
+    <line x1="86" y1="146" x2="744" y2="146"/>
+    <line x1="86" y1="86" x2="744" y2="86"/>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">
+    <text x="76" y="330" text-anchor="end">0</text>
+    <text x="76" y="270" text-anchor="end">70</text>
+    <text x="76" y="210" text-anchor="end">140</text>
+    <text x="76" y="150" text-anchor="end">210</text>
+    <text x="76" y="90" text-anchor="end">280</text>
+  </g>
+  <rect x="145" y="259.1" width="92" height="66.9" fill="#f97316" rx="5"/>
+  <rect x="302" y="102.3" width="92" height="223.7" fill="#2563eb" rx="5"/>
+  <rect x="459" y="139.0" width="92" height="187.0" fill="#2563eb" rx="5"/>
+  <rect x="616" y="153.9" width="92" height="172.1" fill="#2563eb" rx="5"/>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" font-weight="700">
+    <text x="191" y="250" text-anchor="middle" fill="#c2410c">78.0</text>
+    <text x="348" y="94" text-anchor="middle" fill="#1d4ed8">261.0</text>
+    <text x="505" y="131" text-anchor="middle" fill="#1d4ed8">218.1</text>
+    <text x="662" y="146" text-anchor="middle" fill="#1d4ed8">200.7</text>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" fill="#111827">
+    <text x="191" y="352" text-anchor="middle">IVF_PQ</text>
+    <text x="348" y="352" text-anchor="middle">RQ 3-bit</text>
+    <text x="505" y="352" text-anchor="middle">RQ 5-bit</text>
+    <text x="662" y="352" text-anchor="middle">RQ 7-bit</text>
+  </g>
+  <text x="410" y="388" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">QPS is reported per core, not as total system or cluster throughput.</text>
+</svg>
diff --git a/docs/static/assets/images/indexing/rabitq-recall.svg b/docs/static/assets/images/indexing/rabitq-recall.svg
new file mode 100644
index 00000000..e68d7605
--- /dev/null
+++ b/docs/static/assets/images/indexing/rabitq-recall.svg
@@ -0,0 +1,39 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 820 420" role="img" aria-labelledby="title desc">
+  <title id="title">DBpedia recall comparison</title>
+  <desc id="desc">Bar chart comparing recall for IVF_PQ and IVF_RQ 3-bit, 5-bit, and 7-bit indexes on DBpedia 1M.</desc>
+  <rect width="820" height="420" fill="#ffffff"/>
+  <text x="410" y="34" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="24" font-weight="700" fill="#111827">DBpedia 1M: Recall@10</text>
+  <text x="410" y="58" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">Higher is better, nprobes=24, top-k=10, no raw-vector refine</text>
+  <line x1="86" y1="326" x2="744" y2="326" stroke="#111827" stroke-width="1.4"/>
+  <line x1="86" y1="86" x2="86" y2="326" stroke="#111827" stroke-width="1.4"/>
+  <g stroke="#e5e7eb" stroke-width="1">
+    <line x1="86" y1="266" x2="744" y2="266"/>
+    <line x1="86" y1="206" x2="744" y2="206"/>
+    <line x1="86" y1="146" x2="744" y2="146"/>
+    <line x1="86" y1="86" x2="744" y2="86"/>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="12" fill="#374151">
+    <text x="76" y="330" text-anchor="end">70%</text>
+    <text x="76" y="270" text-anchor="end">77.5%</text>
+    <text x="76" y="210" text-anchor="end">85%</text>
+    <text x="76" y="150" text-anchor="end">92.5%</text>
+    <text x="76" y="90" text-anchor="end">100%</text>
+  </g>
+  <rect x="145" y="287.4" width="92" height="38.6" fill="#f97316" rx="5"/>
+  <rect x="302" y="137.8" width="92" height="188.2" fill="#2563eb" rx="5"/>
+  <rect x="459" y="116.1" width="92" height="209.9" fill="#2563eb" rx="5"/>
+  <rect x="616" y="111.4" width="92" height="214.6" fill="#2563eb" rx="5"/>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" font-weight="700">
+    <text x="191" y="279" text-anchor="middle" fill="#c2410c">74.83%</text>
+    <text x="348" y="130" text-anchor="middle" fill="#1d4ed8">93.52%</text>
+    <text x="505" y="108" text-anchor="middle" fill="#1d4ed8">96.24%</text>
+    <text x="662" y="103" text-anchor="middle" fill="#1d4ed8">96.83%</text>
+  </g>
+  <g font-family="Inter, Arial, sans-serif" font-size="13" fill="#111827">
+    <text x="191" y="352" text-anchor="middle">IVF_PQ</text>
+    <text x="348" y="352" text-anchor="middle">RQ 3-bit</text>
+    <text x="505" y="352" text-anchor="middle">RQ 5-bit</text>
+    <text x="662" y="352" text-anchor="middle">RQ 7-bit</text>
+  </g>
+  <text x="410" y="388" text-anchor="middle" font-family="Inter, Arial, sans-serif" font-size="13" fill="#4b5563">Multi-bit RaBitQ stores more signal in the index, reducing the need for raw-vector refine.</text>
+</svg>
diff --git a/tests/py/test_indexing.py b/tests/py/test_indexing.py
index 7f63e8f0..409a54cf 100644
--- a/tests/py/test_indexing.py
+++ b/tests/py/test_indexing.py
@@ -274,6 +274,29 @@ def test_vector_index_custom_name(tmp_db):
     assert table.index_stats("my_custom_index")
 
 
+def test_rabitq_create_index(tmp_db):
+    table = tmp_db.create_table(
+        "rabitq_index",
+        _make_vector_rows(512, 8),
+        mode="overwrite",
+    )
+
+    # --8<-- [start:rabitq_create_index]
+    from lancedb.index import IvfRq
+
+    table.create_index(
+        "vector",
+        config=IvfRq(
+            distance_type="cosine",
+            num_bits=5,
+        ),
+        replace=True,
+    )
+    # --8<-- [end:rabitq_create_index]
+
+    assert table.list_indices()
+
+
 def test_vector_index_hnsw(tmp_db):
     table = tmp_db.create_table(
         "vector_index_hnsw",