diff --git a/ordvec-python/README.md b/ordvec-python/README.md
index 8d19666..d1ca54d 100644
--- a/ordvec-python/README.md
+++ b/ordvec-python/README.md
@@ -24,6 +24,22 @@ scores, ids = q.search_asymmetric(np.random.randn(8, 1024).astype(np.float32), k
 | `Bitmap` | Constant-weight top-bucket bitmap per document; `popcount(Q AND D)` candidate scoring. |
 | `SignBitmap` | Sign bitmap for sign-cosine candidate generation; separate from the constant-weight bitmap theorem. |
 
+## Two-stage retrieval (subset rerank)
+
+A `Bitmap` / `SignBitmap` probe yields a candidate shortlist that
+`RankQuant.search_asymmetric_subset(query, candidates, k)` reranks exactly:
+
+```python
+cands = bm.top_m_candidates(query, m=256)          # uint32 shortlist
+scores, ids = rq.search_asymmetric_subset(query, cands, k=10)
+```
+
+Both returned arrays have length **`min(k, len(candidates))`**, not `k`. When
+`k > len(candidates)` the result is silently capped to the candidate count — the
+subset path never pads with sentinel rows. If you assemble a fixed-width
+`(n_q, k)` result buffer, size each row by its candidate count rather than
+assuming `k` rows back.
+
 ## Theory and calibration
 
 `Bitmap` exposes the constant-weight top-bucket overlap statistic formalized in
@@ -46,6 +62,13 @@ Wheels target CPython 3.10+ (abi3) and require `numpy>=2.2`. Building from
 source needs a Rust toolchain (MSRV 1.89) and
 [maturin](https://www.maturin.rs/).
 
+## Type stubs
+
+The package ships hand-written type stubs (`_ordvec.pyi`) and a `py.typed`
+marker, so editors and `mypy` get full signatures for the four index classes,
+the module-level rank-math primitives, and the `MAX_*` constants — the abi3
+native module is otherwise opaque to static analysis.
+
 ## Provenance & license
 
 The `ordvec` Python bindings were developed within turbovec, factored out
diff --git a/ordvec-python/python/ordvec/__init__.py b/ordvec-python/python/ordvec/__init__.py
index 7bd55b5..18d6555 100644
--- a/ordvec-python/python/ordvec/__init__.py
+++ b/ordvec-python/python/ordvec/__init__.py
@@ -29,6 +29,12 @@
 rank-mode classes; they are kept only to ease script migration and are not part
 of the documented surface — new code should use the OrdVec ontology names above.
 
+Subset rerank result length: ``RankQuant.search_asymmetric_subset(query,
+candidates, k)`` returns ``(scores, ids)`` of length ``min(k, len(candidates))``,
+not ``k``. Passing ``k > len(candidates)`` yields arrays shorter than ``k`` (the
+subset path does not pad with sentinel rows), so a caller building a fixed-width
+``(n_q, k)`` buffer must size each row by its candidate count.
+
 On-disk persistence: each class's ``write(path)`` / ``load(path)`` passes
 ``path`` straight to the filesystem with no normalisation or ``..`` / traversal
 checks. Treat ``path`` as trusted input — in a service that derives it from
diff --git a/ordvec-python/python/ordvec/_ordvec.pyi b/ordvec-python/python/ordvec/_ordvec.pyi
new file mode 100644
index 0000000..de4dc7f
--- /dev/null
+++ b/ordvec-python/python/ordvec/_ordvec.pyi
@@ -0,0 +1,191 @@
+"""Type stubs for the ``ordvec._ordvec`` native (abi3) extension module.
+
+Hand-written to mirror the PyO3 surface in ``ordvec-python/src/lib.rs`` exactly
+— the four index classes (``Rank``, ``RankQuant``, ``Bitmap``, ``SignBitmap``),
+the module-level rank-math primitives, the byte-LUT / eval scorers, and the
+``MAX_*`` loader limit constants. abi3 wheels carry no embedded type
+information, so without this stub (and the ``py.typed`` marker) editors and
+``mypy`` see ``Any`` for the whole package.
+
+Drift policy: kept in sync with ``lib.rs`` by hand (the issue #32 trade-off — no
+new build dependency in exchange for manual upkeep). When a binding signature
+changes, update the matching entry here.
+
+Array conventions (matching the binding's runtime coercion / dtype contract):
+
+* Float embedding inputs (``vectors`` / ``queries`` / ``query`` / ``v`` /
+  ``corpus``) accept any floating NumPy array (float16/32/64) — coerced to
+  float32 at the boundary — so they are typed loosely as ``NDArray[Any]``.
+* Candidate / doc-id inputs accept any integer dtype (coerced to ``uint32``),
+  also typed ``NDArray[Any]``.
+* Returned arrays carry the binding's fixed output dtype: float32 scores, int64
+  search indices, uint32 candidate ids / overlap scores, uint16 ranks, uint8
+  buckets / packed bytes, uint64 bitmap words.
+"""
+
+from typing import Any, final
+
+import numpy as np
+from numpy.typing import NDArray
+
+# ---------------------------------------------------------------------------
+# Loader / limit constants (parity with ``ordvec::rank_io::*``).
+# ---------------------------------------------------------------------------
+MAX_DIM: int
+MAX_SIGN_BITMAP_DIM: int
+MAX_VECTORS: int
+
+# ---------------------------------------------------------------------------
+# Index classes
+# ---------------------------------------------------------------------------
+
+@final
+class Rank:
+    """Full-precision rank vectors (u16 per coordinate)."""
+
+    def __new__(cls, dim: int) -> Rank: ...
+    def __repr__(self) -> str: ...
+    def __len__(self) -> int: ...
+    @property
+    def dim(self) -> int: ...
+    @property
+    def bytes_per_vec(self) -> int: ...
+    @property
+    def byte_size(self) -> int: ...
+    def is_empty(self) -> bool: ...
+    def add(self, vectors: NDArray[Any]) -> None: ...
+    def search(
+        self, queries: NDArray[Any], k: int
+    ) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
+    def search_asymmetric(
+        self, queries: NDArray[Any], k: int
+    ) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
+    def swap_remove(self, idx: int) -> int: ...
+    def write(self, path: str) -> None: ...
+    @classmethod
+    def load(cls, path: str) -> Rank: ...
+
+@final
+class RankQuant:
+    """Bucketed ranks, ``bits`` in {1, 2, 4}; symmetric + asymmetric scoring."""
+
+    def __new__(cls, dim: int, bits: int) -> RankQuant: ...
+    def __repr__(self) -> str: ...
+    def __len__(self) -> int: ...
+    @property
+    def dim(self) -> int: ...
+    @property
+    def bits(self) -> int: ...
+    @property
+    def bytes_per_vec(self) -> int: ...
+    @property
+    def byte_size(self) -> int: ...
+    def is_empty(self) -> bool: ...
+    def add(self, vectors: NDArray[Any]) -> None: ...
+    def search(
+        self, queries: NDArray[Any], k: int
+    ) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
+    def search_asymmetric(
+        self, queries: NDArray[Any], k: int
+    ) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
+    def search_asymmetric_subset(
+        self, query: NDArray[Any], candidates: NDArray[Any], k: int
+    ) -> tuple[NDArray[np.float32], NDArray[np.int64]]:
+        """Asymmetric rerank over a candidate subset.
+
+        Returns ``(scores, global_ids)`` of length ``min(k, len(candidates))``
+        — not ``k``. When ``k > len(candidates)`` the result is silently capped
+        to the candidate count; the subset path does not pad with sentinel rows
+        (see issue #14).
+        """
+        ...
+
+    def swap_remove(self, idx: int) -> int: ...
+    def write(self, path: str) -> None: ...
+    @classmethod
+    def load(cls, path: str) -> RankQuant: ...
+
+@final
+class Bitmap:
+    """Constant-weight top-bucket bitmap per document; ``popcount(Q AND D)``."""
+
+    def __new__(cls, dim: int, n_top: int) -> Bitmap: ...
+    def __repr__(self) -> str: ...
+    def __len__(self) -> int: ...
+    @property
+    def dim(self) -> int: ...
+    @property
+    def n_top(self) -> int: ...
+    @property
+    def bytes_per_vec(self) -> int: ...
+    @property
+    def byte_size(self) -> int: ...
+    def is_empty(self) -> bool: ...
+    def add(self, vectors: NDArray[Any]) -> None: ...
+    def search(
+        self, queries: NDArray[Any], k: int
+    ) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
+    def search_subset(
+        self, query: NDArray[Any], doc_ids: NDArray[Any], k: int
+    ) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
+    def top_m_candidates(self, query: NDArray[Any], m: int) -> NDArray[np.uint32]: ...
+    def top_m_candidates_batched(
+        self, queries: NDArray[Any], m: int
+    ) -> NDArray[np.uint32]: ...
+    def top_m_candidates_batched_chunked(
+        self, queries: NDArray[Any], m: int, batch_size: int
+    ) -> NDArray[np.uint32]: ...
+    def build_query_bitmap_fp32(self, query: NDArray[Any]) -> NDArray[np.uint64]: ...
+    def body_overlap_scores_subset(
+        self, q_bitmap: NDArray[np.uint64], doc_ids: NDArray[Any]
+    ) -> NDArray[np.uint32]: ...
+    def write(self, path: str) -> None: ...
+    @classmethod
+    def load(cls, path: str) -> Bitmap: ...
+
+@final
+class SignBitmap:
+    """1-bit-per-coord sign-cosine retrieval substrate (no ``n_top``)."""
+
+    def __new__(cls, dim: int) -> SignBitmap: ...
+    def __repr__(self) -> str: ...
+    def __len__(self) -> int: ...
+    @property
+    def dim(self) -> int: ...
+    @property
+    def bytes_per_vec(self) -> int: ...
+    @property
+    def byte_size(self) -> int: ...
+    def is_empty(self) -> bool: ...
+    def add(self, vectors: NDArray[Any]) -> None: ...
+    def top_m_candidates(self, query: NDArray[Any], m: int) -> NDArray[np.uint32]: ...
+    def top_m_candidates_batched(
+        self, queries: NDArray[Any], m: int
+    ) -> NDArray[np.uint32]: ...
+    def score_all(self, query: NDArray[Any]) -> NDArray[np.uint32]: ...
+    def score_all_batched(self, queries: NDArray[Any]) -> NDArray[np.uint32]: ...
+    def build_query_bitmap(self, query: NDArray[Any]) -> NDArray[np.uint64]: ...
+    def write(self, path: str) -> None: ...
+    @classmethod
+    def load(cls, path: str) -> SignBitmap: ...
+
+# ---------------------------------------------------------------------------
+# Module-level rank-math primitives (parity with ``ordvec::rank::*``) and the
+# byte-LUT / eval scoring helpers.
+# ---------------------------------------------------------------------------
+
+def rank_transform(v: NDArray[Any]) -> NDArray[np.uint16]: ...
+def rank_to_bucket(rank: int, d: int, bits: int) -> int: ...
+def bucket_ranks(ranks: NDArray[np.uint16], bits: int) -> NDArray[np.uint8]: ...
+def pack_buckets(buckets: NDArray[np.uint8], bits: int) -> NDArray[np.uint8]: ...
+def unpack_buckets(packed: NDArray[np.uint8], d: int, bits: int) -> NDArray[np.uint8]: ...
+def rankquant_bytes_per_vec(d: int, bits: int) -> int: ...
+def bucket_centre(bucket: int, bits: int) -> float: ...
+def rank_norm(d: int) -> float: ...
+def rankquant_norm(d: int, bits: int) -> float: ...
+def search_asymmetric_byte_lut(
+    index: RankQuant, queries: NDArray[Any], k: int
+) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
+def rankquant_eval_search(
+    corpus: NDArray[Any], queries: NDArray[Any], bits: int, k: int
+) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
diff --git a/ordvec-python/python/ordvec/py.typed b/ordvec-python/python/ordvec/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs
index 984d8b1..6e75661 100644
--- a/ordvec-python/src/lib.rs
+++ b/ordvec-python/src/lib.rs
@@ -731,8 +731,14 @@ impl RankQuant {
     /// Asymmetric scoring restricted to a candidate subset (e.g. the top-M
     /// shortlist from a [`Bitmap`] or [`SignBitmap`] probe). Returns
     /// ``(scores, global_ids)`` where ``global_ids`` are the original doc
-    /// indices (mapped from the local candidate slot). ``k`` is capped to the
-    /// candidate-list length; the subset path does not add sentinel padding.
+    /// indices (mapped from the local candidate slot).
+    ///
+    /// Both returned arrays have length ``min(k, len(candidates))`` — **not**
+    /// ``k``. When ``k > len(candidates)`` the result is silently capped to the
+    /// candidate count; the subset path does not pad with sentinel rows. A
+    /// caller assembling a fixed-width ``(n_q, k)`` buffer must therefore size
+    /// each row by its candidate count, not by ``k``.
+    ///
     /// Uses the same AVX-512 → AVX2 → scalar dispatch as ``search_asymmetric``.
     ///
     /// ``candidates`` may be unsorted and may contain duplicates. Duplicate