diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt
index 22a25c24d0..980cb78f29 100644
--- a/c/CMakeLists.txt
+++ b/c/CMakeLists.txt
@@ -89,6 +89,7 @@ add_library(
   src/neighbors/brute_force.cpp
   src/neighbors/ivf_flat.cpp
   src/neighbors/ivf_pq.cpp
+  src/neighbors/ivf_sq.cpp
   src/neighbors/cagra.cpp
   $<$<BOOL:${BUILD_CAGRA_HNSWLIB}>:src/neighbors/hnsw.cpp>
   $<$<BOOL:${BUILD_MG_ALGOS}>:src/neighbors/mg_ivf_pq.cpp>
diff --git a/c/include/cuvs/core/all.h b/c/include/cuvs/core/all.h
index cc83684925..0847cc63da 100644
--- a/c/include/cuvs/core/all.h
+++ b/c/include/cuvs/core/all.h
@@ -22,6 +22,7 @@
 #include <cuvs/neighbors/common.h>
 #include <cuvs/neighbors/ivf_flat.h>
 #include <cuvs/neighbors/ivf_pq.h>
+#include <cuvs/neighbors/ivf_sq.h>
 #include <cuvs/neighbors/nn_descent.h>
 #include <cuvs/neighbors/refine.h>
 #include <cuvs/neighbors/tiered_index.h>
diff --git a/c/include/cuvs/neighbors/ivf_sq.h b/c/include/cuvs/neighbors/ivf_sq.h
new file mode 100644
index 0000000000..6b312443b0
--- /dev/null
+++ b/c/include/cuvs/neighbors/ivf_sq.h
@@ -0,0 +1,373 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <cuvs/core/c_api.h>
+#include <cuvs/distance/distance.h>
+#include <cuvs/neighbors/common.h>
+#include <dlpack/dlpack.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup ivf_sq_c_index_params IVF-SQ index build parameters
+ * @{
+ */
+/**
+ * @brief Supplemental parameters to build IVF-SQ Index
+ *
+ */
+struct cuvsIvfSqIndexParams {
+  /** Distance type. */
+  cuvsDistanceType metric;
+  /** The argument used by some distance metrics. */
+  float metric_arg;
+  /**
+   * Whether to add the dataset content to the index, i.e.:
+   *
+   *  - `true` means the index is filled with the dataset vectors and ready to search after calling
+   * `build`.
+   *  - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but
+   * the index is left empty; you'd need to call `extend` on the index afterwards to populate it.
+   */
+  bool add_data_on_build;
+  /** The number of inverted lists (clusters) */
+  uint32_t n_lists;
+  /** The number of iterations searching for kmeans centers (index building). */
+  uint32_t kmeans_n_iters;
+  /** The fraction of data to use during iterative kmeans building. */
+  double kmeans_trainset_fraction;
+  /**
+   * By default (adaptive_centers = false), the cluster centers are trained in `ivf_sq::build`,
+   * and never modified in `ivf_sq::extend`. As a result, you may need to retrain the index
+   * from scratch after invoking (`ivf_sq::extend`) a few times with new data, the distribution of
+   * which is no longer representative of the original training set.
+   *
+   * The alternative behavior (adaptive_centers = true) is to update the cluster centers for new
+   * data when it is added. In this case, `index.centers()` are always exactly the centroids of the
+   * data in the corresponding clusters. The drawback of this behavior is that the centroids depend
+   * on the order of adding new data (through the classification of the added data); that is,
+   * `index.centers()` "drift" together with the changing distribution of the newly added data.
+   */
+  bool adaptive_centers;
+  /**
+   * By default, the algorithm allocates more space than necessary for individual clusters
+   * (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of
+   * data copies during repeated calls to `extend` (extending the database).
+   *
+   * The alternative is the conservative allocation behavior; when enabled, the algorithm always
+   * allocates the minimum amount of memory required to store the given number of records. Set this
+   * flag to `true` if you prefer to use as little GPU memory for the database as possible.
+   */
+  bool conservative_memory_allocation;
+};
+
+typedef struct cuvsIvfSqIndexParams* cuvsIvfSqIndexParams_t;
+
+/**
+ * @brief Allocate IVF-SQ Index params, and populate with default values
+ *
+ * @param[in] index_params cuvsIvfSqIndexParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* index_params);
+
+/**
+ * @brief De-allocate IVF-SQ Index params
+ *
+ * @param[in] index_params
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index_params);
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_c_search_params IVF-SQ index search parameters
+ * @{
+ */
+/**
+ * @brief Supplemental parameters to search IVF-SQ index
+ *
+ */
+struct cuvsIvfSqSearchParams {
+  /** The number of clusters to search. */
+  uint32_t n_probes;
+};
+
+typedef struct cuvsIvfSqSearchParams* cuvsIvfSqSearchParams_t;
+
+/**
+ * @brief Allocate IVF-SQ search params, and populate with default values
+ *
+ * @param[in] params cuvsIvfSqSearchParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params);
+
+/**
+ * @brief De-allocate IVF-SQ search params
+ *
+ * @param[in] params
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params);
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_c_index IVF-SQ index
+ * @{
+ */
+/**
+ * @brief Struct to hold address of cuvs::neighbors::ivf_sq::index and its active trained dtype
+ *
+ */
+typedef struct {
+  uintptr_t addr;
+  DLDataType dtype;
+} cuvsIvfSqIndex;
+
+typedef cuvsIvfSqIndex* cuvsIvfSqIndex_t;
+
+/**
+ * @brief Allocate IVF-SQ index
+ *
+ * @param[in] index cuvsIvfSqIndex_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index);
+
+/**
+ * @brief De-allocate IVF-SQ index
+ *
+ * @param[in] index cuvsIvfSqIndex_t to de-allocate
+ */
+cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index);
+
+/** Get the number of clusters/inverted lists */
+cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists);
+
+/** Get the dimensionality of the data */
+cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim);
+
+/** Get the size of the index */
+cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size);
+
+/**
+ * @brief Get the cluster centers corresponding to the lists [n_lists, dim]
+ *
+ * @param[in] index cuvsIvfSqIndex_t Built Ivf-SQ Index
+ * @param[out] centers Preallocated array on host or device memory to store output, [n_lists, dim]
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_c_index_build IVF-SQ index build
+ * @{
+ */
+/**
+ * @brief Build an IVF-SQ index with a `DLManagedTensor` which has underlying
+ *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
+ *        or `kDLCPU`. Also, acceptable underlying types are:
+ *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *
+ * @code {.c}
+ * #include <cuvs/core/c_api.h>
+ * #include <cuvs/neighbors/ivf_sq.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // Assume a populated `DLManagedTensor` type here
+ * DLManagedTensor dataset;
+ *
+ * // Create default index params
+ * cuvsIvfSqIndexParams_t index_params;
+ * cuvsError_t params_create_status = cuvsIvfSqIndexParamsCreate(&index_params);
+ *
+ * // Create IVF-SQ index
+ * cuvsIvfSqIndex_t index;
+ * cuvsError_t index_create_status = cuvsIvfSqIndexCreate(&index);
+ *
+ * // Build the IVF-SQ Index
+ * cuvsError_t build_status = cuvsIvfSqBuild(res, index_params, &dataset, index);
+ *
+ * // de-allocate `index_params`, `index` and `res`
+ * cuvsError_t params_destroy_status = cuvsIvfSqIndexParamsDestroy(index_params);
+ * cuvsError_t index_destroy_status = cuvsIvfSqIndexDestroy(index);
+ * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+ * @endcode
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] index_params cuvsIvfSqIndexParams_t used to build IVF-SQ index
+ * @param[in] dataset DLManagedTensor* training dataset
+ * @param[out] index cuvsIvfSqIndex_t Newly built IVF-SQ index
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfSqBuild(cuvsResources_t res,
+                           cuvsIvfSqIndexParams_t index_params,
+                           DLManagedTensor* dataset,
+                           cuvsIvfSqIndex_t index);
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_c_index_search IVF-SQ index search
+ * @{
+ */
+/**
+ * @brief Search an IVF-SQ index with a `DLManagedTensor` which has underlying
+ *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`.
+ *        Types for input are:
+ *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or 16
+ *        2. `neighbors`: `kDLDataType.code == kDLInt` and `kDLDataType.bits = 64`
+ *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *
+ * @code {.c}
+ * #include <cuvs/core/c_api.h>
+ * #include <cuvs/neighbors/ivf_sq.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // Assume a populated `DLManagedTensor` type here
+ * DLManagedTensor queries;
+ * DLManagedTensor neighbors;
+ * DLManagedTensor distances;
+ *
+ * // Create default search params
+ * cuvsIvfSqSearchParams_t search_params;
+ * cuvsError_t params_create_status = cuvsIvfSqSearchParamsCreate(&search_params);
+ *
+ * // Search the `index` built using `cuvsIvfSqBuild`
+ * cuvsError_t search_status = cuvsIvfSqSearch(res, search_params, index, &queries, &neighbors,
+ * &distances);
+ *
+ * // de-allocate `search_params` and `res`
+ * cuvsError_t params_destroy_status = cuvsIvfSqSearchParamsDestroy(search_params);
+ * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+ * @endcode
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] search_params cuvsIvfSqSearchParams_t used to search IVF-SQ index
+ * @param[in] index ivfSqIndex which has been returned by `cuvsIvfSqBuild`
+ * @param[in] queries DLManagedTensor* queries dataset to search
+ * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
+ * @param[out] distances DLManagedTensor* output `k` distances for queries
+ */
+cuvsError_t cuvsIvfSqSearch(cuvsResources_t res,
+                            cuvsIvfSqSearchParams_t search_params,
+                            cuvsIvfSqIndex_t index,
+                            DLManagedTensor* queries,
+                            DLManagedTensor* neighbors,
+                            DLManagedTensor* distances);
+
+/**
+ * @brief Search an IVF-SQ index with filtering.
+ *
+ * Same as cuvsIvfSqSearch, but applies a pre-filter to exclude vectors during search.
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] search_params cuvsIvfSqSearchParams_t used to search IVF-SQ index
+ * @param[in] index ivfSqIndex which has been returned by `cuvsIvfSqBuild`
+ * @param[in] queries DLManagedTensor* queries dataset to search
+ * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
+ * @param[out] distances DLManagedTensor* output `k` distances for queries
+ * @param[in] filter cuvsFilter to filter neighbors based on the given bitset
+ */
+cuvsError_t cuvsIvfSqSearchWithFilter(cuvsResources_t res,
+                                      cuvsIvfSqSearchParams_t search_params,
+                                      cuvsIvfSqIndex_t index,
+                                      DLManagedTensor* queries,
+                                      DLManagedTensor* neighbors,
+                                      DLManagedTensor* distances,
+                                      cuvsFilter filter);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_c_index_serialize IVF-SQ C-API serialize functions
+ * @{
+ */
+/**
+ * Save the index to file.
+ *
+ * Experimental, both the API and the serialization format are subject to change.
+ *
+ * @code{.c}
+ * #include <cuvs/neighbors/ivf_sq.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // create an index with `cuvsIvfSqBuild`
+ * cuvsIvfSqSerialize(res, "/path/to/index", index);
+ * @endcode
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] filename the file name for saving the index
+ * @param[in] index IVF-SQ index
+ */
+cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex_t index);
+
+/**
+ * Load index from file.
+ *
+ * Experimental, both the API and the serialization format are subject to change.
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] filename the name of the file that stores the index
+ * @param[out] index IVF-SQ index loaded from disk
+ */
+cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res,
+                                 const char* filename,
+                                 cuvsIvfSqIndex_t index);
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_c_index_extend IVF-SQ index extend
+ * @{
+ */
+/**
+ * @brief Extend the index with the new data.
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] new_vectors DLManagedTensor* the new vectors to add to the index
+ * @param[in] new_indices DLManagedTensor* vector of new indices for the new vectors
+ * @param[inout] index IVF-SQ index to be extended
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsIvfSqExtend(cuvsResources_t res,
+                            DLManagedTensor* new_vectors,
+                            DLManagedTensor* new_indices,
+                            cuvsIvfSqIndex_t index);
+/**
+ * @}
+ */
+#ifdef __cplusplus
+}
+#endif
diff --git a/c/src/neighbors/ivf_sq.cpp b/c/src/neighbors/ivf_sq.cpp
new file mode 100644
index 0000000000..eadf84a299
--- /dev/null
+++ b/c/src/neighbors/ivf_sq.cpp
@@ -0,0 +1,363 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cstdint>
+#include <dlpack/dlpack.h>
+
+#include <raft/core/error.hpp>
+#include <raft/core/mdspan_types.hpp>
+#include <raft/core/resources.hpp>
+
+#include <cuvs/core/c_api.h>
+#include <cuvs/neighbors/ivf_sq.h>
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include "../core/exceptions.hpp"
+#include "../core/interop.hpp"
+
+namespace cuvs::neighbors::ivf_sq {
+void convert_c_index_params(cuvsIvfSqIndexParams params,
+                            cuvs::neighbors::ivf_sq::index_params* out)
+{
+  out->metric                        = static_cast<cuvs::distance::DistanceType>((int)params.metric);
+  out->metric_arg                    = params.metric_arg;
+  out->add_data_on_build             = params.add_data_on_build;
+  out->n_lists                       = params.n_lists;
+  out->kmeans_n_iters                = params.kmeans_n_iters;
+  out->kmeans_trainset_fraction      = params.kmeans_trainset_fraction;
+  out->adaptive_centers              = params.adaptive_centers;
+  out->conservative_memory_allocation = params.conservative_memory_allocation;
+}
+void convert_c_search_params(cuvsIvfSqSearchParams params,
+                             cuvs::neighbors::ivf_sq::search_params* out)
+{
+  out->n_probes = params.n_probes;
+}
+}  // namespace cuvs::neighbors::ivf_sq
+
+namespace {
+
+template <typename T>
+void* _build(cuvsResources_t res, cuvsIvfSqIndexParams params, DLManagedTensor* dataset_tensor)
+{
+  auto res_ptr = reinterpret_cast<raft::resources*>(res);
+
+  auto build_params = cuvs::neighbors::ivf_sq::index_params();
+  cuvs::neighbors::ivf_sq::convert_c_index_params(params, &build_params);
+
+  auto dataset = dataset_tensor->dl_tensor;
+  auto dim     = dataset.shape[1];
+
+  auto index = new cuvs::neighbors::ivf_sq::index<uint8_t>(*res_ptr, build_params, dim);
+
+  if (cuvs::core::is_dlpack_device_compatible(dataset)) {
+    using mdspan_type = raft::device_matrix_view<const T, int64_t, raft::row_major>;
+    auto mds          = cuvs::core::from_dlpack<mdspan_type>(dataset_tensor);
+    cuvs::neighbors::ivf_sq::build(*res_ptr, build_params, mds, *index);
+  } else {
+    using mdspan_type = raft::host_matrix_view<T const, int64_t, raft::row_major>;
+    auto mds          = cuvs::core::from_dlpack<mdspan_type>(dataset_tensor);
+    cuvs::neighbors::ivf_sq::build(*res_ptr, build_params, mds, *index);
+  }
+
+  return index;
+}
+
+template <typename T>
+void _search(cuvsResources_t res,
+             cuvsIvfSqSearchParams params,
+             cuvsIvfSqIndex index,
+             DLManagedTensor* queries_tensor,
+             DLManagedTensor* neighbors_tensor,
+             DLManagedTensor* distances_tensor,
+             cuvsFilter* filter)
+{
+  auto res_ptr   = reinterpret_cast<raft::resources*>(res);
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_sq::index<uint8_t>*>(index.addr);
+
+  auto search_params = cuvs::neighbors::ivf_sq::search_params();
+  cuvs::neighbors::ivf_sq::convert_c_search_params(params, &search_params);
+
+  using queries_mdspan_type   = raft::device_matrix_view<const T, int64_t, raft::row_major>;
+  using neighbors_mdspan_type = raft::device_matrix_view<int64_t, int64_t, raft::row_major>;
+  using distances_mdspan_type = raft::device_matrix_view<float, int64_t, raft::row_major>;
+  auto queries_mds            = cuvs::core::from_dlpack<queries_mdspan_type>(queries_tensor);
+  auto neighbors_mds          = cuvs::core::from_dlpack<neighbors_mdspan_type>(neighbors_tensor);
+  auto distances_mds          = cuvs::core::from_dlpack<distances_mdspan_type>(distances_tensor);
+
+  if (filter == nullptr || filter->type == NO_FILTER) {
+    cuvs::neighbors::ivf_sq::search(
+      *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds);
+  } else if (filter->type == BITSET) {
+    using filter_mdspan_type    = raft::device_vector_view<std::uint32_t, int64_t, raft::row_major>;
+    auto removed_indices_tensor = reinterpret_cast<DLManagedTensor*>(filter->addr);
+    auto removed_indices = cuvs::core::from_dlpack<filter_mdspan_type>(removed_indices_tensor);
+    cuvs::core::bitset_view<std::uint32_t, int64_t> removed_indices_bitset(removed_indices,
+                                                                           index_ptr->size());
+    auto bitset_filter_obj = cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset);
+    cuvs::neighbors::ivf_sq::search(*res_ptr,
+                                    search_params,
+                                    *index_ptr,
+                                    queries_mds,
+                                    neighbors_mds,
+                                    distances_mds,
+                                    bitset_filter_obj);
+  } else {
+    RAFT_FAIL("Unsupported filter type: BITMAP");
+  }
+}
+
+void _serialize(cuvsResources_t res, const char* filename, cuvsIvfSqIndex index)
+{
+  auto res_ptr   = reinterpret_cast<raft::resources*>(res);
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_sq::index<uint8_t>*>(index.addr);
+  cuvs::neighbors::ivf_sq::serialize(*res_ptr, std::string(filename), *index_ptr);
+}
+
+void* _deserialize(cuvsResources_t res, const char* filename)
+{
+  auto res_ptr = reinterpret_cast<raft::resources*>(res);
+  auto index   = new cuvs::neighbors::ivf_sq::index<uint8_t>(*res_ptr);
+  cuvs::neighbors::ivf_sq::deserialize(*res_ptr, std::string(filename), index);
+  return index;
+}
+
+template <typename T>
+void _extend(cuvsResources_t res,
+             DLManagedTensor* new_vectors,
+             DLManagedTensor* new_indices,
+             cuvsIvfSqIndex index)
+{
+  auto res_ptr   = reinterpret_cast<raft::resources*>(res);
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_sq::index<uint8_t>*>(index.addr);
+
+  bool on_device = cuvs::core::is_dlpack_device_compatible(new_vectors->dl_tensor);
+  if (on_device != cuvs::core::is_dlpack_device_compatible(new_indices->dl_tensor)) {
+    RAFT_FAIL("extend inputs must both either be on device memory or host memory");
+  }
+
+  if (on_device) {
+    using vectors_mdspan_type = raft::device_matrix_view<const T, int64_t, raft::row_major>;
+    using indices_mdspan_type = raft::device_vector_view<int64_t, int64_t>;
+    auto vectors_mds          = cuvs::core::from_dlpack<vectors_mdspan_type>(new_vectors);
+    auto indices_mds          = cuvs::core::from_dlpack<indices_mdspan_type>(new_indices);
+    cuvs::neighbors::ivf_sq::extend(*res_ptr, vectors_mds, indices_mds, index_ptr);
+  } else {
+    using vectors_mdspan_type = raft::host_matrix_view<const T, int64_t, raft::row_major>;
+    using indices_mdspan_type = raft::host_vector_view<int64_t, int64_t>;
+    auto vectors_mds          = cuvs::core::from_dlpack<vectors_mdspan_type>(new_vectors);
+    auto indices_mds          = cuvs::core::from_dlpack<indices_mdspan_type>(new_indices);
+    cuvs::neighbors::ivf_sq::extend(*res_ptr, vectors_mds, indices_mds, index_ptr);
+  }
+}
+
+void _get_centers(cuvsIvfSqIndex index, DLManagedTensor* centers)
+{
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_sq::index<uint8_t>*>(index.addr);
+  cuvs::core::to_dlpack(index_ptr->centers(), centers);
+}
+}  // namespace
+
+extern "C" cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index)
+{
+  return cuvs::core::translate_exceptions([=] { *index = new cuvsIvfSqIndex{}; });
+}
+
+extern "C" cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index_c_ptr)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index     = *index_c_ptr;
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::ivf_sq::index<uint8_t>*>(index.addr);
+    delete index_ptr;
+    delete index_c_ptr;
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfSqBuild(cuvsResources_t res,
+                                      cuvsIvfSqIndexParams_t params,
+                                      DLManagedTensor* dataset_tensor,
+                                      cuvsIvfSqIndex_t index)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto dataset = dataset_tensor->dl_tensor;
+
+    index->dtype.code = dataset.dtype.code;
+    index->dtype.bits = dataset.dtype.bits;
+
+    if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
+      index->addr = reinterpret_cast<uintptr_t>(_build<float>(res, *params, dataset_tensor));
+    } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) {
+      index->addr = reinterpret_cast<uintptr_t>(_build<half>(res, *params, dataset_tensor));
+    } else {
+      RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
+                dataset.dtype.code,
+                dataset.dtype.bits);
+    }
+  });
+}
+
+static cuvsError_t _cuvsIvfSqSearchImpl(cuvsResources_t res,
+                                        cuvsIvfSqSearchParams_t params,
+                                        cuvsIvfSqIndex_t index_c_ptr,
+                                        DLManagedTensor* queries_tensor,
+                                        DLManagedTensor* neighbors_tensor,
+                                        DLManagedTensor* distances_tensor,
+                                        cuvsFilter* filter)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto queries   = queries_tensor->dl_tensor;
+    auto neighbors = neighbors_tensor->dl_tensor;
+    auto distances = distances_tensor->dl_tensor;
+
+    RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(queries),
+                 "queries should have device compatible memory");
+    RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(neighbors),
+                 "neighbors should have device compatible memory");
+    RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(distances),
+                 "distances should have device compatible memory");
+
+    RAFT_EXPECTS(neighbors.dtype.code == kDLInt && neighbors.dtype.bits == 64,
+                 "neighbors should be of type int64_t");
+    RAFT_EXPECTS(distances.dtype.code == kDLFloat && distances.dtype.bits == 32,
+                 "distances should be of type float32");
+
+    auto index = *index_c_ptr;
+    if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) {
+      _search<float>(
+        res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter);
+    } else if (queries.dtype.code == kDLFloat && queries.dtype.bits == 16) {
+      _search<half>(
+        res, *params, index, queries_tensor, neighbors_tensor, distances_tensor, filter);
+    } else {
+      RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d",
+                queries.dtype.code,
+                queries.dtype.bits);
+    }
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfSqSearch(cuvsResources_t res,
+                                       cuvsIvfSqSearchParams_t params,
+                                       cuvsIvfSqIndex_t index_c_ptr,
+                                       DLManagedTensor* queries_tensor,
+                                       DLManagedTensor* neighbors_tensor,
+                                       DLManagedTensor* distances_tensor)
+{
+  return _cuvsIvfSqSearchImpl(
+    res, params, index_c_ptr, queries_tensor, neighbors_tensor, distances_tensor, nullptr);
+}
+
+extern "C" cuvsError_t cuvsIvfSqSearchWithFilter(cuvsResources_t res,
+                                                  cuvsIvfSqSearchParams_t params,
+                                                  cuvsIvfSqIndex_t index_c_ptr,
+                                                  DLManagedTensor* queries_tensor,
+                                                  DLManagedTensor* neighbors_tensor,
+                                                  DLManagedTensor* distances_tensor,
+                                                  cuvsFilter filter)
+{
+  return _cuvsIvfSqSearchImpl(
+    res, params, index_c_ptr, queries_tensor, neighbors_tensor, distances_tensor, &filter);
+}
+
+extern "C" cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* params)
+{
+  return cuvs::core::translate_exceptions([=] {
+    *params = new cuvsIvfSqIndexParams{.metric                         = L2Expanded,
+                                       .metric_arg                     = 2.0f,
+                                       .add_data_on_build              = true,
+                                       .n_lists                        = 1024,
+                                       .kmeans_n_iters                 = 20,
+                                       .kmeans_trainset_fraction       = 0.5,
+                                       .adaptive_centers               = false,
+                                       .conservative_memory_allocation = false};
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
+extern "C" cuvsError_t cuvsIvfSqSearchParamsCreate(cuvsIvfSqSearchParams_t* params)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { *params = new cuvsIvfSqSearchParams{.n_probes = 20}; });
+}
+
+extern "C" cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
+extern "C" cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res,
+                                            const char* filename,
+                                            cuvsIvfSqIndex_t index)
+{
+  return cuvs::core::translate_exceptions(
+    [=] { index->addr = reinterpret_cast<uintptr_t>(_deserialize(res, filename)); });
+}
+
+extern "C" cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res,
+                                          const char* filename,
+                                          cuvsIvfSqIndex_t index)
+{
+  return cuvs::core::translate_exceptions([=] { _serialize(res, filename, *index); });
+}
+
+extern "C" cuvsError_t cuvsIvfSqExtend(cuvsResources_t res,
+                                       DLManagedTensor* new_vectors,
+                                       DLManagedTensor* new_indices,
+                                       cuvsIvfSqIndex_t index)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto vectors = new_vectors->dl_tensor;
+
+    if (index->dtype.code == 0 && index->dtype.bits == 0) {
+      index->dtype.code = vectors.dtype.code;
+      index->dtype.bits = vectors.dtype.bits;
+    }
+
+    if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 32) {
+      _extend<float>(res, new_vectors, new_indices, *index);
+    } else if (vectors.dtype.code == kDLFloat && vectors.dtype.bits == 16) {
+      _extend<half>(res, new_vectors, new_indices, *index);
+    } else {
+      RAFT_FAIL(
+        "Unsupported vectors DLtensor dtype: %d and bits: %d", vectors.dtype.code, vectors.dtype.bits);
+    }
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr =
+      reinterpret_cast<cuvs::neighbors::ivf_sq::index<uint8_t>*>(index->addr);
+    *n_lists = index_ptr->n_lists();
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr =
+      reinterpret_cast<cuvs::neighbors::ivf_sq::index<uint8_t>*>(index->addr);
+    *dim = index_ptr->dim();
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto index_ptr =
+      reinterpret_cast<cuvs::neighbors::ivf_sq::index<uint8_t>*>(index->addr);
+    *size = index_ptr->size();
+  });
+}
+
+extern "C" cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, DLManagedTensor* centers)
+{
+  return cuvs::core::translate_exceptions([=] { _get_centers(*index, centers); });
+}
diff --git a/c/src/neighbors/ivf_sq.hpp b/c/src/neighbors/ivf_sq.hpp
new file mode 100644
index 0000000000..3a08bc689a
--- /dev/null
+++ b/c/src/neighbors/ivf_sq.hpp
@@ -0,0 +1,14 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <cuvs/neighbors/ivf_sq.h>
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+namespace cuvs::neighbors::ivf_sq {
+/// Converts a cuvsIvfSqIndexParams struct (c) to a ivf_sq::index_params (C++) struct
+void convert_c_index_params(cuvsIvfSqIndexParams params,
+                            cuvs::neighbors::ivf_sq::index_params* out);
+void convert_c_search_params(cuvsIvfSqSearchParams params,
+                             cuvs::neighbors::ivf_sq::search_params* out);
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/c/tests/CMakeLists.txt b/c/tests/CMakeLists.txt
index 6d52e5b174..9c96fc4120 100644
--- a/c/tests/CMakeLists.txt
+++ b/c/tests/CMakeLists.txt
@@ -1,6 +1,6 @@
 # =============================================================================
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 # =============================================================================
@@ -77,6 +77,7 @@ ConfigureTest(
 ConfigureTest(NAME BRUTEFORCE_C_TEST PATH neighbors/run_brute_force_c.c neighbors/brute_force_c.cu)
 ConfigureTest(NAME IVF_FLAT_C_TEST PATH neighbors/run_ivf_flat_c.c neighbors/ann_ivf_flat_c.cu)
 ConfigureTest(NAME IVF_PQ_C_TEST PATH neighbors/run_ivf_pq_c.c neighbors/ann_ivf_pq_c.cu)
+ConfigureTest(NAME IVF_SQ_C_TEST PATH neighbors/run_ivf_sq_c.c neighbors/ann_ivf_sq_c.cu)
 ConfigureTest(NAME CAGRA_C_TEST PATH neighbors/ann_cagra_c.cu)
 ConfigureTest(NAME MG_C_TEST PATH neighbors/run_mg_c.c neighbors/ann_mg_c.cu)
 ConfigureTest(
diff --git a/c/tests/neighbors/ann_ivf_sq_c.cu b/c/tests/neighbors/ann_ivf_sq_c.cu
new file mode 100644
index 0000000000..c36786e45a
--- /dev/null
+++ b/c/tests/neighbors/ann_ivf_sq_c.cu
@@ -0,0 +1,130 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuda.h>
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/handle.hpp>
+#include <raft/random/rng.cuh>
+
+#include "neighbors/ann_utils.cuh"
+#include <cuvs/neighbors/ivf_sq.h>
+
+extern "C" void run_ivf_sq(int64_t n_rows,
+                           int64_t n_queries,
+                           int64_t n_dim,
+                           uint32_t n_neighbors,
+                           float* index_data,
+                           float* query_data,
+                           float* distances_data,
+                           int64_t* neighbors_data,
+                           cuvsDistanceType metric,
+                           size_t n_probes,
+                           size_t n_lists);
+
+template <typename T>
+void generate_random_data(T* devPtr, size_t size)
+{
+  raft::handle_t handle;
+  raft::random::RngState r(1234ULL);
+  raft::random::uniform(handle, r, devPtr, size, T(0.1), T(2.0));
+};
+
+template <typename T, typename IdxT>
+void recall_eval(T* query_data,
+                 T* index_data,
+                 IdxT* neighbors,
+                 T* distances,
+                 size_t n_queries,
+                 size_t n_rows,
+                 size_t n_dim,
+                 size_t n_neighbors,
+                 cuvsDistanceType metric,
+                 size_t n_probes,
+                 size_t n_lists)
+{
+  raft::handle_t handle;
+  auto distances_ref = raft::make_device_matrix<T, IdxT>(handle, n_queries, n_neighbors);
+  auto neighbors_ref = raft::make_device_matrix<IdxT, IdxT>(handle, n_queries, n_neighbors);
+  cuvs::neighbors::naive_knn<T, T, IdxT>(
+    handle,
+    distances_ref.data_handle(),
+    neighbors_ref.data_handle(),
+    query_data,
+    index_data,
+    n_queries,
+    n_rows,
+    n_dim,
+    n_neighbors,
+    static_cast<cuvs::distance::DistanceType>((uint16_t)metric));
+
+  size_t size = n_queries * n_neighbors;
+  std::vector<IdxT> neighbors_h(size);
+  std::vector<T> distances_h(size);
+  std::vector<IdxT> neighbors_ref_h(size);
+  std::vector<T> distances_ref_h(size);
+
+  auto stream = raft::resource::get_cuda_stream(handle);
+  raft::copy(neighbors_h.data(), neighbors, size, stream);
+  raft::copy(distances_h.data(), distances, size, stream);
+  raft::copy(neighbors_ref_h.data(), neighbors_ref.data_handle(), size, stream);
+  raft::copy(distances_ref_h.data(), distances_ref.data_handle(), size, stream);
+
+  double min_recall = static_cast<double>(n_probes) / static_cast<double>(n_lists);
+  ASSERT_TRUE(cuvs::neighbors::eval_neighbours(neighbors_ref_h,
+                                               neighbors_h,
+                                               distances_ref_h,
+                                               distances_h,
+                                               n_queries,
+                                               n_neighbors,
+                                               0.001,
+                                               min_recall));
+};
+
+TEST(IvfSqC, BuildSearch)
+{
+  int64_t n_rows       = 8096;
+  int64_t n_queries    = 128;
+  int64_t n_dim        = 32;
+  uint32_t n_neighbors = 8;
+
+  raft::handle_t handle;
+  auto stream = raft::resource::get_cuda_stream(handle);
+
+  cuvsDistanceType metric = L2Expanded;
+  size_t n_probes         = 20;
+  size_t n_lists          = 1024;
+
+  rmm::device_uvector<float> index_data(n_rows * n_dim, stream);
+  rmm::device_uvector<float> query_data(n_queries * n_dim, stream);
+  rmm::device_uvector<int64_t> neighbors_data(n_queries * n_neighbors, stream);
+  rmm::device_uvector<float> distances_data(n_queries * n_neighbors, stream);
+
+  generate_random_data(index_data.data(), n_rows * n_dim);
+  generate_random_data(query_data.data(), n_queries * n_dim);
+
+  run_ivf_sq(n_rows,
+             n_queries,
+             n_dim,
+             n_neighbors,
+             index_data.data(),
+             query_data.data(),
+             distances_data.data(),
+             neighbors_data.data(),
+             metric,
+             n_probes,
+             n_lists);
+
+  recall_eval(query_data.data(),
+              index_data.data(),
+              neighbors_data.data(),
+              distances_data.data(),
+              n_queries,
+              n_rows,
+              n_dim,
+              n_neighbors,
+              metric,
+              n_probes,
+              n_lists);
+}
diff --git a/c/tests/neighbors/c_api.c b/c/tests/neighbors/c_api.c
index 6988aaf618..86108ea703 100644
--- a/c/tests/neighbors/c_api.c
+++ b/c/tests/neighbors/c_api.c
@@ -1,11 +1,12 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
 #include <cuvs/core/c_api.h>
 #include <cuvs/neighbors/all_neighbors.h>
 #include <cuvs/neighbors/cagra.h>
+#include <cuvs/neighbors/ivf_sq.h>
 #include <cuvs/neighbors/tiered_index.h>
 
 #include <dlpack/dlpack.h>
@@ -47,6 +48,15 @@ void test_compile_tiered_index()
   cuvsTieredIndexExtend(resources, &dataset, tiered_index);
 }
 
+void test_compile_ivf_sq()
+{
+  assert(!"test_compile_ivf_sq is not meant to be run");
+
+  cuvsIvfSqIndex_t index;
+  cuvsIvfSqIndexCreate(&index);
+  cuvsIvfSqIndexDestroy(index);
+}
+
 void test_compile_all_neighbors()
 {
   // Smoke test to ensure that the all_neighbors.h API compiles correctly
@@ -66,6 +76,7 @@ int main()
   // These are smoke tests that check that the C-APIs compile with a C compiler.
   // These are not meant to be run.
   test_compile_cagra();
+  test_compile_ivf_sq();
   test_compile_tiered_index();
   test_compile_all_neighbors();
 
diff --git a/c/tests/neighbors/run_ivf_sq_c.c b/c/tests/neighbors/run_ivf_sq_c.c
new file mode 100644
index 0000000000..d07502abd6
--- /dev/null
+++ b/c/tests/neighbors/run_ivf_sq_c.c
@@ -0,0 +1,86 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuvs/neighbors/ivf_sq.h>
+
+void run_ivf_sq(int64_t n_rows,
+                int64_t n_queries,
+                int64_t n_dim,
+                uint32_t n_neighbors,
+                float* index_data,
+                float* query_data,
+                float* distances_data,
+                int64_t* neighbors_data,
+                cuvsDistanceType metric,
+                size_t n_probes,
+                size_t n_lists)
+{
+  cuvsResources_t res;
+  cuvsResourcesCreate(&res);
+
+  DLManagedTensor dataset_tensor;
+  dataset_tensor.dl_tensor.data              = index_data;
+  dataset_tensor.dl_tensor.device.device_type = kDLCUDA;
+  dataset_tensor.dl_tensor.ndim              = 2;
+  dataset_tensor.dl_tensor.dtype.code        = kDLFloat;
+  dataset_tensor.dl_tensor.dtype.bits        = 32;
+  dataset_tensor.dl_tensor.dtype.lanes       = 1;
+  int64_t dataset_shape[2]                   = {n_rows, n_dim};
+  dataset_tensor.dl_tensor.shape             = dataset_shape;
+  dataset_tensor.dl_tensor.strides           = NULL;
+
+  cuvsIvfSqIndex_t index;
+  cuvsIvfSqIndexCreate(&index);
+
+  cuvsIvfSqIndexParams_t build_params;
+  cuvsIvfSqIndexParamsCreate(&build_params);
+  build_params->metric  = metric;
+  build_params->n_lists = n_lists;
+  cuvsIvfSqBuild(res, build_params, &dataset_tensor, index);
+
+  DLManagedTensor queries_tensor;
+  queries_tensor.dl_tensor.data              = (void*)query_data;
+  queries_tensor.dl_tensor.device.device_type = kDLCUDA;
+  queries_tensor.dl_tensor.ndim              = 2;
+  queries_tensor.dl_tensor.dtype.code        = kDLFloat;
+  queries_tensor.dl_tensor.dtype.bits        = 32;
+  queries_tensor.dl_tensor.dtype.lanes       = 1;
+  int64_t queries_shape[2]                   = {n_queries, n_dim};
+  queries_tensor.dl_tensor.shape             = queries_shape;
+  queries_tensor.dl_tensor.strides           = NULL;
+
+  DLManagedTensor neighbors_tensor;
+  neighbors_tensor.dl_tensor.data              = (void*)neighbors_data;
+  neighbors_tensor.dl_tensor.device.device_type = kDLCUDA;
+  neighbors_tensor.dl_tensor.ndim              = 2;
+  neighbors_tensor.dl_tensor.dtype.code        = kDLInt;
+  neighbors_tensor.dl_tensor.dtype.bits        = 64;
+  neighbors_tensor.dl_tensor.dtype.lanes       = 1;
+  int64_t neighbors_shape[2]                   = {n_queries, n_neighbors};
+  neighbors_tensor.dl_tensor.shape             = neighbors_shape;
+  neighbors_tensor.dl_tensor.strides           = NULL;
+
+  DLManagedTensor distances_tensor;
+  distances_tensor.dl_tensor.data              = (void*)distances_data;
+  distances_tensor.dl_tensor.device.device_type = kDLCUDA;
+  distances_tensor.dl_tensor.ndim              = 2;
+  distances_tensor.dl_tensor.dtype.code        = kDLFloat;
+  distances_tensor.dl_tensor.dtype.bits        = 32;
+  distances_tensor.dl_tensor.dtype.lanes       = 1;
+  int64_t distances_shape[2]                   = {n_queries, n_neighbors};
+  distances_tensor.dl_tensor.shape             = distances_shape;
+  distances_tensor.dl_tensor.strides           = NULL;
+
+  cuvsIvfSqSearchParams_t search_params;
+  cuvsIvfSqSearchParamsCreate(&search_params);
+  search_params->n_probes = n_probes;
+  cuvsIvfSqSearch(
+    res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor);
+
+  cuvsIvfSqSearchParamsDestroy(search_params);
+  cuvsIvfSqIndexParamsDestroy(build_params);
+  cuvsIvfSqIndexDestroy(index);
+  cuvsResourcesDestroy(res);
+}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d4e9f2c6b4..78fd606491 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -633,6 +633,12 @@ if(NOT BUILD_CPU_ONLY)
     src/neighbors/ivf_pq/detail/ivf_pq_transform_half_int64_t.cu
     src/neighbors/ivf_pq/detail/ivf_pq_transform_int8_t_int64_t.cu
     src/neighbors/ivf_pq/detail/ivf_pq_transform_uint8_t_int64_t.cu
+    src/neighbors/ivf_sq_index.cpp
+    src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu
+    src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu
+    src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu
+    src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu
+    src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu
     src/neighbors/knn_merge_parts.cu
     src/neighbors/nn_descent.cu
     src/neighbors/nn_descent_float.cu
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 4e4527267c..c377b64ec6 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -24,6 +24,7 @@ option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algori
 option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_FAISS_CPU_HNSW_FLAT "Include faiss' hnsw algorithm in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT "Include cuVS ivf flat algorithm in benchmark" ON)
+option(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ "Include cuVS ivf sq algorithm in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ "Include cuVS ivf pq algorithm in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_CUVS_CAGRA "Include cuVS CAGRA in benchmark" ON)
 option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benchmark" ON)
@@ -80,6 +81,7 @@ set(CUVS_USE_FAISS_STATIC ON)
 if(BUILD_CPU_ONLY)
   set(CUVS_FAISS_ENABLE_GPU OFF)
   set(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT OFF)
+  set(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ OFF)
   set(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OFF)
   set(CUVS_ANN_BENCH_USE_CUVS_CAGRA OFF)
   set(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OFF)
@@ -97,6 +99,7 @@ set(CUVS_ANN_BENCH_USE_CUVS OFF)
 if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
    OR CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE
    OR CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT
+   OR CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
    OR CUVS_ANN_BENCH_USE_CUVS_CAGRA
    OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB
    OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE
@@ -244,6 +247,12 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT)
   )
 endif()
 
+if(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ)
+  ConfigureAnnBench(
+    NAME CUVS_IVF_SQ PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_sq.cu LINKS cuvs
+  )
+endif()
+
 if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE)
   ConfigureAnnBench(NAME CUVS_BRUTE_FORCE PATH src/cuvs/cuvs_benchmark.cu LINKS cuvs)
 endif()
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
index 2eaf3123a0..6e7a6e2179 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
@@ -35,6 +35,11 @@ extern template class cuvs::bench::cuvs_cagra<uint8_t, uint32_t>;
 extern template class cuvs::bench::cuvs_cagra<int8_t, uint32_t>;
 #endif
 
+#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
+#include "cuvs_ivf_sq_wrapper.h"
+extern template class cuvs::bench::cuvs_ivf_sq<float>;
+extern template class cuvs::bench::cuvs_ivf_sq<half>;
+#endif
 #ifdef CUVS_ANN_BENCH_USE_CUVS_MG
 #include "cuvs_ivf_flat_wrapper.h"
 #include "cuvs_mg_ivf_flat_wrapper.h"
@@ -86,6 +91,26 @@ void parse_search_param(const nlohmann::json& conf,
 }
 #endif
 
+#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
+template <typename T>
+void parse_build_param(const nlohmann::json& conf,
+                       typename cuvs::bench::cuvs_ivf_sq<T>::build_param& param)
+{
+  param.n_lists = conf.at("nlist");
+  if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
+  if (conf.contains("ratio")) {
+    param.kmeans_trainset_fraction = 1.0 / static_cast<double>(conf.at("ratio"));
+  }
+}
+
+template <typename T>
+void parse_search_param(const nlohmann::json& conf,
+                        typename cuvs::bench::cuvs_ivf_sq<T>::search_param& param)
+{
+  param.ivf_sq_params.n_probes = conf.at("nprobe");
+}
+#endif
+
 #if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) ||   \
   defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) || \
   defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_DISKANN)
diff --git a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu
index aebac654c2..22aeb31c38 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -84,6 +84,15 @@ auto create_algo(const std::string& algo_name,
     }
   }
 #endif
+#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
+  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, half>) {
+    if (algo_name == "cuvs_ivf_sq") {
+      typename cuvs::bench::cuvs_ivf_sq<T>::build_param param;
+      parse_build_param<T>(conf, param);
+      a = std::make_unique<cuvs::bench::cuvs_ivf_sq<T>>(metric, dim, param);
+    }
+  }
+#endif
 #ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
   if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") {
     typename cuvs::bench::cuvs_ivf_pq<T, int64_t>::build_param param;
@@ -151,6 +160,15 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con
     }
   }
 #endif
+#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
+  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, half>) {
+    if (algo_name == "cuvs_ivf_sq") {
+      auto param = std::make_unique<typename cuvs::bench::cuvs_ivf_sq<T>::search_param>();
+      parse_search_param<T>(conf, *param);
+      return param;
+    }
+  }
+#endif
 #ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
   if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") {
     auto param = std::make_unique<typename cuvs::bench::cuvs_ivf_pq<T, int64_t>::search_param>();
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu
new file mode 100644
index 0000000000..ec41324c8d
--- /dev/null
+++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu
@@ -0,0 +1,10 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "cuvs_ivf_sq_wrapper.h"
+
+namespace cuvs::bench {
+template class cuvs_ivf_sq<float>;
+template class cuvs_ivf_sq<half>;
+}  // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h
new file mode 100644
index 0000000000..1503e6bb84
--- /dev/null
+++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_sq_wrapper.h
@@ -0,0 +1,141 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#pragma once
+
+#include "../common/ann_types.hpp"
+#include "cuvs_ann_bench_utils.h"
+
+#include <cuvs/distance/distance.hpp>
+#include <cuvs/neighbors/ivf_sq.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
+#include <raft/core/logger.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/util/cudart_utils.hpp>
+#include <rmm/cuda_stream_pool.hpp>
+
+#include <cassert>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+namespace cuvs::bench {
+
+template <typename T>
+class cuvs_ivf_sq : public algo<T>, public algo_gpu {
+ public:
+  using search_param_base = typename algo<T>::search_param;
+
+  struct search_param : public search_param_base {
+    cuvs::neighbors::ivf_sq::search_params ivf_sq_params;
+  };
+
+  using build_param = cuvs::neighbors::ivf_sq::index_params;
+
+  cuvs_ivf_sq(Metric metric, int dim, const build_param& param)
+    : algo<T>(metric, dim), index_params_(param), dimension_(dim)
+  {
+    index_params_.metric                         = parse_metric_type(metric);
+    index_params_.conservative_memory_allocation = true;
+    RAFT_CUDA_TRY(cudaGetDevice(&device_));
+  }
+
+  void build(const T* dataset, size_t nrow) final;
+
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
+
+  void search(const T* queries,
+              int batch_size,
+              int k,
+              algo_base::index_type* neighbors,
+              float* distances) const override;
+
+  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
+  {
+    return handle_.get_sync_stream();
+  }
+
+  [[nodiscard]] auto get_preference() const -> algo_property override
+  {
+    algo_property property;
+    property.dataset_memory_type = MemoryType::kHostMmap;
+    property.query_memory_type   = MemoryType::kDevice;
+    return property;
+  }
+
+  void save(const std::string& file) const override;
+  void load(const std::string&) override;
+  std::unique_ptr<algo<T>> copy() override;
+
+ private:
+  configured_raft_resources handle_{};
+  build_param index_params_;
+  cuvs::neighbors::ivf_sq::search_params search_params_;
+  std::shared_ptr<cuvs::neighbors::ivf_sq::index<uint8_t>> index_;
+  int device_;
+  int dimension_;
+
+  std::shared_ptr<cuvs::neighbors::filtering::base_filter> filter_;
+};
+
+template <typename T>
+void cuvs_ivf_sq<T>::build(const T* dataset, size_t nrow)
+{
+  size_t n_streams = 1;
+  raft::resource::set_cuda_stream_pool(handle_, std::make_shared<rmm::cuda_stream_pool>(n_streams));
+  index_ = std::make_shared<cuvs::neighbors::ivf_sq::index<uint8_t>>(
+    std::move(cuvs::neighbors::ivf_sq::build(
+      handle_,
+      index_params_,
+      raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, dimension_))));
+}
+
+template <typename T>
+void cuvs_ivf_sq<T>::set_search_param(const search_param_base& param, const void* filter_bitset)
+{
+  filter_        = make_cuvs_filter(filter_bitset, index_->size());
+  auto sp        = dynamic_cast<const search_param&>(param);
+  search_params_ = sp.ivf_sq_params;
+  assert(search_params_.n_probes <= index_params_.n_lists);
+}
+
+template <typename T>
+void cuvs_ivf_sq<T>::save(const std::string& file) const
+{
+  cuvs::neighbors::ivf_sq::serialize(handle_, file, *index_);
+}
+
+template <typename T>
+void cuvs_ivf_sq<T>::load(const std::string& file)
+{
+  index_ =
+    std::make_shared<cuvs::neighbors::ivf_sq::index<uint8_t>>(handle_, index_params_, this->dim_);
+  cuvs::neighbors::ivf_sq::deserialize(handle_, file, index_.get());
+}
+
+template <typename T>
+std::unique_ptr<algo<T>> cuvs_ivf_sq<T>::copy()
+{
+  return std::make_unique<cuvs_ivf_sq<T>>(*this);
+}
+
+template <typename T>
+void cuvs_ivf_sq<T>::search(
+  const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
+{
+  static_assert(sizeof(algo_base::index_type) == sizeof(int64_t));
+
+  cuvs::neighbors::ivf_sq::search(
+    handle_,
+    search_params_,
+    *index_,
+    raft::make_device_matrix_view<const T, int64_t>(queries, batch_size, index_->dim()),
+    raft::make_device_matrix_view<int64_t, int64_t>(
+      reinterpret_cast<int64_t*>(neighbors), batch_size, k),
+    raft::make_device_matrix_view<float, int64_t>(distances, batch_size, k),
+    *filter_);
+}
+
+}  // namespace cuvs::bench
diff --git a/cpp/include/cuvs/neighbors/ivf_sq.hpp b/cpp/include/cuvs/neighbors/ivf_sq.hpp
new file mode 100644
index 0000000000..2f09751e95
--- /dev/null
+++ b/cpp/include/cuvs/neighbors/ivf_sq.hpp
@@ -0,0 +1,336 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "common.hpp"
+#include <cstdint>
+#include <cuvs/distance/distance.hpp>
+#include <cuvs/neighbors/common.hpp>
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/host_mdarray.hpp>
+#include <raft/core/host_mdspan.hpp>
+#include <raft/core/mdspan.hpp>
+
+namespace cuvs::neighbors::ivf_sq {
+
+/**
+ * @defgroup ivf_sq_cpp_index_params IVF-SQ index build parameters
+ * @{
+ */
+
+constexpr static uint32_t kIndexGroupSize = 32;
+
+struct index_params : cuvs::neighbors::index_params {
+  uint32_t n_lists                    = 1024;
+  uint32_t kmeans_n_iters             = 20;
+  double kmeans_trainset_fraction     = 0.5;
+  bool adaptive_centers               = false;
+  bool conservative_memory_allocation = false;
+  bool add_data_on_build              = true;
+};
+
+struct search_params : cuvs::neighbors::search_params {
+  uint32_t n_probes = 20;
+};
+
+static_assert(std::is_aggregate_v<index_params>);
+static_assert(std::is_aggregate_v<search_params>);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_cpp_list_spec IVF-SQ list storage spec
+ * @{
+ */
+
+template <typename SizeT, typename IdxT, typename ExtT>
+struct list_spec {
+  static_assert(std::is_same_v<IdxT, uint8_t>, "IVF-SQ code type IdxT must be uint8_t");
+
+  using value_type   = IdxT;
+  using list_extents = raft::matrix_extent<SizeT>;
+  using index_type   = ExtT;
+
+  SizeT align_max;
+  SizeT align_min;
+  uint32_t dim;
+
+  constexpr list_spec(uint32_t dim, bool conservative_memory_allocation)
+    : dim(dim),
+      align_min(kIndexGroupSize),
+      align_max(conservative_memory_allocation ? kIndexGroupSize : 1024)
+  {
+  }
+
+  template <typename OtherSizeT>
+  constexpr explicit list_spec(const list_spec<OtherSizeT, IdxT, ExtT>& other_spec)
+    : dim{other_spec.dim}, align_min{other_spec.align_min}, align_max{other_spec.align_max}
+  {
+  }
+
+  static constexpr uint32_t kVecLen = 16;
+
+  constexpr auto make_list_extents(SizeT n_rows) const -> list_extents
+  {
+    uint32_t padded = ((dim + kVecLen - 1) / kVecLen) * kVecLen;
+    return raft::make_extents<SizeT>(n_rows, padded);
+  }
+};
+
+template <typename IdxT, typename ExtT, typename SizeT = uint32_t>
+using list_data = ivf::list<list_spec, SizeT, IdxT, ExtT>;
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_cpp_index IVF-SQ index
+ * @{
+ */
+
+/**
+ * @brief IVF-SQ index.
+ *
+ * @tparam IdxT  SQ code type. Only uint8_t (8-bit, codes in [0,255]) for now.
+ *
+ * No member depends on the raw data type T (float, half). T appears only
+ * in the free-function signatures (build, search, extend) where input data
+ * is consumed, following the IVF-PQ pattern.
+ */
+template <typename IdxT>
+struct index : cuvs::neighbors::index {
+  static_assert(std::is_same_v<IdxT, uint8_t>, "IVF-SQ code type IdxT must be uint8_t for now.");
+
+  using index_params_type  = ivf_sq::index_params;
+  using search_params_type = ivf_sq::search_params;
+  using code_type          = IdxT;
+
+  static constexpr uint32_t sq_bits = sizeof(IdxT) * 8;
+
+ public:
+  index(const index&)            = delete;
+  index(index&&)                 = default;
+  index& operator=(const index&) = delete;
+  index& operator=(index&&)      = default;
+  ~index()                       = default;
+
+  index(raft::resources const& res);
+  index(raft::resources const& res, const index_params& params, uint32_t dim);
+  index(raft::resources const& res,
+        cuvs::distance::DistanceType metric,
+        uint32_t n_lists,
+        uint32_t dim,
+        bool adaptive_centers,
+        bool conservative_memory_allocation);
+
+  cuvs::distance::DistanceType metric() const noexcept;
+  bool adaptive_centers() const noexcept;
+  int64_t size() const noexcept;
+  uint32_t dim() const noexcept;
+  uint32_t n_lists() const noexcept;
+  bool conservative_memory_allocation() const noexcept;
+
+  raft::device_vector_view<uint32_t, uint32_t> list_sizes() noexcept;
+  raft::device_vector_view<const uint32_t, uint32_t> list_sizes() const noexcept;
+
+  raft::device_matrix_view<float, uint32_t, raft::row_major> centers() noexcept;
+  raft::device_matrix_view<const float, uint32_t, raft::row_major> centers() const noexcept;
+
+  std::optional<raft::device_vector_view<float, uint32_t>> center_norms() noexcept;
+  std::optional<raft::device_vector_view<const float, uint32_t>> center_norms() const noexcept;
+  void allocate_center_norms(raft::resources const& res);
+
+  raft::device_vector_view<float, uint32_t> sq_vmin() noexcept;
+  raft::device_vector_view<const float, uint32_t> sq_vmin() const noexcept;
+
+  raft::device_vector_view<float, uint32_t> sq_delta() noexcept;
+  raft::device_vector_view<const float, uint32_t> sq_delta() const noexcept;
+
+  raft::host_vector_view<int64_t, uint32_t> accum_sorted_sizes() noexcept;
+  [[nodiscard]] raft::host_vector_view<const int64_t, uint32_t> accum_sorted_sizes() const noexcept;
+
+  raft::device_vector_view<IdxT*, uint32_t> data_ptrs() noexcept;
+  raft::device_vector_view<IdxT* const, uint32_t> data_ptrs() const noexcept;
+
+  raft::device_vector_view<int64_t*, uint32_t> inds_ptrs() noexcept;
+  raft::device_vector_view<int64_t* const, uint32_t> inds_ptrs() const noexcept;
+
+  std::vector<std::shared_ptr<list_data<IdxT, int64_t>>>& lists() noexcept;
+  const std::vector<std::shared_ptr<list_data<IdxT, int64_t>>>& lists() const noexcept;
+
+  void check_consistency();
+
+ private:
+  cuvs::distance::DistanceType metric_;
+  bool adaptive_centers_;
+  bool conservative_memory_allocation_;
+
+  std::vector<std::shared_ptr<list_data<IdxT, int64_t>>> lists_;
+  raft::device_vector<uint32_t, uint32_t> list_sizes_;
+  raft::device_matrix<float, uint32_t, raft::row_major> centers_;
+  std::optional<raft::device_vector<float, uint32_t>> center_norms_;
+  raft::device_vector<float, uint32_t> sq_vmin_;
+  raft::device_vector<float, uint32_t> sq_delta_;
+
+  raft::device_vector<IdxT*, uint32_t> data_ptrs_;
+  raft::device_vector<int64_t*, uint32_t> inds_ptrs_;
+  raft::host_vector<int64_t, uint32_t> accum_sorted_sizes_;
+};
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_cpp_index_build IVF-SQ index build
+ * @{
+ */
+
+auto build(raft::resources const& handle,
+           const cuvs::neighbors::ivf_sq::index_params& index_params,
+           raft::device_matrix_view<const float, int64_t, raft::row_major> dataset)
+  -> cuvs::neighbors::ivf_sq::index<uint8_t>;
+
+void build(raft::resources const& handle,
+           const cuvs::neighbors::ivf_sq::index_params& index_params,
+           raft::device_matrix_view<const float, int64_t, raft::row_major> dataset,
+           cuvs::neighbors::ivf_sq::index<uint8_t>& idx);
+
+auto build(raft::resources const& handle,
+           const cuvs::neighbors::ivf_sq::index_params& index_params,
+           raft::device_matrix_view<const half, int64_t, raft::row_major> dataset)
+  -> cuvs::neighbors::ivf_sq::index<uint8_t>;
+
+void build(raft::resources const& handle,
+           const cuvs::neighbors::ivf_sq::index_params& index_params,
+           raft::device_matrix_view<const half, int64_t, raft::row_major> dataset,
+           cuvs::neighbors::ivf_sq::index<uint8_t>& idx);
+
+auto build(raft::resources const& handle,
+           const cuvs::neighbors::ivf_sq::index_params& index_params,
+           raft::host_matrix_view<const float, int64_t, raft::row_major> dataset)
+  -> cuvs::neighbors::ivf_sq::index<uint8_t>;
+
+void build(raft::resources const& handle,
+           const cuvs::neighbors::ivf_sq::index_params& index_params,
+           raft::host_matrix_view<const float, int64_t, raft::row_major> dataset,
+           cuvs::neighbors::ivf_sq::index<uint8_t>& idx);
+
+auto build(raft::resources const& handle,
+           const cuvs::neighbors::ivf_sq::index_params& index_params,
+           raft::host_matrix_view<const half, int64_t, raft::row_major> dataset)
+  -> cuvs::neighbors::ivf_sq::index<uint8_t>;
+
+void build(raft::resources const& handle,
+           const cuvs::neighbors::ivf_sq::index_params& index_params,
+           raft::host_matrix_view<const half, int64_t, raft::row_major> dataset,
+           cuvs::neighbors::ivf_sq::index<uint8_t>& idx);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_cpp_index_extend IVF-SQ index extend
+ * @{
+ */
+
+auto extend(raft::resources const& handle,
+            raft::device_matrix_view<const float, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,
+            const cuvs::neighbors::ivf_sq::index<uint8_t>& orig_index)
+  -> cuvs::neighbors::ivf_sq::index<uint8_t>;
+
+void extend(raft::resources const& handle,
+            raft::device_matrix_view<const float, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,
+            cuvs::neighbors::ivf_sq::index<uint8_t>* idx);
+
+auto extend(raft::resources const& handle,
+            raft::device_matrix_view<const half, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,
+            const cuvs::neighbors::ivf_sq::index<uint8_t>& orig_index)
+  -> cuvs::neighbors::ivf_sq::index<uint8_t>;
+
+void extend(raft::resources const& handle,
+            raft::device_matrix_view<const half, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,
+            cuvs::neighbors::ivf_sq::index<uint8_t>* idx);
+
+auto extend(raft::resources const& handle,
+            raft::host_matrix_view<const float, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
+            const cuvs::neighbors::ivf_sq::index<uint8_t>& orig_index)
+  -> cuvs::neighbors::ivf_sq::index<uint8_t>;
+
+void extend(raft::resources const& handle,
+            raft::host_matrix_view<const float, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
+            cuvs::neighbors::ivf_sq::index<uint8_t>* idx);
+
+auto extend(raft::resources const& handle,
+            raft::host_matrix_view<const half, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
+            const cuvs::neighbors::ivf_sq::index<uint8_t>& orig_index)
+  -> cuvs::neighbors::ivf_sq::index<uint8_t>;
+
+void extend(raft::resources const& handle,
+            raft::host_matrix_view<const half, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
+            cuvs::neighbors::ivf_sq::index<uint8_t>* idx);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_cpp_index_search IVF-SQ index search
+ * @{
+ */
+
+void search(raft::resources const& handle,
+            const cuvs::neighbors::ivf_sq::search_params& params,
+            const cuvs::neighbors::ivf_sq::index<uint8_t>& index,
+            raft::device_matrix_view<const float, int64_t, raft::row_major> queries,
+            raft::device_matrix_view<int64_t, int64_t, raft::row_major> neighbors,
+            raft::device_matrix_view<float, int64_t, raft::row_major> distances,
+            const cuvs::neighbors::filtering::base_filter& sample_filter =
+              cuvs::neighbors::filtering::none_sample_filter{});
+
+void search(raft::resources const& handle,
+            const cuvs::neighbors::ivf_sq::search_params& params,
+            const cuvs::neighbors::ivf_sq::index<uint8_t>& index,
+            raft::device_matrix_view<const half, int64_t, raft::row_major> queries,
+            raft::device_matrix_view<int64_t, int64_t, raft::row_major> neighbors,
+            raft::device_matrix_view<float, int64_t, raft::row_major> distances,
+            const cuvs::neighbors::filtering::base_filter& sample_filter =
+              cuvs::neighbors::filtering::none_sample_filter{});
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ivf_sq_cpp_index_serialize IVF-SQ index serialize
+ * @{
+ */
+
+void serialize(raft::resources const& handle,
+               const std::string& filename,
+               const cuvs::neighbors::ivf_sq::index<uint8_t>& index);
+
+void deserialize(raft::resources const& handle,
+                 const std::string& filename,
+                 cuvs::neighbors::ivf_sq::index<uint8_t>* index);
+
+/**
+ * @}
+ */
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh
new file mode 100644
index 0000000000..6c46a20e65
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build.cuh
@@ -0,0 +1,664 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "../../core/nvtx.hpp"
+#include "../ivf_common.cuh"
+#include "../ivf_list.cuh"
+
+#include <cuvs/cluster/kmeans.hpp>
+#include <cuvs/neighbors/common.hpp>
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include "../../cluster/kmeans_balanced.cuh"
+#include "../detail/ann_utils.cuh"
+#include <cuvs/distance/distance.hpp>
+#include <raft/core/logger.hpp>
+#include <raft/core/mdarray.hpp>
+#include <raft/core/operators.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/cuda_stream_pool.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/linalg/add.cuh>
+#include <raft/linalg/map.cuh>
+#include <raft/linalg/norm.cuh>
+#include <raft/stats/histogram.cuh>
+#include <raft/util/pow2_utils.cuh>
+
+#include <cub/block/block_reduce.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <cstdint>
+#include <limits>
+
+namespace cuvs::neighbors::ivf_sq {
+using namespace cuvs::spatial::knn::detail;  // NOLINT
+
+namespace detail {
+
+struct ColMinMaxPair {
+  float min_val;
+  float max_val;
+};
+
+struct ColMinMaxOp {
+  __device__ __forceinline__ ColMinMaxPair operator()(const ColMinMaxPair& a,
+                                                      const ColMinMaxPair& b) const
+  {
+    return {fminf(a.min_val, b.min_val), fmaxf(a.max_val, b.max_val)};
+  }
+};
+
+/**
+ * Fused per-column min+max in a single pass (2x less DRAM traffic than two
+ * separate reductions).  One thread block per column; threads stride over
+ * rows and feed CUB BlockReduce with a combined min/max pair.
+ *
+ * Row-loop is manually 4x-unrolled so the compiler can overlap four
+ * independent __ldg requests in the memory pipeline.
+ */
+template <int BlockSize>
+__launch_bounds__(BlockSize) RAFT_KERNEL fused_column_minmax_kernel(const float* __restrict__ data,
+                                                                    float* __restrict__ col_min,
+                                                                    float* __restrict__ col_max,
+                                                                    int64_t n_rows,
+                                                                    uint32_t dim)
+{
+  using BlockReduce = cub::BlockReduce<ColMinMaxPair, BlockSize>;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+
+  const uint32_t col = blockIdx.x;
+  if (col >= dim) return;
+
+  ColMinMaxPair agg = {std::numeric_limits<float>::max(), std::numeric_limits<float>::lowest()};
+
+  const int64_t stride = static_cast<int64_t>(BlockSize);
+  int64_t row          = static_cast<int64_t>(threadIdx.x);
+
+  for (; row + 3 * stride < n_rows; row += 4 * stride) {
+    float v0    = __ldg(&data[row * dim + col]);
+    float v1    = __ldg(&data[(row + stride) * dim + col]);
+    float v2    = __ldg(&data[(row + 2 * stride) * dim + col]);
+    float v3    = __ldg(&data[(row + 3 * stride) * dim + col]);
+    agg.min_val = fminf(agg.min_val, fminf(fminf(v0, v1), fminf(v2, v3)));
+    agg.max_val = fmaxf(agg.max_val, fmaxf(fmaxf(v0, v1), fmaxf(v2, v3)));
+  }
+  for (; row < n_rows; row += stride) {
+    float val   = __ldg(&data[row * dim + col]);
+    agg.min_val = fminf(agg.min_val, val);
+    agg.max_val = fmaxf(agg.max_val, val);
+  }
+
+  agg = BlockReduce(temp_storage).Reduce(agg, ColMinMaxOp());
+
+  if (threadIdx.x == 0) {
+    col_min[col] = agg.min_val;
+    col_max[col] = agg.max_val;
+  }
+}
+
+template <typename IdxT>
+auto clone(const raft::resources& res, const index<IdxT>& source) -> index<IdxT>
+{
+  auto stream = raft::resource::get_cuda_stream(res);
+
+  index<IdxT> target(res,
+                     source.metric(),
+                     source.n_lists(),
+                     source.dim(),
+                     source.adaptive_centers(),
+                     source.conservative_memory_allocation());
+
+  raft::copy(target.list_sizes().data_handle(),
+             source.list_sizes().data_handle(),
+             source.list_sizes().size(),
+             stream);
+  raft::copy(target.centers().data_handle(),
+             source.centers().data_handle(),
+             source.centers().size(),
+             stream);
+  if (source.center_norms().has_value()) {
+    target.allocate_center_norms(res);
+    raft::copy(target.center_norms()->data_handle(),
+               source.center_norms()->data_handle(),
+               source.center_norms()->size(),
+               stream);
+  }
+  raft::copy(target.sq_vmin().data_handle(),
+             source.sq_vmin().data_handle(),
+             source.sq_vmin().size(),
+             stream);
+  raft::copy(target.sq_delta().data_handle(),
+             source.sq_delta().data_handle(),
+             source.sq_delta().size(),
+             stream);
+  target.lists() = source.lists();
+  ivf::detail::recompute_internal_state(res, target);
+  return target;
+}
+
+/**
+ * Kernel to encode float residuals to uint8_t SQ codes and write them interleaved.
+ *
+ * Uses warp-per-vector parallelism: each warp cooperatively encodes one vector
+ * so that reads from residuals/vmin/delta are coalesced across the 32 lanes.
+ * Lane 0 handles the atomic position assignment and the index write.
+ */
+template <int BlockSize>
+__launch_bounds__(BlockSize) RAFT_KERNEL encode_and_fill_kernel(const uint32_t* labels,
+                                                                const float* residuals,
+                                                                const int64_t* source_ixs,
+                                                                uint8_t** list_data_ptrs,
+                                                                int64_t** list_index_ptrs,
+                                                                uint32_t* list_sizes_ptr,
+                                                                const float* vmin,
+                                                                const float* delta,
+                                                                int64_t n_rows,
+                                                                uint32_t dim,
+                                                                int64_t batch_offset)
+{
+  constexpr uint32_t kWarpSize      = kIndexGroupSize;
+  constexpr uint32_t kWarpsPerBlock = BlockSize / kWarpSize;
+
+  const uint32_t lane_id = threadIdx.x % kWarpSize;
+  const int64_t row_id =
+    int64_t(threadIdx.x / kWarpSize) + int64_t(blockIdx.x) * int64_t(kWarpsPerBlock);
+  if (row_id >= n_rows) return;
+
+  uint32_t list_id   = 0;
+  uint32_t inlist_id = 0;
+  if (lane_id == 0) {
+    auto source_ix = source_ixs == nullptr ? row_id + batch_offset : source_ixs[row_id];
+    list_id        = labels[row_id];
+    inlist_id      = atomicAdd(list_sizes_ptr + list_id, 1);
+    list_index_ptrs[list_id][inlist_id] = source_ix;
+  }
+  list_id   = __shfl_sync(0xFFFFFFFF, list_id, 0);
+  inlist_id = __shfl_sync(0xFFFFFFFF, inlist_id, 0);
+
+  using interleaved_group = raft::Pow2<kIndexGroupSize>;
+  auto group_offset       = interleaved_group::roundDown(inlist_id);
+  auto ingroup_id         = interleaved_group::mod(inlist_id);
+
+  constexpr uint32_t veclen = list_spec<uint32_t, uint8_t, int64_t>::kVecLen;
+  uint32_t padded_dim       = ((dim + veclen - 1) / veclen) * veclen;
+  auto* list_dat   = list_data_ptrs[list_id] + static_cast<size_t>(group_offset) * padded_dim;
+  const float* src = residuals + row_id * dim;
+
+  for (uint32_t d = lane_id; d < padded_dim; d += kWarpSize) {
+    uint8_t out;
+    if (d < dim) {
+      float val  = src[d];
+      float dv   = delta[d];
+      float v    = vmin[d];
+      float code = (dv > 0.0f) ? roundf((val - v) / dv) : 0.0f;
+      out        = static_cast<uint8_t>(fminf(fmaxf(code, 0.0f), 255.0f));
+    } else {
+      out = 0;
+    }
+    uint32_t l                                              = (d / veclen) * veclen;
+    uint32_t j                                              = d % veclen;
+    list_dat[l * kIndexGroupSize + ingroup_id * veclen + j] = out;
+  }
+}
+
+/**
+ * Compute residuals: residual[i] = cast<float>(x_i) - centers[labels[i]]
+ */
+template <typename T>
+RAFT_KERNEL compute_residuals_kernel(const T* dataset,
+                                     const float* centers,
+                                     const uint32_t* labels,
+                                     float* residuals,
+                                     int64_t n_rows,
+                                     uint32_t dim)
+{
+  int64_t i  = int64_t(blockIdx.x) * blockDim.x + threadIdx.x;
+  uint32_t j = blockIdx.y * blockDim.y + threadIdx.y;
+  if (i >= n_rows || j >= dim) return;
+
+  float val              = utils::mapping<float>{}(dataset[i * dim + j]);
+  uint32_t c             = labels[i];
+  residuals[i * dim + j] = val - centers[c * dim + j];
+}
+
+template <typename T, typename IdxT>
+void extend(raft::resources const& handle,
+            index<IdxT>* index,
+            const T* new_vectors,
+            const int64_t* new_indices,
+            int64_t n_rows)
+{
+  using LabelT = uint32_t;
+  RAFT_EXPECTS(index != nullptr, "index cannot be empty.");
+  if (n_rows == 0) return;
+
+  auto stream  = raft::resource::get_cuda_stream(handle);
+  auto n_lists = index->n_lists();
+  auto dim     = index->dim();
+  list_spec<uint32_t, IdxT, int64_t> list_device_spec{index->dim(),
+                                                      index->conservative_memory_allocation()};
+  cuvs::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
+    "ivf_sq::extend(%zu, %u)", size_t(n_rows), dim);
+
+  RAFT_EXPECTS(new_indices != nullptr || index->size() == 0,
+               "You must pass data indices when the index is non-empty.");
+
+  auto new_labels =
+    raft::make_device_mdarray<LabelT>(handle,
+                                      raft::resource::get_large_workspace_resource(handle),
+                                      raft::make_extents<int64_t>(n_rows));
+  cuvs::cluster::kmeans::balanced_params kmeans_params;
+  kmeans_params.metric     = index->metric();
+  auto orig_centroids_view = raft::make_device_matrix_view<const float, int64_t>(
+    index->centers().data_handle(), n_lists, dim);
+
+  constexpr size_t kReasonableMaxBatchSize = 65536;
+  size_t max_batch_size                    = std::min<size_t>(n_rows, kReasonableMaxBatchSize);
+
+  auto copy_stream     = raft::resource::get_cuda_stream(handle);
+  bool enable_prefetch = false;
+  if (handle.has_resource_factory(raft::resource::resource_type::CUDA_STREAM_POOL)) {
+    if (raft::resource::get_stream_pool_size(handle) >= 1) {
+      enable_prefetch = true;
+      copy_stream     = raft::resource::get_stream_from_stream_pool(handle);
+    }
+  }
+
+  utils::batch_load_iterator<T> vec_batches(new_vectors,
+                                            n_rows,
+                                            index->dim(),
+                                            max_batch_size,
+                                            copy_stream,
+                                            raft::resource::get_workspace_resource(handle),
+                                            enable_prefetch);
+  vec_batches.prefetch_next_batch();
+
+  for (const auto& batch : vec_batches) {
+    auto batch_data_view =
+      raft::make_device_matrix_view<const T, int64_t>(batch.data(), batch.size(), index->dim());
+    auto batch_labels_view = raft::make_device_vector_view<LabelT, int64_t>(
+      new_labels.data_handle() + batch.offset(), batch.size());
+    cuvs::cluster::kmeans::predict(
+      handle, kmeans_params, batch_data_view, orig_centroids_view, batch_labels_view);
+    vec_batches.prefetch_next_batch();
+    raft::resource::sync_stream(handle);
+  }
+
+  auto* list_sizes_ptr    = index->list_sizes().data_handle();
+  auto old_list_sizes_dev = raft::make_device_mdarray<uint32_t>(
+    handle, raft::resource::get_workspace_resource(handle), raft::make_extents<int64_t>(n_lists));
+  raft::copy(old_list_sizes_dev.data_handle(), list_sizes_ptr, n_lists, stream);
+
+  if (index->adaptive_centers()) {
+    auto centroids_view = raft::make_device_matrix_view<float, int64_t>(
+      index->centers().data_handle(), index->centers().extent(0), index->centers().extent(1));
+    auto list_sizes_view =
+      raft::make_device_vector_view<std::remove_pointer_t<decltype(list_sizes_ptr)>, int64_t>(
+        list_sizes_ptr, n_lists);
+    for (const auto& batch : vec_batches) {
+      auto batch_data_view =
+        raft::make_device_matrix_view<const T, int64_t>(batch.data(), batch.size(), index->dim());
+      auto batch_labels_view = raft::make_device_vector_view<const LabelT, int64_t>(
+        new_labels.data_handle() + batch.offset(), batch.size());
+      cuvs::cluster::kmeans_balanced::helpers::calc_centers_and_sizes(handle,
+                                                                      batch_data_view,
+                                                                      batch_labels_view,
+                                                                      centroids_view,
+                                                                      list_sizes_view,
+                                                                      false,
+                                                                      utils::mapping<float>{});
+    }
+  } else {
+    raft::stats::histogram<uint32_t, int64_t>(raft::stats::HistTypeAuto,
+                                              reinterpret_cast<int32_t*>(list_sizes_ptr),
+                                              int64_t(n_lists),
+                                              new_labels.data_handle(),
+                                              n_rows,
+                                              1,
+                                              stream);
+    raft::linalg::add(
+      list_sizes_ptr, list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream);
+  }
+
+  std::vector<uint32_t> new_list_sizes(n_lists);
+  std::vector<uint32_t> old_list_sizes(n_lists);
+  {
+    raft::copy(old_list_sizes.data(), old_list_sizes_dev.data_handle(), n_lists, stream);
+    raft::copy(new_list_sizes.data(), list_sizes_ptr, n_lists, stream);
+    raft::resource::sync_stream(handle);
+    auto& lists = index->lists();
+    for (uint32_t label = 0; label < n_lists; label++) {
+      ivf::resize_list(handle,
+                       lists[label],
+                       list_device_spec,
+                       new_list_sizes[label],
+                       raft::Pow2<kIndexGroupSize>::roundUp(old_list_sizes[label]));
+    }
+  }
+  ivf::detail::recompute_internal_state(handle, *index);
+  raft::copy(list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream);
+
+  utils::batch_load_iterator<int64_t> vec_indices(
+    new_indices, n_rows, 1, max_batch_size, stream, raft::resource::get_workspace_resource(handle));
+  vec_batches.reset();
+  vec_batches.prefetch_next_batch();
+  utils::batch_load_iterator<int64_t> idx_batch = vec_indices.begin();
+
+  auto residuals_buf = raft::make_device_vector<float>(handle, max_batch_size * dim);
+
+  size_t next_report_offset = 0;
+  size_t d_report_offset    = n_rows * 5 / 100;
+
+  for (const auto& batch : vec_batches) {
+    int64_t bs = batch.size();
+
+    {
+      dim3 threads(32, 8);
+      dim3 blocks(raft::ceildiv<int64_t>(bs, threads.x), raft::ceildiv<uint32_t>(dim, threads.y));
+      compute_residuals_kernel<T>
+        <<<blocks, threads, 0, stream>>>(batch.data(),
+                                         index->centers().data_handle(),
+                                         new_labels.data_handle() + batch.offset(),
+                                         residuals_buf.data_handle(),
+                                         bs,
+                                         dim);
+      RAFT_CUDA_TRY(cudaPeekAtLastError());
+    }
+
+    {
+      constexpr int kEncodeBlockSize   = 256;
+      constexpr int kEncodeWarpsPerBlk = kEncodeBlockSize / kIndexGroupSize;
+      const dim3 block_dim(kEncodeBlockSize);
+      const dim3 grid_dim(raft::ceildiv<int64_t>(bs, int64_t(kEncodeWarpsPerBlk)));
+      encode_and_fill_kernel<kEncodeBlockSize>
+        <<<grid_dim, block_dim, 0, stream>>>(new_labels.data_handle() + batch.offset(),
+                                             residuals_buf.data_handle(),
+                                             idx_batch->data(),
+                                             index->data_ptrs().data_handle(),
+                                             index->inds_ptrs().data_handle(),
+                                             list_sizes_ptr,
+                                             index->sq_vmin().data_handle(),
+                                             index->sq_delta().data_handle(),
+                                             bs,
+                                             dim,
+                                             batch.offset());
+      RAFT_CUDA_TRY(cudaPeekAtLastError());
+    }
+
+    vec_batches.prefetch_next_batch();
+    raft::resource::sync_stream(handle);
+    RAFT_CUDA_TRY(cudaPeekAtLastError());
+
+    if (batch.offset() > next_report_offset) {
+      float progress = batch.offset() * 100.0f / n_rows;
+      RAFT_LOG_DEBUG("ivf_sq::extend added vectors %zu, %6.1f%% complete",
+                     static_cast<size_t>(batch.offset()),
+                     progress);
+      next_report_offset += d_report_offset;
+    }
+    ++idx_batch;
+  }
+
+  auto compute_center_norms = [&]() {
+    if (index->metric() == cuvs::distance::DistanceType::CosineExpanded) {
+      raft::linalg::rowNorm<raft::linalg::L2Norm, true>(index->center_norms()->data_handle(),
+                                                        index->centers().data_handle(),
+                                                        dim,
+                                                        n_lists,
+                                                        stream,
+                                                        raft::sqrt_op{});
+    } else {
+      raft::linalg::rowNorm<raft::linalg::L2Norm, true>(
+        index->center_norms()->data_handle(), index->centers().data_handle(), dim, n_lists, stream);
+    }
+  };
+
+  if (!index->center_norms().has_value()) {
+    index->allocate_center_norms(handle);
+    if (index->center_norms().has_value()) { compute_center_norms(); }
+  } else if (index->adaptive_centers()) {
+    compute_center_norms();
+  }
+}
+
+template <typename T, typename IdxT>
+auto extend(raft::resources const& handle,
+            const index<IdxT>& orig_index,
+            const T* new_vectors,
+            const int64_t* new_indices,
+            int64_t n_rows) -> index<IdxT>
+{
+  auto ext_index = clone(handle, orig_index);
+  detail::extend(handle, &ext_index, new_vectors, new_indices, n_rows);
+  return ext_index;
+}
+
+template <typename T, typename IdxT>
+inline auto build(raft::resources const& handle,
+                  const index_params& params,
+                  const T* dataset,
+                  int64_t n_rows,
+                  uint32_t dim) -> index<IdxT>
+{
+  auto stream = raft::resource::get_cuda_stream(handle);
+  cuvs::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
+    "ivf_sq::build(%zu, %u)", size_t(n_rows), dim);
+  static_assert(std::is_same_v<T, float> || std::is_same_v<T, half>, "unsupported data type");
+  RAFT_EXPECTS(n_rows > 0 && dim > 0, "empty dataset");
+  RAFT_EXPECTS(n_rows >= params.n_lists, "number of rows can't be less than n_lists");
+  RAFT_EXPECTS(params.metric != cuvs::distance::DistanceType::CosineExpanded || dim > 1,
+               "Cosine metric requires more than one dim");
+
+  index<IdxT> idx(handle, params, dim);
+  utils::memzero(idx.accum_sorted_sizes().data_handle(), idx.accum_sorted_sizes().size(), stream);
+  utils::memzero(idx.list_sizes().data_handle(), idx.list_sizes().size(), stream);
+  utils::memzero(idx.data_ptrs().data_handle(), idx.data_ptrs().size(), stream);
+  utils::memzero(idx.inds_ptrs().data_handle(), idx.inds_ptrs().size(), stream);
+
+  // Train k-means centroids and SQ parameters on the same training subset.
+  // This mirrors IVF-PQ, which also trains its codebook on a subset of the data.
+  {
+    auto trainset_ratio = std::max<size_t>(
+      1, n_rows / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, idx.n_lists()));
+    auto n_rows_train = n_rows / trainset_ratio;
+    rmm::device_uvector<T> trainset(
+      n_rows_train * idx.dim(), stream, raft::resource::get_large_workspace_resource(handle));
+    RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(),
+                                    sizeof(T) * idx.dim(),
+                                    dataset,
+                                    sizeof(T) * idx.dim() * trainset_ratio,
+                                    sizeof(T) * idx.dim(),
+                                    n_rows_train,
+                                    cudaMemcpyDefault,
+                                    stream));
+    auto trainset_const_view =
+      raft::make_device_matrix_view<const T, int64_t>(trainset.data(), n_rows_train, idx.dim());
+    auto centers_view = raft::make_device_matrix_view<float, int64_t>(
+      idx.centers().data_handle(), idx.n_lists(), idx.dim());
+    cuvs::cluster::kmeans::balanced_params kmeans_params;
+    kmeans_params.n_iters = params.kmeans_n_iters;
+    kmeans_params.metric  = idx.metric();
+    cuvs::cluster::kmeans::fit(handle, kmeans_params, trainset_const_view, centers_view);
+    raft::resource::sync_stream(handle);
+
+    // Train SQ: predict labels for the training subset, compute its residuals,
+    // and derive per-dimension vmin/delta from them.
+    auto train_labels = raft::make_device_vector<uint32_t, int64_t>(handle, n_rows_train);
+    {
+      cuvs::cluster::kmeans::balanced_params pred_params;
+      pred_params.metric      = idx.metric();
+      auto centers_const_view = raft::make_device_matrix_view<const float, int64_t>(
+        idx.centers().data_handle(), idx.n_lists(), dim);
+      cuvs::cluster::kmeans::predict(
+        handle, pred_params, trainset_const_view, centers_const_view, train_labels.view());
+      raft::resource::sync_stream(handle);
+    }
+
+    rmm::device_uvector<float> residuals(
+      n_rows_train * dim, stream, raft::resource::get_large_workspace_resource(handle));
+    {
+      dim3 threads(32, 8);
+      dim3 blocks(raft::ceildiv<int64_t>(n_rows_train, threads.x),
+                  raft::ceildiv<uint32_t>(dim, threads.y));
+      compute_residuals_kernel<T><<<blocks, threads, 0, stream>>>(trainset.data(),
+                                                                  idx.centers().data_handle(),
+                                                                  train_labels.data_handle(),
+                                                                  residuals.data(),
+                                                                  n_rows_train,
+                                                                  dim);
+      RAFT_CUDA_TRY(cudaPeekAtLastError());
+    }
+
+    {
+      auto vmax_buf  = raft::make_device_vector<float, uint32_t>(handle, dim);
+      auto* vmin_ptr = idx.sq_vmin().data_handle();
+      auto* vmax_ptr = vmax_buf.data_handle();
+
+      constexpr int kMinMaxBlockSize = 256;
+      fused_column_minmax_kernel<kMinMaxBlockSize><<<dim, kMinMaxBlockSize, 0, stream>>>(
+        residuals.data(), vmin_ptr, vmax_ptr, n_rows_train, dim);
+      RAFT_CUDA_TRY(cudaPeekAtLastError());
+
+      // Expand the observed range by a small margin to reduce clipping on unseen data,
+      // since the SQ parameters are trained on a subset rather than the full dataset.
+      constexpr float kMargin = 0.05f;
+      auto* delta_ptr         = idx.sq_delta().data_handle();
+      raft::linalg::map_offset(
+        handle, idx.sq_vmin(), [vmin_ptr, vmax_ptr, delta_ptr, kMargin] __device__(uint32_t j) {
+          float range  = vmax_ptr[j] - vmin_ptr[j];
+          float margin = range * kMargin;
+          delta_ptr[j] = (range > 0.0f) ? (range + 2.0f * margin) / 255.0f : 1.0f;
+          return vmin_ptr[j] - margin;
+        });
+    }
+  }
+
+  if (params.add_data_on_build) { detail::extend<T, IdxT>(handle, &idx, dataset, nullptr, n_rows); }
+
+  return idx;
+}
+
+template <typename T, typename IdxT>
+auto build(raft::resources const& handle,
+           const index_params& params,
+           raft::device_matrix_view<const T, int64_t, raft::row_major> dataset) -> index<IdxT>
+{
+  int64_t n_rows = dataset.extent(0);
+  uint32_t dim   = dataset.extent(1);
+  return build<T, IdxT>(handle, params, dataset.data_handle(), n_rows, dim);
+}
+
+template <typename T, typename IdxT>
+auto build(raft::resources const& handle,
+           const index_params& params,
+           raft::host_matrix_view<const T, int64_t, raft::row_major> dataset) -> index<IdxT>
+{
+  int64_t n_rows = dataset.extent(0);
+  uint32_t dim   = dataset.extent(1);
+  return build<T, IdxT>(handle, params, dataset.data_handle(), n_rows, dim);
+}
+
+template <typename T, typename IdxT>
+void build(raft::resources const& handle,
+           const index_params& params,
+           raft::device_matrix_view<const T, int64_t, raft::row_major> dataset,
+           index<IdxT>& idx)
+{
+  idx = build<T, IdxT>(handle, params, dataset);
+}
+
+template <typename T, typename IdxT>
+void build(raft::resources const& handle,
+           const index_params& params,
+           raft::host_matrix_view<const T, int64_t, raft::row_major> dataset,
+           index<IdxT>& idx)
+{
+  idx = build<T, IdxT>(handle, params, dataset);
+}
+
+template <typename T, typename IdxT>
+auto extend(raft::resources const& handle,
+            raft::device_matrix_view<const T, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,
+            const index<IdxT>& orig_index) -> index<IdxT>
+{
+  RAFT_EXPECTS(new_vectors.extent(1) == orig_index.dim(),
+               "new_vectors should have the same dimension as the index");
+  if (new_indices.has_value()) {
+    RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0),
+                 "new_vectors and new_indices have different number of rows");
+  }
+  int64_t n_rows = new_vectors.extent(0);
+  return extend<T, IdxT>(handle,
+                         orig_index,
+                         new_vectors.data_handle(),
+                         new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
+                         n_rows);
+}
+
+template <typename T, typename IdxT>
+auto extend(raft::resources const& handle,
+            raft::host_matrix_view<const T, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
+            const index<IdxT>& orig_index) -> index<IdxT>
+{
+  RAFT_EXPECTS(new_vectors.extent(1) == orig_index.dim(),
+               "new_vectors should have the same dimension as the index");
+  if (new_indices.has_value()) {
+    RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0),
+                 "new_vectors and new_indices have different number of rows");
+  }
+  int64_t n_rows = new_vectors.extent(0);
+  return extend<T, IdxT>(handle,
+                         orig_index,
+                         new_vectors.data_handle(),
+                         new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
+                         n_rows);
+}
+
+template <typename T, typename IdxT>
+void extend(raft::resources const& handle,
+            raft::device_matrix_view<const T, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,
+            index<IdxT>* idx)
+{
+  RAFT_EXPECTS(new_vectors.extent(1) == idx->dim(),
+               "new_vectors should have the same dimension as the index");
+  if (new_indices.has_value()) {
+    RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0),
+                 "new_vectors and new_indices have different number of rows");
+  }
+  detail::extend(handle,
+                 idx,
+                 new_vectors.data_handle(),
+                 new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
+                 new_vectors.extent(0));
+}
+
+template <typename T, typename IdxT>
+void extend(raft::resources const& handle,
+            raft::host_matrix_view<const T, int64_t, raft::row_major> new_vectors,
+            std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,
+            index<IdxT>* idx)
+{
+  RAFT_EXPECTS(new_vectors.extent(1) == idx->dim(),
+               "new_vectors should have the same dimension as the index");
+  if (new_indices.has_value()) {
+    RAFT_EXPECTS(new_indices.value().extent(0) == new_vectors.extent(0),
+                 "new_vectors and new_indices have different number of rows");
+  }
+  detail::extend(handle,
+                 idx,
+                 new_vectors.data_handle(),
+                 new_indices.has_value() ? new_indices.value().data_handle() : nullptr,
+                 new_vectors.extent(0));
+}
+
+}  // namespace detail
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..a97aebb11c
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu
@@ -0,0 +1,89 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include "ivf_sq_build.cuh"
+
+namespace cuvs::neighbors::ivf_sq {
+
+#define CUVS_INST_IVF_SQ_BUILD_EXTEND(T, IdxT)                                               \
+  auto build(raft::resources const& handle,                                                  \
+             const cuvs::neighbors::ivf_sq::index_params& params,                            \
+             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset)            \
+    -> cuvs::neighbors::ivf_sq::index<IdxT>                                                  \
+  {                                                                                          \
+    return cuvs::neighbors::ivf_sq::index<IdxT>(                                             \
+      std::move(cuvs::neighbors::ivf_sq::detail::build<T, IdxT>(handle, params, dataset)));  \
+  }                                                                                          \
+                                                                                             \
+  void build(raft::resources const& handle,                                                  \
+             const cuvs::neighbors::ivf_sq::index_params& params,                            \
+             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset,            \
+             cuvs::neighbors::ivf_sq::index<IdxT>& idx)                                      \
+  {                                                                                          \
+    cuvs::neighbors::ivf_sq::detail::build<T, IdxT>(handle, params, dataset, idx);           \
+  }                                                                                          \
+                                                                                             \
+  auto build(raft::resources const& handle,                                                  \
+             const cuvs::neighbors::ivf_sq::index_params& params,                            \
+             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)              \
+    -> cuvs::neighbors::ivf_sq::index<IdxT>                                                  \
+  {                                                                                          \
+    return cuvs::neighbors::ivf_sq::index<IdxT>(                                             \
+      std::move(cuvs::neighbors::ivf_sq::detail::build<T, IdxT>(handle, params, dataset)));  \
+  }                                                                                          \
+                                                                                             \
+  void build(raft::resources const& handle,                                                  \
+             const cuvs::neighbors::ivf_sq::index_params& params,                            \
+             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset,              \
+             cuvs::neighbors::ivf_sq::index<IdxT>& idx)                                      \
+  {                                                                                          \
+    cuvs::neighbors::ivf_sq::detail::build<T, IdxT>(handle, params, dataset, idx);           \
+  }                                                                                          \
+                                                                                             \
+  auto extend(raft::resources const& handle,                                                 \
+              raft::device_matrix_view<const T, int64_t, raft::row_major> new_vectors,       \
+              std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,   \
+              const cuvs::neighbors::ivf_sq::index<IdxT>& orig_index)                        \
+    -> cuvs::neighbors::ivf_sq::index<IdxT>                                                  \
+  {                                                                                          \
+    return cuvs::neighbors::ivf_sq::index<IdxT>(                                             \
+      std::move(cuvs::neighbors::ivf_sq::detail::extend<T, IdxT>(                            \
+        handle, new_vectors, new_indices, orig_index)));                                     \
+  }                                                                                          \
+                                                                                             \
+  void extend(raft::resources const& handle,                                                 \
+              raft::device_matrix_view<const T, int64_t, raft::row_major> new_vectors,       \
+              std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,   \
+              cuvs::neighbors::ivf_sq::index<IdxT>* idx)                                     \
+  {                                                                                          \
+    cuvs::neighbors::ivf_sq::detail::extend<T, IdxT>(handle, new_vectors, new_indices, idx); \
+  }                                                                                          \
+                                                                                             \
+  auto extend(raft::resources const& handle,                                                 \
+              raft::host_matrix_view<const T, int64_t, raft::row_major> new_vectors,         \
+              std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,     \
+              const cuvs::neighbors::ivf_sq::index<IdxT>& orig_index)                        \
+    -> cuvs::neighbors::ivf_sq::index<IdxT>                                                  \
+  {                                                                                          \
+    return cuvs::neighbors::ivf_sq::index<IdxT>(                                             \
+      std::move(cuvs::neighbors::ivf_sq::detail::extend<T, IdxT>(                            \
+        handle, new_vectors, new_indices, orig_index)));                                     \
+  }                                                                                          \
+                                                                                             \
+  void extend(raft::resources const& handle,                                                 \
+              raft::host_matrix_view<const T, int64_t, raft::row_major> new_vectors,         \
+              std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,     \
+              cuvs::neighbors::ivf_sq::index<IdxT>* idx)                                     \
+  {                                                                                          \
+    cuvs::neighbors::ivf_sq::detail::extend<T, IdxT>(handle, new_vectors, new_indices, idx); \
+  }
+
+CUVS_INST_IVF_SQ_BUILD_EXTEND(float, uint8_t);
+
+#undef CUVS_INST_IVF_SQ_BUILD_EXTEND
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..9148e5c328
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu
@@ -0,0 +1,89 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include "ivf_sq_build.cuh"
+
+namespace cuvs::neighbors::ivf_sq {
+
+#define CUVS_INST_IVF_SQ_BUILD_EXTEND(T, IdxT)                                               \
+  auto build(raft::resources const& handle,                                                  \
+             const cuvs::neighbors::ivf_sq::index_params& params,                            \
+             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset)            \
+    -> cuvs::neighbors::ivf_sq::index<IdxT>                                                  \
+  {                                                                                          \
+    return cuvs::neighbors::ivf_sq::index<IdxT>(                                             \
+      std::move(cuvs::neighbors::ivf_sq::detail::build<T, IdxT>(handle, params, dataset)));  \
+  }                                                                                          \
+                                                                                             \
+  void build(raft::resources const& handle,                                                  \
+             const cuvs::neighbors::ivf_sq::index_params& params,                            \
+             raft::device_matrix_view<const T, int64_t, raft::row_major> dataset,            \
+             cuvs::neighbors::ivf_sq::index<IdxT>& idx)                                      \
+  {                                                                                          \
+    cuvs::neighbors::ivf_sq::detail::build<T, IdxT>(handle, params, dataset, idx);           \
+  }                                                                                          \
+                                                                                             \
+  auto build(raft::resources const& handle,                                                  \
+             const cuvs::neighbors::ivf_sq::index_params& params,                            \
+             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)              \
+    -> cuvs::neighbors::ivf_sq::index<IdxT>                                                  \
+  {                                                                                          \
+    return cuvs::neighbors::ivf_sq::index<IdxT>(                                             \
+      std::move(cuvs::neighbors::ivf_sq::detail::build<T, IdxT>(handle, params, dataset)));  \
+  }                                                                                          \
+                                                                                             \
+  void build(raft::resources const& handle,                                                  \
+             const cuvs::neighbors::ivf_sq::index_params& params,                            \
+             raft::host_matrix_view<const T, int64_t, raft::row_major> dataset,              \
+             cuvs::neighbors::ivf_sq::index<IdxT>& idx)                                      \
+  {                                                                                          \
+    cuvs::neighbors::ivf_sq::detail::build<T, IdxT>(handle, params, dataset, idx);           \
+  }                                                                                          \
+                                                                                             \
+  auto extend(raft::resources const& handle,                                                 \
+              raft::device_matrix_view<const T, int64_t, raft::row_major> new_vectors,       \
+              std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,   \
+              const cuvs::neighbors::ivf_sq::index<IdxT>& orig_index)                        \
+    -> cuvs::neighbors::ivf_sq::index<IdxT>                                                  \
+  {                                                                                          \
+    return cuvs::neighbors::ivf_sq::index<IdxT>(                                             \
+      std::move(cuvs::neighbors::ivf_sq::detail::extend<T, IdxT>(                            \
+        handle, new_vectors, new_indices, orig_index)));                                     \
+  }                                                                                          \
+                                                                                             \
+  void extend(raft::resources const& handle,                                                 \
+              raft::device_matrix_view<const T, int64_t, raft::row_major> new_vectors,       \
+              std::optional<raft::device_vector_view<const int64_t, int64_t>> new_indices,   \
+              cuvs::neighbors::ivf_sq::index<IdxT>* idx)                                     \
+  {                                                                                          \
+    cuvs::neighbors::ivf_sq::detail::extend<T, IdxT>(handle, new_vectors, new_indices, idx); \
+  }                                                                                          \
+                                                                                             \
+  auto extend(raft::resources const& handle,                                                 \
+              raft::host_matrix_view<const T, int64_t, raft::row_major> new_vectors,         \
+              std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,     \
+              const cuvs::neighbors::ivf_sq::index<IdxT>& orig_index)                        \
+    -> cuvs::neighbors::ivf_sq::index<IdxT>                                                  \
+  {                                                                                          \
+    return cuvs::neighbors::ivf_sq::index<IdxT>(                                             \
+      std::move(cuvs::neighbors::ivf_sq::detail::extend<T, IdxT>(                            \
+        handle, new_vectors, new_indices, orig_index)));                                     \
+  }                                                                                          \
+                                                                                             \
+  void extend(raft::resources const& handle,                                                 \
+              raft::host_matrix_view<const T, int64_t, raft::row_major> new_vectors,         \
+              std::optional<raft::host_vector_view<const int64_t, int64_t>> new_indices,     \
+              cuvs::neighbors::ivf_sq::index<IdxT>* idx)                                     \
+  {                                                                                          \
+    cuvs::neighbors::ivf_sq::detail::extend<T, IdxT>(handle, new_vectors, new_indices, idx); \
+  }
+
+CUVS_INST_IVF_SQ_BUILD_EXTEND(half, uint8_t);
+
+#undef CUVS_INST_IVF_SQ_BUILD_EXTEND
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh
new file mode 100644
index 0000000000..a17992ff19
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search.cuh
@@ -0,0 +1,549 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "../../core/nvtx.hpp"
+#include "../detail/ann_utils.cuh"
+#include "../ivf_common.cuh"
+#include "../sample_filter.cuh"
+#include <cuvs/neighbors/common.hpp>
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include <cuvs/distance/distance.hpp>
+#include <cuvs/selection/select_k.hpp>
+#include <raft/core/error.hpp>
+#include <raft/core/logger.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/resource/device_memory_resource.hpp>
+#include <raft/core/resources.hpp>
+#include <raft/linalg/gemm.cuh>
+#include <raft/linalg/norm.cuh>
+#include <raft/linalg/unary_op.cuh>
+#include <raft/matrix/detail/select_warpsort.cuh>
+
+#include <rmm/resource_ref.hpp>
+
+#include <thrust/fill.h>
+
+namespace cuvs::neighbors::ivf_sq::detail {
+
+using namespace cuvs::spatial::knn::detail;  // NOLINT
+
+enum class SqScanMetric { kL2, kIP, kCosine };
+
+/**
+ * Per-probe scan kernel for IVF-SQ search.
+ *
+ * Grid: (n_queries, n_probes).  Each block handles one (query, probe) pair.
+ * Within a block, each warp processes one interleaved group of kIndexGroupSize
+ * (=32) vectors at a time, with each lane responsible for one vector.
+ * Dimension blocks of veclen=16 bytes are loaded as coalesced uint4 reads
+ * across the warp (32 lanes x 16 bytes = 512 bytes = 4 cache lines), giving
+ * full memory-bandwidth utilisation.
+ *
+ * Per-dimension constants that are invariant across rows are precomputed into
+ * shared memory so the hot loop only reads from smem + one uint4 per dim-block:
+ *
+ *   L2 / L2Sqrt:
+ *     s_query_term[d] = query[d] - centroid[d] - sq_vmin[d]
+ *     dist += (s_query_term[d] - code * s_sq_scale[d])^2
+ *
+ *   InnerProduct / Cosine:
+ *     s_query_term[d] = query[d]
+ *     s_recon_base[d] = centroid[d] + sq_vmin[d]
+ *     v_d   = s_recon_base[d] + code * s_sq_scale[d]
+ *     dist += s_query_term[d] * v_d
+ *
+ * Shared-memory layout adapts to the metric to avoid waste:
+ *   L2 / L2Sqrt       : [s_query_term | s_sq_scale]                (2 * dim floats)
+ *   InnerProduct/Cosine: [s_query_term | s_recon_base | s_sq_scale] (3 * dim floats)
+ */
+template <int BlockDim, SqScanMetric Metric, typename IdxT, typename IvfSampleFilterT>
+__launch_bounds__(BlockDim) RAFT_KERNEL ivf_sq_scan_kernel(const uint8_t* const* data_ptrs,
+                                                           const uint32_t* list_sizes,
+                                                           const uint32_t* coarse_indices,
+                                                           const float* queries_float,
+                                                           const float* centers,
+                                                           const float* sq_vmin,
+                                                           const float* sq_delta,
+                                                           const float* query_norms,
+                                                           uint32_t n_probes,
+                                                           uint32_t dim,
+                                                           uint32_t max_samples,
+                                                           const uint32_t* chunk_indices,
+                                                           float* out_distances,
+                                                           uint32_t* out_indices,
+                                                           IvfSampleFilterT sample_filter)
+{
+  static_assert(kIndexGroupSize == raft::WarpSize,
+                "Warp-coalesced scan requires kIndexGroupSize == WarpSize");
+
+  extern __shared__ float smem[];
+
+  constexpr bool kIsL2     = (Metric == SqScanMetric::kL2);
+  constexpr bool kIsCosine = (Metric == SqScanMetric::kCosine);
+
+  float* s_query_term = smem;
+  float* s_recon_base = smem + dim;
+  float* s_sq_scale   = kIsL2 ? (smem + dim) : (smem + 2 * dim);
+
+  const uint32_t query_ix = blockIdx.x;
+  const uint32_t probe_ix = blockIdx.y;
+
+  const uint32_t* my_coarse = coarse_indices + query_ix * n_probes;
+  const uint32_t cluster_id = my_coarse[probe_ix];
+  const uint32_t cluster_sz = list_sizes[cluster_id];
+  if (cluster_sz == 0) return;
+
+  const uint8_t* codes  = data_ptrs[cluster_id];
+  const float* query    = queries_float + query_ix * dim;
+  const float* centroid = centers + cluster_id * dim;
+
+  for (uint32_t d = threadIdx.x; d < dim; d += BlockDim) {
+    float vmin_d  = sq_vmin[d];
+    s_sq_scale[d] = sq_delta[d];
+    if constexpr (kIsL2) {
+      s_query_term[d] = query[d] - centroid[d] - vmin_d;
+    } else {
+      s_query_term[d] = query[d];
+      s_recon_base[d] = centroid[d] + vmin_d;
+    }
+  }
+  __syncthreads();
+
+  const uint32_t* my_chunk = chunk_indices + query_ix * n_probes;
+  uint32_t out_base        = (probe_ix > 0) ? my_chunk[probe_ix - 1] : 0;
+
+  constexpr uint32_t veclen         = 16;
+  constexpr uint32_t kWarpsPerBlock = BlockDim / raft::WarpSize;
+  const uint32_t warp_id            = threadIdx.x / raft::WarpSize;
+  const uint32_t lane_id            = threadIdx.x % raft::WarpSize;
+
+  uint32_t padded_dim   = ((dim + veclen - 1) / veclen) * veclen;
+  uint32_t n_dim_blocks = padded_dim / veclen;
+
+  for (uint32_t group = warp_id * kIndexGroupSize; group < cluster_sz;
+       group += kWarpsPerBlock * kIndexGroupSize) {
+    const uint32_t row = group + lane_id;
+    const bool valid   = (row < cluster_sz) && sample_filter(query_ix, cluster_id, row);
+
+    float dist      = 0.0f;
+    float v_norm_sq = 0.0f;
+
+    const uint8_t* group_data = codes + size_t(group) * padded_dim;
+
+    for (uint32_t bl = 0; bl < n_dim_blocks; bl++) {
+      uint8_t codes_local[veclen];
+      *reinterpret_cast<uint4*>(codes_local) = *reinterpret_cast<const uint4*>(
+        group_data + bl * (veclen * kIndexGroupSize) + lane_id * veclen);
+
+      const uint32_t l = bl * veclen;
+#pragma unroll
+      for (uint32_t j = 0; j < veclen; j++) {
+        if (l + j < dim) {
+          float recon = float(codes_local[j]) * s_sq_scale[l + j];
+
+          if constexpr (kIsL2) {
+            float diff = s_query_term[l + j] - recon;
+            dist += diff * diff;
+          } else {
+            float v_d = s_recon_base[l + j] + recon;
+            dist += s_query_term[l + j] * v_d;
+            if constexpr (kIsCosine) { v_norm_sq += v_d * v_d; }
+          }
+        }
+      }
+    }
+
+    if constexpr (kIsCosine) {
+      float denom = query_norms[query_ix] * sqrtf(v_norm_sq);
+      dist        = (denom > 0.0f) ? 1.0f - dist / denom : 0.0f;
+    }
+
+    if (valid) {
+      uint32_t out_idx       = query_ix * max_samples + out_base + row;
+      out_distances[out_idx] = dist;
+      out_indices[out_idx]   = out_base + row;
+    }
+  }
+}
+
+template <typename IdxT, typename IvfSampleFilterT>
+void ivf_sq_scan(raft::resources const& handle,
+                 const index<IdxT>& idx,
+                 const float* queries_float,
+                 const float* query_norms,
+                 uint32_t n_queries,
+                 uint32_t n_probes,
+                 uint32_t max_samples,
+                 const uint32_t* coarse_indices,
+                 const uint32_t* chunk_indices,
+                 float* out_distances,
+                 uint32_t* out_indices,
+                 IvfSampleFilterT sample_filter,
+                 rmm::cuda_stream_view stream)
+{
+  constexpr int kThreads = 256;
+  dim3 grid(n_queries, n_probes);
+  dim3 block(kThreads);
+  uint32_t dim = idx.dim();
+
+  auto do_launch = [&](auto kernel_ptr, size_t smem) {
+    RAFT_CUDA_TRY(
+      cudaFuncSetAttribute(kernel_ptr, cudaFuncAttributeMaxDynamicSharedMemorySize, smem));
+    kernel_ptr<<<grid, block, smem, stream>>>(idx.data_ptrs().data_handle(),
+                                              idx.list_sizes().data_handle(),
+                                              coarse_indices,
+                                              queries_float,
+                                              idx.centers().data_handle(),
+                                              idx.sq_vmin().data_handle(),
+                                              idx.sq_delta().data_handle(),
+                                              query_norms,
+                                              n_probes,
+                                              dim,
+                                              max_samples,
+                                              chunk_indices,
+                                              out_distances,
+                                              out_indices,
+                                              sample_filter);
+    RAFT_CUDA_TRY(cudaPeekAtLastError());
+  };
+
+  switch (idx.metric()) {
+    case cuvs::distance::DistanceType::L2Expanded:
+    case cuvs::distance::DistanceType::L2SqrtExpanded:
+      do_launch(ivf_sq_scan_kernel<kThreads, SqScanMetric::kL2, IdxT, IvfSampleFilterT>,
+                2 * dim * sizeof(float));
+      break;
+    case cuvs::distance::DistanceType::InnerProduct:
+      do_launch(ivf_sq_scan_kernel<kThreads, SqScanMetric::kIP, IdxT, IvfSampleFilterT>,
+                3 * dim * sizeof(float));
+      break;
+    case cuvs::distance::DistanceType::CosineExpanded:
+      do_launch(ivf_sq_scan_kernel<kThreads, SqScanMetric::kCosine, IdxT, IvfSampleFilterT>,
+                3 * dim * sizeof(float));
+      break;
+    default: RAFT_FAIL("Unsupported metric type for IVF-SQ scan.");
+  }
+}
+
+template <typename T, typename IdxT, typename IvfSampleFilterT>
+void search_impl(raft::resources const& handle,
+                 const index<IdxT>& index,
+                 const T* queries,
+                 uint32_t n_queries,
+                 uint32_t k,
+                 uint32_t n_probes,
+                 bool select_min,
+                 int64_t* neighbors,
+                 float* distances,
+                 rmm::device_async_resource_ref search_mr,
+                 IvfSampleFilterT sample_filter)
+{
+  auto stream = raft::resource::get_cuda_stream(handle);
+  auto dim    = index.dim();
+
+  std::size_t n_queries_probes = std::size_t(n_queries) * std::size_t(n_probes);
+
+  rmm::device_uvector<float> query_norm_dev(n_queries, stream, search_mr);
+  rmm::device_uvector<float> distance_buffer_dev(n_queries * index.n_lists(), stream, search_mr);
+  rmm::device_uvector<float> coarse_distances_dev(n_queries_probes, stream, search_mr);
+  rmm::device_uvector<uint32_t> coarse_indices_dev(n_queries_probes, stream, search_mr);
+
+  size_t float_query_size;
+  if constexpr (std::is_same_v<T, float>) {
+    float_query_size = 0;
+  } else {
+    float_query_size = n_queries * dim;
+  }
+  rmm::device_uvector<float> converted_queries_dev(float_query_size, stream, search_mr);
+  float* converted_queries_ptr = converted_queries_dev.data();
+
+  if constexpr (std::is_same_v<T, float>) {
+    converted_queries_ptr = const_cast<float*>(queries);
+  } else {
+    raft::linalg::unaryOp(
+      converted_queries_ptr, queries, n_queries * dim, utils::mapping<float>{}, stream);
+  }
+
+  auto distance_buffer_dev_view = raft::make_device_matrix_view<float, int64_t>(
+    distance_buffer_dev.data(), n_queries, index.n_lists());
+
+  RAFT_EXPECTS(index.metric() == cuvs::distance::DistanceType::InnerProduct ||
+                 index.center_norms().has_value(),
+               "Center norms are required for search with L2 or Cosine metric. "
+               "Rebuild the index with add_data_on_build=true or call extend() first.");
+
+  float alpha = 1.0f;
+  float beta  = 0.0f;
+  switch (index.metric()) {
+    case cuvs::distance::DistanceType::L2Expanded:
+    case cuvs::distance::DistanceType::L2SqrtExpanded: {
+      alpha = -2.0f;
+      beta  = 1.0f;
+      raft::linalg::rowNorm<raft::linalg::L2Norm, true>(query_norm_dev.data(),
+                                                        converted_queries_ptr,
+                                                        static_cast<int64_t>(dim),
+                                                        static_cast<int64_t>(n_queries),
+                                                        stream);
+      utils::outer_add(query_norm_dev.data(),
+                       (int64_t)n_queries,
+                       index.center_norms()->data_handle(),
+                       (int64_t)index.n_lists(),
+                       distance_buffer_dev.data(),
+                       stream);
+      break;
+    }
+    case cuvs::distance::DistanceType::CosineExpanded: {
+      raft::linalg::rowNorm<raft::linalg::L2Norm, true>(query_norm_dev.data(),
+                                                        converted_queries_ptr,
+                                                        static_cast<int64_t>(dim),
+                                                        static_cast<int64_t>(n_queries),
+                                                        stream,
+                                                        raft::sqrt_op{});
+      alpha = -1.0f;
+      beta  = 0.0f;
+      break;
+    }
+    case cuvs::distance::DistanceType::InnerProduct: {
+      alpha = 1.0f;
+      beta  = 0.0f;
+      break;
+    }
+    default: RAFT_FAIL("Unsupported metric type for IVF-SQ search.");
+  }
+
+  raft::linalg::gemm(handle,
+                     true,
+                     false,
+                     index.n_lists(),
+                     n_queries,
+                     dim,
+                     &alpha,
+                     index.centers().data_handle(),
+                     dim,
+                     converted_queries_ptr,
+                     dim,
+                     &beta,
+                     distance_buffer_dev.data(),
+                     index.n_lists(),
+                     stream);
+
+  if (index.metric() == cuvs::distance::DistanceType::CosineExpanded) {
+    auto n_lists_local          = index.n_lists();
+    const auto* q_norm_ptr      = query_norm_dev.data();
+    const auto* center_norm_ptr = index.center_norms()->data_handle();
+    raft::linalg::map_offset(
+      handle,
+      distance_buffer_dev_view,
+      [=] __device__(const uint32_t idx, const float dist) {
+        const auto query   = idx / n_lists_local;
+        const auto cluster = idx % n_lists_local;
+        float denom        = q_norm_ptr[query] * center_norm_ptr[cluster];
+        return (denom > 0.0f) ? dist / denom : 0.0f;
+      },
+      raft::make_const_mdspan(distance_buffer_dev_view));
+  }
+
+  cuvs::selection::select_k(
+    handle,
+    raft::make_const_mdspan(distance_buffer_dev_view),
+    std::nullopt,
+    raft::make_device_matrix_view<float, int64_t>(coarse_distances_dev.data(), n_queries, n_probes),
+    raft::make_device_matrix_view<uint32_t, int64_t>(
+      coarse_indices_dev.data(), n_queries, n_probes),
+    select_min);
+
+  rmm::device_uvector<uint32_t> num_samples(n_queries, stream, search_mr);
+  rmm::device_uvector<uint32_t> chunk_index(n_queries_probes, stream, search_mr);
+
+  ivf::detail::calc_chunk_indices::configure(n_probes, n_queries)(index.list_sizes().data_handle(),
+                                                                  coarse_indices_dev.data(),
+                                                                  chunk_index.data(),
+                                                                  num_samples.data(),
+                                                                  stream);
+
+  uint32_t max_samples =
+    std::max<uint32_t>(static_cast<uint32_t>(index.accum_sorted_sizes()(n_probes)), k);
+
+  rmm::device_uvector<float> all_distances(std::size_t(n_queries) * max_samples, stream, search_mr);
+  rmm::device_uvector<uint32_t> all_indices(
+    std::size_t(n_queries) * max_samples, stream, search_mr);
+
+  float init_val =
+    select_min ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
+  thrust::fill_n(raft::resource::get_thrust_policy(handle),
+                 all_distances.data(),
+                 std::size_t(n_queries) * max_samples,
+                 init_val);
+  thrust::fill_n(raft::resource::get_thrust_policy(handle),
+                 all_indices.data(),
+                 std::size_t(n_queries) * max_samples,
+                 uint32_t(0xFFFFFFFF));
+
+  auto filter_adapter = cuvs::neighbors::filtering::ivf_to_sample_filter(
+    index.inds_ptrs().data_handle(), sample_filter);
+
+  ivf_sq_scan(handle,
+              index,
+              converted_queries_ptr,
+              query_norm_dev.data(),
+              n_queries,
+              n_probes,
+              max_samples,
+              coarse_indices_dev.data(),
+              chunk_index.data(),
+              all_distances.data(),
+              all_indices.data(),
+              filter_adapter,
+              stream);
+
+  rmm::device_uvector<uint32_t> neighbors_uint32(0, stream, search_mr);
+  uint32_t* neighbors_uint32_ptr = nullptr;
+  if constexpr (sizeof(int64_t) == sizeof(uint32_t)) {
+    neighbors_uint32_ptr = reinterpret_cast<uint32_t*>(neighbors);
+  } else {
+    neighbors_uint32.resize(std::size_t(n_queries) * k, stream);
+    neighbors_uint32_ptr = neighbors_uint32.data();
+  }
+
+  auto num_samples_view =
+    raft::make_device_vector_view<const uint32_t>(num_samples.data(), n_queries);
+
+  cuvs::selection::select_k(
+    handle,
+    raft::make_device_matrix_view<const float, int64_t>(
+      all_distances.data(), n_queries, max_samples),
+    raft::make_device_matrix_view<const uint32_t, int64_t>(
+      all_indices.data(), n_queries, max_samples),
+    raft::make_device_matrix_view<float, int64_t>(distances, n_queries, k),
+    raft::make_device_matrix_view<uint32_t, int64_t>(neighbors_uint32_ptr, n_queries, k),
+    select_min,
+    false,
+    cuvs::selection::SelectAlgo::kAuto,
+    num_samples_view);
+
+  ivf::detail::postprocess_distances(
+    handle, distances, distances, index.metric(), n_queries, k, 1.0, false);
+
+  ivf::detail::postprocess_neighbors(neighbors,
+                                     neighbors_uint32_ptr,
+                                     index.inds_ptrs().data_handle(),
+                                     coarse_indices_dev.data(),
+                                     chunk_index.data(),
+                                     n_queries,
+                                     n_probes,
+                                     k,
+                                     stream);
+}
+
+template <typename T,
+          typename IdxT,
+          typename IvfSampleFilterT = cuvs::neighbors::filtering::none_sample_filter>
+inline void search_with_filtering(raft::resources const& handle,
+                                  const search_params& params,
+                                  const index<IdxT>& index,
+                                  const T* queries,
+                                  uint32_t n_queries,
+                                  uint32_t k,
+                                  int64_t* neighbors,
+                                  float* distances,
+                                  IvfSampleFilterT sample_filter = IvfSampleFilterT())
+{
+  cuvs::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
+    "ivf_sq::search(k = %u, n_queries = %u, dim = %zu)", k, n_queries, index.dim());
+
+  RAFT_EXPECTS(params.n_probes > 0,
+               "n_probes (number of clusters to probe in the search) must be positive.");
+  auto n_probes = std::min<uint32_t>(params.n_probes, index.n_lists());
+
+  uint32_t max_samples =
+    std::max<uint32_t>(static_cast<uint32_t>(index.accum_sorted_sizes()(n_probes)), k);
+
+  constexpr uint64_t kExpectedWsSize = 1024ull * 1024 * 1024;
+  uint64_t max_ws_size =
+    std::min<uint64_t>(raft::resource::get_workspace_free_bytes(handle), kExpectedWsSize);
+
+  uint64_t converted_query_floats = std::is_same_v<T, float> ? 0 : index.dim();
+  uint64_t ws_per_query = sizeof(float) * (uint64_t(index.n_lists()) + n_probes + 1 + max_samples +
+                                           converted_query_floats) +
+                          sizeof(uint32_t) * (uint64_t(n_probes) * 2 + 1 + max_samples + k);
+
+  const uint32_t max_queries =
+    std::min<uint32_t>(n_queries, std::max<uint64_t>(1, max_ws_size / ws_per_query));
+
+  for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) {
+    uint32_t queries_batch = std::min(max_queries, n_queries - offset_q);
+
+    search_impl<T, IdxT, IvfSampleFilterT>(handle,
+                                           index,
+                                           queries + std::size_t(offset_q) * index.dim(),
+                                           queries_batch,
+                                           k,
+                                           n_probes,
+                                           cuvs::distance::is_min_close(index.metric()),
+                                           neighbors + std::size_t(offset_q) * k,
+                                           distances + std::size_t(offset_q) * k,
+                                           raft::resource::get_workspace_resource(handle),
+                                           sample_filter);
+  }
+}
+
+template <typename T, typename IdxT, typename IvfSampleFilterT>
+void search_with_filtering(raft::resources const& handle,
+                           const search_params& params,
+                           const index<IdxT>& index,
+                           raft::device_matrix_view<const T, int64_t, raft::row_major> queries,
+                           raft::device_matrix_view<int64_t, int64_t, raft::row_major> neighbors,
+                           raft::device_matrix_view<float, int64_t, raft::row_major> distances,
+                           IvfSampleFilterT sample_filter = IvfSampleFilterT())
+{
+  RAFT_EXPECTS(
+    queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0),
+    "Number of rows in output neighbors and distances matrices must equal the number of queries.");
+  RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1),
+               "Number of columns in output neighbors and distances matrices must be equal");
+  RAFT_EXPECTS(queries.extent(1) == index.dim(),
+               "Number of query dimensions should equal number of dimensions in the index.");
+
+  search_with_filtering(handle,
+                        params,
+                        index,
+                        queries.data_handle(),
+                        static_cast<std::uint32_t>(queries.extent(0)),
+                        static_cast<std::uint32_t>(neighbors.extent(1)),
+                        neighbors.data_handle(),
+                        distances.data_handle(),
+                        sample_filter);
+}
+
+template <typename T, typename IdxT>
+void search(raft::resources const& handle,
+            const search_params& params,
+            const index<IdxT>& idx,
+            raft::device_matrix_view<const T, int64_t, raft::row_major> queries,
+            raft::device_matrix_view<int64_t, int64_t, raft::row_major> neighbors,
+            raft::device_matrix_view<float, int64_t, raft::row_major> distances,
+            const cuvs::neighbors::filtering::base_filter& sample_filter_ref)
+{
+  try {
+    auto& sample_filter =
+      dynamic_cast<const cuvs::neighbors::filtering::none_sample_filter&>(sample_filter_ref);
+    return search_with_filtering(handle, params, idx, queries, neighbors, distances, sample_filter);
+  } catch (const std::bad_cast&) {
+  }
+
+  try {
+    auto& sample_filter =
+      dynamic_cast<const cuvs::neighbors::filtering::bitset_filter<uint32_t, int64_t>&>(
+        sample_filter_ref);
+    return search_with_filtering(handle, params, idx, queries, neighbors, distances, sample_filter);
+  } catch (const std::bad_cast&) {
+    RAFT_FAIL("Unsupported sample filter type");
+  }
+}
+
+}  // namespace cuvs::neighbors::ivf_sq::detail
diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..60d95a153f
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search_float_uint8_t_int64_t.cu
@@ -0,0 +1,29 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include "ivf_sq_search.cuh"
+
+namespace cuvs::neighbors::ivf_sq {
+
+#define CUVS_INST_IVF_SQ_SEARCH(T, IdxT)                                             \
+  void search(raft::resources const& handle,                                         \
+              const cuvs::neighbors::ivf_sq::search_params& params,                  \
+              const cuvs::neighbors::ivf_sq::index<IdxT>& index,                     \
+              raft::device_matrix_view<const T, int64_t, raft::row_major> queries,   \
+              raft::device_matrix_view<int64_t, int64_t, raft::row_major> neighbors, \
+              raft::device_matrix_view<float, int64_t, raft::row_major> distances,   \
+              const cuvs::neighbors::filtering::base_filter& sample_filter)          \
+  {                                                                                  \
+    cuvs::neighbors::ivf_sq::detail::search(                                         \
+      handle, params, index, queries, neighbors, distances, sample_filter);          \
+  }
+
+CUVS_INST_IVF_SQ_SEARCH(float, uint8_t);
+
+#undef CUVS_INST_IVF_SQ_SEARCH
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu
new file mode 100644
index 0000000000..fbed3fd432
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq/ivf_sq_search_half_uint8_t_int64_t.cu
@@ -0,0 +1,29 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include "ivf_sq_search.cuh"
+
+namespace cuvs::neighbors::ivf_sq {
+
+#define CUVS_INST_IVF_SQ_SEARCH(T, IdxT)                                             \
+  void search(raft::resources const& handle,                                         \
+              const cuvs::neighbors::ivf_sq::search_params& params,                  \
+              const cuvs::neighbors::ivf_sq::index<IdxT>& index,                     \
+              raft::device_matrix_view<const T, int64_t, raft::row_major> queries,   \
+              raft::device_matrix_view<int64_t, int64_t, raft::row_major> neighbors, \
+              raft::device_matrix_view<float, int64_t, raft::row_major> distances,   \
+              const cuvs::neighbors::filtering::base_filter& sample_filter)          \
+  {                                                                                  \
+    cuvs::neighbors::ivf_sq::detail::search(                                         \
+      handle, params, index, queries, neighbors, distances, sample_filter);          \
+  }
+
+CUVS_INST_IVF_SQ_SEARCH(half, uint8_t);
+
+#undef CUVS_INST_IVF_SQ_SEARCH
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh
new file mode 100644
index 0000000000..b95e63ee33
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize.cuh
@@ -0,0 +1,161 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "../ivf_common.cuh"
+#include "../ivf_list.cuh"
+#include <cuvs/neighbors/common.hpp>
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include <raft/core/detail/mdspan_numpy_serializer.hpp>
+#include <raft/core/mdarray.hpp>
+#include <raft/core/resource/cuda_stream.hpp>
+#include <raft/core/serialize.hpp>
+#include <raft/util/pow2_utils.cuh>
+
+#include <fstream>
+
+namespace cuvs::neighbors::ivf_sq::detail {
+
+constexpr int serialization_version = 1;
+
+template <typename IdxT>
+void serialize(raft::resources const& handle, std::ostream& os, const index<IdxT>& index_)
+{
+  RAFT_LOG_DEBUG(
+    "Saving IVF-SQ index, size %zu, dim %u", static_cast<size_t>(index_.size()), index_.dim());
+
+  std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype<IdxT>().to_string();
+  dtype_string.resize(4);
+  os << dtype_string;
+
+  serialize_scalar(handle, os, serialization_version);
+  serialize_scalar(handle, os, index_.size());
+  serialize_scalar(handle, os, index_.dim());
+  serialize_scalar(handle, os, index_.n_lists());
+  serialize_scalar(handle, os, index_.metric());
+  serialize_scalar(handle, os, index_.adaptive_centers());
+  serialize_scalar(handle, os, index_.conservative_memory_allocation());
+  serialize_mdspan(handle, os, index_.centers());
+
+  if (index_.center_norms()) {
+    bool has_norms = true;
+    serialize_scalar(handle, os, has_norms);
+    serialize_mdspan(handle, os, *index_.center_norms());
+  } else {
+    bool has_norms = false;
+    serialize_scalar(handle, os, has_norms);
+  }
+
+  serialize_mdspan(handle, os, index_.sq_vmin());
+  serialize_mdspan(handle, os, index_.sq_delta());
+
+  auto sizes_host = raft::make_host_vector<uint32_t, uint32_t>(index_.list_sizes().extent(0));
+  raft::copy(sizes_host.data_handle(),
+             index_.list_sizes().data_handle(),
+             sizes_host.size(),
+             raft::resource::get_cuda_stream(handle));
+  raft::resource::sync_stream(handle);
+  serialize_mdspan(handle, os, sizes_host.view());
+
+  list_spec<uint32_t, IdxT, int64_t> list_store_spec{index_.dim(), true};
+  for (uint32_t label = 0; label < index_.n_lists(); label++) {
+    ivf::serialize_list(handle,
+                        os,
+                        index_.lists()[label],
+                        list_store_spec,
+                        raft::Pow2<kIndexGroupSize>::roundUp(sizes_host(label)));
+  }
+  raft::resource::sync_stream(handle);
+}
+
+template <typename IdxT>
+void serialize(raft::resources const& handle,
+               const std::string& filename,
+               const index<IdxT>& index_)
+{
+  std::ofstream of(filename, std::ios::out | std::ios::binary);
+  if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); }
+  detail::serialize(handle, of, index_);
+  of.close();
+  if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); }
+}
+
+template <typename IdxT>
+auto deserialize(raft::resources const& handle, std::istream& is) -> index<IdxT>
+{
+  char dtype_string[4];
+  is.read(dtype_string, 4);
+
+  auto ver = raft::deserialize_scalar<int>(handle, is);
+  if (ver != serialization_version) {
+    RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver);
+  }
+  auto n_rows           = raft::deserialize_scalar<int64_t>(handle, is);
+  auto dim              = raft::deserialize_scalar<uint32_t>(handle, is);
+  auto n_lists          = raft::deserialize_scalar<uint32_t>(handle, is);
+  auto metric           = raft::deserialize_scalar<cuvs::distance::DistanceType>(handle, is);
+  bool adaptive_centers = raft::deserialize_scalar<bool>(handle, is);
+  bool cma              = raft::deserialize_scalar<bool>(handle, is);
+
+  index<IdxT> index_ = index<IdxT>(handle, metric, n_lists, dim, adaptive_centers, cma);
+
+  deserialize_mdspan(handle, is, index_.centers());
+
+  bool has_norms = raft::deserialize_scalar<bool>(handle, is);
+  if (has_norms) {
+    index_.allocate_center_norms(handle);
+    if (!index_.center_norms()) {
+      RAFT_FAIL("Error inconsistent center norms");
+    } else {
+      auto center_norms = index_.center_norms().value();
+      deserialize_mdspan(handle, is, center_norms);
+    }
+  }
+
+  deserialize_mdspan(handle, is, index_.sq_vmin());
+  deserialize_mdspan(handle, is, index_.sq_delta());
+
+  deserialize_mdspan(handle, is, index_.list_sizes());
+
+  list_spec<uint32_t, IdxT, int64_t> list_device_spec{index_.dim(), cma};
+  list_spec<uint32_t, IdxT, int64_t> list_store_spec{index_.dim(), true};
+  for (uint32_t label = 0; label < index_.n_lists(); label++) {
+    ivf::deserialize_list(handle, is, index_.lists()[label], list_store_spec, list_device_spec);
+  }
+  raft::resource::sync_stream(handle);
+
+  ivf::detail::recompute_internal_state(handle, index_);
+
+  return index_;
+}
+
+template <typename IdxT>
+auto deserialize(raft::resources const& handle, const std::string& filename) -> index<IdxT>
+{
+  std::ifstream is(filename, std::ios::in | std::ios::binary);
+  if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); }
+  auto index = detail::deserialize<IdxT>(handle, is);
+  is.close();
+  return index;
+}
+
+}  // namespace cuvs::neighbors::ivf_sq::detail
+
+#define CUVS_INST_IVF_SQ_SERIALIZE(IdxT)                                           \
+  void serialize(raft::resources const& handle,                                    \
+                 const std::string& filename,                                      \
+                 const cuvs::neighbors::ivf_sq::index<IdxT>& index)                \
+  {                                                                                \
+    cuvs::neighbors::ivf_sq::detail::serialize(handle, filename, index);           \
+  }                                                                                \
+                                                                                   \
+  void deserialize(raft::resources const& handle,                                  \
+                   const std::string& filename,                                    \
+                   cuvs::neighbors::ivf_sq::index<IdxT>* index)                    \
+  {                                                                                \
+    *index = cuvs::neighbors::ivf_sq::detail::deserialize<IdxT>(handle, filename); \
+  }
diff --git a/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu
new file mode 100644
index 0000000000..c2351ed8c3
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu
@@ -0,0 +1,16 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+#include "ivf_sq_serialize.cuh"
+
+namespace cuvs::neighbors::ivf_sq {
+
+CUVS_INST_IVF_SQ_SERIALIZE(uint8_t);
+
+#undef CUVS_INST_IVF_SQ_SERIALIZE
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/src/neighbors/ivf_sq_index.cpp b/cpp/src/neighbors/ivf_sq_index.cpp
new file mode 100644
index 0000000000..d97ace7dcb
--- /dev/null
+++ b/cpp/src/neighbors/ivf_sq_index.cpp
@@ -0,0 +1,236 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuvs/neighbors/ivf_sq.hpp>
+
+namespace cuvs::neighbors::ivf_sq {
+
+template <typename IdxT>
+index<IdxT>::index(raft::resources const& res)
+  : index(res, cuvs::distance::DistanceType::L2Expanded, 0, 0, false, false)
+{
+}
+
+template <typename IdxT>
+index<IdxT>::index(raft::resources const& res, const index_params& params, uint32_t dim)
+  : index(res,
+          params.metric,
+          params.n_lists,
+          dim,
+          params.adaptive_centers,
+          params.conservative_memory_allocation)
+{
+}
+
+template <typename IdxT>
+index<IdxT>::index(raft::resources const& res,
+                   cuvs::distance::DistanceType metric,
+                   uint32_t n_lists,
+                   uint32_t dim,
+                   bool adaptive_centers,
+                   bool conservative_memory_allocation)
+  : cuvs::neighbors::index(),
+    metric_(metric),
+    adaptive_centers_(adaptive_centers),
+    conservative_memory_allocation_(conservative_memory_allocation),
+    lists_{n_lists},
+    list_sizes_{raft::make_device_vector<uint32_t, uint32_t>(res, n_lists)},
+    centers_(raft::make_device_matrix<float, uint32_t>(res, n_lists, dim)),
+    center_norms_(std::nullopt),
+    sq_vmin_{raft::make_device_vector<float, uint32_t>(res, dim)},
+    sq_delta_{raft::make_device_vector<float, uint32_t>(res, dim)},
+    data_ptrs_{raft::make_device_vector<IdxT*, uint32_t>(res, n_lists)},
+    inds_ptrs_{raft::make_device_vector<int64_t*, uint32_t>(res, n_lists)},
+    accum_sorted_sizes_{raft::make_host_vector<int64_t, uint32_t>(n_lists + 1)}
+{
+  check_consistency();
+  accum_sorted_sizes_(n_lists) = 0;
+}
+
+template <typename IdxT>
+cuvs::distance::DistanceType index<IdxT>::metric() const noexcept
+{
+  return metric_;
+}
+
+template <typename IdxT>
+bool index<IdxT>::adaptive_centers() const noexcept
+{
+  return adaptive_centers_;
+}
+
+template <typename IdxT>
+int64_t index<IdxT>::size() const noexcept
+{
+  return accum_sorted_sizes()(n_lists());
+}
+
+template <typename IdxT>
+uint32_t index<IdxT>::dim() const noexcept
+{
+  return centers_.extent(1);
+}
+
+template <typename IdxT>
+uint32_t index<IdxT>::n_lists() const noexcept
+{
+  return lists_.size();
+}
+
+template <typename IdxT>
+bool index<IdxT>::conservative_memory_allocation() const noexcept
+{
+  return conservative_memory_allocation_;
+}
+
+template <typename IdxT>
+raft::device_vector_view<uint32_t, uint32_t> index<IdxT>::list_sizes() noexcept
+{
+  return list_sizes_.view();
+}
+
+template <typename IdxT>
+raft::device_vector_view<const uint32_t, uint32_t> index<IdxT>::list_sizes() const noexcept
+{
+  return list_sizes_.view();
+}
+
+template <typename IdxT>
+raft::device_matrix_view<float, uint32_t, raft::row_major> index<IdxT>::centers() noexcept
+{
+  return centers_.view();
+}
+
+template <typename IdxT>
+raft::device_matrix_view<const float, uint32_t, raft::row_major> index<IdxT>::centers()
+  const noexcept
+{
+  return centers_.view();
+}
+
+template <typename IdxT>
+std::optional<raft::device_vector_view<float, uint32_t>> index<IdxT>::center_norms() noexcept
+{
+  if (center_norms_.has_value()) {
+    return std::make_optional<raft::device_vector_view<float, uint32_t>>(center_norms_->view());
+  } else {
+    return std::nullopt;
+  }
+}
+
+template <typename IdxT>
+std::optional<raft::device_vector_view<const float, uint32_t>> index<IdxT>::center_norms()
+  const noexcept
+{
+  if (center_norms_.has_value()) {
+    return std::make_optional<raft::device_vector_view<const float, uint32_t>>(
+      center_norms_->view());
+  } else {
+    return std::nullopt;
+  }
+}
+
+template <typename IdxT>
+void index<IdxT>::allocate_center_norms(raft::resources const& res)
+{
+  switch (metric_) {
+    case cuvs::distance::DistanceType::L2Expanded:
+    case cuvs::distance::DistanceType::L2SqrtExpanded:
+    case cuvs::distance::DistanceType::L2Unexpanded:
+    case cuvs::distance::DistanceType::L2SqrtUnexpanded:
+    case cuvs::distance::DistanceType::CosineExpanded:
+      center_norms_ = raft::make_device_vector<float, uint32_t>(res, n_lists());
+      break;
+    default: center_norms_ = std::nullopt;
+  }
+}
+
+template <typename IdxT>
+raft::device_vector_view<float, uint32_t> index<IdxT>::sq_vmin() noexcept
+{
+  return sq_vmin_.view();
+}
+
+template <typename IdxT>
+raft::device_vector_view<const float, uint32_t> index<IdxT>::sq_vmin() const noexcept
+{
+  return sq_vmin_.view();
+}
+
+template <typename IdxT>
+raft::device_vector_view<float, uint32_t> index<IdxT>::sq_delta() noexcept
+{
+  return sq_delta_.view();
+}
+
+template <typename IdxT>
+raft::device_vector_view<const float, uint32_t> index<IdxT>::sq_delta() const noexcept
+{
+  return sq_delta_.view();
+}
+
+template <typename IdxT>
+raft::host_vector_view<int64_t, uint32_t> index<IdxT>::accum_sorted_sizes() noexcept
+{
+  return accum_sorted_sizes_.view();
+}
+
+template <typename IdxT>
+raft::host_vector_view<const int64_t, uint32_t> index<IdxT>::accum_sorted_sizes() const noexcept
+{
+  return accum_sorted_sizes_.view();
+}
+
+template <typename IdxT>
+raft::device_vector_view<IdxT*, uint32_t> index<IdxT>::data_ptrs() noexcept
+{
+  return data_ptrs_.view();
+}
+
+template <typename IdxT>
+raft::device_vector_view<IdxT* const, uint32_t> index<IdxT>::data_ptrs() const noexcept
+{
+  return data_ptrs_.view();
+}
+
+template <typename IdxT>
+raft::device_vector_view<int64_t*, uint32_t> index<IdxT>::inds_ptrs() noexcept
+{
+  return inds_ptrs_.view();
+}
+
+template <typename IdxT>
+raft::device_vector_view<int64_t* const, uint32_t> index<IdxT>::inds_ptrs() const noexcept
+{
+  return inds_ptrs_.view();
+}
+
+template <typename IdxT>
+std::vector<std::shared_ptr<list_data<IdxT, int64_t>>>& index<IdxT>::lists() noexcept
+{
+  return lists_;
+}
+
+template <typename IdxT>
+const std::vector<std::shared_ptr<list_data<IdxT, int64_t>>>& index<IdxT>::lists() const noexcept
+{
+  return lists_;
+}
+
+template <typename IdxT>
+void index<IdxT>::check_consistency()
+{
+  auto n_lists = lists_.size();
+  RAFT_EXPECTS(list_sizes_.extent(0) == n_lists, "inconsistent list size");
+  RAFT_EXPECTS(data_ptrs_.extent(0) == n_lists, "inconsistent list size");
+  RAFT_EXPECTS(inds_ptrs_.extent(0) == n_lists, "inconsistent list size");
+  RAFT_EXPECTS((centers_.extent(0) == list_sizes_.extent(0)) &&
+                 (!center_norms_.has_value() || centers_.extent(0) == center_norms_->extent(0)),
+               "inconsistent number of lists (clusters)");
+}
+
+template struct index<uint8_t>;
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index bbddef87e5..8d90ed35b9 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -133,6 +133,13 @@ ConfigureTest(
   PERCENT 100
 )
 
+ConfigureTest(
+  NAME NEIGHBORS_ANN_IVF_SQ_TEST
+  PATH neighbors/ann_ivf_sq/test_float_uint8_t.cu
+  GPUS 1
+  PERCENT 100
+)
+
 ConfigureTest(
   NAME NEIGHBORS_ANN_IVF_PQ_TEST
   PATH neighbors/ann_ivf_pq/test_float_int64_t.cu neighbors/ann_ivf_pq/test_int8_t_int64_t.cu
diff --git a/cpp/tests/neighbors/ann_ivf_sq.cuh b/cpp/tests/neighbors/ann_ivf_sq.cuh
new file mode 100644
index 0000000000..a7e02315e4
--- /dev/null
+++ b/cpp/tests/neighbors/ann_ivf_sq.cuh
@@ -0,0 +1,457 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#pragma once
+
+#include "../test_utils.cuh"
+#include "ann_utils.cuh"
+#include "naive_knn.cuh"
+
+#include <cuvs/neighbors/ivf_sq.hpp>
+#include <raft/core/bitset.cuh>
+#include <raft/linalg/add.cuh>
+#include <raft/linalg/normalize.cuh>
+
+#include <raft/core/resource/cuda_stream_pool.hpp>
+#include <rmm/cuda_stream_pool.hpp>
+
+namespace cuvs::neighbors::ivf_sq {
+
+struct test_ivf_sample_filter {
+  static constexpr unsigned offset = 300;
+};
+
+template <typename IdxT>
+struct AnnIvfSqInputs {
+  IdxT num_queries;
+  IdxT num_db_vecs;
+  IdxT dim;
+  IdxT k;
+  IdxT nprobe;
+  IdxT nlist;
+  cuvs::distance::DistanceType metric;
+  bool adaptive_centers;
+};
+
+template <typename IdxT>
+::std::ostream& operator<<(::std::ostream& os, const AnnIvfSqInputs<IdxT>& p)
+{
+  os << "{ " << p.num_queries << ", " << p.num_db_vecs << ", " << p.dim << ", " << p.k << ", "
+     << p.nprobe << ", " << p.nlist << ", "
+     << cuvs::neighbors::print_metric{static_cast<cuvs::distance::DistanceType>((int)p.metric)}
+     << ", " << p.adaptive_centers << '}' << std::endl;
+  return os;
+}
+
+template <typename T, typename DataT, typename IdxT>
+class AnnIVFSQTest : public ::testing::TestWithParam<AnnIvfSqInputs<IdxT>> {
+ public:
+  AnnIVFSQTest()
+    : stream_(raft::resource::get_cuda_stream(handle_)),
+      ps(::testing::TestWithParam<AnnIvfSqInputs<IdxT>>::GetParam()),
+      database(0, stream_),
+      search_queries(0, stream_)
+  {
+  }
+
+  void testIVFSQ()
+  {
+    size_t queries_size = ps.num_queries * ps.k;
+    std::vector<IdxT> indices_ivfsq(queries_size);
+    std::vector<IdxT> indices_naive(queries_size);
+    std::vector<T> distances_ivfsq(queries_size);
+    std::vector<T> distances_naive(queries_size);
+
+    {
+      rmm::device_uvector<T> distances_naive_dev(queries_size, stream_);
+      rmm::device_uvector<IdxT> indices_naive_dev(queries_size, stream_);
+      cuvs::neighbors::naive_knn<T, DataT, IdxT>(handle_,
+                                                 distances_naive_dev.data(),
+                                                 indices_naive_dev.data(),
+                                                 search_queries.data(),
+                                                 database.data(),
+                                                 ps.num_queries,
+                                                 ps.num_db_vecs,
+                                                 ps.dim,
+                                                 ps.k,
+                                                 ps.metric);
+      raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_);
+      raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_);
+      raft::resource::sync_stream(handle_);
+    }
+
+    {
+      double min_recall =
+        std::min(1.0, static_cast<double>(ps.nprobe) / static_cast<double>(ps.nlist));
+
+      rmm::device_uvector<T> distances_ivfsq_dev(queries_size, stream_);
+      rmm::device_uvector<IdxT> indices_ivfsq_dev(queries_size, stream_);
+
+      {
+        cuvs::neighbors::ivf_sq::index_params index_params;
+        cuvs::neighbors::ivf_sq::search_params search_params;
+        index_params.n_lists          = ps.nlist;
+        index_params.metric           = ps.metric;
+        index_params.adaptive_centers = ps.adaptive_centers;
+        search_params.n_probes        = ps.nprobe;
+
+        index_params.add_data_on_build        = true;
+        index_params.kmeans_trainset_fraction = 0.5;
+
+        auto database_view = raft::make_device_matrix_view<const DataT, IdxT>(
+          (const DataT*)database.data(), ps.num_db_vecs, ps.dim);
+
+        auto idx = cuvs::neighbors::ivf_sq::build(handle_, index_params, database_view);
+
+        // Test extend: build without data, then extend
+        cuvs::neighbors::ivf_sq::index_params index_params_no_add;
+        index_params_no_add.n_lists                  = ps.nlist;
+        index_params_no_add.metric                   = ps.metric;
+        index_params_no_add.adaptive_centers         = ps.adaptive_centers;
+        index_params_no_add.add_data_on_build        = false;
+        index_params_no_add.kmeans_trainset_fraction = 0.5;
+
+        auto idx_empty =
+          cuvs::neighbors::ivf_sq::build(handle_, index_params_no_add, database_view);
+
+        auto vector_indices = raft::make_device_vector<IdxT, IdxT>(handle_, ps.num_db_vecs);
+        raft::linalg::map_offset(handle_, vector_indices.view(), raft::identity_op{});
+        raft::resource::sync_stream(handle_);
+
+        auto indices_view = raft::make_device_vector_view<const IdxT, IdxT>(
+          vector_indices.data_handle(), ps.num_db_vecs);
+        cuvs::neighbors::ivf_sq::extend(
+          handle_,
+          database_view,
+          std::make_optional<raft::device_vector_view<const IdxT, IdxT>>(indices_view),
+          &idx_empty);
+
+        // Serialize / deserialize round-trip
+        tmp_index_file index_file;
+        cuvs::neighbors::ivf_sq::serialize(handle_, index_file.filename, idx);
+        cuvs::neighbors::ivf_sq::index<uint8_t> index_loaded(handle_);
+        cuvs::neighbors::ivf_sq::deserialize(handle_, index_file.filename, &index_loaded);
+        ASSERT_EQ(idx.size(), index_loaded.size());
+        ASSERT_EQ(idx.dim(), index_loaded.dim());
+        ASSERT_EQ(idx.n_lists(), index_loaded.n_lists());
+
+        auto search_queries_view = raft::make_device_matrix_view<const DataT, IdxT>(
+          search_queries.data(), ps.num_queries, ps.dim);
+        auto indices_out_view =
+          raft::make_device_matrix_view<IdxT, IdxT>(indices_ivfsq_dev.data(), ps.num_queries, ps.k);
+        auto dists_out_view =
+          raft::make_device_matrix_view<T, IdxT>(distances_ivfsq_dev.data(), ps.num_queries, ps.k);
+
+        cuvs::neighbors::ivf_sq::search(handle_,
+                                        search_params,
+                                        index_loaded,
+                                        search_queries_view,
+                                        indices_out_view,
+                                        dists_out_view);
+
+        raft::update_host(
+          distances_ivfsq.data(), distances_ivfsq_dev.data(), queries_size, stream_);
+        raft::update_host(indices_ivfsq.data(), indices_ivfsq_dev.data(), queries_size, stream_);
+        raft::resource::sync_stream(handle_);
+      }
+      // SQ introduces quantization error, so we relax the distance epsilon
+      float eps = 0.1;
+      ASSERT_TRUE(eval_neighbours(indices_naive,
+                                  indices_ivfsq,
+                                  distances_naive,
+                                  distances_ivfsq,
+                                  ps.num_queries,
+                                  ps.k,
+                                  eps,
+                                  min_recall));
+    }
+  }
+
+  void testFilter()
+  {
+    if (ps.num_db_vecs <= static_cast<IdxT>(test_ivf_sample_filter::offset)) {
+      GTEST_SKIP() << "Skipping filter test: num_db_vecs <= filter offset";
+    }
+
+    size_t queries_size = ps.num_queries * ps.k;
+    std::vector<IdxT> indices_ivfsq(queries_size);
+    std::vector<IdxT> indices_naive(queries_size);
+    std::vector<T> distances_ivfsq(queries_size);
+    std::vector<T> distances_naive(queries_size);
+
+    {
+      rmm::device_uvector<T> distances_naive_dev(queries_size, stream_);
+      rmm::device_uvector<IdxT> indices_naive_dev(queries_size, stream_);
+      auto* database_filtered_ptr = database.data() + test_ivf_sample_filter::offset * ps.dim;
+      cuvs::neighbors::naive_knn<T, DataT, IdxT>(handle_,
+                                                 distances_naive_dev.data(),
+                                                 indices_naive_dev.data(),
+                                                 search_queries.data(),
+                                                 database_filtered_ptr,
+                                                 ps.num_queries,
+                                                 ps.num_db_vecs - test_ivf_sample_filter::offset,
+                                                 ps.dim,
+                                                 ps.k,
+                                                 ps.metric);
+      raft::linalg::addScalar(indices_naive_dev.data(),
+                              indices_naive_dev.data(),
+                              IdxT(test_ivf_sample_filter::offset),
+                              queries_size,
+                              stream_);
+      raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_);
+      raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_);
+      raft::resource::sync_stream(handle_);
+    }
+
+    {
+      double min_recall =
+        std::min(1.0, static_cast<double>(ps.nprobe) / static_cast<double>(ps.nlist));
+
+      rmm::device_uvector<T> distances_ivfsq_dev(queries_size, stream_);
+      rmm::device_uvector<IdxT> indices_ivfsq_dev(queries_size, stream_);
+
+      {
+        cuvs::neighbors::ivf_sq::index_params index_params;
+        cuvs::neighbors::ivf_sq::search_params search_params;
+        index_params.n_lists          = ps.nlist;
+        index_params.metric           = ps.metric;
+        index_params.adaptive_centers = ps.adaptive_centers;
+        search_params.n_probes        = ps.nprobe;
+
+        index_params.add_data_on_build        = true;
+        index_params.kmeans_trainset_fraction = 0.5;
+
+        auto database_view = raft::make_device_matrix_view<const DataT, IdxT>(
+          (const DataT*)database.data(), ps.num_db_vecs, ps.dim);
+        auto index = cuvs::neighbors::ivf_sq::build(handle_, index_params, database_view);
+
+        auto removed_indices =
+          raft::make_device_vector<IdxT, int64_t>(handle_, test_ivf_sample_filter::offset);
+        raft::linalg::map_offset(handle_, removed_indices.view(), raft::identity_op{});
+        raft::resource::sync_stream(handle_);
+
+        cuvs::core::bitset<std::uint32_t, IdxT> removed_indices_bitset(
+          handle_, removed_indices.view(), ps.num_db_vecs);
+        auto bitset_filter_obj =
+          cuvs::neighbors::filtering::bitset_filter(removed_indices_bitset.view());
+
+        auto search_queries_view = raft::make_device_matrix_view<const DataT, IdxT>(
+          search_queries.data(), ps.num_queries, ps.dim);
+        auto indices_out_view =
+          raft::make_device_matrix_view<IdxT, IdxT>(indices_ivfsq_dev.data(), ps.num_queries, ps.k);
+        auto dists_out_view =
+          raft::make_device_matrix_view<T, IdxT>(distances_ivfsq_dev.data(), ps.num_queries, ps.k);
+
+        cuvs::neighbors::ivf_sq::search(handle_,
+                                        search_params,
+                                        index,
+                                        search_queries_view,
+                                        indices_out_view,
+                                        dists_out_view,
+                                        bitset_filter_obj);
+
+        raft::update_host(
+          distances_ivfsq.data(), distances_ivfsq_dev.data(), queries_size, stream_);
+        raft::update_host(indices_ivfsq.data(), indices_ivfsq_dev.data(), queries_size, stream_);
+        raft::resource::sync_stream(handle_);
+      }
+      float eps = 0.1;
+      ASSERT_TRUE(eval_neighbours(indices_naive,
+                                  indices_ivfsq,
+                                  distances_naive,
+                                  distances_ivfsq,
+                                  ps.num_queries,
+                                  ps.k,
+                                  eps,
+                                  min_recall));
+    }
+  }
+
+  void SetUp() override
+  {
+    database.resize(ps.num_db_vecs * ps.dim, stream_);
+    search_queries.resize(ps.num_queries * ps.dim, stream_);
+
+    raft::random::RngState r(1234ULL);
+    if constexpr (std::is_same_v<DataT, float> || std::is_same_v<DataT, half>) {
+      raft::random::uniform(
+        handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(0.1), DataT(2.0));
+      raft::random::uniform(
+        handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(0.1), DataT(2.0));
+    }
+    raft::resource::sync_stream(handle_);
+  }
+
+  void TearDown() override
+  {
+    raft::resource::sync_stream(handle_);
+    database.resize(0, stream_);
+    search_queries.resize(0, stream_);
+  }
+
+ private:
+  raft::resources handle_;
+  rmm::cuda_stream_view stream_;
+  AnnIvfSqInputs<IdxT> ps;
+  rmm::device_uvector<DataT> database;
+  rmm::device_uvector<DataT> search_queries;
+};
+
+const std::vector<AnnIvfSqInputs<int64_t>> inputs = {
+  // num_queries, num_db_vecs, dim, k, nprobe, nlist, metric, adaptive_centers
+
+  // ===== Dimension edge cases (all four metrics) =====
+  // dim=1 (CosineExpanded excluded: requires dim > 1)
+  {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 1, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false},
+  // dim=2,3,4,5 (unaligned)
+  {1000, 10000, 2, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 2, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 3, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true},
+  {1000, 10000, 3, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
+  {1000, 10000, 4, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 4, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 5, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  // dim=7,8 (around veclen=16 boundary, not a multiple of veclen)
+  {1000, 10000, 7, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 7, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true},
+  {1000, 10000, 8, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
+  // dim=15,16,17 (around veclen=16 boundary)
+  {1000, 10000, 15, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 15, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false},
+  {1000, 10000, 17, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 17, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  // dim=31,32,33 (around 2*veclen boundary)
+  {1000, 10000, 31, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 31, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 32, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 33, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 33, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  // medium dims
+  {1000, 10000, 64, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 64, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false},
+  {1000, 10000, 256, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 256, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  // large dims (may exceed shared memory limits)
+  {1000, 10000, 2048, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 2048, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 2049, 16, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 2049, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 2050, 16, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 4096, 20, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+
+  // ===== k edge cases =====
+  {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 16, 1, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 16, 2, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 5, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 20, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 20, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 16, 50, 100, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 100, 200, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 100, 200, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+
+  // ===== nprobe / nlist edge cases =====
+  // nprobe == nlist (exhaustive probe)
+  {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 16, 10, 64, 64, cuvs::distance::DistanceType::CosineExpanded, false},
+  // nprobe == 1 (minimal probe)
+  {1000, 10000, 16, 10, 1, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 10, 1, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  // nprobe > nlist (clamped to nlist)
+  {1000, 10000, 16, 10, 2048, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 10, 2048, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  // various nprobe
+  {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::InnerProduct, false},
+  {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {1000, 10000, 16, 10, 50, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false},
+  {1000, 10000, 16, 10, 70, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false},
+  // very small nlist
+  {100, 10000, 16, 10, 8, 8, cuvs::distance::DistanceType::L2Expanded, false},
+  {100, 10000, 16, 10, 8, 8, cuvs::distance::DistanceType::CosineExpanded, false},
+  // smaller nlist
+  {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2Expanded, false},
+  {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::InnerProduct, false},
+  {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::CosineExpanded, false},
+  {100, 10000, 16, 10, 20, 512, cuvs::distance::DistanceType::L2SqrtExpanded, false},
+
+  // ===== Dataset size edge cases =====
+  // single query
+  {1, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {1, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  // very few queries
+  {2, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {5, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  // very few db vectors (nlist reduced to fit)
+  {100, 500, 16, 10, 40, 256, cuvs::distance::DistanceType::L2Expanded, false},
+  {100, 500, 16, 10, 40, 256, cuvs::distance::DistanceType::CosineExpanded, false},
+  // small db with many empty clusters
+  {100, 100, 16, 5, 20, 64, cuvs::distance::DistanceType::L2Expanded, false},
+  {100, 100, 16, 5, 20, 64, cuvs::distance::DistanceType::CosineExpanded, false},
+  // larger datasets
+  {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true},
+  {20, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
+  {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, true},
+  {1000, 100000, 16, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
+  {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::L2Expanded, false},
+  {10000, 131072, 8, 10, 20, 1024, cuvs::distance::DistanceType::CosineExpanded, false},
+  {10000, 131072, 8, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true},
+  {10000, 131072, 8, 10, 50, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, false},
+
+  // ===== Large query batches (gridDim.x > 65535) =====
+  {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::L2Expanded, false},
+  {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::InnerProduct, false},
+  {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::CosineExpanded, false},
+  {100000, 1024, 32, 10, 64, 64, cuvs::distance::DistanceType::L2SqrtExpanded, false},
+  {100000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false},
+  {100000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::CosineExpanded, false},
+  // just above the old 65535 limit
+  {65536, 1024, 16, 10, 32, 64, cuvs::distance::DistanceType::L2Expanded, false},
+  {65536, 1024, 16, 10, 32, 64, cuvs::distance::DistanceType::CosineExpanded, false},
+
+  // ===== Adaptive centers (all four metrics, multiple dims) =====
+  {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true},
+  {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true},
+  {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
+  {1000, 10000, 8, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, true},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::InnerProduct, true},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
+  {1000, 10000, 16, 10, 40, 1024, cuvs::distance::DistanceType::L2SqrtExpanded, true},
+  {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::L2Expanded, true},
+  {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::InnerProduct, true},
+  {1000, 10000, 32, 10, 50, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
+  {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::L2Expanded, true},
+  {1000, 10000, 128, 10, 40, 1024, cuvs::distance::DistanceType::CosineExpanded, true},
+
+  // ===== Recall-stability: same data, different query counts =====
+  {20000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false},
+  {50000, 8712, 3, 10, 51, 66, cuvs::distance::DistanceType::L2Expanded, false},
+};
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu b/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu
new file mode 100644
index 0000000000..02ec8a7dfc
--- /dev/null
+++ b/cpp/tests/neighbors/ann_ivf_sq/test_float_uint8_t.cu
@@ -0,0 +1,21 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <gtest/gtest.h>
+
+#include "../ann_ivf_sq.cuh"
+
+namespace cuvs::neighbors::ivf_sq {
+
+typedef AnnIVFSQTest<float, float, int64_t> AnnIVFSQTestF_float;
+TEST_P(AnnIVFSQTestF_float, AnnIVFSQ)
+{
+  this->testIVFSQ();
+  this->testFilter();
+}
+
+INSTANTIATE_TEST_CASE_P(AnnIVFSQTest, AnnIVFSQTestF_float, ::testing::ValuesIn(inputs));
+
+}  // namespace cuvs::neighbors::ivf_sq
diff --git a/docs/source/python_api/neighbors.rst b/docs/source/python_api/neighbors.rst
index 909f2013ad..614dc0b8e1 100644
--- a/docs/source/python_api/neighbors.rst
+++ b/docs/source/python_api/neighbors.rst
@@ -17,6 +17,7 @@ Single-GPU Algorithms
    neighbors_hnsw.rst
    neighbors_ivf_flat.rst
    neighbors_ivf_pq.rst
+   neighbors_ivf_sq.rst
    neighbors_nn_decent.rst
 
 Multi-GPU Algorithms
diff --git a/docs/source/python_api/neighbors_ivf_sq.rst b/docs/source/python_api/neighbors_ivf_sq.rst
new file mode 100644
index 0000000000..7a1b8e2ac9
--- /dev/null
+++ b/docs/source/python_api/neighbors_ivf_sq.rst
@@ -0,0 +1,49 @@
+IVF-SQ
+======
+
+.. role:: py(code)
+   :language: python
+   :class: highlight
+
+Index build parameters
+######################
+
+.. autoclass:: cuvs.neighbors.ivf_sq.IndexParams
+    :members:
+
+Index search parameters
+#######################
+
+.. autoclass:: cuvs.neighbors.ivf_sq.SearchParams
+    :members:
+
+Index
+#####
+
+.. autoclass:: cuvs.neighbors.ivf_sq.Index
+    :members:
+
+Index build
+###########
+
+.. autofunction:: cuvs.neighbors.ivf_sq.build
+
+Index search
+############
+
+.. autofunction:: cuvs.neighbors.ivf_sq.search
+
+Index save
+##########
+
+.. autofunction:: cuvs.neighbors.ivf_sq.save
+
+Index load
+##########
+
+.. autofunction:: cuvs.neighbors.ivf_sq.load
+
+Index extend
+############
+
+.. autofunction:: cuvs.neighbors.ivf_sq.extend
diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt
index 6a48508be5..8124962cd4 100644
--- a/python/cuvs/cuvs/neighbors/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt
@@ -1,6 +1,6 @@
 # =============================================================================
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 #
@@ -9,6 +9,7 @@ add_subdirectory(cagra)
 add_subdirectory(hnsw)
 add_subdirectory(ivf_flat)
 add_subdirectory(ivf_pq)
+add_subdirectory(ivf_sq)
 add_subdirectory(filters)
 add_subdirectory(nn_descent)
 add_subdirectory(tiered_index)
diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py
index 016f98ce60..6b4966bc18 100644
--- a/python/cuvs/cuvs/neighbors/__init__.py
+++ b/python/cuvs/cuvs/neighbors/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -9,6 +9,7 @@
     filters,
     ivf_flat,
     ivf_pq,
+    ivf_sq,
     mg,
     nn_descent,
     vamana,
@@ -22,6 +23,7 @@
     "filters",
     "ivf_flat",
     "ivf_pq",
+    "ivf_sq",
     "mg",
     "nn_descent",
     "all_neighbors",
diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt
new file mode 100644
index 0000000000..5fe85c9de5
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt
@@ -0,0 +1,17 @@
+# =============================================================================
+# cmake-format: off
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+# cmake-format: on
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources ivf_sq.pyx)
+set(linked_libraries cuvs::cuvs cuvs::c_api)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_ivf_sq_
+)
diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py b/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py
new file mode 100644
index 0000000000..bec1a652db
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py
@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+
+
+from .ivf_sq import (
+    Index,
+    IndexParams,
+    SearchParams,
+    build,
+    extend,
+    load,
+    save,
+    search,
+)
+
+__all__ = [
+    "Index",
+    "IndexParams",
+    "SearchParams",
+    "build",
+    "extend",
+    "load",
+    "save",
+    "search",
+]
diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd
new file mode 100644
index 0000000000..99617f7d0c
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd
@@ -0,0 +1,102 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+#
+# cython: language_level=3
+
+from libc.stdint cimport int64_t, uint32_t, uintptr_t
+from libcpp cimport bool
+
+from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
+from cuvs.distance_type cimport cuvsDistanceType
+from cuvs.neighbors.filters.filters cimport cuvsFilter
+
+
+cdef extern from "cuvs/neighbors/ivf_sq.h" nogil:
+
+    ctypedef struct cuvsIvfSqIndexParams:
+        cuvsDistanceType metric
+        float metric_arg
+        bool add_data_on_build
+        uint32_t n_lists
+        uint32_t kmeans_n_iters
+        double kmeans_trainset_fraction
+        bool adaptive_centers
+        bool conservative_memory_allocation
+
+    ctypedef cuvsIvfSqIndexParams* cuvsIvfSqIndexParams_t
+
+    ctypedef struct cuvsIvfSqSearchParams:
+        uint32_t n_probes
+
+    ctypedef cuvsIvfSqSearchParams* cuvsIvfSqSearchParams_t
+
+    ctypedef struct cuvsIvfSqIndex:
+        uintptr_t addr
+        DLDataType dtype
+
+    ctypedef cuvsIvfSqIndex* cuvsIvfSqIndex_t
+
+    cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* params)
+
+    cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index)
+
+    cuvsError_t cuvsIvfSqSearchParamsCreate(
+        cuvsIvfSqSearchParams_t* params)
+
+    cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t index)
+
+    cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index)
+
+    cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index)
+
+    cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index,
+                                        int64_t * n_lists)
+
+    cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t * dim)
+
+    cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t * size)
+
+    cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index,
+                                         DLManagedTensor * centers)
+
+    cuvsError_t cuvsIvfSqBuild(cuvsResources_t res,
+                               cuvsIvfSqIndexParams* params,
+                               DLManagedTensor* dataset,
+                               cuvsIvfSqIndex_t index) except +
+
+    cuvsError_t cuvsIvfSqSearch(cuvsResources_t res,
+                                cuvsIvfSqSearchParams* params,
+                                cuvsIvfSqIndex_t index,
+                                DLManagedTensor* queries,
+                                DLManagedTensor* neighbors,
+                                DLManagedTensor* distances) except +
+
+    cuvsError_t cuvsIvfSqSearchWithFilter(cuvsResources_t res,
+                                          cuvsIvfSqSearchParams* params,
+                                          cuvsIvfSqIndex_t index,
+                                          DLManagedTensor* queries,
+                                          DLManagedTensor* neighbors,
+                                          DLManagedTensor* distances,
+                                          cuvsFilter filter) except +
+
+    cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res,
+                                   const char * filename,
+                                   cuvsIvfSqIndex_t index) except +
+
+    cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res,
+                                     const char * filename,
+                                     cuvsIvfSqIndex_t index) except +
+
+    cuvsError_t cuvsIvfSqExtend(cuvsResources_t res,
+                                DLManagedTensor* new_vectors,
+                                DLManagedTensor* new_indices,
+                                cuvsIvfSqIndex_t index)
+
+
+cdef class IndexParams:
+    cdef cuvsIvfSqIndexParams* params
+
+cdef class SearchParams:
+    cdef cuvsIvfSqSearchParams* params
diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx
new file mode 100644
index 0000000000..7f8f2b0897
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx
@@ -0,0 +1,547 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+#
+# cython: language_level=3
+
+import numpy as np
+
+cimport cuvs.common.cydlpack
+
+from cuvs.common.resources import auto_sync_resources
+
+from cython.operator cimport dereference as deref
+from libcpp cimport bool, cast
+from libcpp.string cimport string
+
+from cuvs.common cimport cydlpack
+from cuvs.distance_type cimport cuvsDistanceType
+
+from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray
+from pylibraft.common.cai_wrapper import wrap_array
+from pylibraft.common.interruptible import cuda_interruptible
+
+from cuvs.common.device_tensor_view import DeviceTensorView
+from cuvs.distance import DISTANCE_NAMES, DISTANCE_TYPES
+from cuvs.neighbors.common import _check_input_array
+from cuvs.neighbors.filters import no_filter
+
+from libc.stdint cimport (
+    int8_t,
+    int64_t,
+    uint8_t,
+    uint32_t,
+    uint64_t,
+    uintptr_t,
+)
+
+from cuvs.common.exceptions import check_cuvs
+
+
+cdef class IndexParams:
+    """
+    Parameters to build index for IvfSq nearest neighbor search
+
+    Parameters
+    ----------
+    n_lists : int, default = 1024
+        The number of clusters used in the coarse quantizer.
+    metric : str, default = "sqeuclidean"
+        String denoting the metric type.
+        Valid values for metric: ["sqeuclidean", "inner_product",
+        "euclidean", "cosine"], where
+
+            - sqeuclidean is the euclidean distance without the square root
+              operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,
+            - euclidean is the euclidean distance
+            - inner product distance is defined as
+              distance(a, b) = \\sum_i a_i * b_i.
+            - cosine distance is defined as
+              distance(a, b) = 1 - \\sum_i a_i * b_i / ( ||a||_2 * ||b||_2).
+
+    kmeans_n_iters : int, default = 20
+        The number of iterations searching for kmeans centers during index
+        building.
+    kmeans_trainset_fraction : int, default = 0.5
+        If kmeans_trainset_fraction is less than 1, then the dataset is
+        subsampled, and only n_samples * kmeans_trainset_fraction rows
+        are used for training.
+    add_data_on_build : bool, default = True
+        After training the coarse and fine quantizers, we will populate
+        the index with the dataset if add_data_on_build == True, otherwise
+        the index is left empty, and the extend method can be used
+        to add new vectors to the index.
+    adaptive_centers : bool, default = False
+        By default (adaptive_centers = False), the cluster centers are
+        trained in `ivf_sq.build`, and never modified in
+        `ivf_sq.extend`. The alternative behavior (adaptive_centers
+        = true) is to update the cluster centers for new data when it is
+        added. In this case, `index.centers()` are always exactly the
+        centroids of the data in the corresponding clusters. The drawback
+        of this behavior is that the centroids depend on the order of
+        adding new data (through the classification of the added data);
+        that is, `index.centers()` "drift" together with the changing
+        distribution of the newly added data.
+    conservative_memory_allocation : bool, default = False
+        By default, the algorithm allocates more space than necessary for
+        individual clusters (`list_data`). This allows to amortize the cost
+        of memory allocation and reduce the number of data copies during
+        repeated calls to `extend` (extending the database).
+        To disable this behavior and use as little GPU memory for the
+        database as possible, set this flag to `True`.
+    """
+
+    def __cinit__(self):
+        cuvsIvfSqIndexParamsCreate(&self.params)
+
+    def __dealloc__(self):
+        if self.params != NULL:
+            check_cuvs(cuvsIvfSqIndexParamsDestroy(self.params))
+
+    def __init__(self, *,
+                 n_lists=1024,
+                 metric="sqeuclidean",
+                 metric_arg=2.0,
+                 kmeans_n_iters=20,
+                 kmeans_trainset_fraction=0.5,
+                 adaptive_centers=False,
+                 add_data_on_build=True,
+                 conservative_memory_allocation=False):
+        self.params.metric = <cuvsDistanceType>DISTANCE_TYPES[metric]
+        self.params.metric_arg = metric_arg
+        self.params.add_data_on_build = add_data_on_build
+        self.params.n_lists = n_lists
+        self.params.kmeans_n_iters = kmeans_n_iters
+        self.params.kmeans_trainset_fraction = kmeans_trainset_fraction
+        self.params.adaptive_centers = adaptive_centers
+        self.params.conservative_memory_allocation = \
+            conservative_memory_allocation
+
+    def get_handle(self):
+        return <size_t> self.params
+
+    @property
+    def metric(self):
+        return DISTANCE_NAMES[self.params.metric]
+
+    @property
+    def metric_arg(self):
+        return self.params.metric_arg
+
+    @property
+    def add_data_on_build(self):
+        return self.params.add_data_on_build
+
+    @property
+    def n_lists(self):
+        return self.params.n_lists
+
+    @property
+    def kmeans_n_iters(self):
+        return self.params.kmeans_n_iters
+
+    @property
+    def kmeans_trainset_fraction(self):
+        return self.params.kmeans_trainset_fraction
+
+    @property
+    def adaptive_centers(self):
+        return self.params.adaptive_centers
+
+    @property
+    def conservative_memory_allocation(self):
+        return self.params.conservative_memory_allocation
+
+
+cdef class Index:
+    """
+    IvfSq index object. This object stores the trained IvfSq index state
+    which can be used to perform nearest neighbors searches.
+    """
+
+    cdef cuvsIvfSqIndex_t index
+    cdef bool trained
+
+    def __cinit__(self):
+        self.trained = False
+        check_cuvs(cuvsIvfSqIndexCreate(&self.index))
+
+    def __dealloc__(self):
+        check_cuvs(cuvsIvfSqIndexDestroy(self.index))
+
+    @property
+    def trained(self):
+        return self.trained
+
+    def __repr__(self):
+        return "Index(type=IvfSq)"
+
+    @property
+    def n_lists(self):
+        """ The number of inverted lists (clusters) """
+        cdef int64_t n_lists = 0
+        cuvsIvfSqIndexGetNLists(self.index, &n_lists)
+        return n_lists
+
+    @property
+    def dim(self):
+        """ dimensionality of the cluster centers """
+        cdef int64_t dim = 0
+        cuvsIvfSqIndexGetDim(self.index, &dim)
+        return dim
+
+    def __len__(self):
+        cdef int64_t size = 0
+        check_cuvs(cuvsIvfSqIndexGetSize(self.index, &size))
+        return size
+
+    @property
+    def centers(self):
+        """ Get the cluster centers corresponding to the lists in the
+        original space """
+        if not self.trained:
+            raise ValueError("Index needs to be built before getting centers")
+
+        output = DeviceTensorView()
+        cdef cydlpack.DLManagedTensor * tensor = \
+            <cydlpack.DLManagedTensor*><size_t>output.get_handle()
+        check_cuvs(cuvsIvfSqIndexGetCenters(self.index, tensor))
+        output.parent = self
+
+        return output
+
+
+@auto_sync_resources
+def build(IndexParams index_params, dataset, resources=None):
+    """
+    Build the IvfSq index from the dataset for efficient search.
+
+    IVF-SQ (Scalar Quantization) combines an IVF coarse quantizer with
+    per-dimension scalar quantization. Each vector's residual is encoded
+    as one byte per dimension, providing ~4x memory reduction vs IVF-Flat
+    with higher recall than IVF-PQ at similar memory budgets.
+
+    Parameters
+    ----------
+    index_params : :py:class:`cuvs.neighbors.ivf_sq.IndexParams`
+    dataset : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float32, float16]
+    {resources_docstring}
+
+    Returns
+    -------
+    index: py:class:`cuvs.neighbors.ivf_sq.Index`
+
+    Examples
+    --------
+
+    >>> import cupy as cp
+    >>> from cuvs.neighbors import ivf_sq
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> n_queries = 1000
+    >>> k = 10
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> build_params = ivf_sq.IndexParams(metric="sqeuclidean")
+    >>> index = ivf_sq.build(build_params, dataset)
+    >>> distances, neighbors = ivf_sq.search(ivf_sq.SearchParams(),
+    ...                                      index, dataset,
+    ...                                      k)
+    >>> distances = cp.asarray(distances)
+    >>> neighbors = cp.asarray(neighbors)
+    """
+
+    dataset_ai = wrap_array(dataset)
+    _check_input_array(dataset_ai, [np.dtype('float32'),
+                                    np.dtype('float16')])
+
+    cdef Index idx = Index()
+    cdef cydlpack.DLManagedTensor* dataset_dlpack = \
+        cydlpack.dlpack_c(dataset_ai)
+    cdef cuvsIvfSqIndexParams* params = index_params.params
+
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+    with cuda_interruptible():
+        check_cuvs(cuvsIvfSqBuild(
+            res,
+            params,
+            dataset_dlpack,
+            idx.index
+        ))
+        idx.trained = True
+
+    return idx
+
+
+cdef class SearchParams:
+    """
+    Supplemental parameters to search IVF-SQ index
+
+    Parameters
+    ----------
+    n_probes: int
+        The number of clusters to search.
+    """
+
+    def __cinit__(self):
+        cuvsIvfSqSearchParamsCreate(&self.params)
+
+    def __dealloc__(self):
+        if self.params != NULL:
+            check_cuvs(cuvsIvfSqSearchParamsDestroy(self.params))
+
+    def __init__(self, *, n_probes=20):
+        self.params.n_probes = n_probes
+
+    def get_handle(self):
+        return <size_t> self.params
+
+    @property
+    def n_probes(self):
+        return self.params.n_probes
+
+
+@auto_sync_resources
+@auto_convert_output
+def search(SearchParams search_params,
+           Index index,
+           queries,
+           k,
+           neighbors=None,
+           distances=None,
+           resources=None,
+           filter=None):
+    """
+    Find the k nearest neighbors for each query.
+
+    Parameters
+    ----------
+    search_params : py:class:`cuvs.neighbors.ivf_sq.SearchParams`
+    index : py:class:`cuvs.neighbors.ivf_sq.Index`
+        Trained IvfSq index.
+    queries : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float32, float16]
+    k : int
+        The number of neighbors.
+    neighbors : Optional CUDA array interface compliant matrix shape
+                (n_queries, k), dtype int64_t. If supplied, neighbor
+                indices will be written here in-place. (default None)
+    distances : Optional CUDA array interface compliant matrix shape
+                (n_queries, k) If supplied, the distances to the
+                neighbors will be written here in-place. (default None)
+    filter:     Optional cuvs.neighbors.cuvsFilter can be used to filter
+                neighbors based on a given bitset. (default None)
+    {resources_docstring}
+
+    Examples
+    --------
+    >>> import cupy as cp
+    >>> from cuvs.neighbors import ivf_sq
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> n_queries = 1000
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> # Build the index
+    >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset)
+    >>>
+    >>> # Search using the built index
+    >>> queries = cp.random.random_sample((n_queries, n_features),
+    ...                                   dtype=cp.float32)
+    >>> k = 10
+    >>> search_params = ivf_sq.SearchParams(n_probes=20)
+    >>>
+    >>> distances, neighbors = ivf_sq.search(search_params, index, queries,
+    ...                                     k)
+    """
+    if not index.trained:
+        raise ValueError("Index needs to be built before calling search.")
+
+    queries_cai = wrap_array(queries)
+    _check_input_array(queries_cai, [np.dtype('float32'),
+                                     np.dtype('float16')])
+
+    cdef uint32_t n_queries = queries_cai.shape[0]
+
+    if neighbors is None:
+        neighbors = device_ndarray.empty((n_queries, k), dtype='int64')
+
+    neighbors_cai = wrap_array(neighbors)
+    _check_input_array(neighbors_cai, [np.dtype('int64')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    if distances is None:
+        distances = device_ndarray.empty((n_queries, k), dtype='float32')
+
+    distances_cai = wrap_array(distances)
+    _check_input_array(distances_cai, [np.dtype('float32')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    cdef cuvsIvfSqSearchParams* params = search_params.params
+    cdef cydlpack.DLManagedTensor* queries_dlpack = \
+        cydlpack.dlpack_c(queries_cai)
+    cdef cydlpack.DLManagedTensor* neighbors_dlpack = \
+        cydlpack.dlpack_c(neighbors_cai)
+    cdef cydlpack.DLManagedTensor* distances_dlpack = \
+        cydlpack.dlpack_c(distances_cai)
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+    if filter is None:
+        filter = no_filter()
+
+    with cuda_interruptible():
+        check_cuvs(cuvsIvfSqSearchWithFilter(
+            res,
+            params,
+            index.index,
+            queries_dlpack,
+            neighbors_dlpack,
+            distances_dlpack,
+            filter.prefilter
+        ))
+
+    return (distances, neighbors)
+
+
+@auto_sync_resources
+def save(filename, Index index, bool include_dataset=True, resources=None):
+    """
+    Saves the index to a file.
+
+    Saving / loading the index is experimental. The serialization format is
+    subject to change.
+
+    Parameters
+    ----------
+    filename : string
+        Name of the file.
+    index : Index
+        Trained IVF-SQ index.
+    {resources_docstring}
+
+    Examples
+    --------
+    >>> import cupy as cp
+    >>> from cuvs.neighbors import ivf_sq
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> # Build index
+    >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset)
+    >>> # Serialize and deserialize the ivf_sq index built
+    >>> ivf_sq.save("my_index.bin", index)
+    >>> index_loaded = ivf_sq.load("my_index.bin")
+    """
+    cdef string c_filename = filename.encode('utf-8')
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+    check_cuvs(cuvsIvfSqSerialize(res,
+                                  c_filename.c_str(),
+                                  index.index))
+
+
+@auto_sync_resources
+def load(filename, resources=None):
+    """
+    Loads index from file.
+
+    Saving / loading the index is experimental. The serialization format is
+    subject to change, therefore loading an index saved with a previous
+    version of cuvs is not guaranteed to work.
+
+    Parameters
+    ----------
+    filename : string
+        Name of the file.
+    {resources_docstring}
+
+    Returns
+    -------
+    index : Index
+
+    """
+    cdef Index idx = Index()
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+    cdef string c_filename = filename.encode('utf-8')
+
+    check_cuvs(cuvsIvfSqDeserialize(
+        res,
+        c_filename.c_str(),
+        idx.index
+    ))
+    idx.trained = True
+    return idx
+
+
+@auto_sync_resources
+def extend(Index index, new_vectors, new_indices, resources=None):
+    """
+    Extend an existing index with new vectors.
+
+    The input array can be either CUDA array interface compliant matrix or
+    array interface compliant matrix in host memory.
+
+
+    Parameters
+    ----------
+    index : ivf_sq.Index
+        Trained ivf_sq object.
+    new_vectors : array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float32, float16]
+    new_indices : array interface compliant vector shape (n_samples)
+        Supported dtype [int64]
+    {resources_docstring}
+
+    Returns
+    -------
+    index: py:class:`cuvs.neighbors.ivf_sq.Index`
+
+    Examples
+    --------
+
+    >>> import cupy as cp
+    >>> from cuvs.neighbors import ivf_sq
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> n_queries = 1000
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset)
+    >>> n_rows = 100
+    >>> more_data = cp.random.random_sample((n_rows, n_features),
+    ...                                     dtype=cp.float32)
+    >>> indices = n_samples + cp.arange(n_rows, dtype=cp.int64)
+    >>> index = ivf_sq.extend(index, more_data, indices)
+    >>> # Search using the built index
+    >>> queries = cp.random.random_sample((n_queries, n_features),
+    ...                                   dtype=cp.float32)
+    >>> distances, neighbors = ivf_sq.search(ivf_sq.SearchParams(),
+    ...                                      index, queries,
+    ...                                      k=10)
+    """
+
+    new_vectors_ai = wrap_array(new_vectors)
+    _check_input_array(new_vectors_ai,
+                       [np.dtype('float32'), np.dtype('float16')])
+
+    new_indices_ai = wrap_array(new_indices)
+    _check_input_array(new_indices_ai, [np.dtype('int64')])
+    cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+    cdef cydlpack.DLManagedTensor* new_vectors_dlpack = \
+        cydlpack.dlpack_c(new_vectors_ai)
+
+    cdef cydlpack.DLManagedTensor* new_indices_dlpack = \
+        cydlpack.dlpack_c(new_indices_ai)
+
+    with cuda_interruptible():
+        check_cuvs(cuvsIvfSqExtend(
+            res,
+            new_vectors_dlpack,
+            new_indices_dlpack,
+            index.index
+        ))
+
+    return index
diff --git a/python/cuvs/cuvs/tests/test_ivf_sq.py b/python/cuvs/cuvs/tests/test_ivf_sq.py
new file mode 100644
index 0000000000..f4a03fd14b
--- /dev/null
+++ b/python/cuvs/cuvs/tests/test_ivf_sq.py
@@ -0,0 +1,143 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+# SPDX-License-Identifier: Apache-2.0
+#
+
+import tempfile
+
+import numpy as np
+import pytest
+from pylibraft.common import device_ndarray
+from sklearn.neighbors import NearestNeighbors
+from sklearn.preprocessing import normalize
+
+from cuvs.neighbors import ivf_sq
+from cuvs.tests.ann_utils import (
+    calc_recall,
+    generate_data,
+    run_filtered_search_test,
+)
+
+
+def run_ivf_sq_build_search_test(
+    n_rows=10000,
+    n_cols=10,
+    n_queries=100,
+    k=10,
+    dtype=np.float32,
+    add_data_on_build=True,
+    metric="euclidean",
+    compare=True,
+    inplace=True,
+    search_params={},
+    serialize=False,
+):
+    dataset = generate_data((n_rows, n_cols), dtype)
+    if metric == "inner_product":
+        dataset = normalize(dataset, norm="l2", axis=1)
+    dataset_device = device_ndarray(dataset)
+
+    build_params = ivf_sq.IndexParams(
+        metric=metric,
+        add_data_on_build=add_data_on_build,
+    )
+
+    index = ivf_sq.build(build_params, dataset_device)
+
+    if serialize:
+        with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
+            temp_filename = f.name
+        ivf_sq.save(temp_filename, index)
+        index = ivf_sq.load(temp_filename)
+
+    if not add_data_on_build:
+        dataset_1 = dataset[: n_rows // 2, :]
+        dataset_2 = dataset[n_rows // 2 :, :]
+        indices_1 = np.arange(n_rows // 2, dtype=np.int64)
+        indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.int64)
+
+        dataset_1_device = device_ndarray(dataset_1)
+        dataset_2_device = device_ndarray(dataset_2)
+        indices_1_device = device_ndarray(indices_1)
+        indices_2_device = device_ndarray(indices_2)
+        index = ivf_sq.extend(index, dataset_1_device, indices_1_device)
+        index = ivf_sq.extend(index, dataset_2_device, indices_2_device)
+
+    queries = generate_data((n_queries, n_cols), dtype)
+    out_idx = np.zeros((n_queries, k), dtype=np.int64)
+    out_dist = np.zeros((n_queries, k), dtype=np.float32)
+
+    queries_device = device_ndarray(queries)
+    out_idx_device = device_ndarray(out_idx) if inplace else None
+    out_dist_device = device_ndarray(out_dist) if inplace else None
+
+    search_params = ivf_sq.SearchParams(**search_params)
+
+    ret_output = ivf_sq.search(
+        search_params,
+        index,
+        queries_device,
+        k,
+        neighbors=out_idx_device,
+        distances=out_dist_device,
+    )
+
+    if not inplace:
+        out_dist_device, out_idx_device = ret_output
+
+    if not compare:
+        return
+
+    out_idx = out_idx_device.copy_to_host()
+    out_dist = out_dist_device.copy_to_host()
+
+    skl_metric = {
+        "sqeuclidean": "sqeuclidean",
+        "inner_product": "cosine",
+        "cosine": "cosine",
+        "euclidean": "euclidean",
+    }[metric]
+    nn_skl = NearestNeighbors(
+        n_neighbors=k, algorithm="brute", metric=skl_metric
+    )
+    nn_skl.fit(dataset)
+    skl_idx = nn_skl.kneighbors(queries, return_distance=False)
+
+    recall = calc_recall(out_idx, skl_idx)
+    assert recall > 0.7
+
+    centers = index.centers
+    assert centers.shape[0] == build_params.n_lists
+    assert centers.shape[1] == n_cols
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32])
+@pytest.mark.parametrize(
+    "metric", ["sqeuclidean", "inner_product", "euclidean", "cosine"]
+)
+def test_ivf_sq(inplace, dtype, metric):
+    run_ivf_sq_build_search_test(
+        dtype=dtype,
+        inplace=inplace,
+        metric=metric,
+    )
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float16])
+@pytest.mark.parametrize("serialize", [True, False])
+def test_extend(dtype, serialize):
+    run_ivf_sq_build_search_test(
+        n_rows=10000,
+        n_cols=10,
+        n_queries=100,
+        k=10,
+        metric="sqeuclidean",
+        dtype=dtype,
+        add_data_on_build=False,
+        serialize=serialize,
+    )
+
+
+@pytest.mark.parametrize("sparsity", [0.5, 0.7, 1.0])
+def test_filtered_ivf_sq(sparsity):
+    run_filtered_search_test(ivf_sq, sparsity)
diff --git a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml
index fa2195fc61..3a787f65ab 100644
--- a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml
+++ b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml
@@ -34,6 +34,9 @@ cuvs_ivf_flat:
 cuvs_ivf_pq:
   executable: CUVS_IVF_PQ_ANN_BENCH
   requires_gpu: true
+cuvs_ivf_sq:
+  executable: CUVS_IVF_SQ_ANN_BENCH
+  requires_gpu: true
 cuvs_cagra:
   executable: CUVS_CAGRA_ANN_BENCH
   requires_gpu: true
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py
index 9111bdc3b9..ea2afe351e 100644
--- a/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py
+++ b/python/cuvs_bench/cuvs_bench/config/algos/constraints/__init__.py
@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -50,6 +50,12 @@ def cuvs_cagra_search(params, build_params, k, batch_size):
     return True
 
 
+def cuvs_ivf_sq_search(params, build_params, k, batch_size):
+    if "nlist" in build_params and "nprobe" in params:
+        return build_params["nlist"] >= params["nprobe"]
+    return True
+
+
 ###############################################################################
 #                              FAISS constraints                              #
 ###############################################################################
diff --git a/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml
new file mode 100644
index 0000000000..adaad54e04
--- /dev/null
+++ b/python/cuvs_bench/cuvs_bench/config/algos/cuvs_ivf_sq.yaml
@@ -0,0 +1,25 @@
+name: cuvs_ivf_sq
+constraints:
+  search: cuvs_bench.config.algos.constraints.cuvs_ivf_sq_search
+groups:
+  base:
+    build:
+      nlist: [1024, 2048, 4096, 8192]
+      ratio: [1, 2]
+      niter: [25]
+    search:
+      nprobe: [1, 5, 10, 20, 50, 100, 200, 500]
+  large:
+    build:
+      nlist: [8192, 16384, 32000, 64000]
+      ratio: [2, 4]
+      niter: [20]
+    search:
+      nprobe: [10, 20, 50, 100, 200, 500, 1000, 2000]
+  test:
+    build:
+      nlist: [1024]
+      ratio: [1]
+      niter: [20]
+    search:
+      nprobe: [1, 5]