diff --git a/c/include/cuvs/cluster/kmeans.h b/c/include/cuvs/cluster/kmeans.h
index 9a3882cb4c..9e091839a1 100644
--- a/c/include/cuvs/cluster/kmeans.h
+++ b/c/include/cuvs/cluster/kmeans.h
@@ -41,8 +41,6 @@ typedef enum {
 
 /**
  * @brief Hyper-parameters for the kmeans algorithm
- * NB: The inertia_check field is kept for ABI compatibility. Removed in cuvsKMeansParams_v2.
- * TODO: CalVer for the replacement: 26.08
  */
 struct cuvsKMeansParams {
   cuvsDistanceType metric;
@@ -95,88 +93,6 @@ struct cuvsKMeansParams {
    */
   int batch_centroids;
 
-  /** Deprecated, ignored. Kept for ABI compatibility. */
-  bool inertia_check;
-
-  /**
-   * Whether to use hierarchical (balanced) kmeans or not
-   */
-  bool hierarchical;
-
-  /**
-   * For hierarchical k-means , defines the number of training iterations
-   */
-  int hierarchical_n_iters;
-
-  /**
-   * Number of samples to process per GPU batch for the batched (host-data) API.
-   * When set to 0, defaults to n_samples (process all at once).
-   */
-  int64_t streaming_batch_size;
-
-  /**
-   * Number of samples to draw for KMeansPlusPlus initialization.
-   * When set to 0, uses heuristic min(3 * n_clusters, n_samples) for host data,
-   * or n_samples for device data.
-   */
-  int64_t init_size;
-};
-
-/**
- * @brief Hyper-parameters for the kmeans algorithm
- * TODO: Remove this after cuvsKMeansParams is replaced in ABI 2.0
- */
- struct cuvsKMeansParams_v2 {
-  cuvsDistanceType metric;
-
-  /**
-   * The number of clusters to form as well as the number of centroids to generate (default:8).
-   */
-  int n_clusters;
-
-  /**
-   * Method for initialization, defaults to k-means++:
-   *  - cuvsKMeansInitMethod::KMeansPlusPlus (k-means++): Use scalable k-means++ algorithm
-   * to select the initial cluster centers.
-   *  - cuvsKMeansInitMethod::Random (random): Choose 'n_clusters' observations (rows) at
-   * random from the input data for the initial centroids.
-   *  - cuvsKMeansInitMethod::Array (ndarray): Use 'centroids' as initial cluster centers.
-   */
-  cuvsKMeansInitMethod init;
-
-  /**
-   * Maximum number of iterations of the k-means algorithm for a single run.
-   */
-  int max_iter;
-
-  /**
-   * Relative tolerance with regards to inertia to declare convergence.
-   */
-  double tol;
-
-  /**
-   * Number of instance k-means algorithm will be run with different seeds.
-   */
-  int n_init;
-
-  /**
-   * Oversampling factor for use in the k-means|| algorithm
-   */
-  double oversampling_factor;
-
-  /**
-   * batch_samples and batch_centroids are used to tile 1NN computation which is
-   * useful to optimize/control the memory footprint
-   * Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0
-   * then don't tile the centroids
-   */
-  int batch_samples;
-
-  /**
-   * if 0 then batch_centroids = n_clusters
-   */
-  int batch_centroids;
-
   /**
    * Whether to use hierarchical (balanced) kmeans or not
    */
@@ -202,14 +118,10 @@ struct cuvsKMeansParams {
 };
 
 typedef struct cuvsKMeansParams* cuvsKMeansParams_t;
-typedef struct cuvsKMeansParams_v2* cuvsKMeansParams_v2_t;
 
 /**
  * @brief Allocate KMeans params, and populate with default values
  *
- * @note In cuVS 26.08 (next ABI major version) this signature will be
- * replaced by cuvsKMeansParamsCreate_v2.
- *
  * @param[in] params cuvsKMeansParams_t to allocate
  * @return cuvsError_t
  */
@@ -218,33 +130,11 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params);
 /**
  * @brief De-allocate KMeans params
  *
- * @note In cuVS 26.08 (next ABI major version) this signature will be
- * replaced by cuvsKMeansParamsDestroy_v2.
- *
  * @param[in] params
  * @return cuvsError_t
  */
 CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params);
 
-/**
- * @brief Allocate KMeans params
- *
- * Mirrors cuvsKMeansParamsCreate but operates on cuvsKMeansParams_v2.
- * Will become the unsuffixed cuvsKMeansParamsCreate in cuVS 26.08.
- *
- * @param[in] params cuvsKMeansParams_v2_t to allocate
- * @return cuvsError_t
- */
-CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params);
-
-/**
- * @brief De-allocate KMeans params allocated by cuvsKMeansParamsCreate_v2.
- *
- * @param[in] params
- * @return cuvsError_t
- */
-CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params);
-
 /**
  * @brief Type of k-means algorithm.
  */
@@ -270,9 +160,6 @@ typedef enum { CUVS_KMEANS_TYPE_KMEANS = 0, CUVS_KMEANS_TYPE_KMEANS_BALANCED = 1
  *   When X is on the host the data is streamed to the GPU in
  *   batches controlled by params->streaming_batch_size.
  *
- * @note In cuVS 26.08 (next ABI major version) this signature will be
- * replaced by cuvsKMeansFit_v2.
- *
  * @param[in]     res           opaque C handle
  * @param[in]     params        Parameters for KMeans model.
  * @param[in]     X             Training instances to cluster. The data must
@@ -300,45 +187,9 @@ CUVS_EXPORT cuvsError_t cuvsKMeansFit(cuvsResources_t res,
                           double* inertia,
                           int* n_iter);
 
-/**
- * @brief Find clusters with k-means algorithm (v2 params layout).
- *
- * Mirrors cuvsKMeansFit but takes cuvsKMeansParams_v2_t. Will become the
- * unsuffixed cuvsKMeansFit in cuVS 26.08.
- *
- * @param[in]     res           opaque C handle
- * @param[in]     params        Parameters for KMeans model (v2 layout).
- * @param[in]     X             Training instances to cluster. The data must
- *                              be in row-major format. May be on host or
- *                              device memory.
- *                              [dim = n_samples x n_features]
- * @param[in]     sample_weight Optional weights for each observation in X.
- *                              Must be on the same memory space as X.
- *                              [len = n_samples]
- * @param[inout]  centroids     [in] When init is InitMethod::Array, use
- *                              centroids as the initial cluster centers.
- *                              [out] The generated centroids from the
- *                              kmeans algorithm are stored at the address
- *                              pointed by 'centroids'. Must be on device.
- *                              [dim = n_clusters x n_features]
- * @param[out]    inertia       Sum of squared distances of samples to their
- *                              closest cluster center.
- * @param[out]    n_iter        Number of iterations run.
- */
-CUVS_EXPORT cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res,
-                             cuvsKMeansParams_v2_t params,
-                             DLManagedTensor* X,
-                             DLManagedTensor* sample_weight,
-                             DLManagedTensor* centroids,
-                             double* inertia,
-                             int* n_iter);
-
 /**
  * @brief Predict the closest cluster each sample in X belongs to.
  *
- * @note In cuVS 26.08 (next ABI major version) this signature will be
- * replaced by cuvsKMeansPredict_v2.
- *
  * @param[in]     res              opaque C handle
  * @param[in]     params           Parameters for KMeans model.
  * @param[in]     X                New data to predict.
@@ -364,37 +215,6 @@ CUVS_EXPORT cuvsError_t cuvsKMeansPredict(cuvsResources_t res,
                               bool normalize_weight,
                               double* inertia);
 
-/**
- * @brief Predict the closest cluster each sample in X belongs to (v2 params layout).
- *
- * Mirrors cuvsKMeansPredict but takes cuvsKMeansParams_v2_t. Will become the
- * unsuffixed cuvsKMeansPredict in cuVS 26.08.
- *
- * @param[in]     res              opaque C handle
- * @param[in]     params           Parameters for KMeans model (v2 layout).
- * @param[in]     X                New data to predict.
- *                                 [dim = n_samples x n_features]
- * @param[in]     sample_weight    Optional weights for each observation in X.
- *                                 [len = n_samples]
- * @param[in]     centroids        Cluster centroids. The data must be in
- *                                 row-major format.
- *                                 [dim = n_clusters x n_features]
- * @param[in]     normalize_weight True if the weights should be normalized
- * @param[out]    labels           Index of the cluster each sample in X
- *                                 belongs to.
- *                                 [len = n_samples]
- * @param[out]    inertia          Sum of squared distances of samples to
- *                                 their closest cluster center.
- */
-CUVS_EXPORT cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res,
-                                 cuvsKMeansParams_v2_t params,
-                                 DLManagedTensor* X,
-                                 DLManagedTensor* sample_weight,
-                                 DLManagedTensor* centroids,
-                                 DLManagedTensor* labels,
-                                 bool normalize_weight,
-                                 double* inertia);
-
 /**
  * @brief Compute cluster cost
  *
diff --git a/c/src/cluster/kmeans.cpp b/c/src/cluster/kmeans.cpp
index 8e46764ce4..4db59532be 100644
--- a/c/src/cluster/kmeans.cpp
+++ b/c/src/cluster/kmeans.cpp
@@ -16,9 +16,7 @@
 
 namespace {
 
-// The conversions are templated on the C struct type and reused by both API surfaces.
-template <typename ParamsT>
-cuvs::cluster::kmeans::params convert_params(const ParamsT& params)
+cuvs::cluster::kmeans::params convert_params(const cuvsKMeansParams& params)
 {
   auto kmeans_params                = cuvs::cluster::kmeans::params();
   kmeans_params.metric              = static_cast<cuvs::distance::DistanceType>(params.metric);
@@ -35,8 +33,7 @@ cuvs::cluster::kmeans::params convert_params(const ParamsT& params)
   return kmeans_params;
 }
 
-template <typename ParamsT>
-cuvs::cluster::kmeans::balanced_params convert_balanced_params(const ParamsT& params)
+cuvs::cluster::kmeans::balanced_params convert_balanced_params(const cuvsKMeansParams& params)
 {
   auto kmeans_params    = cuvs::cluster::kmeans::balanced_params();
   kmeans_params.metric  = static_cast<cuvs::distance::DistanceType>(params.metric);
@@ -44,9 +41,9 @@ cuvs::cluster::kmeans::balanced_params convert_balanced_params(const ParamsT& pa
   return kmeans_params;
 }
 
-template <typename T, typename ParamsT, typename IdxT = int64_t>
+template <typename T, typename IdxT = int64_t>
 void _fit(cuvsResources_t res,
-          const ParamsT& params,
+          const cuvsKMeansParams& params,
           DLManagedTensor* X_tensor,
           DLManagedTensor* sample_weight_tensor,
           DLManagedTensor* centroids_tensor,
@@ -143,9 +140,9 @@ void _fit(cuvsResources_t res,
   }
 }
 
-template <typename T, typename ParamsT, typename IdxT = int32_t, typename LabelsT = int32_t>
+template <typename T, typename IdxT = int32_t, typename LabelsT = int32_t>
 void _predict(cuvsResources_t res,
-              const ParamsT& params,
+              const cuvsKMeansParams& params,
               DLManagedTensor* X_tensor,
               DLManagedTensor* sample_weight_tensor,
               DLManagedTensor* centroids_tensor,
@@ -240,7 +237,6 @@ extern "C" cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params)
       .oversampling_factor  = cpp_params.oversampling_factor,
       .batch_samples        = cpp_params.batch_samples,
       .batch_centroids      = cpp_params.batch_centroids,
-      .inertia_check        = false,
       .hierarchical         = false,
       .hierarchical_n_iters = static_cast<int>(cpp_balanced_params.n_iters),
       .streaming_batch_size = cpp_params.streaming_batch_size,
@@ -298,79 +294,6 @@ extern "C" cuvsError_t cuvsKMeansPredict(cuvsResources_t res,
   });
 }
 
-extern "C" cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params)
-{
-  return cuvs::core::translate_exceptions([=] {
-    cuvs::cluster::kmeans::params cpp_params;
-    cuvs::cluster::kmeans::balanced_params cpp_balanced_params;
-    *params = new cuvsKMeansParams_v2{
-      .metric               = static_cast<cuvsDistanceType>(cpp_params.metric),
-      .n_clusters           = cpp_params.n_clusters,
-      .init                 = static_cast<cuvsKMeansInitMethod>(cpp_params.init),
-      .max_iter             = cpp_params.max_iter,
-      .tol                  = cpp_params.tol,
-      .n_init               = cpp_params.n_init,
-      .oversampling_factor  = cpp_params.oversampling_factor,
-      .batch_samples        = cpp_params.batch_samples,
-      .batch_centroids      = cpp_params.batch_centroids,
-      .hierarchical         = false,
-      .hierarchical_n_iters = static_cast<int>(cpp_balanced_params.n_iters),
-      .streaming_batch_size = cpp_params.streaming_batch_size,
-      .init_size            = cpp_params.init_size};
-  });
-}
-
-extern "C" cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params)
-{
-  return cuvs::core::translate_exceptions([=] { delete params; });
-}
-
-extern "C" cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res,
-                                        cuvsKMeansParams_v2_t params,
-                                        DLManagedTensor* X,
-                                        DLManagedTensor* sample_weight,
-                                        DLManagedTensor* centroids,
-                                        double* inertia,
-                                        int* n_iter)
-{
-  return cuvs::core::translate_exceptions([=] {
-    auto dataset = X->dl_tensor;
-    if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
-      _fit<float>(res, *params, X, sample_weight, centroids, inertia, n_iter);
-    } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) {
-      _fit<double>(res, *params, X, sample_weight, centroids, inertia, n_iter);
-    } else {
-      RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
-                dataset.dtype.code,
-                dataset.dtype.bits);
-    }
-  });
-}
-
-extern "C" cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res,
-                                            cuvsKMeansParams_v2_t params,
-                                            DLManagedTensor* X,
-                                            DLManagedTensor* sample_weight,
-                                            DLManagedTensor* centroids,
-                                            DLManagedTensor* labels,
-                                            bool normalize_weight,
-                                            double* inertia)
-{
-  return cuvs::core::translate_exceptions([=] {
-    auto dataset = X->dl_tensor;
-    if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
-      _predict<float>(res, *params, X, sample_weight, centroids, labels, normalize_weight, inertia);
-    } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) {
-      _predict<double>(
-        res, *params, X, sample_weight, centroids, labels, normalize_weight, inertia);
-    } else {
-      RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
-                dataset.dtype.code,
-                dataset.dtype.bits);
-    }
-  });
-}
-
 extern "C" cuvsError_t cuvsKMeansClusterCost(cuvsResources_t res,
                                              DLManagedTensor* X,
                                              DLManagedTensor* centroids,
diff --git a/c/tests/cluster/kmeans_c.cu b/c/tests/cluster/kmeans_c.cu
index 3c87d035d3..7aa68bed40 100644
--- a/c/tests/cluster/kmeans_c.cu
+++ b/c/tests/cluster/kmeans_c.cu
@@ -48,59 +48,6 @@ int32_t kExpectedLabels[kNSamples]                = {0, 0, 0, 0, 1, 1, 1, 1};
 // 8 points, each at squared distance 0.5 from its cluster mean -> 4.0.
 constexpr double kExpectedInertia = 4.0;
 
-// Type-erased dispatcher to exercise both the v1 and v2 entry points with
-// shared test bodies.
-struct kmeans_api_v1 {
-  using params_t = cuvsKMeansParams_t;
-  static cuvsError_t params_create(params_t* p) { return cuvsKMeansParamsCreate(p); }
-  static cuvsError_t params_destroy(params_t p) { return cuvsKMeansParamsDestroy(p); }
-  static cuvsError_t fit(cuvsResources_t res,
-                         params_t params,
-                         DLManagedTensor* dataset,
-                         DLManagedTensor* centroids,
-                         double* inertia,
-                         int* n_iter)
-  {
-    return cuvsKMeansFit(res, params, dataset, NULL, centroids, inertia, n_iter);
-  }
-  static cuvsError_t predict(cuvsResources_t res,
-                             params_t params,
-                             DLManagedTensor* dataset,
-                             DLManagedTensor* centroids,
-                             DLManagedTensor* labels,
-                             double* inertia)
-  {
-    return cuvsKMeansPredict(
-      res, params, dataset, NULL, centroids, labels, false, inertia);
-  }
-};
-
-struct kmeans_api_v2 {
-  using params_t = cuvsKMeansParams_v2_t;
-  static cuvsError_t params_create(params_t* p) { return cuvsKMeansParamsCreate_v2(p); }
-  static cuvsError_t params_destroy(params_t p) { return cuvsKMeansParamsDestroy_v2(p); }
-  static cuvsError_t fit(cuvsResources_t res,
-                         params_t params,
-                         DLManagedTensor* dataset,
-                         DLManagedTensor* centroids,
-                         double* inertia,
-                         int* n_iter)
-  {
-    return cuvsKMeansFit_v2(res, params, dataset, NULL, centroids, inertia, n_iter);
-  }
-  static cuvsError_t predict(cuvsResources_t res,
-                             params_t params,
-                             DLManagedTensor* dataset,
-                             DLManagedTensor* centroids,
-                             DLManagedTensor* labels,
-                             double* inertia)
-  {
-    return cuvsKMeansPredict_v2(
-      res, params, dataset, NULL, centroids, labels, false, inertia);
-  }
-};
-
-template <typename Api>
 void test_fit_predict()
 {
   raft::handle_t handle;
@@ -122,8 +69,8 @@ void test_fit_predict()
   cuvsResources_t res;
   ASSERT_EQ(cuvsResourcesCreate(&res), CUVS_SUCCESS);
 
-  typename Api::params_t params;
-  ASSERT_EQ(Api::params_create(&params), CUVS_SUCCESS);
+  cuvsKMeansParams_t params;
+  ASSERT_EQ(cuvsKMeansParamsCreate(&params), CUVS_SUCCESS);
   params->n_clusters           = kNClusters;
   params->max_iter             = 100;
   params->tol                  = 1e-6;
@@ -149,8 +96,10 @@ void test_fit_predict()
   double predict_inertia = -1.0;
   double cluster_cost    = -1.0;
 
-  ASSERT_EQ(Api::fit(res, params, &dataset_t, &centroids_t, &inertia, &n_iter), CUVS_SUCCESS);
-  ASSERT_EQ(Api::predict(res, params, &dataset_t, &centroids_t, &labels_t, &predict_inertia),
+  ASSERT_EQ(cuvsKMeansFit(res, params, &dataset_t, NULL, &centroids_t, &inertia, &n_iter),
+            CUVS_SUCCESS);
+  ASSERT_EQ(cuvsKMeansPredict(
+              res, params, &dataset_t, NULL, &centroids_t, &labels_t, false, &predict_inertia),
             CUVS_SUCCESS);
   ASSERT_EQ(cuvsKMeansClusterCost(res, &dataset_t, &centroids_t, &cluster_cost), CUVS_SUCCESS);
 
@@ -170,11 +119,10 @@ void test_fit_predict()
   centroids_t.deleter(&centroids_t);
   dataset_t.deleter(&dataset_t);
 
-  ASSERT_EQ(Api::params_destroy(params), CUVS_SUCCESS);
+  ASSERT_EQ(cuvsKMeansParamsDestroy(params), CUVS_SUCCESS);
   ASSERT_EQ(cuvsResourcesDestroy(res), CUVS_SUCCESS);
 }
 
-template <typename Api>
 void test_fit_host()
 {
   raft::handle_t handle;
@@ -189,8 +137,8 @@ void test_fit_host()
   cuvsResources_t res;
   ASSERT_EQ(cuvsResourcesCreate(&res), CUVS_SUCCESS);
 
-  typename Api::params_t params;
-  ASSERT_EQ(Api::params_create(&params), CUVS_SUCCESS);
+  cuvsKMeansParams_t params;
+  ASSERT_EQ(cuvsKMeansParamsCreate(&params), CUVS_SUCCESS);
   params->n_clusters           = kNClusters;
   params->max_iter             = 100;
   params->tol                  = 1e-6;
@@ -211,7 +159,8 @@ void test_fit_host()
   double inertia = -1.0;
   int n_iter     = -1;
 
-  ASSERT_EQ(Api::fit(res, params, &dataset_t, &centroids_t, &inertia, &n_iter), CUVS_SUCCESS);
+  ASSERT_EQ(cuvsKMeansFit(res, params, &dataset_t, NULL, &centroids_t, &inertia, &n_iter),
+            CUVS_SUCCESS);
 
   ASSERT_TRUE(cuvs::devArrMatchHost(kExpectedCentroids,
                                     centroids_d.data(),
@@ -224,21 +173,15 @@ void test_fit_host()
   centroids_t.deleter(&centroids_t);
   dataset_t.deleter(&dataset_t);
 
-  ASSERT_EQ(Api::params_destroy(params), CUVS_SUCCESS);
+  ASSERT_EQ(cuvsKMeansParamsDestroy(params), CUVS_SUCCESS);
   ASSERT_EQ(cuvsResourcesDestroy(res), CUVS_SUCCESS);
 }
 
 }  // namespace
 
-TEST(KMeansC, FitPredict) { test_fit_predict<kmeans_api_v1>(); }
-// TODO(cuVS 26.08): remove FitPredictV2 once `_v2` is promoted to the
-// unsuffixed ABI -- it will be redundant with FitPredict at that point.
-TEST(KMeansC, FitPredictV2) { test_fit_predict<kmeans_api_v2>(); }
+TEST(KMeansC, FitPredict) { test_fit_predict(); }
 
-TEST(KMeansC, FitHost) { test_fit_host<kmeans_api_v1>(); }
-// TODO(cuVS 26.08): remove FitHostV2 once `_v2` is promoted to the
-// unsuffixed ABI.
-TEST(KMeansC, FitHostV2) { test_fit_host<kmeans_api_v2>(); }
+TEST(KMeansC, FitHost) { test_fit_host(); }
 
 TEST(KMeansC, ParamsCreateDestroy)
 {
@@ -249,16 +192,3 @@ TEST(KMeansC, ParamsCreateDestroy)
   EXPECT_GT(params->max_iter, 0);
   ASSERT_EQ(cuvsKMeansParamsDestroy(params), CUVS_SUCCESS);
 }
-
-// TODO(cuVS 26.08): remove ParamsCreateDestroyV2 once cuvsKMeansParamsCreate_v2
-// / cuvsKMeansParamsDestroy_v2 are promoted to the unsuffixed entry points and
-// the `_v2` symbols are deleted from the public header.
-TEST(KMeansC, ParamsCreateDestroyV2)
-{
-  cuvsKMeansParams_v2_t params = nullptr;
-  ASSERT_EQ(cuvsKMeansParamsCreate_v2(&params), CUVS_SUCCESS);
-  ASSERT_NE(params, nullptr);
-  EXPECT_GT(params->n_clusters, 0);
-  EXPECT_GT(params->max_iter, 0);
-  ASSERT_EQ(cuvsKMeansParamsDestroy_v2(params), CUVS_SUCCESS);
-}
diff --git a/fern/pages/c_api/c-api-cluster-kmeans.md b/fern/pages/c_api/c-api-cluster-kmeans.md
index 90d093d140..e7bfb9a185 100644
--- a/fern/pages/c_api/c-api-cluster-kmeans.md
+++ b/fern/pages/c_api/c-api-cluster-kmeans.md
@@ -32,53 +32,10 @@ typedef enum {
 <a id="cuvskmeansparams"></a>
 ### cuvsKMeansParams
 
-Hyper-parameters for the kmeans algorithm NB: The inertia_check field is kept for ABI compatibility. Removed in cuvsKMeansParams_v2. TODO: CalVer for the replacement: 26.08
+Hyper-parameters for the kmeans algorithm
 
 ```c
 struct cuvsKMeansParams {
-  int n_clusters;
-  cuvsKMeansInitMethod init;
-  int max_iter;
-  double tol;
-  int n_init;
-  double oversampling_factor;
-  int batch_samples;
-  int batch_centroids;
-  bool inertia_check;
-  bool hierarchical;
-  int hierarchical_n_iters;
-  int64_t streaming_batch_size;
-  int64_t init_size;
-  cuvsDistanceType metric;
-};
-```
-
-**Fields**
-
-| Name | Type | Description |
-| --- | --- | --- |
-| `n_clusters` | `int` | The number of clusters to form as well as the number of centroids to generate (default:8). |
-| `init` | [`cuvsKMeansInitMethod`](/api-reference/c-api-cluster-kmeans#cuvskmeansinitmethod) | Method for initialization, defaults to k-means++:<br />- cuvsKMeansInitMethod::KMeansPlusPlus (k-means++): Use scalable k-means++ algorithm to select the initial cluster centers.<br />- cuvsKMeansInitMethod::Random (random): Choose 'n_clusters' observations (rows) at random from the input data for the initial centroids.<br />- cuvsKMeansInitMethod::Array (ndarray): Use 'centroids' as initial cluster centers. |
-| `max_iter` | `int` | Maximum number of iterations of the k-means algorithm for a single run. |
-| `tol` | `double` | Relative tolerance with regards to inertia to declare convergence. |
-| `n_init` | `int` | Number of instance k-means algorithm will be run with different seeds. |
-| `oversampling_factor` | `double` | Oversampling factor for use in the k-means\|\| algorithm |
-| `batch_samples` | `int` | batch_samples and batch_centroids are used to tile 1NN computation which is useful to optimize/control the memory footprint Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 then don't tile the centroids |
-| `batch_centroids` | `int` | if 0 then batch_centroids = n_clusters |
-| `inertia_check` | `bool` | Deprecated, ignored. Kept for ABI compatibility. |
-| `hierarchical` | `bool` | Whether to use hierarchical (balanced) kmeans or not |
-| `hierarchical_n_iters` | `int` | For hierarchical k-means , defines the number of training iterations |
-| `streaming_batch_size` | `int64_t` | Number of samples to process per GPU batch for the batched (host-data) API. When set to 0, defaults to n_samples (process all at once). |
-| `init_size` | `int64_t` | Number of samples to draw for KMeansPlusPlus initialization. When set to 0, uses heuristic min(3 * n_clusters, n_samples) for host data, or n_samples for device data. |
-| `metric` | [`cuvsDistanceType`](/api-reference/c-api-distance-distance#cuvsdistancetype) |  |
-
-<a id="cuvskmeansparams-v2"></a>
-### cuvsKMeansParams_v2
-
-Hyper-parameters for the kmeans algorithm TODO: Remove this after cuvsKMeansParams is replaced in ABI 2.0
-
-```c
-struct cuvsKMeansParams_v2 {
   int n_clusters;
   cuvsKMeansInitMethod init;
   int max_iter;
@@ -122,8 +79,6 @@ Allocate KMeans params, and populate with default values
 CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params);
 ```
 
-**Note:** In cuVS 26.08 (next ABI major version) this signature will be<br />replaced by cuvsKMeansParamsCreate_v2.
-
 **Parameters**
 
 | Name | Direction | Type | Description |
@@ -143,8 +98,6 @@ De-allocate KMeans params
 CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params);
 ```
 
-**Note:** In cuVS 26.08 (next ABI major version) this signature will be<br />replaced by cuvsKMeansParamsDestroy_v2.
-
 **Parameters**
 
 | Name | Direction | Type | Description |
@@ -155,46 +108,6 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params);
 
 [`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
-<a id="cuvskmeansparamscreate-v2"></a>
-### cuvsKMeansParamsCreate_v2
-
-Allocate KMeans params
-
-```c
-CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params);
-```
-
-Mirrors cuvsKMeansParamsCreate but operates on cuvsKMeansParams_v2. Will become the unsuffixed cuvsKMeansParamsCreate in cuVS 26.08.
-
-**Parameters**
-
-| Name | Direction | Type | Description |
-| --- | --- | --- | --- |
-| `params` | in | [`cuvsKMeansParams_v2_t*`](/api-reference/c-api-cluster-kmeans#cuvskmeansparams-v2) | cuvsKMeansParams_v2_t to allocate |
-
-**Returns**
-
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
-
-<a id="cuvskmeansparamsdestroy-v2"></a>
-### cuvsKMeansParamsDestroy_v2
-
-De-allocate KMeans params allocated by cuvsKMeansParamsCreate_v2.
-
-```c
-CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params);
-```
-
-**Parameters**
-
-| Name | Direction | Type | Description |
-| --- | --- | --- | --- |
-| `params` | in | [`cuvsKMeansParams_v2_t`](/api-reference/c-api-cluster-kmeans#cuvskmeansparams-v2) |  |
-
-**Returns**
-
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
-
 <a id="cuvskmeanstype"></a>
 ### cuvsKMeansType
 
@@ -235,8 +148,6 @@ Initial centroids are chosen with k-means++ algorithm. Empty clusters are reinit
 
 X may reside on either host (CPU) or device (GPU) memory. When X is on the host the data is streamed to the GPU in batches controlled by params-&gt;streaming_batch_size.
 
-**Note:** In cuVS 26.08 (next ABI major version) this signature will be<br />replaced by cuvsKMeansFit_v2.
-
 **Parameters**
 
 | Name | Direction | Type | Description |
@@ -253,39 +164,6 @@ X may reside on either host (CPU) or device (GPU) memory. When X is on the host
 
 [`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
-<a id="cuvskmeansfit-v2"></a>
-### cuvsKMeansFit_v2
-
-Find clusters with k-means algorithm (v2 params layout).
-
-```c
-CUVS_EXPORT cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res,
-cuvsKMeansParams_v2_t params,
-DLManagedTensor* X,
-DLManagedTensor* sample_weight,
-DLManagedTensor* centroids,
-double* inertia,
-int* n_iter);
-```
-
-Mirrors cuvsKMeansFit but takes cuvsKMeansParams_v2_t. Will become the unsuffixed cuvsKMeansFit in cuVS 26.08.
-
-**Parameters**
-
-| Name | Direction | Type | Description |
-| --- | --- | --- | --- |
-| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | opaque C handle |
-| `params` | in | [`cuvsKMeansParams_v2_t`](/api-reference/c-api-cluster-kmeans#cuvskmeansparams-v2) | Parameters for KMeans model (v2 layout). |
-| `X` | in | `DLManagedTensor*` | Training instances to cluster. The data must be in row-major format. May be on host or device memory. [dim = n_samples x n_features] |
-| `sample_weight` | in | `DLManagedTensor*` | Optional weights for each observation in X. Must be on the same memory space as X. [len = n_samples] |
-| `centroids` | inout | `DLManagedTensor*` | [in] When init is InitMethod::Array, use centroids as the initial cluster centers. [out] The generated centroids from the kmeans algorithm are stored at the address pointed by 'centroids'. Must be on device. [dim = n_clusters x n_features] |
-| `inertia` | out | `double*` | Sum of squared distances of samples to their closest cluster center. |
-| `n_iter` | out | `int*` | Number of iterations run. |
-
-**Returns**
-
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
-
 <a id="cuvskmeanspredict"></a>
 ### cuvsKMeansPredict
 
@@ -302,8 +180,6 @@ bool normalize_weight,
 double* inertia);
 ```
 
-**Note:** In cuVS 26.08 (next ABI major version) this signature will be<br />replaced by cuvsKMeansPredict_v2.
-
 **Parameters**
 
 | Name | Direction | Type | Description |
@@ -321,41 +197,6 @@ double* inertia);
 
 [`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
 
-<a id="cuvskmeanspredict-v2"></a>
-### cuvsKMeansPredict_v2
-
-Predict the closest cluster each sample in X belongs to (v2 params layout).
-
-```c
-CUVS_EXPORT cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res,
-cuvsKMeansParams_v2_t params,
-DLManagedTensor* X,
-DLManagedTensor* sample_weight,
-DLManagedTensor* centroids,
-DLManagedTensor* labels,
-bool normalize_weight,
-double* inertia);
-```
-
-Mirrors cuvsKMeansPredict but takes cuvsKMeansParams_v2_t. Will become the unsuffixed cuvsKMeansPredict in cuVS 26.08.
-
-**Parameters**
-
-| Name | Direction | Type | Description |
-| --- | --- | --- | --- |
-| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | opaque C handle |
-| `params` | in | [`cuvsKMeansParams_v2_t`](/api-reference/c-api-cluster-kmeans#cuvskmeansparams-v2) | Parameters for KMeans model (v2 layout). |
-| `X` | in | `DLManagedTensor*` | New data to predict. [dim = n_samples x n_features] |
-| `sample_weight` | in | `DLManagedTensor*` | Optional weights for each observation in X. [len = n_samples] |
-| `centroids` | in | `DLManagedTensor*` | Cluster centroids. The data must be in row-major format. [dim = n_clusters x n_features] |
-| `labels` | out | `DLManagedTensor*` | Index of the cluster each sample in X belongs to. [len = n_samples] |
-| `normalize_weight` | in | `bool` | True if the weights should be normalized |
-| `inertia` | out | `double*` | Sum of squared distances of samples to their closest cluster center. |
-
-**Returns**
-
-[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t)
-
 <a id="cuvskmeansclustercost"></a>
 ### cuvsKMeansClusterCost
 
diff --git a/fern/pages/python_api/python-api-cluster-kmeans.md b/fern/pages/python_api/python-api-cluster-kmeans.md
index 0eea49dd72..a6746025bc 100644
--- a/fern/pages/python_api/python-api-cluster-kmeans.md
+++ b/fern/pages/python_api/python-api-cluster-kmeans.md
@@ -27,7 +27,6 @@ Hyper-parameters for the kmeans algorithm
 | `oversampling_factor` | `double` | Oversampling factor for use in the k-means\|\| algorithm |
 | `batch_samples` | `int` | Number of samples to process in each batch for tiled 1NN computation. Useful to optimize/control memory footprint. Default tile is [batch_samples x n_clusters]. |
 | `batch_centroids` | `int` | Number of centroids to process in each batch. If 0, uses n_clusters. |
-| `inertia_check` | `bool` | Deprecated and ignored. Will be removed in a future release. Inertia-based convergence checking always runs. |
 | `init_size` | `int` | Number of samples to draw for KMeansPlusPlus initialization with host (out-of-core) data. When set to 0, uses the heuristic min(3 * n_clusters, n_samples). Default: 0. |
 | `streaming_batch_size` | `int` | Number of samples to process per GPU batch when fitting with host (numpy) data. When set to 0, defaults to n_samples (process all at once). Only used by the batched (host-data) code path. Reducing streaming_batch_size can help reduce GPU memory pressure but increases overhead as the number of times centroid adjustments are computed increases.<br /><br />Default: 0 (process all data at once). |
 | `hierarchical` | `bool` | Whether to use hierarchical (balanced) kmeans or not |
@@ -36,7 +35,7 @@ Hyper-parameters for the kmeans algorithm
 **Constructor**
 
 ```python
-def __init__(self, *, metric=None, n_clusters=None, init_method=None, max_iter=None, tol=None, n_init=None, oversampling_factor=None, batch_samples=None, batch_centroids=None, inertia_check=None, init_size=None, streaming_batch_size=None, hierarchical=None, hierarchical_n_iters=None)
+def __init__(self, *, metric=None, n_clusters=None, init_method=None, max_iter=None, tol=None, n_init=None, oversampling_factor=None, batch_samples=None, batch_centroids=None, init_size=None, streaming_batch_size=None, hierarchical=None, hierarchical_n_iters=None)
 ```
 
 **Members**
diff --git a/python/cuvs/cuvs/cluster/kmeans/kmeans.pxd b/python/cuvs/cuvs/cluster/kmeans/kmeans.pxd
index 975ef386df..b01b653d23 100644
--- a/python/cuvs/cuvs/cluster/kmeans/kmeans.pxd
+++ b/python/cuvs/cuvs/cluster/kmeans/kmeans.pxd
@@ -22,11 +22,6 @@ cdef extern from "cuvs/cluster/kmeans.h" nogil:
         CUVS_KMEANS_TYPE_KMEANS
         CUVS_KMEANS_TYPE_KMEANS_BALANCED
 
-    # NOTE: The Python binding currently targets the unsuffixed cuvsKMeansParams
-    # ABI (which still carries the deprecated `inertia_check` field). In cuVS
-    # 26.08 this struct/entry-point set will be replaced by the contents of
-    # cuvsKMeansParams_v2 -- once that lands, the `inertia_check` field below
-    # should be deleted.
     ctypedef struct cuvsKMeansParams:
         cuvsDistanceType metric,
         int n_clusters,
@@ -37,7 +32,6 @@ cdef extern from "cuvs/cluster/kmeans.h" nogil:
         double oversampling_factor,
         int batch_samples,
         int batch_centroids,
-        bool inertia_check,
         bool hierarchical,
         int hierarchical_n_iters,
         int64_t streaming_batch_size,
diff --git a/python/cuvs/cuvs/cluster/kmeans/kmeans.pyx b/python/cuvs/cuvs/cluster/kmeans/kmeans.pyx
index 2e9046b4b2..fd38143c3b 100644
--- a/python/cuvs/cuvs/cluster/kmeans/kmeans.pyx
+++ b/python/cuvs/cuvs/cluster/kmeans/kmeans.pyx
@@ -77,10 +77,6 @@ cdef class KMeansParams:
         [batch_samples x n_clusters].
     batch_centroids : int
         Number of centroids to process in each batch. If 0, uses n_clusters.
-    inertia_check : bool
-        Deprecated and ignored. Will be
-        removed in a future release. Inertia-based convergence checking
-        always runs.
     init_size : int
         Number of samples to draw for KMeansPlusPlus initialization with
         host (out-of-core) data. When set to 0, uses the heuristic
@@ -118,7 +114,6 @@ cdef class KMeansParams:
                  oversampling_factor=None,
                  batch_samples=None,
                  batch_centroids=None,
-                 inertia_check=None,
                  init_size=None,
                  streaming_batch_size=None,
                  hierarchical=None,
@@ -142,12 +137,6 @@ cdef class KMeansParams:
             self.params.batch_samples = batch_samples
         if batch_centroids is not None:
             self.params.batch_centroids = batch_centroids
-        if inertia_check is not None:
-            warnings.warn(
-                "KMeansParams `inertia_check` is deprecated and ignored; "
-                "inertia-based convergence checking always runs.",
-                FutureWarning
-            )
         if init_size is not None:
             self.params.init_size = init_size
         if streaming_batch_size is not None:
diff --git a/rust/cuvs-sys/src/bindings.rs b/rust/cuvs-sys/src/bindings.rs
index 0498b77f3a..d985f6a359 100644
--- a/rust/cuvs-sys/src/bindings.rs
+++ b/rust/cuvs-sys/src/bindings.rs
@@ -388,8 +388,6 @@ pub struct cuvsKMeansParams {
     pub batch_samples: ::std::os::raw::c_int,
     #[doc = " if 0 then batch_centroids = n_clusters"]
     pub batch_centroids: ::std::os::raw::c_int,
-    #[doc = " Check inertia during iterations for early convergence."]
-    pub inertia_check: bool,
     #[doc = " Whether to use hierarchical (balanced) kmeans or not"]
     pub hierarchical: bool,
     #[doc = " For hierarchical k-means , defines the number of training iterations"]
@@ -419,8 +417,6 @@ const _: () = {
         [::std::mem::offset_of!(cuvsKMeansParams, batch_samples) - 40usize];
     ["Offset of field: cuvsKMeansParams::batch_centroids"]
         [::std::mem::offset_of!(cuvsKMeansParams, batch_centroids) - 44usize];
-    ["Offset of field: cuvsKMeansParams::inertia_check"]
-        [::std::mem::offset_of!(cuvsKMeansParams, inertia_check) - 48usize];
     ["Offset of field: cuvsKMeansParams::hierarchical"]
         [::std::mem::offset_of!(cuvsKMeansParams, hierarchical) - 49usize];
     ["Offset of field: cuvsKMeansParams::hierarchical_n_iters"]