diff --git a/c/include/cuvs/cluster/kmeans.h b/c/include/cuvs/cluster/kmeans.h index 9a3882cb4c..9e091839a1 100644 --- a/c/include/cuvs/cluster/kmeans.h +++ b/c/include/cuvs/cluster/kmeans.h @@ -41,8 +41,6 @@ typedef enum { /** * @brief Hyper-parameters for the kmeans algorithm - * NB: The inertia_check field is kept for ABI compatibility. Removed in cuvsKMeansParams_v2. - * TODO: CalVer for the replacement: 26.08 */ struct cuvsKMeansParams { cuvsDistanceType metric; @@ -95,88 +93,6 @@ struct cuvsKMeansParams { */ int batch_centroids; - /** Deprecated, ignored. Kept for ABI compatibility. */ - bool inertia_check; - - /** - * Whether to use hierarchical (balanced) kmeans or not - */ - bool hierarchical; - - /** - * For hierarchical k-means , defines the number of training iterations - */ - int hierarchical_n_iters; - - /** - * Number of samples to process per GPU batch for the batched (host-data) API. - * When set to 0, defaults to n_samples (process all at once). - */ - int64_t streaming_batch_size; - - /** - * Number of samples to draw for KMeansPlusPlus initialization. - * When set to 0, uses heuristic min(3 * n_clusters, n_samples) for host data, - * or n_samples for device data. - */ - int64_t init_size; -}; - -/** - * @brief Hyper-parameters for the kmeans algorithm - * TODO: Remove this after cuvsKMeansParams is replaced in ABI 2.0 - */ - struct cuvsKMeansParams_v2 { - cuvsDistanceType metric; - - /** - * The number of clusters to form as well as the number of centroids to generate (default:8). - */ - int n_clusters; - - /** - * Method for initialization, defaults to k-means++: - * - cuvsKMeansInitMethod::KMeansPlusPlus (k-means++): Use scalable k-means++ algorithm - * to select the initial cluster centers. - * - cuvsKMeansInitMethod::Random (random): Choose 'n_clusters' observations (rows) at - * random from the input data for the initial centroids. - * - cuvsKMeansInitMethod::Array (ndarray): Use 'centroids' as initial cluster centers. - */ - cuvsKMeansInitMethod init; - - /** - * Maximum number of iterations of the k-means algorithm for a single run. - */ - int max_iter; - - /** - * Relative tolerance with regards to inertia to declare convergence. - */ - double tol; - - /** - * Number of instance k-means algorithm will be run with different seeds. - */ - int n_init; - - /** - * Oversampling factor for use in the k-means|| algorithm - */ - double oversampling_factor; - - /** - * batch_samples and batch_centroids are used to tile 1NN computation which is - * useful to optimize/control the memory footprint - * Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 - * then don't tile the centroids - */ - int batch_samples; - - /** - * if 0 then batch_centroids = n_clusters - */ - int batch_centroids; - /** * Whether to use hierarchical (balanced) kmeans or not */ @@ -202,14 +118,10 @@ struct cuvsKMeansParams { }; typedef struct cuvsKMeansParams* cuvsKMeansParams_t; -typedef struct cuvsKMeansParams_v2* cuvsKMeansParams_v2_t; /** * @brief Allocate KMeans params, and populate with default values * - * @note In cuVS 26.08 (next ABI major version) this signature will be - * replaced by cuvsKMeansParamsCreate_v2. - * * @param[in] params cuvsKMeansParams_t to allocate * @return cuvsError_t */ @@ -218,33 +130,11 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params); /** * @brief De-allocate KMeans params * - * @note In cuVS 26.08 (next ABI major version) this signature will be - * replaced by cuvsKMeansParamsDestroy_v2. - * * @param[in] params * @return cuvsError_t */ CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params); -/** - * @brief Allocate KMeans params - * - * Mirrors cuvsKMeansParamsCreate but operates on cuvsKMeansParams_v2. - * Will become the unsuffixed cuvsKMeansParamsCreate in cuVS 26.08. - * - * @param[in] params cuvsKMeansParams_v2_t to allocate - * @return cuvsError_t - */ -CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params); - -/** - * @brief De-allocate KMeans params allocated by cuvsKMeansParamsCreate_v2. - * - * @param[in] params - * @return cuvsError_t - */ -CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params); - /** * @brief Type of k-means algorithm. */ @@ -270,9 +160,6 @@ typedef enum { CUVS_KMEANS_TYPE_KMEANS = 0, CUVS_KMEANS_TYPE_KMEANS_BALANCED = 1 * When X is on the host the data is streamed to the GPU in * batches controlled by params->streaming_batch_size. * - * @note In cuVS 26.08 (next ABI major version) this signature will be - * replaced by cuvsKMeansFit_v2. - * * @param[in] res opaque C handle * @param[in] params Parameters for KMeans model. * @param[in] X Training instances to cluster. The data must @@ -300,45 +187,9 @@ CUVS_EXPORT cuvsError_t cuvsKMeansFit(cuvsResources_t res, double* inertia, int* n_iter); -/** - * @brief Find clusters with k-means algorithm (v2 params layout). - * - * Mirrors cuvsKMeansFit but takes cuvsKMeansParams_v2_t. Will become the - * unsuffixed cuvsKMeansFit in cuVS 26.08. - * - * @param[in] res opaque C handle - * @param[in] params Parameters for KMeans model (v2 layout). - * @param[in] X Training instances to cluster. The data must - * be in row-major format. May be on host or - * device memory. - * [dim = n_samples x n_features] - * @param[in] sample_weight Optional weights for each observation in X. - * Must be on the same memory space as X. - * [len = n_samples] - * @param[inout] centroids [in] When init is InitMethod::Array, use - * centroids as the initial cluster centers. - * [out] The generated centroids from the - * kmeans algorithm are stored at the address - * pointed by 'centroids'. Must be on device. - * [dim = n_clusters x n_features] - * @param[out] inertia Sum of squared distances of samples to their - * closest cluster center. - * @param[out] n_iter Number of iterations run. - */ -CUVS_EXPORT cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res, - cuvsKMeansParams_v2_t params, - DLManagedTensor* X, - DLManagedTensor* sample_weight, - DLManagedTensor* centroids, - double* inertia, - int* n_iter); - /** * @brief Predict the closest cluster each sample in X belongs to. * - * @note In cuVS 26.08 (next ABI major version) this signature will be - * replaced by cuvsKMeansPredict_v2. - * * @param[in] res opaque C handle * @param[in] params Parameters for KMeans model. * @param[in] X New data to predict. @@ -364,37 +215,6 @@ CUVS_EXPORT cuvsError_t cuvsKMeansPredict(cuvsResources_t res, bool normalize_weight, double* inertia); -/** - * @brief Predict the closest cluster each sample in X belongs to (v2 params layout). - * - * Mirrors cuvsKMeansPredict but takes cuvsKMeansParams_v2_t. Will become the - * unsuffixed cuvsKMeansPredict in cuVS 26.08. - * - * @param[in] res opaque C handle - * @param[in] params Parameters for KMeans model (v2 layout). - * @param[in] X New data to predict. - * [dim = n_samples x n_features] - * @param[in] sample_weight Optional weights for each observation in X. - * [len = n_samples] - * @param[in] centroids Cluster centroids. The data must be in - * row-major format. - * [dim = n_clusters x n_features] - * @param[in] normalize_weight True if the weights should be normalized - * @param[out] labels Index of the cluster each sample in X - * belongs to. - * [len = n_samples] - * @param[out] inertia Sum of squared distances of samples to - * their closest cluster center. - */ -CUVS_EXPORT cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res, - cuvsKMeansParams_v2_t params, - DLManagedTensor* X, - DLManagedTensor* sample_weight, - DLManagedTensor* centroids, - DLManagedTensor* labels, - bool normalize_weight, - double* inertia); - /** * @brief Compute cluster cost * diff --git a/c/src/cluster/kmeans.cpp b/c/src/cluster/kmeans.cpp index 8e46764ce4..4db59532be 100644 --- a/c/src/cluster/kmeans.cpp +++ b/c/src/cluster/kmeans.cpp @@ -16,9 +16,7 @@ namespace { -// The conversions are templated on the C struct type and reused by both API surfaces. -template -cuvs::cluster::kmeans::params convert_params(const ParamsT& params) +cuvs::cluster::kmeans::params convert_params(const cuvsKMeansParams& params) { auto kmeans_params = cuvs::cluster::kmeans::params(); kmeans_params.metric = static_cast(params.metric); @@ -35,8 +33,7 @@ cuvs::cluster::kmeans::params convert_params(const ParamsT& params) return kmeans_params; } -template -cuvs::cluster::kmeans::balanced_params convert_balanced_params(const ParamsT& params) +cuvs::cluster::kmeans::balanced_params convert_balanced_params(const cuvsKMeansParams& params) { auto kmeans_params = cuvs::cluster::kmeans::balanced_params(); kmeans_params.metric = static_cast(params.metric); @@ -44,9 +41,9 @@ cuvs::cluster::kmeans::balanced_params convert_balanced_params(const ParamsT& pa return kmeans_params; } -template +template void _fit(cuvsResources_t res, - const ParamsT& params, + const cuvsKMeansParams& params, DLManagedTensor* X_tensor, DLManagedTensor* sample_weight_tensor, DLManagedTensor* centroids_tensor, @@ -143,9 +140,9 @@ void _fit(cuvsResources_t res, } } -template +template void _predict(cuvsResources_t res, - const ParamsT& params, + const cuvsKMeansParams& params, DLManagedTensor* X_tensor, DLManagedTensor* sample_weight_tensor, DLManagedTensor* centroids_tensor, @@ -240,7 +237,6 @@ extern "C" cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params) .oversampling_factor = cpp_params.oversampling_factor, .batch_samples = cpp_params.batch_samples, .batch_centroids = cpp_params.batch_centroids, - .inertia_check = false, .hierarchical = false, .hierarchical_n_iters = static_cast(cpp_balanced_params.n_iters), .streaming_batch_size = cpp_params.streaming_batch_size, @@ -298,79 +294,6 @@ extern "C" cuvsError_t cuvsKMeansPredict(cuvsResources_t res, }); } -extern "C" cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params) -{ - return cuvs::core::translate_exceptions([=] { - cuvs::cluster::kmeans::params cpp_params; - cuvs::cluster::kmeans::balanced_params cpp_balanced_params; - *params = new cuvsKMeansParams_v2{ - .metric = static_cast(cpp_params.metric), - .n_clusters = cpp_params.n_clusters, - .init = static_cast(cpp_params.init), - .max_iter = cpp_params.max_iter, - .tol = cpp_params.tol, - .n_init = cpp_params.n_init, - .oversampling_factor = cpp_params.oversampling_factor, - .batch_samples = cpp_params.batch_samples, - .batch_centroids = cpp_params.batch_centroids, - .hierarchical = false, - .hierarchical_n_iters = static_cast(cpp_balanced_params.n_iters), - .streaming_batch_size = cpp_params.streaming_batch_size, - .init_size = cpp_params.init_size}; - }); -} - -extern "C" cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params) -{ - return cuvs::core::translate_exceptions([=] { delete params; }); -} - -extern "C" cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res, - cuvsKMeansParams_v2_t params, - DLManagedTensor* X, - DLManagedTensor* sample_weight, - DLManagedTensor* centroids, - double* inertia, - int* n_iter) -{ - return cuvs::core::translate_exceptions([=] { - auto dataset = X->dl_tensor; - if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { - _fit(res, *params, X, sample_weight, centroids, inertia, n_iter); - } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) { - _fit(res, *params, X, sample_weight, centroids, inertia, n_iter); - } else { - RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", - dataset.dtype.code, - dataset.dtype.bits); - } - }); -} - -extern "C" cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res, - cuvsKMeansParams_v2_t params, - DLManagedTensor* X, - DLManagedTensor* sample_weight, - DLManagedTensor* centroids, - DLManagedTensor* labels, - bool normalize_weight, - double* inertia) -{ - return cuvs::core::translate_exceptions([=] { - auto dataset = X->dl_tensor; - if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { - _predict(res, *params, X, sample_weight, centroids, labels, normalize_weight, inertia); - } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) { - _predict( - res, *params, X, sample_weight, centroids, labels, normalize_weight, inertia); - } else { - RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", - dataset.dtype.code, - dataset.dtype.bits); - } - }); -} - extern "C" cuvsError_t cuvsKMeansClusterCost(cuvsResources_t res, DLManagedTensor* X, DLManagedTensor* centroids, diff --git a/c/tests/cluster/kmeans_c.cu b/c/tests/cluster/kmeans_c.cu index 3c87d035d3..7aa68bed40 100644 --- a/c/tests/cluster/kmeans_c.cu +++ b/c/tests/cluster/kmeans_c.cu @@ -48,59 +48,6 @@ int32_t kExpectedLabels[kNSamples] = {0, 0, 0, 0, 1, 1, 1, 1}; // 8 points, each at squared distance 0.5 from its cluster mean -> 4.0. constexpr double kExpectedInertia = 4.0; -// Type-erased dispatcher to exercise both the v1 and v2 entry points with -// shared test bodies. -struct kmeans_api_v1 { - using params_t = cuvsKMeansParams_t; - static cuvsError_t params_create(params_t* p) { return cuvsKMeansParamsCreate(p); } - static cuvsError_t params_destroy(params_t p) { return cuvsKMeansParamsDestroy(p); } - static cuvsError_t fit(cuvsResources_t res, - params_t params, - DLManagedTensor* dataset, - DLManagedTensor* centroids, - double* inertia, - int* n_iter) - { - return cuvsKMeansFit(res, params, dataset, NULL, centroids, inertia, n_iter); - } - static cuvsError_t predict(cuvsResources_t res, - params_t params, - DLManagedTensor* dataset, - DLManagedTensor* centroids, - DLManagedTensor* labels, - double* inertia) - { - return cuvsKMeansPredict( - res, params, dataset, NULL, centroids, labels, false, inertia); - } -}; - -struct kmeans_api_v2 { - using params_t = cuvsKMeansParams_v2_t; - static cuvsError_t params_create(params_t* p) { return cuvsKMeansParamsCreate_v2(p); } - static cuvsError_t params_destroy(params_t p) { return cuvsKMeansParamsDestroy_v2(p); } - static cuvsError_t fit(cuvsResources_t res, - params_t params, - DLManagedTensor* dataset, - DLManagedTensor* centroids, - double* inertia, - int* n_iter) - { - return cuvsKMeansFit_v2(res, params, dataset, NULL, centroids, inertia, n_iter); - } - static cuvsError_t predict(cuvsResources_t res, - params_t params, - DLManagedTensor* dataset, - DLManagedTensor* centroids, - DLManagedTensor* labels, - double* inertia) - { - return cuvsKMeansPredict_v2( - res, params, dataset, NULL, centroids, labels, false, inertia); - } -}; - -template void test_fit_predict() { raft::handle_t handle; @@ -122,8 +69,8 @@ void test_fit_predict() cuvsResources_t res; ASSERT_EQ(cuvsResourcesCreate(&res), CUVS_SUCCESS); - typename Api::params_t params; - ASSERT_EQ(Api::params_create(¶ms), CUVS_SUCCESS); + cuvsKMeansParams_t params; + ASSERT_EQ(cuvsKMeansParamsCreate(¶ms), CUVS_SUCCESS); params->n_clusters = kNClusters; params->max_iter = 100; params->tol = 1e-6; @@ -149,8 +96,10 @@ void test_fit_predict() double predict_inertia = -1.0; double cluster_cost = -1.0; - ASSERT_EQ(Api::fit(res, params, &dataset_t, ¢roids_t, &inertia, &n_iter), CUVS_SUCCESS); - ASSERT_EQ(Api::predict(res, params, &dataset_t, ¢roids_t, &labels_t, &predict_inertia), + ASSERT_EQ(cuvsKMeansFit(res, params, &dataset_t, NULL, ¢roids_t, &inertia, &n_iter), + CUVS_SUCCESS); + ASSERT_EQ(cuvsKMeansPredict( + res, params, &dataset_t, NULL, ¢roids_t, &labels_t, false, &predict_inertia), CUVS_SUCCESS); ASSERT_EQ(cuvsKMeansClusterCost(res, &dataset_t, ¢roids_t, &cluster_cost), CUVS_SUCCESS); @@ -170,11 +119,10 @@ void test_fit_predict() centroids_t.deleter(¢roids_t); dataset_t.deleter(&dataset_t); - ASSERT_EQ(Api::params_destroy(params), CUVS_SUCCESS); + ASSERT_EQ(cuvsKMeansParamsDestroy(params), CUVS_SUCCESS); ASSERT_EQ(cuvsResourcesDestroy(res), CUVS_SUCCESS); } -template void test_fit_host() { raft::handle_t handle; @@ -189,8 +137,8 @@ void test_fit_host() cuvsResources_t res; ASSERT_EQ(cuvsResourcesCreate(&res), CUVS_SUCCESS); - typename Api::params_t params; - ASSERT_EQ(Api::params_create(¶ms), CUVS_SUCCESS); + cuvsKMeansParams_t params; + ASSERT_EQ(cuvsKMeansParamsCreate(¶ms), CUVS_SUCCESS); params->n_clusters = kNClusters; params->max_iter = 100; params->tol = 1e-6; @@ -211,7 +159,8 @@ void test_fit_host() double inertia = -1.0; int n_iter = -1; - ASSERT_EQ(Api::fit(res, params, &dataset_t, ¢roids_t, &inertia, &n_iter), CUVS_SUCCESS); + ASSERT_EQ(cuvsKMeansFit(res, params, &dataset_t, NULL, ¢roids_t, &inertia, &n_iter), + CUVS_SUCCESS); ASSERT_TRUE(cuvs::devArrMatchHost(kExpectedCentroids, centroids_d.data(), @@ -224,21 +173,15 @@ void test_fit_host() centroids_t.deleter(¢roids_t); dataset_t.deleter(&dataset_t); - ASSERT_EQ(Api::params_destroy(params), CUVS_SUCCESS); + ASSERT_EQ(cuvsKMeansParamsDestroy(params), CUVS_SUCCESS); ASSERT_EQ(cuvsResourcesDestroy(res), CUVS_SUCCESS); } } // namespace -TEST(KMeansC, FitPredict) { test_fit_predict(); } -// TODO(cuVS 26.08): remove FitPredictV2 once `_v2` is promoted to the -// unsuffixed ABI -- it will be redundant with FitPredict at that point. -TEST(KMeansC, FitPredictV2) { test_fit_predict(); } +TEST(KMeansC, FitPredict) { test_fit_predict(); } -TEST(KMeansC, FitHost) { test_fit_host(); } -// TODO(cuVS 26.08): remove FitHostV2 once `_v2` is promoted to the -// unsuffixed ABI. -TEST(KMeansC, FitHostV2) { test_fit_host(); } +TEST(KMeansC, FitHost) { test_fit_host(); } TEST(KMeansC, ParamsCreateDestroy) { @@ -249,16 +192,3 @@ TEST(KMeansC, ParamsCreateDestroy) EXPECT_GT(params->max_iter, 0); ASSERT_EQ(cuvsKMeansParamsDestroy(params), CUVS_SUCCESS); } - -// TODO(cuVS 26.08): remove ParamsCreateDestroyV2 once cuvsKMeansParamsCreate_v2 -// / cuvsKMeansParamsDestroy_v2 are promoted to the unsuffixed entry points and -// the `_v2` symbols are deleted from the public header. -TEST(KMeansC, ParamsCreateDestroyV2) -{ - cuvsKMeansParams_v2_t params = nullptr; - ASSERT_EQ(cuvsKMeansParamsCreate_v2(¶ms), CUVS_SUCCESS); - ASSERT_NE(params, nullptr); - EXPECT_GT(params->n_clusters, 0); - EXPECT_GT(params->max_iter, 0); - ASSERT_EQ(cuvsKMeansParamsDestroy_v2(params), CUVS_SUCCESS); -} diff --git a/fern/pages/c_api/c-api-cluster-kmeans.md b/fern/pages/c_api/c-api-cluster-kmeans.md index 90d093d140..e7bfb9a185 100644 --- a/fern/pages/c_api/c-api-cluster-kmeans.md +++ b/fern/pages/c_api/c-api-cluster-kmeans.md @@ -32,53 +32,10 @@ typedef enum { ### cuvsKMeansParams -Hyper-parameters for the kmeans algorithm NB: The inertia_check field is kept for ABI compatibility. Removed in cuvsKMeansParams_v2. TODO: CalVer for the replacement: 26.08 +Hyper-parameters for the kmeans algorithm ```c struct cuvsKMeansParams { - int n_clusters; - cuvsKMeansInitMethod init; - int max_iter; - double tol; - int n_init; - double oversampling_factor; - int batch_samples; - int batch_centroids; - bool inertia_check; - bool hierarchical; - int hierarchical_n_iters; - int64_t streaming_batch_size; - int64_t init_size; - cuvsDistanceType metric; -}; -``` - -**Fields** - -| Name | Type | Description | -| --- | --- | --- | -| `n_clusters` | `int` | The number of clusters to form as well as the number of centroids to generate (default:8). | -| `init` | [`cuvsKMeansInitMethod`](/api-reference/c-api-cluster-kmeans#cuvskmeansinitmethod) | Method for initialization, defaults to k-means++:
- cuvsKMeansInitMethod::KMeansPlusPlus (k-means++): Use scalable k-means++ algorithm to select the initial cluster centers.
- cuvsKMeansInitMethod::Random (random): Choose 'n_clusters' observations (rows) at random from the input data for the initial centroids.
- cuvsKMeansInitMethod::Array (ndarray): Use 'centroids' as initial cluster centers. | -| `max_iter` | `int` | Maximum number of iterations of the k-means algorithm for a single run. | -| `tol` | `double` | Relative tolerance with regards to inertia to declare convergence. | -| `n_init` | `int` | Number of instance k-means algorithm will be run with different seeds. | -| `oversampling_factor` | `double` | Oversampling factor for use in the k-means\|\| algorithm | -| `batch_samples` | `int` | batch_samples and batch_centroids are used to tile 1NN computation which is useful to optimize/control the memory footprint Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0 then don't tile the centroids | -| `batch_centroids` | `int` | if 0 then batch_centroids = n_clusters | -| `inertia_check` | `bool` | Deprecated, ignored. Kept for ABI compatibility. | -| `hierarchical` | `bool` | Whether to use hierarchical (balanced) kmeans or not | -| `hierarchical_n_iters` | `int` | For hierarchical k-means , defines the number of training iterations | -| `streaming_batch_size` | `int64_t` | Number of samples to process per GPU batch for the batched (host-data) API. When set to 0, defaults to n_samples (process all at once). | -| `init_size` | `int64_t` | Number of samples to draw for KMeansPlusPlus initialization. When set to 0, uses heuristic min(3 * n_clusters, n_samples) for host data, or n_samples for device data. | -| `metric` | [`cuvsDistanceType`](/api-reference/c-api-distance-distance#cuvsdistancetype) | | - - -### cuvsKMeansParams_v2 - -Hyper-parameters for the kmeans algorithm TODO: Remove this after cuvsKMeansParams is replaced in ABI 2.0 - -```c -struct cuvsKMeansParams_v2 { int n_clusters; cuvsKMeansInitMethod init; int max_iter; @@ -122,8 +79,6 @@ Allocate KMeans params, and populate with default values CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params); ``` -**Note:** In cuVS 26.08 (next ABI major version) this signature will be
replaced by cuvsKMeansParamsCreate_v2. - **Parameters** | Name | Direction | Type | Description | @@ -143,8 +98,6 @@ De-allocate KMeans params CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params); ``` -**Note:** In cuVS 26.08 (next ABI major version) this signature will be
replaced by cuvsKMeansParamsDestroy_v2. - **Parameters** | Name | Direction | Type | Description | @@ -155,46 +108,6 @@ CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params); [`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) - -### cuvsKMeansParamsCreate_v2 - -Allocate KMeans params - -```c -CUVS_EXPORT cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params); -``` - -Mirrors cuvsKMeansParamsCreate but operates on cuvsKMeansParams_v2. Will become the unsuffixed cuvsKMeansParamsCreate in cuVS 26.08. - -**Parameters** - -| Name | Direction | Type | Description | -| --- | --- | --- | --- | -| `params` | in | [`cuvsKMeansParams_v2_t*`](/api-reference/c-api-cluster-kmeans#cuvskmeansparams-v2) | cuvsKMeansParams_v2_t to allocate | - -**Returns** - -[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) - - -### cuvsKMeansParamsDestroy_v2 - -De-allocate KMeans params allocated by cuvsKMeansParamsCreate_v2. - -```c -CUVS_EXPORT cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params); -``` - -**Parameters** - -| Name | Direction | Type | Description | -| --- | --- | --- | --- | -| `params` | in | [`cuvsKMeansParams_v2_t`](/api-reference/c-api-cluster-kmeans#cuvskmeansparams-v2) | | - -**Returns** - -[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) - ### cuvsKMeansType @@ -235,8 +148,6 @@ Initial centroids are chosen with k-means++ algorithm. Empty clusters are reinit X may reside on either host (CPU) or device (GPU) memory. When X is on the host the data is streamed to the GPU in batches controlled by params->streaming_batch_size. -**Note:** In cuVS 26.08 (next ABI major version) this signature will be
replaced by cuvsKMeansFit_v2. - **Parameters** | Name | Direction | Type | Description | @@ -253,39 +164,6 @@ X may reside on either host (CPU) or device (GPU) memory. When X is on the host [`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) - -### cuvsKMeansFit_v2 - -Find clusters with k-means algorithm (v2 params layout). - -```c -CUVS_EXPORT cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res, -cuvsKMeansParams_v2_t params, -DLManagedTensor* X, -DLManagedTensor* sample_weight, -DLManagedTensor* centroids, -double* inertia, -int* n_iter); -``` - -Mirrors cuvsKMeansFit but takes cuvsKMeansParams_v2_t. Will become the unsuffixed cuvsKMeansFit in cuVS 26.08. - -**Parameters** - -| Name | Direction | Type | Description | -| --- | --- | --- | --- | -| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | opaque C handle | -| `params` | in | [`cuvsKMeansParams_v2_t`](/api-reference/c-api-cluster-kmeans#cuvskmeansparams-v2) | Parameters for KMeans model (v2 layout). | -| `X` | in | `DLManagedTensor*` | Training instances to cluster. The data must be in row-major format. May be on host or device memory. [dim = n_samples x n_features] | -| `sample_weight` | in | `DLManagedTensor*` | Optional weights for each observation in X. Must be on the same memory space as X. [len = n_samples] | -| `centroids` | inout | `DLManagedTensor*` | [in] When init is InitMethod::Array, use centroids as the initial cluster centers. [out] The generated centroids from the kmeans algorithm are stored at the address pointed by 'centroids'. Must be on device. [dim = n_clusters x n_features] | -| `inertia` | out | `double*` | Sum of squared distances of samples to their closest cluster center. | -| `n_iter` | out | `int*` | Number of iterations run. | - -**Returns** - -[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) - ### cuvsKMeansPredict @@ -302,8 +180,6 @@ bool normalize_weight, double* inertia); ``` -**Note:** In cuVS 26.08 (next ABI major version) this signature will be
replaced by cuvsKMeansPredict_v2. - **Parameters** | Name | Direction | Type | Description | @@ -321,41 +197,6 @@ double* inertia); [`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) - -### cuvsKMeansPredict_v2 - -Predict the closest cluster each sample in X belongs to (v2 params layout). - -```c -CUVS_EXPORT cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res, -cuvsKMeansParams_v2_t params, -DLManagedTensor* X, -DLManagedTensor* sample_weight, -DLManagedTensor* centroids, -DLManagedTensor* labels, -bool normalize_weight, -double* inertia); -``` - -Mirrors cuvsKMeansPredict but takes cuvsKMeansParams_v2_t. Will become the unsuffixed cuvsKMeansPredict in cuVS 26.08. - -**Parameters** - -| Name | Direction | Type | Description | -| --- | --- | --- | --- | -| `res` | in | [`cuvsResources_t`](/api-reference/c-api-core-c-api#cuvsresources-t) | opaque C handle | -| `params` | in | [`cuvsKMeansParams_v2_t`](/api-reference/c-api-cluster-kmeans#cuvskmeansparams-v2) | Parameters for KMeans model (v2 layout). | -| `X` | in | `DLManagedTensor*` | New data to predict. [dim = n_samples x n_features] | -| `sample_weight` | in | `DLManagedTensor*` | Optional weights for each observation in X. [len = n_samples] | -| `centroids` | in | `DLManagedTensor*` | Cluster centroids. The data must be in row-major format. [dim = n_clusters x n_features] | -| `labels` | out | `DLManagedTensor*` | Index of the cluster each sample in X belongs to. [len = n_samples] | -| `normalize_weight` | in | `bool` | True if the weights should be normalized | -| `inertia` | out | `double*` | Sum of squared distances of samples to their closest cluster center. | - -**Returns** - -[`CUVS_EXPORT cuvsError_t`](/api-reference/c-api-core-c-api#cuvserror-t) - ### cuvsKMeansClusterCost diff --git a/fern/pages/python_api/python-api-cluster-kmeans.md b/fern/pages/python_api/python-api-cluster-kmeans.md index 0eea49dd72..a6746025bc 100644 --- a/fern/pages/python_api/python-api-cluster-kmeans.md +++ b/fern/pages/python_api/python-api-cluster-kmeans.md @@ -27,7 +27,6 @@ Hyper-parameters for the kmeans algorithm | `oversampling_factor` | `double` | Oversampling factor for use in the k-means\|\| algorithm | | `batch_samples` | `int` | Number of samples to process in each batch for tiled 1NN computation. Useful to optimize/control memory footprint. Default tile is [batch_samples x n_clusters]. | | `batch_centroids` | `int` | Number of centroids to process in each batch. If 0, uses n_clusters. | -| `inertia_check` | `bool` | Deprecated and ignored. Will be removed in a future release. Inertia-based convergence checking always runs. | | `init_size` | `int` | Number of samples to draw for KMeansPlusPlus initialization with host (out-of-core) data. When set to 0, uses the heuristic min(3 * n_clusters, n_samples). Default: 0. | | `streaming_batch_size` | `int` | Number of samples to process per GPU batch when fitting with host (numpy) data. When set to 0, defaults to n_samples (process all at once). Only used by the batched (host-data) code path. Reducing streaming_batch_size can help reduce GPU memory pressure but increases overhead as the number of times centroid adjustments are computed increases.

Default: 0 (process all data at once). | | `hierarchical` | `bool` | Whether to use hierarchical (balanced) kmeans or not | @@ -36,7 +35,7 @@ Hyper-parameters for the kmeans algorithm **Constructor** ```python -def __init__(self, *, metric=None, n_clusters=None, init_method=None, max_iter=None, tol=None, n_init=None, oversampling_factor=None, batch_samples=None, batch_centroids=None, inertia_check=None, init_size=None, streaming_batch_size=None, hierarchical=None, hierarchical_n_iters=None) +def __init__(self, *, metric=None, n_clusters=None, init_method=None, max_iter=None, tol=None, n_init=None, oversampling_factor=None, batch_samples=None, batch_centroids=None, init_size=None, streaming_batch_size=None, hierarchical=None, hierarchical_n_iters=None) ``` **Members** diff --git a/python/cuvs/cuvs/cluster/kmeans/kmeans.pxd b/python/cuvs/cuvs/cluster/kmeans/kmeans.pxd index 975ef386df..b01b653d23 100644 --- a/python/cuvs/cuvs/cluster/kmeans/kmeans.pxd +++ b/python/cuvs/cuvs/cluster/kmeans/kmeans.pxd @@ -22,11 +22,6 @@ cdef extern from "cuvs/cluster/kmeans.h" nogil: CUVS_KMEANS_TYPE_KMEANS CUVS_KMEANS_TYPE_KMEANS_BALANCED - # NOTE: The Python binding currently targets the unsuffixed cuvsKMeansParams - # ABI (which still carries the deprecated `inertia_check` field). In cuVS - # 26.08 this struct/entry-point set will be replaced by the contents of - # cuvsKMeansParams_v2 -- once that lands, the `inertia_check` field below - # should be deleted. ctypedef struct cuvsKMeansParams: cuvsDistanceType metric, int n_clusters, @@ -37,7 +32,6 @@ cdef extern from "cuvs/cluster/kmeans.h" nogil: double oversampling_factor, int batch_samples, int batch_centroids, - bool inertia_check, bool hierarchical, int hierarchical_n_iters, int64_t streaming_batch_size, diff --git a/python/cuvs/cuvs/cluster/kmeans/kmeans.pyx b/python/cuvs/cuvs/cluster/kmeans/kmeans.pyx index 2e9046b4b2..fd38143c3b 100644 --- a/python/cuvs/cuvs/cluster/kmeans/kmeans.pyx +++ b/python/cuvs/cuvs/cluster/kmeans/kmeans.pyx @@ -77,10 +77,6 @@ cdef class KMeansParams: [batch_samples x n_clusters]. batch_centroids : int Number of centroids to process in each batch. If 0, uses n_clusters. - inertia_check : bool - Deprecated and ignored. Will be - removed in a future release. Inertia-based convergence checking - always runs. init_size : int Number of samples to draw for KMeansPlusPlus initialization with host (out-of-core) data. When set to 0, uses the heuristic @@ -118,7 +114,6 @@ cdef class KMeansParams: oversampling_factor=None, batch_samples=None, batch_centroids=None, - inertia_check=None, init_size=None, streaming_batch_size=None, hierarchical=None, @@ -142,12 +137,6 @@ cdef class KMeansParams: self.params.batch_samples = batch_samples if batch_centroids is not None: self.params.batch_centroids = batch_centroids - if inertia_check is not None: - warnings.warn( - "KMeansParams `inertia_check` is deprecated and ignored; " - "inertia-based convergence checking always runs.", - FutureWarning - ) if init_size is not None: self.params.init_size = init_size if streaming_batch_size is not None: diff --git a/rust/cuvs-sys/src/bindings.rs b/rust/cuvs-sys/src/bindings.rs index 0498b77f3a..d985f6a359 100644 --- a/rust/cuvs-sys/src/bindings.rs +++ b/rust/cuvs-sys/src/bindings.rs @@ -388,8 +388,6 @@ pub struct cuvsKMeansParams { pub batch_samples: ::std::os::raw::c_int, #[doc = " if 0 then batch_centroids = n_clusters"] pub batch_centroids: ::std::os::raw::c_int, - #[doc = " Check inertia during iterations for early convergence."] - pub inertia_check: bool, #[doc = " Whether to use hierarchical (balanced) kmeans or not"] pub hierarchical: bool, #[doc = " For hierarchical k-means , defines the number of training iterations"] @@ -419,8 +417,6 @@ const _: () = { [::std::mem::offset_of!(cuvsKMeansParams, batch_samples) - 40usize]; ["Offset of field: cuvsKMeansParams::batch_centroids"] [::std::mem::offset_of!(cuvsKMeansParams, batch_centroids) - 44usize]; - ["Offset of field: cuvsKMeansParams::inertia_check"] - [::std::mem::offset_of!(cuvsKMeansParams, inertia_check) - 48usize]; ["Offset of field: cuvsKMeansParams::hierarchical"] [::std::mem::offset_of!(cuvsKMeansParams, hierarchical) - 49usize]; ["Offset of field: cuvsKMeansParams::hierarchical_n_iters"]