alibaba · feihongxu0824 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/src/ailego/algorithm/lloyd_cluster.h b/src/ailego/algorithm/lloyd_cluster.h
@@ -16,6 +16,7 @@
 
 #include <algorithm>
 #include <random>
+#include <vector>
 #include <ailego/parallel/lock.h>
 #include <zvec/ailego/parallel/thread_pool.h>
 #include <zvec/ailego/utility/type_helper.h>
@@ -247,7 +248,7 @@ class LloydCluster {
  protected:
   //! Cluster the cache features
   void cluster_cache_features(void) {
-    float scores[BatchCount];
+    std::vector<float> scores(BatchCount);
 
     for (size_t i = 0, n = feature_cache_.count(); i != n; ++i) {
       size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
@@ -258,7 +259,7 @@ class LloydCluster {
       for (size_t j = 0; j != count; j += BatchCount) {
         ContextType::template BatchDistance<1>(centroids_matrix_[j], feature,
                                                centroids_matrix_.dimension(),
-                                               scores);
+                                               scores.data());
 
         for (size_t k = 0; k < BatchCount; ++k) {
           if (scores[k] < nearest_score) {
@@ -271,7 +272,7 @@ class LloydCluster {
       for (size_t j = count, total = centroids_matrix_.count(); j != total;
            ++j) {
         ContextType::Distance(centroids_matrix_[j], feature,
-                              centroids_matrix_.dimension(), scores);
+                              centroids_matrix_.dimension(), scores.data());
 
         if (scores[0] < nearest_score) {
           nearest_score = scores[0];
@@ -295,23 +296,23 @@ class LloydCluster {
       return i < j;
     };
 
-    float nearest_scores[BatchCount];
-    size_t nearest_indexes[BatchCount];
+    std::vector<float> nearest_scores(BatchCount);
+    std::vector<size_t> nearest_indexes(BatchCount);
 
     rows.resize(BatchCount);
     for (size_t i = first * BatchCount; i != last * BatchCount;
          i += BatchCount) {
       size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
       const StoreType *block = feature_matrix_[i];
 
-      std::fill(nearest_indexes, nearest_indexes + BatchCount, 0);
-      std::fill(nearest_scores, nearest_scores + BatchCount,
+      std::fill(nearest_indexes.data(), nearest_indexes.data() + BatchCount, 0);
+      std::fill(nearest_scores.data(), nearest_scores.data() + BatchCount,
                 std::numeric_limits<float>::max());
 
       for (size_t j = 0; j != count; j += BatchCount) {
         ContextType::template BatchDistance<BatchCount>(
             centroids_matrix_[j], block, centroids_matrix_.dimension(),
-            &scores[0]);
+            scores.data());
 
         for (size_t k = 0; k < BatchCount; ++k) {
           const float *start = &scores[k * BatchCount];
@@ -328,7 +329,7 @@ class LloydCluster {
            ++j) {
         ContextType::template BatchDistance<1>(block, centroids_matrix_[j],
                                                centroids_matrix_.dimension(),
-                                               &scores[0]);
+                                               scores.data());
 
         for (size_t k = 0; k < BatchCount; ++k) {
           float score = scores[k];

diff --git a/src/ailego/math/mips_euclidean_distance_matrix.h b/src/ailego/math/mips_euclidean_distance_matrix.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include <vector>
 #include <ailego/math/norm2_matrix.h>
 #include <ailego/utility/math_helper.h>
 #include <zvec/ailego/internal/platform.h>
@@ -108,8 +109,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
-    float v2[N];
+    std::vector<float> u2(M);
+    std::vector<float> v2(N);
     for (size_t i = 0; i < M; ++i) {
       const ValueType p_val = p[i];
       u2[i] = static_cast<float>(p_val * p_val);
@@ -161,8 +162,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
-    float v2[N];
+    std::vector<float> u2(M);
+    std::vector<float> v2(N);
     for (size_t i = 0; i < M; ++i) {
       const ValueType p_val = p[i];
       u2[i] = static_cast<float>(p_val * p_val);
@@ -240,7 +241,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
+    std::vector<float> u2(M);
     ValueType q_val = *q++;
     float v2 = static_cast<float>(q_val * q_val);
     for (size_t i = 0; i < M; ++i) {
@@ -274,7 +275,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
+    std::vector<float> u2(M);
     ValueType q_val = *q++;
     float v2 = static_cast<float>(q_val * q_val);
     for (size_t i = 0; i < M; ++i) {
@@ -327,8 +328,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
-    float v2[N];
+    std::vector<float> u2(M);
+    std::vector<float> v2(N);
     const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
     const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
     for (size_t i = 0; i < M; ++i) {
@@ -383,8 +384,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
-    float v2[N];
+    std::vector<float> u2(M);
+    std::vector<float> v2(N);
     const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
     const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
     for (size_t i = 0; i < M; ++i) {
@@ -495,7 +496,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
+    std::vector<float> u2(M);
     uint32_t q_val = *q_it++;
     float v2 = Squared(q_val);
     for (size_t i = 0; i < M; ++i) {
@@ -531,7 +532,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
+    std::vector<float> u2(M);
     uint32_t q_val = *q_it++;
     float v2 = Squared(q_val);
     for (size_t i = 0; i < M; ++i) {
@@ -613,8 +614,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
-    float v2[N];
+    std::vector<float> u2(M);
+    std::vector<float> v2(N);
     const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
     const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
     for (size_t i = 0; i < M; ++i) {
@@ -669,8 +670,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
-    float v2[N];
+    std::vector<float> u2(M);
+    std::vector<float> v2(N);
     const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
     const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
     for (size_t i = 0; i < M; ++i) {
@@ -856,7 +857,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
+    std::vector<float> u2(M);
     uint32_t q_val = *q_it++;
     float v2 = Squared(q_val);
     for (size_t i = 0; i < M; ++i) {
@@ -892,7 +893,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
       return;
     }
 
-    float u2[M];
+    std::vector<float> u2(M);
     uint32_t q_val = *q_it++;
     float v2 = Squared(q_val);
     for (size_t i = 0; i < M; ++i) {

diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl.h b/src/ailego/math_batch/inner_product_distance_batch_impl.h
@@ -53,14 +53,14 @@ compute_one_to_many_avx2_fp32(
     const ValueType *query, const ValueType **ptrs,
     std::array<const ValueType *, dp_batch> &prefetch_ptrs,
     size_t dimensionality, float *results) {
-  __m256 accs[dp_batch];
+  std::vector<__m256> accs(dp_batch);
   for (size_t i = 0; i < dp_batch; ++i) {
     accs[i] = _mm256_setzero_ps();
   }
   size_t dim = 0;
   for (; dim + 8 <= dimensionality; dim += 8) {
     __m256 q = _mm256_loadu_ps(query + dim);
-    __m256 data_regs[dp_batch];
+    std::vector<__m256> data_regs(dp_batch);
     for (size_t i = 0; i < dp_batch; ++i) {
       data_regs[i] = _mm256_loadu_ps(ptrs[i] + dim);
     }
@@ -73,13 +73,13 @@ compute_one_to_many_avx2_fp32(
       accs[i] = _mm256_fnmadd_ps(q, data_regs[i], accs[i]);
     }
   }
-  __m128 sum128_regs[dp_batch];
+  std::vector<__m128> sum128_regs(dp_batch);
   for (size_t i = 0; i < dp_batch; ++i) {
     sum128_regs[i] = sum_top_bottom_avx(accs[i]);
   }
   if (dim + 4 <= dimensionality) {
     __m128 q = _mm_loadu_ps(query + dim);
-    __m128 data_regs[dp_batch];
+    std::vector<__m128> data_regs(dp_batch);
     for (size_t i = 0; i < dp_batch; ++i) {
       data_regs[i] = _mm_loadu_ps(ptrs[i] + dim);
     }
@@ -95,7 +95,7 @@ compute_one_to_many_avx2_fp32(
   }
   if (dim + 2 <= dimensionality) {
     __m128 q = _mm_setzero_ps();
-    __m128 data_regs[dp_batch];
+    std::vector<__m128> data_regs(dp_batch);
     for (size_t i = 0; i < dp_batch; ++i) {
       data_regs[i] = _mm_setzero_ps();
     }

diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h b/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h
@@ -30,7 +30,7 @@ compute_one_to_many_avx512fp16_fp16(
     const ailego::Float16 *query, const ailego::Float16 **ptrs,
     std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
     size_t dimensionality, float *results) {
-  __m512h accs[dp_batch];
+  std::vector<__m512h> accs(dp_batch);
 
   for (size_t i = 0; i < dp_batch; ++i) {
     accs[i] = _mm512_setzero_ph();
@@ -40,7 +40,7 @@ compute_one_to_many_avx512fp16_fp16(
   for (; dim + 32 <= dimensionality; dim += 32) {
     __m512h q = _mm512_loadu_ph(query + dim);
 
-    __m512h data_regs[dp_batch];
+    std::vector<__m512h> data_regs(dp_batch);
     for (size_t i = 0; i < dp_batch; ++i) {
       data_regs[i] = _mm512_loadu_ph(ptrs[i] + dim);
     }
@@ -86,7 +86,7 @@ compute_one_to_many_avx512f_fp16(
     const ailego::Float16 *query, const ailego::Float16 **ptrs,
     std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
     size_t dimensionality, float *results) {
-  __m512 accs[dp_batch];
+  std::vector<__m512> accs(dp_batch);
 
   for (size_t i = 0; i < dp_batch; ++i) {
     accs[i] = _mm512_setzero_ps();
@@ -100,8 +100,8 @@ compute_one_to_many_avx512f_fp16(
     __m512 q1 = _mm512_cvtph_ps(_mm512_castsi512_si256(q));
     __m512 q2 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(q, 1));
 
-    __m512 data_regs_1[dp_batch];
-    __m512 data_regs_2[dp_batch];
+    std::vector<__m512> data_regs_1(dp_batch);
+    std::vector<__m512> data_regs_2(dp_batch);
     for (size_t i = 0; i < dp_batch; ++i) {
       __m512i m =
           _mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] + dim));
@@ -126,7 +126,7 @@ compute_one_to_many_avx512f_fp16(
     __m512 q = _mm512_cvtph_ps(
         _mm256_loadu_si256(reinterpret_cast<const __m256i *>(query + dim)));
 
-    __m512 data_regs[dp_batch];
+    std::vector<__m512> data_regs(dp_batch);
     for (size_t i = 0; i < dp_batch; ++i) {
       data_regs[i] = _mm512_cvtph_ps(
           _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim)));
@@ -136,7 +136,7 @@ compute_one_to_many_avx512f_fp16(
     dim += 16;
   }
 
-  __m256 acc_new[dp_batch];
+  std::vector<__m256> acc_new(dp_batch);
   for (size_t i = 0; i < dp_batch; ++i) {
     acc_new[i] = _mm256_add_ps(
         _mm512_castps512_ps256(accs[i]),
@@ -176,7 +176,7 @@ compute_one_to_many_avx2_fp16(
     const ailego::Float16 *query, const ailego::Float16 **ptrs,
     std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
     size_t dimensionality, float *results) {
-  __m256 accs[dp_batch];
+  std::vector<__m256> accs(dp_batch);
 
   for (size_t i = 0; i < dp_batch; ++i) {
     accs[i] = _mm256_setzero_ps();
@@ -190,8 +190,8 @@ compute_one_to_many_avx2_fp16(
     __m256 q1 = _mm256_cvtph_ps(_mm256_castsi256_si128(q));
     __m256 q2 = _mm256_cvtph_ps(_mm256_extractf128_si256(q, 1));
 
-    __m256 data_regs_1[dp_batch];
-    __m256 data_regs_2[dp_batch];
+    std::vector<__m256> data_regs_1(dp_batch);
+    std::vector<__m256> data_regs_2(dp_batch);
     for (size_t i = 0; i < dp_batch; ++i) {
       __m256i m =
           _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim));
@@ -216,7 +216,7 @@ compute_one_to_many_avx2_fp16(
     __m256 q = _mm256_cvtph_ps(
         _mm_loadu_si128(reinterpret_cast<const __m128i *>(query + dim)));
 
-    __m256 data_regs[dp_batch];
+    std::vector<__m256> data_regs(dp_batch);
     for (size_t i = 0; i < dp_batch; ++i) {
       data_regs[i] = _mm256_cvtph_ps(
           _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptrs[i] + dim)));