Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions src/ailego/algorithm/lloyd_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <algorithm>
#include <random>
#include <vector>
#include <ailego/parallel/lock.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/utility/type_helper.h>
Expand Down Expand Up @@ -247,7 +248,7 @@ class LloydCluster {
protected:
//! Cluster the cache features
void cluster_cache_features(void) {
float scores[BatchCount];
std::vector<float> scores(BatchCount);

for (size_t i = 0, n = feature_cache_.count(); i != n; ++i) {
size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
Expand All @@ -258,7 +259,7 @@ class LloydCluster {
for (size_t j = 0; j != count; j += BatchCount) {
ContextType::template BatchDistance<1>(centroids_matrix_[j], feature,
centroids_matrix_.dimension(),
scores);
scores.data());

for (size_t k = 0; k < BatchCount; ++k) {
if (scores[k] < nearest_score) {
Expand All @@ -271,7 +272,7 @@ class LloydCluster {
for (size_t j = count, total = centroids_matrix_.count(); j != total;
++j) {
ContextType::Distance(centroids_matrix_[j], feature,
centroids_matrix_.dimension(), scores);
centroids_matrix_.dimension(), scores.data());

if (scores[0] < nearest_score) {
nearest_score = scores[0];
Expand All @@ -295,23 +296,23 @@ class LloydCluster {
return i < j;
};

float nearest_scores[BatchCount];
size_t nearest_indexes[BatchCount];
std::vector<float> nearest_scores(BatchCount);
std::vector<size_t> nearest_indexes(BatchCount);

rows.resize(BatchCount);
for (size_t i = first * BatchCount; i != last * BatchCount;
i += BatchCount) {
size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
const StoreType *block = feature_matrix_[i];

std::fill(nearest_indexes, nearest_indexes + BatchCount, 0);
std::fill(nearest_scores, nearest_scores + BatchCount,
std::fill(nearest_indexes.data(), nearest_indexes.data() + BatchCount, 0);
std::fill(nearest_scores.data(), nearest_scores.data() + BatchCount,
std::numeric_limits<float>::max());

for (size_t j = 0; j != count; j += BatchCount) {
ContextType::template BatchDistance<BatchCount>(
centroids_matrix_[j], block, centroids_matrix_.dimension(),
&scores[0]);
scores.data());

for (size_t k = 0; k < BatchCount; ++k) {
const float *start = &scores[k * BatchCount];
Expand All @@ -328,7 +329,7 @@ class LloydCluster {
++j) {
ContextType::template BatchDistance<1>(block, centroids_matrix_[j],
centroids_matrix_.dimension(),
&scores[0]);
scores.data());

for (size_t k = 0; k < BatchCount; ++k) {
float score = scores[k];
Expand Down
37 changes: 19 additions & 18 deletions src/ailego/math/mips_euclidean_distance_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#pragma once

#include <vector>
#include <ailego/math/norm2_matrix.h>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
Expand Down Expand Up @@ -108,8 +109,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
float v2[N];
std::vector<float> u2(M);
std::vector<float> v2(N);
for (size_t i = 0; i < M; ++i) {
const ValueType p_val = p[i];
u2[i] = static_cast<float>(p_val * p_val);
Expand Down Expand Up @@ -161,8 +162,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
float v2[N];
std::vector<float> u2(M);
std::vector<float> v2(N);
for (size_t i = 0; i < M; ++i) {
const ValueType p_val = p[i];
u2[i] = static_cast<float>(p_val * p_val);
Expand Down Expand Up @@ -240,7 +241,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
std::vector<float> u2(M);
ValueType q_val = *q++;
float v2 = static_cast<float>(q_val * q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -274,7 +275,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
std::vector<float> u2(M);
ValueType q_val = *q++;
float v2 = static_cast<float>(q_val * q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -327,8 +328,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
float v2[N];
std::vector<float> u2(M);
std::vector<float> v2(N);
const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -383,8 +384,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
float v2[N];
std::vector<float> u2(M);
std::vector<float> v2(N);
const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -495,7 +496,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
std::vector<float> u2(M);
uint32_t q_val = *q_it++;
float v2 = Squared(q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -531,7 +532,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
std::vector<float> u2(M);
uint32_t q_val = *q_it++;
float v2 = Squared(q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -613,8 +614,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
float v2[N];
std::vector<float> u2(M);
std::vector<float> v2(N);
const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -669,8 +670,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
float v2[N];
std::vector<float> u2(M);
std::vector<float> v2(N);
const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -856,7 +857,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
std::vector<float> u2(M);
uint32_t q_val = *q_it++;
float v2 = Squared(q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -892,7 +893,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

float u2[M];
std::vector<float> u2(M);
uint32_t q_val = *q_it++;
float v2 = Squared(q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down
10 changes: 5 additions & 5 deletions src/ailego/math_batch/inner_product_distance_batch_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ compute_one_to_many_avx2_fp32(
const ValueType *query, const ValueType **ptrs,
std::array<const ValueType *, dp_batch> &prefetch_ptrs,
size_t dimensionality, float *results) {
__m256 accs[dp_batch];
std::vector<__m256> accs(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
accs[i] = _mm256_setzero_ps();
}
size_t dim = 0;
for (; dim + 8 <= dimensionality; dim += 8) {
__m256 q = _mm256_loadu_ps(query + dim);
__m256 data_regs[dp_batch];
std::vector<__m256> data_regs(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm256_loadu_ps(ptrs[i] + dim);
}
Expand All @@ -73,13 +73,13 @@ compute_one_to_many_avx2_fp32(
accs[i] = _mm256_fnmadd_ps(q, data_regs[i], accs[i]);
}
}
__m128 sum128_regs[dp_batch];
std::vector<__m128> sum128_regs(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
sum128_regs[i] = sum_top_bottom_avx(accs[i]);
}
if (dim + 4 <= dimensionality) {
__m128 q = _mm_loadu_ps(query + dim);
__m128 data_regs[dp_batch];
std::vector<__m128> data_regs(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm_loadu_ps(ptrs[i] + dim);
}
Expand All @@ -95,7 +95,7 @@ compute_one_to_many_avx2_fp32(
}
if (dim + 2 <= dimensionality) {
__m128 q = _mm_setzero_ps();
__m128 data_regs[dp_batch];
std::vector<__m128> data_regs(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm_setzero_ps();
}
Expand Down
22 changes: 11 additions & 11 deletions src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ compute_one_to_many_avx512fp16_fp16(
const ailego::Float16 *query, const ailego::Float16 **ptrs,
std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
size_t dimensionality, float *results) {
__m512h accs[dp_batch];
std::vector<__m512h> accs(dp_batch);

for (size_t i = 0; i < dp_batch; ++i) {
accs[i] = _mm512_setzero_ph();
Expand All @@ -40,7 +40,7 @@ compute_one_to_many_avx512fp16_fp16(
for (; dim + 32 <= dimensionality; dim += 32) {
__m512h q = _mm512_loadu_ph(query + dim);

__m512h data_regs[dp_batch];
std::vector<__m512h> data_regs(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm512_loadu_ph(ptrs[i] + dim);
}
Expand Down Expand Up @@ -86,7 +86,7 @@ compute_one_to_many_avx512f_fp16(
const ailego::Float16 *query, const ailego::Float16 **ptrs,
std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
size_t dimensionality, float *results) {
__m512 accs[dp_batch];
std::vector<__m512> accs(dp_batch);

for (size_t i = 0; i < dp_batch; ++i) {
accs[i] = _mm512_setzero_ps();
Expand All @@ -100,8 +100,8 @@ compute_one_to_many_avx512f_fp16(
__m512 q1 = _mm512_cvtph_ps(_mm512_castsi512_si256(q));
__m512 q2 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(q, 1));

__m512 data_regs_1[dp_batch];
__m512 data_regs_2[dp_batch];
std::vector<__m512> data_regs_1(dp_batch);
std::vector<__m512> data_regs_2(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
__m512i m =
_mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] + dim));
Expand All @@ -126,7 +126,7 @@ compute_one_to_many_avx512f_fp16(
__m512 q = _mm512_cvtph_ps(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(query + dim)));

__m512 data_regs[dp_batch];
std::vector<__m512> data_regs(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm512_cvtph_ps(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim)));
Expand All @@ -136,7 +136,7 @@ compute_one_to_many_avx512f_fp16(
dim += 16;
}

__m256 acc_new[dp_batch];
std::vector<__m256> acc_new(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
acc_new[i] = _mm256_add_ps(
_mm512_castps512_ps256(accs[i]),
Expand Down Expand Up @@ -176,7 +176,7 @@ compute_one_to_many_avx2_fp16(
const ailego::Float16 *query, const ailego::Float16 **ptrs,
std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
size_t dimensionality, float *results) {
__m256 accs[dp_batch];
std::vector<__m256> accs(dp_batch);

for (size_t i = 0; i < dp_batch; ++i) {
accs[i] = _mm256_setzero_ps();
Expand All @@ -190,8 +190,8 @@ compute_one_to_many_avx2_fp16(
__m256 q1 = _mm256_cvtph_ps(_mm256_castsi256_si128(q));
__m256 q2 = _mm256_cvtph_ps(_mm256_extractf128_si256(q, 1));

__m256 data_regs_1[dp_batch];
__m256 data_regs_2[dp_batch];
std::vector<__m256> data_regs_1(dp_batch);
std::vector<__m256> data_regs_2(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
__m256i m =
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim));
Expand All @@ -216,7 +216,7 @@ compute_one_to_many_avx2_fp16(
__m256 q = _mm256_cvtph_ps(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(query + dim)));

__m256 data_regs[dp_batch];
std::vector<__m256> data_regs(dp_batch);
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm256_cvtph_ps(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(ptrs[i] + dim)));
Expand Down
Loading