From a59ac1716dd0eb415174a0d916a77f728f407570 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Sat, 7 Mar 2026 02:55:37 +0800 Subject: [PATCH 1/4] upd vector to array for ip --- src/ailego/algorithm/lloyd_cluster.h | 8 ++-- .../math/mips_euclidean_distance_matrix.h | 38 +++++++++---------- .../inner_product_distance_batch_impl.h | 10 ++--- .../inner_product_distance_batch_impl_fp16.h | 22 +++++------ .../inner_product_distance_batch_impl_int8.h | 28 +++++++------- 5 files changed, 53 insertions(+), 53 deletions(-) diff --git a/src/ailego/algorithm/lloyd_cluster.h b/src/ailego/algorithm/lloyd_cluster.h index 454a63c7..423693eb 100644 --- a/src/ailego/algorithm/lloyd_cluster.h +++ b/src/ailego/algorithm/lloyd_cluster.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include @@ -248,7 +248,7 @@ class LloydCluster { protected: //! Cluster the cache features void cluster_cache_features(void) { - std::vector scores(BatchCount); + std::array scores; for (size_t i = 0, n = feature_cache_.count(); i != n; ++i) { size_t count = centroids_matrix_.count() / BatchCount * BatchCount; @@ -296,8 +296,8 @@ class LloydCluster { return i < j; }; - std::vector nearest_scores(BatchCount); - std::vector nearest_indexes(BatchCount); + std::array nearest_scores; + std::array nearest_indexes; rows.resize(BatchCount); for (size_t i = first * BatchCount; i != last * BatchCount; diff --git a/src/ailego/math/mips_euclidean_distance_matrix.h b/src/ailego/math/mips_euclidean_distance_matrix.h index 34e9ee02..34b1a7a1 100644 --- a/src/ailego/math/mips_euclidean_distance_matrix.h +++ b/src/ailego/math/mips_euclidean_distance_matrix.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include #include #include @@ -109,8 +109,8 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); - std::vector v2(N); + std::array u2; + std::array v2; for (size_t i = 0; i < M; ++i) { const ValueType p_val = p[i]; u2[i] = static_cast(p_val * p_val); @@ -162,8 +162,8 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); - std::vector v2(N); + std::array u2; + std::array v2; for (size_t i = 0; i < M; ++i) { const ValueType p_val = p[i]; u2[i] = static_cast(p_val * p_val); @@ -241,7 +241,7 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); + std::array u2; ValueType q_val = *q++; float v2 = static_cast(q_val * q_val); for (size_t i = 0; i < M; ++i) { @@ -275,7 +275,7 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); + std::array u2; ValueType q_val = *q++; float v2 = static_cast(q_val * q_val); for (size_t i = 0; i < M; ++i) { @@ -328,8 +328,8 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); - std::vector v2(N); + std::array u2; + std::array v2; const uint32_t *p_it = reinterpret_cast(p); const uint32_t *q_it = reinterpret_cast(q); for (size_t i = 0; i < M; ++i) { @@ -384,8 +384,8 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); - std::vector v2(N); + std::array u2; + std::array v2; const uint32_t *p_it = reinterpret_cast(p); const uint32_t *q_it = reinterpret_cast(q); for (size_t i = 0; i < M; ++i) { @@ -496,7 +496,7 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); + std::array u2; uint32_t q_val = *q_it++; float v2 = Squared(q_val); for (size_t i = 0; i < M; ++i) { @@ -532,7 +532,7 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); + std::array u2; uint32_t q_val = *q_it++; float v2 = Squared(q_val); for (size_t i = 0; i < M; ++i) { @@ -614,8 +614,8 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); - std::vector v2(N); + std::array u2; + std::array v2; const uint32_t *p_it = reinterpret_cast(p); const uint32_t *q_it = reinterpret_cast(q); for (size_t i = 0; i < M; ++i) { @@ -670,8 +670,8 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); - std::vector v2(N); + std::array u2; + std::array v2; const uint32_t *p_it = reinterpret_cast(p); const uint32_t *q_it = reinterpret_cast(q); for (size_t i = 0; i < M; ++i) { @@ -857,7 +857,7 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); + std::array u2; uint32_t q_val = *q_it++; float v2 = Squared(q_val); for (size_t i = 0; i < M; ++i) { @@ -893,7 +893,7 @@ struct MipsSquaredEuclideanDistanceMatrix< return; } - std::vector u2(M); + std::array u2; uint32_t q_val = *q_it++; float v2 = Squared(q_val); for (size_t i = 0; i < M; ++i) { diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl.h b/src/ailego/math_batch/inner_product_distance_batch_impl.h index d3ad6f0d..8dd8504d 100644 --- a/src/ailego/math_batch/inner_product_distance_batch_impl.h +++ b/src/ailego/math_batch/inner_product_distance_batch_impl.h @@ -53,14 +53,14 @@ compute_one_to_many_avx2_fp32( const ValueType *query, const ValueType **ptrs, std::array &prefetch_ptrs, size_t dimensionality, float *results) { - std::vector<__m256> accs(dp_batch); + std::array<__m256, dp_batch> accs; for (size_t i = 0; i < dp_batch; ++i) { accs[i] = _mm256_setzero_ps(); } size_t dim = 0; for (; dim + 8 <= dimensionality; dim += 8) { __m256 q = _mm256_loadu_ps(query + dim); - std::vector<__m256> data_regs(dp_batch); + std::array<__m256, dp_batch> data_regs; for (size_t i = 0; i < dp_batch; ++i) { data_regs[i] = _mm256_loadu_ps(ptrs[i] + dim); } @@ -73,13 +73,13 @@ compute_one_to_many_avx2_fp32( accs[i] = _mm256_fnmadd_ps(q, data_regs[i], accs[i]); } } - std::vector<__m128> sum128_regs(dp_batch); + std::array<__m128, dp_batch> sum128_regs; for (size_t i = 0; i < dp_batch; ++i) { sum128_regs[i] = sum_top_bottom_avx(accs[i]); } if (dim + 4 <= dimensionality) { __m128 q = _mm_loadu_ps(query + dim); - std::vector<__m128> data_regs(dp_batch); + std::array<__m128, dp_batch> data_regs; for (size_t i = 0; i < dp_batch; ++i) { data_regs[i] = _mm_loadu_ps(ptrs[i] + dim); } @@ -95,7 +95,7 @@ compute_one_to_many_avx2_fp32( } if (dim + 2 <= dimensionality) { __m128 q = _mm_setzero_ps(); - std::vector<__m128> data_regs(dp_batch); + std::array<__m128, dp_batch> data_regs; for (size_t i = 0; i < dp_batch; ++i) { data_regs[i] = _mm_setzero_ps(); } diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h b/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h index 0ff65f8d..b1aa7f5b 100644 --- a/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h +++ b/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h @@ -30,7 +30,7 @@ compute_one_to_many_avx512fp16_fp16( const ailego::Float16 *query, const ailego::Float16 **ptrs, std::array &prefetch_ptrs, size_t dimensionality, float *results) { - std::vector<__m512h> accs(dp_batch); + std::array<__m512h, dp_batch> accs; for (size_t i = 0; i < dp_batch; ++i) { accs[i] = _mm512_setzero_ph(); @@ -40,7 +40,7 @@ compute_one_to_many_avx512fp16_fp16( for (; dim + 32 <= dimensionality; dim += 32) { __m512h q = _mm512_loadu_ph(query + dim); - std::vector<__m512h> data_regs(dp_batch); + std::array<__m512h, dp_batch> data_regs; for (size_t i = 0; i < dp_batch; ++i) { data_regs[i] = _mm512_loadu_ph(ptrs[i] + dim); } @@ -86,7 +86,7 @@ compute_one_to_many_avx512f_fp16( const ailego::Float16 *query, const ailego::Float16 **ptrs, std::array &prefetch_ptrs, size_t dimensionality, float *results) { - std::vector<__m512> accs(dp_batch); + std::array<__m512, dp_batch> accs; for (size_t i = 0; i < dp_batch; ++i) { accs[i] = _mm512_setzero_ps(); @@ -100,8 +100,8 @@ compute_one_to_many_avx512f_fp16( __m512 q1 = _mm512_cvtph_ps(_mm512_castsi512_si256(q)); __m512 q2 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(q, 1)); - std::vector<__m512> data_regs_1(dp_batch); - std::vector<__m512> data_regs_2(dp_batch); + std::array<__m512, dp_batch> data_regs_1; + std::array<__m512, dp_batch> data_regs_2; for (size_t i = 0; i < dp_batch; ++i) { __m512i m = _mm512_loadu_si512(reinterpret_cast(ptrs[i] + dim)); @@ -126,7 +126,7 @@ compute_one_to_many_avx512f_fp16( __m512 q = _mm512_cvtph_ps( _mm256_loadu_si256(reinterpret_cast(query + dim))); - std::vector<__m512> data_regs(dp_batch); + std::array<__m512, dp_batch> data_regs; for (size_t i = 0; i < dp_batch; ++i) { data_regs[i] = _mm512_cvtph_ps( _mm256_loadu_si256(reinterpret_cast(ptrs[i] + dim))); @@ -136,7 +136,7 @@ compute_one_to_many_avx512f_fp16( dim += 16; } - std::vector<__m256> acc_new(dp_batch); + std::array<__m256, dp_batch> acc_new; for (size_t i = 0; i < dp_batch; ++i) { acc_new[i] = _mm256_add_ps( _mm512_castps512_ps256(accs[i]), @@ -176,7 +176,7 @@ compute_one_to_many_avx2_fp16( const ailego::Float16 *query, const ailego::Float16 **ptrs, std::array &prefetch_ptrs, size_t dimensionality, float *results) { - std::vector<__m256> accs(dp_batch); + std::array<__m256, dp_batch> accs; for (size_t i = 0; i < dp_batch; ++i) { accs[i] = _mm256_setzero_ps(); @@ -190,8 +190,8 @@ compute_one_to_many_avx2_fp16( __m256 q1 = _mm256_cvtph_ps(_mm256_castsi256_si128(q)); __m256 q2 = _mm256_cvtph_ps(_mm256_extractf128_si256(q, 1)); - std::vector<__m256> data_regs_1(dp_batch); - std::vector<__m256> data_regs_2(dp_batch); + std::array<__m256, dp_batch> data_regs_1; + std::array<__m256, dp_batch> data_regs_2; for (size_t i = 0; i < dp_batch; ++i) { __m256i m = _mm256_loadu_si256(reinterpret_cast(ptrs[i] + dim)); @@ -216,7 +216,7 @@ compute_one_to_many_avx2_fp16( __m256 q = _mm256_cvtph_ps( _mm_loadu_si128(reinterpret_cast(query + dim))); - std::vector<__m256> data_regs(dp_batch); + std::array<__m256, dp_batch> data_regs; for (size_t i = 0; i < dp_batch; ++i) { data_regs[i] = _mm256_cvtph_ps( _mm_loadu_si128(reinterpret_cast(ptrs[i] + dim))); diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl_int8.h b/src/ailego/math_batch/inner_product_distance_batch_impl_int8.h index 3c3721fb..2b1c612d 100644 --- a/src/ailego/math_batch/inner_product_distance_batch_impl_int8.h +++ b/src/ailego/math_batch/inner_product_distance_batch_impl_int8.h @@ -55,7 +55,7 @@ static void compute_one_to_many_avx512_vnni_int8( const int8_t *query, const int8_t **ptrs, std::array &prefetch_ptrs, size_t dimensionality, float *results) { - std::vector<__m512i> accs(dp_batch); + std::array<__m512i, dp_batch> accs; for (size_t i = 0; i < dp_batch; ++i) { accs[i] = _mm512_setzero_si512(); } @@ -63,7 +63,7 @@ static void compute_one_to_many_avx512_vnni_int8( for (; dim + 64 <= dimensionality; dim += 64) { __m512i q = _mm512_loadu_si512(reinterpret_cast(query + dim)); - std::vector<__m512i> data_regs(dp_batch); + std::array<__m512i, dp_batch> data_regs; for (size_t i = 0; i < dp_batch; ++i) { data_regs[i] = _mm512_loadu_si512(reinterpret_cast(ptrs[i] + dim)); @@ -100,12 +100,12 @@ static void compute_one_to_many_avx512_vnni_int8( // const int8_t *query, const int8_t **ptrs, // std::array &prefetch_ptrs, size_t // dimensionality, float *results) { -// std::vector<__m512i> accs(dp_batch); +// std::array<__m512i, dp_batch> accs; // size_t dim = 0; // for (; dim + 64 <= dimensionality; dim += 64) { // __m512i q = // _mm512_loadu_si512(reinterpret_cast(query + dim)); -// std::vector<__m512i> data_regs(dp_batch); +// std::array<__m512i, dp_batch> data_regs; // for (size_t i = 0; i < dp_batch; ++i) { // data_regs[i] = // _mm512_loadu_si512(reinterpret_cast(ptrs[i] + @@ -118,16 +118,16 @@ static void compute_one_to_many_avx512_vnni_int8( // } // __m512i q_lo = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(q, 0)); // __m512i q_hi = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(q, 1)); -// std::vector<__m512i> data_lo(dp_batch); -// std::vector<__m512i> data_hi(dp_batch); +// std::array<__m512i, dp_batch> data_lo; +// std::array<__m512i, dp_batch> data_hi; // for (size_t i = 0; i < dp_batch; ++i) { // data_lo[i] = // _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(data_regs[i], 0)); // data_hi[i] = // _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(data_regs[i], 1)); // } -// std::vector<__m512i> prod_lo(dp_batch); -// std::vector<__m512i> prod_hi(dp_batch); +// std::array<__m512i, dp_batch> prod_lo; +// std::array<__m512i, dp_batch> prod_hi; // for (size_t i = 0; i < dp_batch; ++i) { // prod_lo[i] = _mm512_madd_epi16(q_lo, data_lo[i]); // prod_hi[i] = _mm512_madd_epi16(q_hi, data_hi[i]); @@ -163,14 +163,14 @@ compute_one_to_many_avx2_int8( const int8_t *query, const int8_t **ptrs, std::array &prefetch_ptrs, size_t dimensionality, float *results) { - std::vector<__m256i> accs(dp_batch); + std::array<__m256i, dp_batch> accs; for (size_t i = 0; i < dp_batch; ++i) { accs[i] = _mm256_setzero_si256(); } size_t dim = 0; for (; dim + 32 <= dimensionality; dim += 32) { __m256i q = _mm256_loadu_si256((const __m256i *)(query + dim)); - std::vector<__m256i> data_regs(dp_batch); + std::array<__m256i, dp_batch> data_regs; for (size_t i = 0; i < dp_batch; ++i) { data_regs[i] = _mm256_loadu_si256((const __m256i *)(ptrs[i] + dim)); } @@ -181,15 +181,15 @@ compute_one_to_many_avx2_int8( } __m256i q_lo = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(q)); __m256i q_hi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(q, 1)); - std::vector<__m256i> data_lo(dp_batch); - std::vector<__m256i> data_hi(dp_batch); + std::array<__m256i, dp_batch> data_lo; + std::array<__m256i, dp_batch> data_hi; for (size_t i = 0; i < dp_batch; ++i) { data_lo[i] = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(data_regs[i])); data_hi[i] = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(data_regs[i], 1)); } - std::vector<__m256i> prod_lo(dp_batch); - std::vector<__m256i> prod_hi(dp_batch); + std::array<__m256i, dp_batch> prod_lo; + std::array<__m256i, dp_batch> prod_hi; for (size_t i = 0; i < dp_batch; ++i) { prod_lo[i] = _mm256_madd_epi16(q_lo, data_lo[i]); prod_hi[i] = _mm256_madd_epi16(q_hi, data_hi[i]); From ef8e8c9d167a74760431bbc559d514c8935836ce Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Sat, 7 Mar 2026 03:20:21 +0800 Subject: [PATCH 2/4] clang-format --- src/ailego/algorithm/lloyd_cluster.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ailego/algorithm/lloyd_cluster.h b/src/ailego/algorithm/lloyd_cluster.h index 423693eb..1b28eeb9 100644 --- a/src/ailego/algorithm/lloyd_cluster.h +++ b/src/ailego/algorithm/lloyd_cluster.h @@ -15,8 +15,8 @@ #pragma once #include -#include #include +#include #include #include #include From f7bc0934e713cdbb81034b3aa0b62267348d6d6c Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Sat, 7 Mar 2026 03:23:54 +0800 Subject: [PATCH 3/4] fix --- src/ailego/algorithm/lloyd_cluster.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ailego/algorithm/lloyd_cluster.h b/src/ailego/algorithm/lloyd_cluster.h index 1b28eeb9..d889271c 100644 --- a/src/ailego/algorithm/lloyd_cluster.h +++ b/src/ailego/algorithm/lloyd_cluster.h @@ -286,7 +286,7 @@ class LloydCluster { //! Cluster the matrix features void cluster_matrix_features(size_t first, size_t last) { - std::vector scores(BatchCount * BatchCount, 0); + std::array scores; ContainerType rows(centroids_matrix_.dimension()); auto comp = [](float i, float j) { From ed3e92350dc78d23c9ec989803fbf917da41d367 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Sat, 7 Mar 2026 03:29:20 +0800 Subject: [PATCH 4/4] fix --- src/ailego/math_batch/inner_product_distance_batch_impl.h | 2 +- src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h | 2 +- src/ailego/math_batch/inner_product_distance_batch_impl_int8.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl.h b/src/ailego/math_batch/inner_product_distance_batch_impl.h index 8dd8504d..d15a747e 100644 --- a/src/ailego/math_batch/inner_product_distance_batch_impl.h +++ b/src/ailego/math_batch/inner_product_distance_batch_impl.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include #include #include diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h b/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h index b1aa7f5b..183369d7 100644 --- a/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h +++ b/src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include #include #include diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl_int8.h b/src/ailego/math_batch/inner_product_distance_batch_impl_int8.h index 2b1c612d..0e236641 100644 --- a/src/ailego/math_batch/inner_product_distance_batch_impl_int8.h +++ b/src/ailego/math_batch/inner_product_distance_batch_impl_int8.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include #include #include