Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/ailego/algorithm/lloyd_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
#pragma once

#include <algorithm>
#include <array>
#include <random>
#include <vector>
#include <ailego/parallel/lock.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/utility/type_helper.h>
Expand Down Expand Up @@ -248,7 +248,7 @@ class LloydCluster {
protected:
//! Cluster the cache features
void cluster_cache_features(void) {
std::vector<float> scores(BatchCount);
std::array<float, BatchCount> scores;

for (size_t i = 0, n = feature_cache_.count(); i != n; ++i) {
size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
Expand Down Expand Up @@ -286,7 +286,7 @@ class LloydCluster {

//! Cluster the matrix features
void cluster_matrix_features(size_t first, size_t last) {
std::vector<float> scores(BatchCount * BatchCount, 0);
std::array<float, BatchCount * BatchCount> scores;
ContainerType rows(centroids_matrix_.dimension());

auto comp = [](float i, float j) {
Expand All @@ -296,8 +296,8 @@ class LloydCluster {
return i < j;
};

std::vector<float> nearest_scores(BatchCount);
std::vector<size_t> nearest_indexes(BatchCount);
std::array<float, BatchCount> nearest_scores;
std::array<size_t, BatchCount> nearest_indexes;

rows.resize(BatchCount);
for (size_t i = first * BatchCount; i != last * BatchCount;
Expand Down
38 changes: 19 additions & 19 deletions src/ailego/math/mips_euclidean_distance_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#pragma once

#include <vector>
#include <array>
#include <ailego/math/norm2_matrix.h>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
Expand Down Expand Up @@ -109,8 +109,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::vector<float> v2(N);
std::array<float, M> u2;
std::array<float, N> v2;
for (size_t i = 0; i < M; ++i) {
const ValueType p_val = p[i];
u2[i] = static_cast<float>(p_val * p_val);
Expand Down Expand Up @@ -162,8 +162,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::vector<float> v2(N);
std::array<float, M> u2;
std::array<float, N> v2;
for (size_t i = 0; i < M; ++i) {
const ValueType p_val = p[i];
u2[i] = static_cast<float>(p_val * p_val);
Expand Down Expand Up @@ -241,7 +241,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::array<float, M> u2;
ValueType q_val = *q++;
float v2 = static_cast<float>(q_val * q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -275,7 +275,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::array<float, M> u2;
ValueType q_val = *q++;
float v2 = static_cast<float>(q_val * q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -328,8 +328,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::vector<float> v2(N);
std::array<float, M> u2;
std::array<float, N> v2;
const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -384,8 +384,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::vector<float> v2(N);
std::array<float, M> u2;
std::array<float, N> v2;
const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -496,7 +496,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::array<float, M> u2;
uint32_t q_val = *q_it++;
float v2 = Squared(q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -532,7 +532,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::array<float, M> u2;
uint32_t q_val = *q_it++;
float v2 = Squared(q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -614,8 +614,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::vector<float> v2(N);
std::array<float, M> u2;
std::array<float, N> v2;
const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -670,8 +670,8 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::vector<float> v2(N);
std::array<float, M> u2;
std::array<float, N> v2;
const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -857,7 +857,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::array<float, M> u2;
uint32_t q_val = *q_it++;
float v2 = Squared(q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down Expand Up @@ -893,7 +893,7 @@ struct MipsSquaredEuclideanDistanceMatrix<
return;
}

std::vector<float> u2(M);
std::array<float, M> u2;
uint32_t q_val = *q_it++;
float v2 = Squared(q_val);
for (size_t i = 0; i < M; ++i) {
Expand Down
12 changes: 6 additions & 6 deletions src/ailego/math_batch/inner_product_distance_batch_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#pragma once

#include <vector>
#include <array>
#include <ailego/math/inner_product_matrix.h>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
Expand Down Expand Up @@ -53,14 +53,14 @@ compute_one_to_many_avx2_fp32(
const ValueType *query, const ValueType **ptrs,
std::array<const ValueType *, dp_batch> &prefetch_ptrs,
size_t dimensionality, float *results) {
std::vector<__m256> accs(dp_batch);
std::array<__m256, dp_batch> accs;
for (size_t i = 0; i < dp_batch; ++i) {
accs[i] = _mm256_setzero_ps();
}
size_t dim = 0;
for (; dim + 8 <= dimensionality; dim += 8) {
__m256 q = _mm256_loadu_ps(query + dim);
std::vector<__m256> data_regs(dp_batch);
std::array<__m256, dp_batch> data_regs;
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm256_loadu_ps(ptrs[i] + dim);
}
Expand All @@ -73,13 +73,13 @@ compute_one_to_many_avx2_fp32(
accs[i] = _mm256_fnmadd_ps(q, data_regs[i], accs[i]);
}
}
std::vector<__m128> sum128_regs(dp_batch);
std::array<__m128, dp_batch> sum128_regs;
for (size_t i = 0; i < dp_batch; ++i) {
sum128_regs[i] = sum_top_bottom_avx(accs[i]);
}
if (dim + 4 <= dimensionality) {
__m128 q = _mm_loadu_ps(query + dim);
std::vector<__m128> data_regs(dp_batch);
std::array<__m128, dp_batch> data_regs;
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm_loadu_ps(ptrs[i] + dim);
}
Expand All @@ -95,7 +95,7 @@ compute_one_to_many_avx2_fp32(
}
if (dim + 2 <= dimensionality) {
__m128 q = _mm_setzero_ps();
std::vector<__m128> data_regs(dp_batch);
std::array<__m128, dp_batch> data_regs;
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm_setzero_ps();
}
Expand Down
24 changes: 12 additions & 12 deletions src/ailego/math_batch/inner_product_distance_batch_impl_fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#pragma once

#include <vector>
#include <array>
#include <ailego/math/matrix_utility.i>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
Expand All @@ -30,7 +30,7 @@ compute_one_to_many_avx512fp16_fp16(
const ailego::Float16 *query, const ailego::Float16 **ptrs,
std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
size_t dimensionality, float *results) {
std::vector<__m512h> accs(dp_batch);
std::array<__m512h, dp_batch> accs;

for (size_t i = 0; i < dp_batch; ++i) {
accs[i] = _mm512_setzero_ph();
Expand All @@ -40,7 +40,7 @@ compute_one_to_many_avx512fp16_fp16(
for (; dim + 32 <= dimensionality; dim += 32) {
__m512h q = _mm512_loadu_ph(query + dim);

std::vector<__m512h> data_regs(dp_batch);
std::array<__m512h, dp_batch> data_regs;
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm512_loadu_ph(ptrs[i] + dim);
}
Expand Down Expand Up @@ -86,7 +86,7 @@ compute_one_to_many_avx512f_fp16(
const ailego::Float16 *query, const ailego::Float16 **ptrs,
std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
size_t dimensionality, float *results) {
std::vector<__m512> accs(dp_batch);
std::array<__m512, dp_batch> accs;

for (size_t i = 0; i < dp_batch; ++i) {
accs[i] = _mm512_setzero_ps();
Expand All @@ -100,8 +100,8 @@ compute_one_to_many_avx512f_fp16(
__m512 q1 = _mm512_cvtph_ps(_mm512_castsi512_si256(q));
__m512 q2 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(q, 1));

std::vector<__m512> data_regs_1(dp_batch);
std::vector<__m512> data_regs_2(dp_batch);
std::array<__m512, dp_batch> data_regs_1;
std::array<__m512, dp_batch> data_regs_2;
for (size_t i = 0; i < dp_batch; ++i) {
__m512i m =
_mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] + dim));
Expand All @@ -126,7 +126,7 @@ compute_one_to_many_avx512f_fp16(
__m512 q = _mm512_cvtph_ps(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(query + dim)));

std::vector<__m512> data_regs(dp_batch);
std::array<__m512, dp_batch> data_regs;
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm512_cvtph_ps(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim)));
Expand All @@ -136,7 +136,7 @@ compute_one_to_many_avx512f_fp16(
dim += 16;
}

std::vector<__m256> acc_new(dp_batch);
std::array<__m256, dp_batch> acc_new;
for (size_t i = 0; i < dp_batch; ++i) {
acc_new[i] = _mm256_add_ps(
_mm512_castps512_ps256(accs[i]),
Expand Down Expand Up @@ -176,7 +176,7 @@ compute_one_to_many_avx2_fp16(
const ailego::Float16 *query, const ailego::Float16 **ptrs,
std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
size_t dimensionality, float *results) {
std::vector<__m256> accs(dp_batch);
std::array<__m256, dp_batch> accs;

for (size_t i = 0; i < dp_batch; ++i) {
accs[i] = _mm256_setzero_ps();
Expand All @@ -190,8 +190,8 @@ compute_one_to_many_avx2_fp16(
__m256 q1 = _mm256_cvtph_ps(_mm256_castsi256_si128(q));
__m256 q2 = _mm256_cvtph_ps(_mm256_extractf128_si256(q, 1));

std::vector<__m256> data_regs_1(dp_batch);
std::vector<__m256> data_regs_2(dp_batch);
std::array<__m256, dp_batch> data_regs_1;
std::array<__m256, dp_batch> data_regs_2;
for (size_t i = 0; i < dp_batch; ++i) {
__m256i m =
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim));
Expand All @@ -216,7 +216,7 @@ compute_one_to_many_avx2_fp16(
__m256 q = _mm256_cvtph_ps(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(query + dim)));

std::vector<__m256> data_regs(dp_batch);
std::array<__m256, dp_batch> data_regs;
for (size_t i = 0; i < dp_batch; ++i) {
data_regs[i] = _mm256_cvtph_ps(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(ptrs[i] + dim)));
Expand Down
Loading