diff --git a/src/avx2-32bit-half.hpp b/src/avx2-32bit-half.hpp index 3646979..9a72f0d 100644 --- a/src/avx2-32bit-half.hpp +++ b/src/avx2-32bit-half.hpp @@ -84,7 +84,7 @@ struct avx2_half_vector { return _mm_mask_i32gather_epi32( src, (const int *)base, index, mask, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]); } @@ -237,7 +237,7 @@ struct avx2_half_vector { return _mm_mask_i32gather_epi32( src, (const int *)base, index, mask, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]); } @@ -421,7 +421,7 @@ struct avx2_half_vector { return _mm_mask_i32gather_ps( src, (const float *)base, index, _mm_castsi128_ps(mask), scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]); } diff --git a/src/avx2-64bit-qsort.hpp b/src/avx2-64bit-qsort.hpp index 2af97d6..78bea58 100644 --- a/src/avx2-64bit-qsort.hpp +++ b/src/avx2-64bit-qsort.hpp @@ -99,7 +99,7 @@ struct avx2_vector { return _mm256_mask_i32gather_epi64( src, (const long long int *)base, index, mask, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]); } @@ -269,7 +269,7 @@ struct avx2_vector { return _mm256_mask_i32gather_epi64( src, (const long long int *)base, index, mask, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]); } @@ -499,7 +499,7 @@ struct avx2_vector { scale); ; } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]); } diff --git a/src/avx512-64bit-common.h b/src/avx512-64bit-common.h index f27a31f..71599e8 100644 --- a/src/avx512-64bit-common.h +++ b/src/avx512-64bit-common.h @@ -99,7 +99,7 @@ struct ymm_vector { { return _mm256_mmask_i32gather_ps(src, mask, index, base, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[7]], arr[ind[6]], @@ -293,7 +293,7 @@ struct ymm_vector { { return _mm256_mmask_i32gather_epi32(src, mask, index, base, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[7]], arr[ind[6]], @@ -481,7 +481,7 @@ struct ymm_vector { { return _mm256_mmask_i32gather_epi32(src, mask, index, base, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[7]], arr[ind[6]], @@ -680,7 +680,7 @@ struct zmm_vector { { return _mm512_mask_i32gather_epi64(src, mask, index, base, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[7]], arr[ind[6]], @@ -843,7 +843,7 @@ struct zmm_vector { { return _mm512_mask_i32gather_epi64(src, mask, index, base, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[7]], arr[ind[6]], @@ -1062,7 +1062,7 @@ struct zmm_vector { { return _mm512_mask_i32gather_pd(src, mask, index, base, scale); } - static reg_t i64gather(type_t *arr, arrsize_t *ind) + static reg_t i64gather(const type_t *arr, arrsize_t *ind) { return set(arr[ind[7]], arr[ind[6]], diff --git a/src/xss-common-argsort.h b/src/xss-common-argsort.h index 3e80083..1bec821 100644 --- a/src/xss-common-argsort.h +++ b/src/xss-common-argsort.h @@ -11,8 +11,11 @@ #include template -X86_SIMD_SORT_INLINE void std_argselect_withnan( - T *arr, arrsize_t *arg, arrsize_t k, arrsize_t left, arrsize_t right) +X86_SIMD_SORT_INLINE void std_argselect_withnan(const T *arr, + arrsize_t *arg, + arrsize_t k, + arrsize_t left, + arrsize_t right) { std::nth_element(arg + left, arg + k, @@ -32,8 +35,10 @@ X86_SIMD_SORT_INLINE void std_argselect_withnan( /* argsort using std::sort */ template -X86_SIMD_SORT_INLINE void -std_argsort_withnan(T *arr, arrsize_t *arg, arrsize_t left, arrsize_t right) +X86_SIMD_SORT_INLINE void std_argsort_withnan(const T *arr, + arrsize_t *arg, + arrsize_t left, + arrsize_t right) { std::sort(arg + left, arg + right, @@ -53,7 +58,7 @@ std_argsort_withnan(T *arr, arrsize_t *arg, arrsize_t left, arrsize_t right) /* argsort using std::sort */ template X86_SIMD_SORT_INLINE void -std_argsort(T *arr, arrsize_t *arg, arrsize_t left, arrsize_t right) +std_argsort(const T *arr, arrsize_t *arg, arrsize_t left, arrsize_t right) { std::sort(arg + left, arg + right, @@ -172,7 +177,7 @@ X86_SIMD_SORT_INLINE int32_t partition_vec(type_t *arg, * last element that is less than equal to the pivot. */ template -X86_SIMD_SORT_INLINE arrsize_t argpartition(type_t *arr, +X86_SIMD_SORT_INLINE arrsize_t argpartition(const type_t *arr, arrsize_t *arg, arrsize_t left, arrsize_t right, @@ -291,7 +296,7 @@ template -X86_SIMD_SORT_INLINE arrsize_t argpartition_unrolled(type_t *arr, +X86_SIMD_SORT_INLINE arrsize_t argpartition_unrolled(const type_t *arr, arrsize_t *arg, arrsize_t left, arrsize_t right, @@ -422,7 +427,7 @@ X86_SIMD_SORT_INLINE arrsize_t argpartition_unrolled(type_t *arr, } template -X86_SIMD_SORT_INLINE type_t get_pivot_64bit(type_t *arr, +X86_SIMD_SORT_INLINE type_t get_pivot_64bit(const type_t *arr, arrsize_t *arg, const arrsize_t left, const arrsize_t right) @@ -468,7 +473,7 @@ X86_SIMD_SORT_INLINE type_t get_pivot_64bit(type_t *arr, } template -X86_SIMD_SORT_INLINE void argsort_(type_t *arr, +X86_SIMD_SORT_INLINE void argsort_(const type_t *arr, arrsize_t *arg, arrsize_t left, arrsize_t right, @@ -549,7 +554,7 @@ X86_SIMD_SORT_INLINE void argsort_(type_t *arr, } template -X86_SIMD_SORT_INLINE void argselect_(type_t *arr, +X86_SIMD_SORT_INLINE void argselect_(const type_t *arr, arrsize_t *arg, arrsize_t pos, arrsize_t left, @@ -590,7 +595,7 @@ template typename half_vector> -X86_SIMD_SORT_INLINE void xss_argsort(T *arr, +X86_SIMD_SORT_INLINE void xss_argsort(const T *arr, arrsize_t *arg, arrsize_t arrsize, bool hasnan = false, @@ -669,29 +674,25 @@ X86_SIMD_SORT_INLINE void xss_argsort(T *arr, } template -X86_SIMD_SORT_INLINE void avx512_argsort(T *arr, +X86_SIMD_SORT_INLINE void avx512_argsort(const T *arr, arrsize_t *arg, arrsize_t arrsize, bool hasnan = false, bool descending = false) { - // Safe: argsort never mutates arr; const is dropped only for SIMD type instantiation - using base_t = std::remove_const_t; - xss_argsort( - const_cast(arr), arg, arrsize, hasnan, descending); + xss_argsort( + arr, arg, arrsize, hasnan, descending); } template -X86_SIMD_SORT_INLINE void avx2_argsort(T *arr, +X86_SIMD_SORT_INLINE void avx2_argsort(const T *arr, arrsize_t *arg, arrsize_t arrsize, bool hasnan = false, bool descending = false) { - // Safe: argsort never mutates arr; const is dropped only for SIMD type instantiation - using base_t = std::remove_const_t; - xss_argsort( - const_cast(arr), arg, arrsize, hasnan, descending); + xss_argsort( + arr, arg, arrsize, hasnan, descending); } /* argselect methods for 32-bit and 64-bit dtypes */ @@ -700,7 +701,7 @@ template typename half_vector> -X86_SIMD_SORT_INLINE void xss_argselect(T *arr, +X86_SIMD_SORT_INLINE void xss_argselect(const T *arr, arrsize_t *arg, arrsize_t k, arrsize_t arrsize, @@ -735,29 +736,24 @@ X86_SIMD_SORT_INLINE void xss_argselect(T *arr, } template -X86_SIMD_SORT_INLINE void avx512_argselect(T *arr, +X86_SIMD_SORT_INLINE void avx512_argselect(const T *arr, arrsize_t *arg, arrsize_t k, arrsize_t arrsize, bool hasnan = false) { - // Safe: argselect never mutates arr; const is dropped only for SIMD type instantiation - using base_t = std::remove_const_t; - xss_argselect( - const_cast(arr), arg, k, arrsize, hasnan); + xss_argselect(arr, arg, k, arrsize, hasnan); } template -X86_SIMD_SORT_INLINE void avx2_argselect(T *arr, +X86_SIMD_SORT_INLINE void avx2_argselect(const T *arr, arrsize_t *arg, arrsize_t k, arrsize_t arrsize, bool hasnan = false) { - // Safe: argselect never mutates arr; const is dropped only for SIMD type instantiation - using base_t = std::remove_const_t; - xss_argselect( - const_cast(arr), arg, k, arrsize, hasnan); + xss_argselect( + arr, arg, k, arrsize, hasnan); } #endif // XSS_COMMON_ARGSORT diff --git a/src/xss-common-qsort.h b/src/xss-common-qsort.h index 73b947a..2bf7ca6 100644 --- a/src/xss-common-qsort.h +++ b/src/xss-common-qsort.h @@ -79,7 +79,7 @@ X86_SIMD_SORT_INLINE arrsize_t replace_nan_with_inf(T *arr, arrsize_t size) } template -X86_SIMD_SORT_INLINE bool array_has_nan(type_t *arr, arrsize_t size) +X86_SIMD_SORT_INLINE bool array_has_nan(const type_t *arr, arrsize_t size) { using opmask_t = typename vtype::opmask_t; using reg_t = typename vtype::reg_t; diff --git a/src/xss-network-keyvaluesort.hpp b/src/xss-network-keyvaluesort.hpp index 771d7a3..1e5cd5d 100644 --- a/src/xss-network-keyvaluesort.hpp +++ b/src/xss-network-keyvaluesort.hpp @@ -208,7 +208,7 @@ bitonic_fullmerge_n_vec(typename keyType::reg_t *keys, template X86_SIMD_SORT_INLINE void -argsort_n_vec(typename keyType::type_t *keys, arrsize_t *indices, int N) +argsort_n_vec(const typename keyType::type_t *keys, arrsize_t *indices, int N) { using kreg_t = typename keyType::reg_t; using ireg_t = typename indexType::reg_t; @@ -354,7 +354,7 @@ X86_SIMD_SORT_INLINE void kvsort_n_vec(typename keyType::type_t *keys, template X86_SIMD_SORT_INLINE void -argsort_n(typename keyType::type_t *keys, arrsize_t *indices, int N) +argsort_n(const typename keyType::type_t *keys, arrsize_t *indices, int N) { static_assert(keyType::numlanes == indexType::numlanes, "invalid pairing of value/index types");