From f90235ced1112468630c740ca112c4c512d148a8 Mon Sep 17 00:00:00 2001 From: ckormanyos Date: Fri, 29 May 2026 09:09:40 +0200 Subject: [PATCH 1/2] Update wide integer --- ref_app/src/math/wide_integer/uintwide_t.h | 1134 ++++++++++---------- 1 file changed, 545 insertions(+), 589 deletions(-) diff --git a/ref_app/src/math/wide_integer/uintwide_t.h b/ref_app/src/math/wide_integer/uintwide_t.h index 622be0b84..4243121b8 100644 --- a/ref_app/src/math/wide_integer/uintwide_t.h +++ b/ref_app/src/math/wide_integer/uintwide_t.h @@ -24,6 +24,10 @@ #define WIDE_INTEGER_AVR __AVR__ // NOLINT(cppcoreguidelines-macro-usage) #endif + #if defined(WIDE_INTEGER_MSVC) + //#define WIDE_INTEGER_HAS_LIMB_TYPE_UINT64 + #endif + #if defined(WIDE_INTEGER_MSVC) && defined(WIDE_INTEGER_HAS_LIMB_TYPE_UINT64) #include <__msvc_int128.hpp> #endif @@ -678,6 +682,8 @@ using const_reverse_iterator = ::math::wide_integer::detail::iterator_detail::reverse_iterator; #endif + static_assert(std::is_integral::value, "Error: the value_type of dynamic_array must be a built-in integral"); + // Constructors. constexpr dynamic_array() = delete; @@ -706,18 +712,18 @@ constexpr dynamic_array(const dynamic_array& other) : elem_count(other.size()) { - allocator_type my_alloc; - if(elem_count > static_cast(UINT8_C(0))) { + allocator_type my_alloc; + elems = std::allocator_traits::allocate(my_alloc, elem_count); - } - #if defined(WIDE_INTEGER_NAMESPACE) - WIDE_INTEGER_NAMESPACE::math::wide_integer::detail::copy_unsafe(other.elems, other.elems + elem_count, elems); - #else - ::math::wide_integer::detail::copy_unsafe(other.elems, other.elems + elem_count, elems); - #endif + #if defined(WIDE_INTEGER_NAMESPACE) + WIDE_INTEGER_NAMESPACE::math::wide_integer::detail::copy_unsafe(other.elems, other.elems + elem_count, elems); + #else + ::math::wide_integer::detail::copy_unsafe(other.elems, other.elems + elem_count, elems); + #endif + } } template @@ -726,37 +732,36 @@ const allocator_type& alloc_in = allocator_type()) : elem_count(static_cast(last - first)) { - allocator_type my_alloc(alloc_in); - if(elem_count > static_cast(UINT8_C(0))) { - elems = std::allocator_traits::allocate(my_alloc, elem_count); - } + allocator_type my_alloc(alloc_in); - #if defined(WIDE_INTEGER_NAMESPACE) - WIDE_INTEGER_NAMESPACE::math::wide_integer::detail::copy_unsafe(first, last, elems); - #else - ::math::wide_integer::detail::copy_unsafe(first, last, elems); - #endif + elems = std::allocator_traits::allocate(my_alloc, elem_count); + #if defined(WIDE_INTEGER_NAMESPACE) + WIDE_INTEGER_NAMESPACE::math::wide_integer::detail::copy_unsafe(first, last, elems); + #else + ::math::wide_integer::detail::copy_unsafe(first, last, elems); + #endif + } } constexpr dynamic_array(std::initializer_list lst, const allocator_type& alloc_in = allocator_type()) : elem_count(lst.size()) { - allocator_type my_alloc(alloc_in); - if(elem_count > static_cast(UINT8_C(0))) { + allocator_type my_alloc(alloc_in); + elems = std::allocator_traits::allocate(my_alloc, elem_count); - } - #if defined(WIDE_INTEGER_NAMESPACE) - WIDE_INTEGER_NAMESPACE::math::wide_integer::detail::copy_unsafe(lst.begin(), lst.end(), elems); - #else - ::math::wide_integer::detail::copy_unsafe(lst.begin(), lst.end(), elems); - #endif + #if defined(WIDE_INTEGER_NAMESPACE) + WIDE_INTEGER_NAMESPACE::math::wide_integer::detail::copy_unsafe(lst.begin(), lst.end(), elems); + #else + ::math::wide_integer::detail::copy_unsafe(lst.begin(), lst.end(), elems); + #endif + } } // Move constructor. @@ -775,19 +780,13 @@ { using local_allocator_traits_type = std::allocator_traits; - allocator_type my_alloc; - - auto p = begin(); // NOLINT(llvm-qualified-auto,readability-qualified-auto) - - while(p != end()) - { - local_allocator_traits_type::destroy(my_alloc, p); + allocator_type my_alloc { }; - ++p; - } - - // Destroy the elements and deallocate the range. + // Deallocate the range of *this. local_allocator_traits_type::deallocate(my_alloc, elems, elem_count); + + elem_count = static_cast(UINT8_C(0)); + elems = nullptr; } } @@ -821,13 +820,21 @@ // Move assignment operator. constexpr auto operator=(dynamic_array&& other) noexcept -> dynamic_array& { - #if defined(WIDE_INTEGER_NAMESPACE) - WIDE_INTEGER_NAMESPACE::math::wide_integer::detail::swap_unsafe(elem_count, other.elem_count); - WIDE_INTEGER_NAMESPACE::math::wide_integer::detail::swap_unsafe(elems, other.elems); - #else - ::math::wide_integer::detail::swap_unsafe(elem_count, other.elem_count); - ::math::wide_integer::detail::swap_unsafe(elems, other.elems); - #endif + if(!empty()) + { + using local_allocator_traits_type = std::allocator_traits; + + allocator_type my_alloc { }; + + // Deallocate the range of *this. + local_allocator_traits_type::deallocate(my_alloc, elems, elem_count); + } + + elem_count = other.elem_count; + elems = other.elems; + + other.elem_count = static_cast(UINT8_C(0)); + other.elems = nullptr; return *this; } @@ -1439,8 +1446,7 @@ const bool IsSignedLeft, const bool IsSignedRight> constexpr auto divmod(const uintwide_t& a, - const uintwide_t& b, - std::enable_if_t<((!IsSignedLeft) && (!IsSignedRight)), int>* p_nullparam = nullptr) -> std::pair, uintwide_t>; + const uintwide_t& b) -> std::enable_if_t<((!IsSignedLeft) && (!IsSignedRight)), std::pair, uintwide_t>>; template constexpr auto divmod(const uintwide_t& a, - const uintwide_t& b, - std::enable_if_t<(IsSignedLeft || IsSignedRight), int>* p_nullparam = nullptr) -> std::pair, uintwide_t>; + const uintwide_t& b) -> std::enable_if_t<(IsSignedLeft || IsSignedRight), std::pair, uintwide_t>>; template::value_type>::digits == std::numeric_limits::digits> const* = nullptr> - constexpr - auto import_bits(uintwide_t& val, - ForwardIterator first, - ForwardIterator last, - unsigned chunk_size = static_cast(UINT8_C(0)), - bool msv_first = true) -> uintwide_t&; + const bool IsSigned> + constexpr auto import_bits(uintwide_t& val, + ForwardIterator first, + ForwardIterator last, + unsigned chunk_size = static_cast(UINT8_C(0)), + bool msv_first = true) -> std::enable_if_t::value_type>::digits == std::numeric_limits::digits, uintwide_t&>; template::value_type>::digits == std::numeric_limits::digits)> const* = nullptr> - constexpr - auto import_bits(uintwide_t& val, - ForwardIterator first, - ForwardIterator last, - unsigned chunk_size = static_cast(UINT8_C(0)), - bool msv_first = true) -> uintwide_t&; + const bool IsSigned> + constexpr auto import_bits(uintwide_t& val, + ForwardIterator first, + ForwardIterator last, + unsigned chunk_size = static_cast(UINT8_C(0)), + bool msv_first = true) -> std::enable_if_t::value_type>::digits == std::numeric_limits::digits), uintwide_t&>; template::value_type>::digits == std::numeric_limits::digits> const* = nullptr> - constexpr - auto export_bits(const uintwide_t& val, - OutputIterator out, - unsigned chunk_size, - bool msv_first = true) -> OutputIterator; + const bool IsSigned> + constexpr auto export_bits(const uintwide_t& val, + OutputIterator out, + unsigned chunk_size, + bool msv_first = true) -> std::enable_if_t::value_type>::digits == std::numeric_limits::digits, OutputIterator>; template::value_type>::digits == std::numeric_limits::digits)> const* = nullptr> - constexpr - auto export_bits(const uintwide_t& val, - OutputIterator out, - unsigned chunk_size, - bool msv_first = true) -> OutputIterator; + const bool IsSigned> + constexpr auto export_bits(const uintwide_t& val, + OutputIterator out, + unsigned chunk_size, + bool msv_first = true) -> std::enable_if_t::value_type>::digits == std::numeric_limits::digits), OutputIterator>; #if(__cplusplus >= 201703L) } // namespace math::wide_integer @@ -1617,36 +1614,27 @@ explicit constexpr fixed_dynamic_array(const typename base_class_type::size_type size_in = MySize, const typename base_class_type::value_type& value_in = typename base_class_type::value_type(), const typename base_class_type::allocator_type& alloc_in = typename base_class_type::allocator_type()) - : base_class_type(MySize, typename base_class_type::value_type(), alloc_in) + : base_class_type(MySize, value_in, alloc_in) { - detail::fill_unsafe(base_class_type::begin(), - base_class_type::begin() + (detail::min_unsafe)(MySize, static_cast(size_in)), - value_in); + static_cast(size_in); } - constexpr fixed_dynamic_array(const fixed_dynamic_array& other_array) = default; + constexpr fixed_dynamic_array(const fixed_dynamic_array&) = default; - constexpr fixed_dynamic_array(fixed_dynamic_array&& other_array) noexcept = default; + constexpr fixed_dynamic_array(fixed_dynamic_array&&) noexcept = default; constexpr fixed_dynamic_array(std::initializer_list lst) - : base_class_type(MySize) - { - detail::copy_unsafe(lst.begin(), - lst.begin() + (detail::min_unsafe)(static_cast(lst.size()), MySize), - base_class_type::begin()); - } + : base_class_type(lst.begin(), + lst.begin() + (detail::min_unsafe)(static_cast(lst.size()), MySize)) { } - constexpr auto operator=(const fixed_dynamic_array& other_array) -> fixed_dynamic_array& = default; + //constexpt + ~fixed_dynamic_array() override = default; - constexpr auto operator=(fixed_dynamic_array&& other_array) noexcept -> fixed_dynamic_array& = default; - }; + constexpr auto operator=(const fixed_dynamic_array&) -> fixed_dynamic_array& = default; - struct allocator_dummy_unsafe - { - constexpr allocator_dummy_unsafe() = default; + constexpr auto operator=(fixed_dynamic_array&&) noexcept -> fixed_dynamic_array& = default; }; - template class fixed_static_array final : public detail::array_detail::array(MySize)> @@ -1654,6 +1642,11 @@ private: using base_class_type = detail::array_detail::array(MySize)>; + struct allocator_dummy_unsafe + { + constexpr allocator_dummy_unsafe() = default; + }; + public: using size_type = size_t; using value_type = typename base_class_type::value_type; @@ -2066,15 +2059,8 @@ Width2 / static_cast(std::numeric_limits::digits) ); - static constexpr size_t number_of_limbs_karatsuba_threshold = - static_cast - ( - static_cast - ( - static_cast(UINT8_C(128)) - + static_cast(UINT8_C(1)) - ) - ); + static constexpr size_t number_of_limbs_karatsuba_threshold { static_cast(128U) }; + static constexpr size_t number_of_limbs_schoolbook_fallback { static_cast(24U) }; // Verify that the Width2 template parameter (mirrored with my_width2): // * Is equal to 2^n times 1...63. @@ -2161,7 +2147,6 @@ constexpr uintwide_t(const SignedIntegralType v, // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) std::enable_if_t<( std::is_integral::value && std::is_signed ::value)>* p_nullparam = nullptr) - : values(number_of_limbs) { static_cast(p_nullparam == nullptr); @@ -2248,7 +2233,7 @@ // Copy-like constructor from the other signed-ness type. template const* = nullptr> + std::enable_if_t<(RePhraseIsSigned != IsSigned)>* = nullptr> constexpr uintwide_t(const uintwide_t& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : values(other.values) { } @@ -2256,7 +2241,7 @@ // (but has the same limb type) and possibly a different signed-ness. template const* = nullptr> + std::enable_if_t<(Width2 < OtherWidth2)>* = nullptr> explicit constexpr uintwide_t(const uintwide_t& v) { using other_wide_integer_type = uintwide_t; @@ -2345,9 +2330,8 @@ constexpr auto operator=(const uintwide_t&) -> uintwide_t& = default; // LCOV_EXCL_LINE // Assignment operator from the other signed-ness type. - template const* = nullptr> - constexpr auto operator=(const uintwide_t& other) -> uintwide_t& + template + constexpr auto operator=(const uintwide_t& other) -> std::enable_if_t<(RePhraseIsSigned != IsSigned), uintwide_t&> // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) { values = other.values; @@ -2358,9 +2342,8 @@ constexpr auto operator=(uintwide_t&& other) noexcept -> uintwide_t& = default; // LCOV_EXCL_LINE // Trivial move assignment operator from the other signed-ness type. - template const* = nullptr> - constexpr auto operator=(uintwide_t&& other) -> uintwide_t& + template + constexpr auto operator=(uintwide_t&& other) -> std::enable_if_t<(IsSigned != RePhraseIsSigned), uintwide_t&> // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) { values = static_cast(other.values); @@ -2493,11 +2476,11 @@ { if(this == &other) { - eval_mul_unary(*this, uintwide_t(other)); // NOLINT(performance-unnecessary-copy-initialization) + eval_mul_unary(uintwide_t(other)); // NOLINT(performance-unnecessary-copy-initialization) } else { - eval_mul_unary(*this, other); + eval_mul_unary(other); } return *this; @@ -2536,29 +2519,7 @@ { // Unary division function. - const auto numer_was_neg = is_neg(*this); - const auto denom_was_neg = is_neg(other); - - if(numer_was_neg || denom_was_neg) - { - using local_unsigned_wide_type = uintwide_t; - - local_unsigned_wide_type a(*this); - local_unsigned_wide_type b(other); - - if(numer_was_neg) { a.negate(); } - if(denom_was_neg) { b.negate(); } - - a.eval_divide_knuth(b); - - if(numer_was_neg != denom_was_neg) { a.negate(); } - - values = a.values; - } - else - { - eval_divide_knuth(other); - } + eval_div_unary(other); } return *this; @@ -2573,36 +2534,8 @@ else { // Unary modulus function. - const auto numer_was_neg = is_neg(*this); - const auto denom_was_neg = is_neg(other); - - if(numer_was_neg || denom_was_neg) - { - using local_unsigned_wide_type = uintwide_t; - - local_unsigned_wide_type a(*this); - local_unsigned_wide_type b(other); - - if(numer_was_neg) { a.negate(); } - if(denom_was_neg) { b.negate(); } - - local_unsigned_wide_type remainder_unsigned { }; - - a.eval_divide_knuth(b, &remainder_unsigned); - - // The sign of the remainder follows the sign of the denominator. - if(numer_was_neg) { remainder_unsigned.negate(); } - - values = remainder_unsigned.values; - } - else - { - uintwide_t remainder { }; - - eval_divide_knuth(other, &remainder); - values = remainder.values; - } + eval_mod_unary(other); } return *this; @@ -3120,18 +3053,16 @@ return wr_string_is_ok; } - template const* = nullptr> - WIDE_INTEGER_NODISCARD constexpr auto compare(const uintwide_t& other) const -> std::int_fast8_t + template + WIDE_INTEGER_NODISCARD constexpr auto compare(const uintwide_t& other) const -> std::enable_if_t<(!RePhraseIsSigned), std::int_fast8_t> { return compare_ranges(values.cbegin(), other.values.cbegin(), uintwide_t::number_of_limbs); } - template const* = nullptr> - WIDE_INTEGER_NODISCARD constexpr auto compare(const uintwide_t& other) const -> std::int_fast8_t + template + WIDE_INTEGER_NODISCARD constexpr auto compare(const uintwide_t& other) const -> std::enable_if_t { auto n_result = std::int_fast8_t { }; @@ -3250,15 +3181,13 @@ } template - static constexpr auto is_neg(const uintwide_t&, // NOLINT(hicpp-named-parameter,readability-named-parameter) - std::enable_if_t<(!RePhraseIsSigned), int>* = nullptr) -> bool // NOLINT(hicpp-named-parameter,readability-named-parameter) + static constexpr auto is_neg(const uintwide_t&) -> std::enable_if_t<(!RePhraseIsSigned), bool> // NOLINT(hicpp-named-parameter,readability-named-parameter) { return false; } template - static constexpr auto is_neg(const uintwide_t& a, // NOLINT(hicpp-named-parameter,readability-named-parameter) - std::enable_if_t* = nullptr) -> bool // NOLINT(hicpp-named-parameter,readability-named-parameter) + static constexpr auto is_neg(const uintwide_t& a) -> std::enable_if_t // NOLINT(hicpp-named-parameter,readability-named-parameter) { return (static_cast(static_cast(a.values.back() >> static_cast(std::numeric_limits::limb_type>::digits - 1)) & 1U) != 0U); } @@ -3542,17 +3471,7 @@ const bool OtherIsSignedLeft, const bool OtherIsSignedRight> friend constexpr auto divmod(const uintwide_t& a, // NOLINT(readability-redundant-declaration) - const uintwide_t& b, - std::enable_if_t<((!OtherIsSignedLeft) && (!OtherIsSignedRight)), int>* p_nullparam) -> std::pair, uintwide_t>; - - template - friend constexpr auto divmod(const uintwide_t& a, // NOLINT(readability-redundant-declaration) - const uintwide_t& b, - std::enable_if_t<((!OtherIsSignedLeft) && (!OtherIsSignedRight)), int>* p_nullparam) -> std::pair, uintwide_t>; + const uintwide_t& b) -> std::enable_if_t<((!OtherIsSignedLeft) && (!OtherIsSignedRight)), std::pair, uintwide_t>>; template friend constexpr auto divmod(const uintwide_t& a, // NOLINT(readability-redundant-declaration) - const uintwide_t& b, - std::enable_if_t<(OtherIsSignedLeft || OtherIsSignedRight), int>* p_nullparam) -> std::pair, uintwide_t>; + const uintwide_t& b) -> std::enable_if_t<(OtherIsSignedLeft || OtherIsSignedRight), std::pair, uintwide_t>>; #if (defined(__cpp_lib_to_chars) && (__cpp_lib_to_chars >= 201611L)) template(other_rep)) { } - template const* = nullptr> + template static constexpr auto extract_hex_digits(uintwide_t& tu, char* pstr, - const bool is_uppercase) -> unsigned_fast_type + const bool is_uppercase) -> std::enable_if_t<(!RePhraseIsSigned), unsigned_fast_type> { constexpr auto mask = static_cast(UINT8_C(0xF)); @@ -3766,12 +3683,8 @@ #endif template - static constexpr auto eval_mul_unary( uintwide_t& u, - const uintwide_t& v, - std::enable_if_t<((OtherWidth2 / std::numeric_limits::digits) < number_of_limbs_karatsuba_threshold)>* p_nullparam = nullptr) -> void + constexpr auto eval_mul_unary(const uintwide_t& other) -> std::enable_if_t<((OtherWidth2 / std::numeric_limits::digits) < number_of_limbs_karatsuba_threshold), void> { - static_cast(p_nullparam == nullptr); - // Unary multiplication function using schoolbook multiplication, // but we only need to retain the low half of the n*n algorithm. // In other words, this is an n*n->n bit multiplication. @@ -3789,22 +3702,18 @@ }; eval_multiply_n_by_n_to_lo_part(result.begin(), - u.values.cbegin(), - v.values.cbegin(), + values.cbegin(), + other.values.cbegin(), local_other_number_of_limbs); detail::copy_unsafe(result.cbegin(), detail::advance_and_point(result.cbegin(), local_other_number_of_limbs), - u.values.begin()); + values.begin()); } template - static constexpr auto eval_mul_unary( uintwide_t& u, - const uintwide_t& v, - std::enable_if_t<((OtherWidth2 / std::numeric_limits::digits) >= number_of_limbs_karatsuba_threshold)>* p_nullparam = nullptr) -> void + constexpr auto eval_mul_unary(const uintwide_t& other) -> std::enable_if_t<((OtherWidth2 / std::numeric_limits::digits) >= number_of_limbs_karatsuba_threshold), void> { - static_cast(p_nullparam == nullptr); - // Unary multiplication function using Karatsuba multiplication. constexpr auto local_number_of_limbs = uintwide_t::number_of_limbs; @@ -3836,14 +3745,103 @@ storage_array_type t { }; eval_multiply_kara_n_by_n_to_2n(result.begin(), - u.values.cbegin(), - v.values.cbegin(), + values.cbegin(), + other.values.cbegin(), local_number_of_limbs, t.begin()); detail::copy_unsafe(result.cbegin(), result.cbegin() + local_number_of_limbs, - u.values.begin()); + values.begin()); + } + + template + constexpr auto eval_div_unary(const uintwide_t& other) -> std::enable_if_t<(!RePhraseIsSigned), void> + { + // Unary division function. + + eval_divide_knuth(other); + } + + template + constexpr auto eval_div_unary(const uintwide_t& other) -> std::enable_if_t + { + // Unary division function. + + const auto numer_was_neg = is_neg(*this); + const auto denom_was_neg = is_neg(other); + + if(numer_was_neg || denom_was_neg) + { + using local_unsigned_wide_type = uintwide_t; + + local_unsigned_wide_type a(*this); + local_unsigned_wide_type b(other); + + if(numer_was_neg) { a.negate(); } + if(denom_was_neg) { b.negate(); } + + a.eval_divide_knuth(b); + + if(numer_was_neg != denom_was_neg) { a.negate(); } + + values = a.values; + } + else + { + eval_divide_knuth(other); + } + } + + template + constexpr auto eval_mod_unary(const uintwide_t& other) -> std::enable_if_t<(!RePhraseIsSigned), void> + { + // Unary modulus function. + + using local_unsigned_wide_type = uintwide_t; + + local_unsigned_wide_type remainder { }; + + eval_divide_knuth(other, &remainder); + + values = remainder.values; + } + + template + constexpr auto eval_mod_unary(const uintwide_t& other) -> std::enable_if_t + { + // Unary modulus function. + + const auto numer_was_neg = is_neg(*this); + const auto denom_was_neg = is_neg(other); + + if(numer_was_neg || denom_was_neg) + { + using local_unsigned_wide_type = uintwide_t; + + local_unsigned_wide_type a(*this); + local_unsigned_wide_type b(other); + + if(numer_was_neg) { a.negate(); } + if(denom_was_neg) { b.negate(); } + + local_unsigned_wide_type remainder { }; + + a.eval_divide_knuth(b, &remainder); + + // The sign of the remainder follows the sign of the denominator. + if(numer_was_neg) { remainder.negate(); } + + values = remainder.values; + } + else + { + uintwide_t remainder { }; + + eval_divide_knuth(other, &remainder); + + values = remainder.values; + } } template::number_of_limbs == 4U)> const* = nullptr> + const size_t RePhraseWidth2 = Width2> static constexpr auto eval_multiply_n_by_n_to_lo_part( ResultIterator r, InputIteratorLeft a, InputIteratorRight b, - const unsigned_fast_type count) -> void + const unsigned_fast_type count) -> std::enable_if_t<(uintwide_t::number_of_limbs == 4U), void> { static_cast(count); @@ -4087,12 +4084,11 @@ template::number_of_limbs == static_cast(UINT32_C(8)))> const* = nullptr> + const size_t RePhraseWidth2 = Width2> static constexpr auto eval_multiply_n_by_n_to_lo_part( ResultIterator r, InputIteratorLeft a, InputIteratorRight b, - const unsigned_fast_type count) -> void + const unsigned_fast_type count) -> std::enable_if_t<(uintwide_t::number_of_limbs == static_cast(UINT32_C(8))), void> { static_cast(count); @@ -4385,16 +4381,15 @@ template::number_of_limbs != static_cast(UINT32_C(4))) - #if defined(WIDE_INTEGER_HAS_MUL_8_BY_8_UNROLL) - && (uintwide_t::number_of_limbs != static_cast(UINT32_C(8))) - #endif - )> const* = nullptr> + const size_t RePhraseWidth2 = Width2> static constexpr auto eval_multiply_n_by_n_to_lo_part( ResultIterator r, InputIteratorLeft a, InputIteratorRight b, - const unsigned_fast_type count) -> void + const unsigned_fast_type count) -> std::enable_if_t<( (uintwide_t::number_of_limbs != static_cast(UINT32_C(4))) + #if defined(WIDE_INTEGER_HAS_MUL_8_BY_8_UNROLL) + && (uintwide_t::number_of_limbs != static_cast(UINT32_C(8))) + #endif + ), void> { static_assert ( @@ -4699,7 +4694,8 @@ const unsigned_fast_type n, InputIteratorTemp t) -> void { - if(n <= static_cast(UINT32_C(48))) + // Small-size fallback: use schoolbook full 2n multiplication. + if(n <= static_cast(number_of_limbs_schoolbook_fallback)) { static_cast(t); @@ -4707,6 +4703,7 @@ } else { + // Sanity: types must have equal limb widths. static_assert ( (std::numeric_limits::value_type>::digits == std::numeric_limits::value_type>::digits) @@ -4715,8 +4712,6 @@ "Error: Internals require same widths for left-right-result limb_types at the moment" ); - using local_limb_type = typename detail::iterator_detail::iterator_traits::value_type; - using result_difference_type = typename detail::iterator_detail::iterator_traits::difference_type; using left_difference_type = typename detail::iterator_detail::iterator_traits::difference_type; using right_difference_type = typename detail::iterator_detail::iterator_traits::difference_type; @@ -4757,40 +4752,54 @@ const auto nh = static_cast(n / 2U); + // Input halves. const InputIteratorLeft a0 = detail::advance_and_point(a, static_cast(0)); const InputIteratorLeft a1 = detail::advance_and_point(a, static_cast(nh)); const InputIteratorRight b0 = detail::advance_and_point(b, static_cast(0)); const InputIteratorRight b1 = detail::advance_and_point(b, static_cast(nh)); - ResultIterator r0 = detail::advance_and_point(r, static_cast(0)); - ResultIterator r1 = detail::advance_and_point(r, static_cast(nh)); - ResultIterator r2 = detail::advance_and_point(r, static_cast(n)); - ResultIterator r3 = detail::advance_and_point(r, static_cast(static_cast(n) + static_cast(nh))); - - InputIteratorTemp t0 = detail::advance_and_point(t, static_cast(0)); - InputIteratorTemp t1 = detail::advance_and_point(t, static_cast(nh)); - InputIteratorTemp t2 = detail::advance_and_point(t, static_cast(n)); - InputIteratorTemp t4 = detail::advance_and_point(t, static_cast(static_cast(n) + static_cast(n))); - - // Step 1 - // a1*b1 -> r2 - // a0*b0 -> r0 - // r -> t0 + // Result partitions: + // r0 -> r[0 .. 2*nh-1] (low) + // r1 -> r[nh .. nh + n - 1] (middle overlap region) + // r2 -> r[n .. 2*n - 1] (high) + // r3 -> r[n + nh .. 2*n - 1] (upper carry area) + ResultIterator r0 = detail::advance_and_point(r, static_cast(0)); + ResultIterator r1 = detail::advance_and_point(r, static_cast(nh)); + ResultIterator r2 = detail::advance_and_point(r, static_cast(n)); + ResultIterator r3 = detail::advance_and_point(r, static_cast(static_cast(n) + static_cast(nh))); + + // Temporary layout inside t: + // t0 -> t[0 .. nh-1] (temp for |a1-a0|) + // t1 -> t[nh .. 2*nh-1] (temp for |b0-b1|) + // t2 -> t[n .. n + 2*nh - 1] (temp area for copies/products) + // t4 -> t[2*n .. ...] (workspace for deeper recursion) + InputIteratorTemp t0 = detail::advance_and_point(t, static_cast(0)); + InputIteratorTemp t1 = detail::advance_and_point(t, static_cast(nh)); + InputIteratorTemp t2 = detail::advance_and_point(t, static_cast(n)); + InputIteratorTemp t4 = detail::advance_and_point(t, static_cast(static_cast(n) + static_cast(n))); + + // Step 1: compute a1*b1 -> r2 and a0*b0 -> r0 (full 2*nh each). eval_multiply_kara_n_by_n_to_2n(r2, a1, b1, nh, t0); eval_multiply_kara_n_by_n_to_2n(r0, a0, b0, nh, t0); - detail::copy_unsafe(r0, detail::advance_and_point(r0, static_cast(static_cast(n) * static_cast(2U))), t0); - // Step 2 - // r1 -> r1 + a1*b1 - // r1 -> r1 + a0*b0 - auto carry = static_cast(eval_add_n(r1, r1, t2, n)); - eval_multiply_kara_propagate_carry(r3, nh, carry); - carry = static_cast(eval_add_n(r1, r1, t0, n)); - eval_multiply_kara_propagate_carry(r3, nh, carry); + // Keep a copy of a0*b0 (2*nh limbs) in t2 for use in middle accumulation. + detail::copy_unsafe(r0, detail::advance_and_point(r0, static_cast(static_cast(nh) * static_cast(2U))), t2); - // Step 3 - // |a1-a0| -> t0 + // Step 2: r1 += a1*b1 (upper), then r1 += a0*b0 (lower) + { + // Add a1*b1 into r1 (n limbs). + const auto carry1 = eval_add_n(r1, r1, r2, n); + eval_multiply_kara_propagate_carry(r3, nh, static_cast::value_type>(carry1)); + } + + { + // Add a0*b0 copy (in t2) into r1 (n limbs). + const auto carry2 = eval_add_n(r1, r1, t2, n); + eval_multiply_kara_propagate_carry(r3, nh, static_cast::value_type>(carry2)); + } + + // Step 3: compute |a1 - a0| -> t0, note sign. const auto cmp_result_a1a0 = compare_ranges(a1, a0, nh); if(cmp_result_a1a0 == static_cast(INT8_C(1))) @@ -4801,9 +4810,13 @@ { static_cast(eval_subtract_n(t0, a0, a1, nh)); } + else + { + // Handle zero difference. + detail::fill_unsafe(t0, detail::advance_and_point(t0, static_cast(nh)), static_cast::value_type>(UINT8_C(0))); + } - // Step 4 - // |b0-b1| -> t1 + // Step 4: compute |b0 - b1| -> t1, note sign. const auto cmp_result_b0b1 = compare_ranges(b0, b1, nh); if(cmp_result_b0b1 == static_cast(INT8_C(1))) @@ -4814,26 +4827,30 @@ { static_cast(eval_subtract_n(t1, b1, b0, nh)); } + else + { + // Handle zero difference. + detail::fill_unsafe(t1, detail::advance_and_point(t1, static_cast(nh)), static_cast::value_type>(UINT8_C(0))); + } - // Step 5 - // |a1-a0|*|b0-b1| -> t2 + // Step 5: compute t2 = |a1-a0| * |b0-b1| (2*nh limbs) using karatsuba recursively. eval_multiply_kara_n_by_n_to_2n(t2, t0, t1, nh, t4); - // Step 6 - // either r1 += |a1-a0|*|b0-b1| - // or r1 -= |a1-a0|*|b0-b1| - if(static_cast(cmp_result_a1a0 * cmp_result_b0b1) == static_cast(INT8_C(1))) - { - carry = eval_add_n(r1, r1, t2, n); + // Step 6: add or subtract t2 into r1 depending on sign. + const auto sign_prod = static_cast(cmp_result_a1a0 * cmp_result_b0b1); - eval_multiply_kara_propagate_carry(r3, nh, carry); + if(sign_prod == static_cast(INT8_C(1))) + { + const auto carry3 = eval_add_n(r1, r1, t2, n); + eval_multiply_kara_propagate_carry(r3, nh, static_cast::value_type>(carry3)); } - else if(static_cast(cmp_result_a1a0 * cmp_result_b0b1) == static_cast(INT8_C(-1))) + else if(sign_prod == static_cast(INT8_C(-1))) { const auto has_borrow = eval_subtract_n(r1, r1, t2, n); - - eval_multiply_kara_propagate_borrow(r3, nh, has_borrow); + eval_multiply_kara_propagate_borrow(r3, nh, static_cast(has_borrow)); } + + // Result r (2*n limbs) is now populated: low part in r0, middle corrected in r1, high in r2. } } @@ -4890,6 +4907,14 @@ *remainder = uintwide_t(static_cast(UINT8_C(0))); } } + else if(static_cast(v_offset + static_cast(1U)) == static_cast(number_of_limbs)) + { + // The denominator has one single limb. + // Use a one-dimensional division algorithm. + const limb_type short_denominator = *other.values.cbegin(); + + eval_divide_by_single_limb(short_denominator, u_offset, remainder); + } else { const auto result_of_compare_left_with_right = compare(other); @@ -4926,306 +4951,273 @@ } } - template constexpr auto eval_divide_knuth_core(const unsigned_fast_type u_offset, // NOLINT(readability-function-cognitive-complexity) const unsigned_fast_type v_offset, const uintwide_t& other, - uintwide_t* remainder, - std::enable_if_t<(RePhraseWidth2 > static_cast(std::numeric_limits::digits)), int>* p_nullparam = nullptr) -> void + uintwide_t* remainder) -> void { - static_cast(p_nullparam); - using local_uint_index_type = unsigned_fast_type; - if(static_cast(v_offset + static_cast(1U)) == static_cast(number_of_limbs)) - { - // The denominator has one single limb. - // Use a one-dimensional division algorithm. - const limb_type short_denominator = *other.values.cbegin(); - - eval_divide_by_single_limb(short_denominator, u_offset, remainder); - } - else - { - // We will now use the Knuth long division algorithm. + // We will now use the Knuth long division algorithm. - // Compute the normalization factor d. - const auto d = - static_cast - ( - static_cast(static_cast(UINT8_C(1)) << static_cast(std::numeric_limits::digits)) - / static_cast(static_cast(*detail::advance_and_point(other.values.cbegin(), static_cast(static_cast(number_of_limbs - 1U) - v_offset))) + static_cast(1U)) - ); + // Compute the normalization factor d. + const auto d = + static_cast + ( + static_cast(static_cast(UINT8_C(1)) << static_cast(std::numeric_limits::digits)) + / static_cast(static_cast(*detail::advance_and_point(other.values.cbegin(), static_cast(static_cast(number_of_limbs - 1U) - v_offset))) + static_cast(1U)) + ); - // Step D1(b), normalize u -> u * d = uu. - // Step D1(c): normalize v -> v * d = vv. + // Step D1(b), normalize u -> u * d = uu. + // Step D1(c): normalize v -> v * d = vv. - using uu_array_type = - std::conditional_t::value, - detail::fixed_static_array , - detail::fixed_dynamic_array::value, - std::allocator, - AllocatorType>>::template rebind_alloc>>; + using uu_array_type = + std::conditional_t::value, + detail::fixed_static_array , + detail::fixed_dynamic_array::value, + std::allocator, + AllocatorType>>::template rebind_alloc>>; - uu_array_type uu { }; + uu_array_type uu { }; - representation_type - vv - { - static_cast(number_of_limbs), - static_cast(UINT8_C(0)), - typename representation_type::allocator_type() // LCOV_EXCL_LINE - }; - - if(d > static_cast(UINT8_C(1))) + representation_type + vv { - { - const auto num_limbs_minus_u_ofs = - static_cast - ( - static_cast(number_of_limbs) - u_offset - ); - - const auto u_carry = - eval_multiply_1d - ( - uu.begin(), - values.cbegin(), - d, - static_cast(num_limbs_minus_u_ofs) - ); + static_cast(number_of_limbs), + static_cast(UINT8_C(0)), + typename representation_type::allocator_type() // LCOV_EXCL_LINE + }; - *(uu.begin() + num_limbs_minus_u_ofs) = u_carry; - } + if(d > static_cast(UINT8_C(1))) + { + { + const auto num_limbs_minus_u_ofs = + static_cast + ( + static_cast(number_of_limbs) - u_offset + ); - static_cast - ( + const auto u_carry = eval_multiply_1d ( - vv.begin(), - other.values.cbegin(), + uu.begin(), + values.cbegin(), d, - static_cast(number_of_limbs - v_offset) - ) - ); + static_cast(num_limbs_minus_u_ofs) + ); + + *(uu.begin() + num_limbs_minus_u_ofs) = u_carry; } - else - { - detail::copy_unsafe(values.cbegin(), values.cend(), uu.begin()); - *(uu.begin() + static_cast(static_cast(number_of_limbs) - u_offset)) = static_cast(UINT8_C(0)); + static_cast + ( + eval_multiply_1d + ( + vv.begin(), + other.values.cbegin(), + d, + static_cast(number_of_limbs - v_offset) + ) + ); + } + else + { + detail::copy_unsafe(values.cbegin(), values.cend(), uu.begin()); - vv = other.values; - } + *(uu.begin() + static_cast(static_cast(number_of_limbs) - u_offset)) = static_cast(UINT8_C(0)); - // Step D2: Initialize j. - // Step D7: Loop on j from m to 0. + vv = other.values; + } - const auto n = static_cast (number_of_limbs - v_offset); - const auto m = static_cast(static_cast(number_of_limbs - u_offset) - n); - const auto vj0 = static_cast(static_cast(n - static_cast(UINT8_C(1)))); + // Step D2: Initialize j. + // Step D7: Loop on j from m to 0. - auto vv_at_vj0_it = detail::advance_and_point(vv.cbegin(), static_cast(vj0)); // NOLINT(llvm-qualified-auto,readability-qualified-auto) + const auto n = static_cast (number_of_limbs - v_offset); + const auto m = static_cast(static_cast(number_of_limbs - u_offset) - n); + const auto vj0 = static_cast(static_cast(n - static_cast(UINT8_C(1)))); - const auto vv_at_vj0 = *vv_at_vj0_it--; - const auto vv_at_vj0_minus_one = *vv_at_vj0_it; + auto vv_at_vj0_it = detail::advance_and_point(vv.cbegin(), static_cast(vj0)); // NOLINT(llvm-qualified-auto,readability-qualified-auto) - auto values_at_m_minus_j_it = detail::advance_and_point(values.begin(), static_cast(m)); // NOLINT(llvm-qualified-auto,readability-qualified-auto) + const auto vv_at_vj0 = *vv_at_vj0_it--; + const auto vv_at_vj0_minus_one = *vv_at_vj0_it; - for(auto j = static_cast(UINT8_C(0)); j <= m; ++j) // NOLINT(altera-id-dependent-backward-branch) - { - // Step D3 [Calculate q_hat]. - // if u[j] == v[j0] - // set q_hat = b - 1 - // else - // set q_hat = (u[j] * b + u[j + 1]) / v[1] + auto values_at_m_minus_j_it = detail::advance_and_point(values.begin(), static_cast(m)); // NOLINT(llvm-qualified-auto,readability-qualified-auto) + + for(auto j = static_cast(UINT8_C(0)); j <= m; ++j) // NOLINT(altera-id-dependent-backward-branch) + { + // Step D3 [Calculate q_hat]. + // if u[j] == v[j0] + // set q_hat = b - 1 + // else + // set q_hat = (u[j] * b + u[j + 1]) / v[1] - const auto uj = static_cast(static_cast(static_cast(static_cast(number_of_limbs + 1U) - 1U) - u_offset) - j); - const auto u_j_j1 = static_cast(static_cast(static_cast(*(uu.cbegin() + static_cast(uj))) << static_cast(std::numeric_limits::digits)) + *(uu.cbegin() + static_cast(uj - 1U))); + const auto uj = static_cast(static_cast(static_cast(static_cast(number_of_limbs + 1U) - 1U) - u_offset) - j); + const auto u_j_j1 = static_cast(static_cast(static_cast(*(uu.cbegin() + static_cast(uj))) << static_cast(std::numeric_limits::digits)) + *(uu.cbegin() + static_cast(uj - 1U))); - auto q_hat = - static_cast + auto q_hat = + static_cast + ( + (*(uu.cbegin() + static_cast(uj)) == vv_at_vj0) + ? (std::numeric_limits::max)() + : static_cast(u_j_j1 / vv_at_vj0) + ); + + // Decrease q_hat if necessary. + // This means that q_hat must be decreased if the + // expression [(u[uj] * b + u[uj - 1] - q_hat * v[vj0 - 1]) * b] + // exceeds the range of uintwide_t. + + { + const auto u_j_minus_2 = + *detail::advance_and_point(uu.cbegin(), static_cast(uj - 2U)); + + auto t = + static_cast ( - (*(uu.cbegin() + static_cast(uj)) == vv_at_vj0) - ? (std::numeric_limits::max)() - : static_cast(u_j_j1 / vv_at_vj0) + u_j_j1 - static_cast(q_hat * static_cast(vv_at_vj0)) ); - // Decrease q_hat if necessary. - // This means that q_hat must be decreased if the - // expression [(u[uj] * b + u[uj - 1] - q_hat * v[vj0 - 1]) * b] - // exceeds the range of uintwide_t. - + while(true) { - const auto u_j_minus_2 = - *detail::advance_and_point(uu.cbegin(), static_cast(uj - 2U)); + const bool t_overflow = + (detail::make_hi(t) != static_cast(UINT8_C(0))); - auto t = + const auto lhs = static_cast ( - u_j_j1 - static_cast(q_hat * static_cast(vv_at_vj0)) + static_cast(vv_at_vj0_minus_one) * q_hat ); - while(true) - { - const bool t_overflow = - (detail::make_hi(t) != static_cast(UINT8_C(0))); - - const auto lhs = - static_cast - ( - static_cast(vv_at_vj0_minus_one) * q_hat - ); - - const auto rhs = - static_cast - ( - static_cast(t << static_cast(std::numeric_limits::digits)) - + u_j_minus_2 - ); - - if(t_overflow || (lhs <= rhs)) - { - break; - } + const auto rhs = + static_cast + ( + static_cast(t << static_cast(std::numeric_limits::digits)) + + u_j_minus_2 + ); - t = static_cast(t + vv_at_vj0); - --q_hat; + if(t_overflow || (lhs <= rhs)) + { + break; } + + t = static_cast(t + vv_at_vj0); + --q_hat; } + } - { - // Step D4: Multiply and subtract. - // Replace u[j, ... j + n] by u[j, ... j + n] - q_hat * v[1, ... n]. + { + // Step D4: Multiply and subtract. + // Replace u[j, ... j + n] by u[j, ... j + n] - q_hat * v[1, ... n]. - // Set nv = q_hat * (v[1, ... n]). - uu_array_type nv { }; + // Set nv = q_hat * (v[1, ... n]). + uu_array_type nv { }; - *(nv.begin() + static_cast(n)) = eval_multiply_1d(nv.begin(), vv.cbegin(), q_hat, n); + *(nv.begin() + static_cast(n)) = eval_multiply_1d(nv.begin(), vv.cbegin(), q_hat, n); - const auto has_borrow = - eval_subtract_n + const auto has_borrow = + eval_subtract_n + ( + detail::advance_and_point(uu.begin(), static_cast(static_cast(uj - n))), + detail::advance_and_point(uu.cbegin(), static_cast(static_cast(uj - n))), + nv.cbegin(), + static_cast ( - detail::advance_and_point(uu.begin(), static_cast(static_cast(uj - n))), - detail::advance_and_point(uu.cbegin(), static_cast(static_cast(uj - n))), - nv.cbegin(), - static_cast - ( - static_cast(n + static_cast(UINT8_C(1))) - ) - ); + static_cast(n + static_cast(UINT8_C(1))) + ) + ); - // Step D5: Test the remainder. - // Set the result value: Set result.m_data[m - j] = q_hat. - // Use the condition (u[j] < 0), in other words if the borrow - // is non-zero, then step D6 needs to be carried out. + // Step D5: Test the remainder. + // Set the result value: Set result.m_data[m - j] = q_hat. + // Use the condition (u[j] < 0), in other words if the borrow + // is non-zero, then step D6 needs to be carried out. - if(has_borrow) - { - --q_hat; + if(has_borrow) + { + --q_hat; - // Step D6: Add back. - // Add v[1, ... n] back to u[j, ... j + n], - // and decrease the result by 1. + // Step D6: Add back. + // Add v[1, ... n] back to u[j, ... j + n], + // and decrease the result by 1. - static_cast - ( - eval_add_n(uu.begin() + static_cast(static_cast(uj - n)), - detail::advance_and_point(uu.cbegin(), static_cast(static_cast(uj - n))), - vv.cbegin(), - static_cast(n)) - ); - } + static_cast + ( + eval_add_n(uu.begin() + static_cast(static_cast(uj - n)), + detail::advance_and_point(uu.cbegin(), static_cast(static_cast(uj - n))), + vv.cbegin(), + static_cast(n)) + ); } + } - // Get the result data. - *values_at_m_minus_j_it = static_cast(q_hat); + // Get the result data. + *values_at_m_minus_j_it = static_cast(q_hat); - if(j < m) - { - --values_at_m_minus_j_it; - } + if(j < m) + { + --values_at_m_minus_j_it; } + } - // Clear the data elements that have not - // been computed in the division algorithm. - { - const auto m_plus_one = - static_cast - ( - static_cast(m) + static_cast(UINT8_C(1)) - ); + // Clear the data elements that have not + // been computed in the division algorithm. + { + const auto m_plus_one = + static_cast + ( + static_cast(m) + static_cast(UINT8_C(1)) + ); - detail::fill_unsafe(detail::advance_and_point(values.begin(), m_plus_one), values.end(), static_cast(UINT8_C(0))); - } + detail::fill_unsafe(detail::advance_and_point(values.begin(), m_plus_one), values.end(), static_cast(UINT8_C(0))); + } - if(remainder != nullptr) + if(remainder != nullptr) + { + auto rl_it_fwd = // NOLINT(llvm-qualified-auto,readability-qualified-auto) + detail::advance_and_point(remainder->values.begin(), static_cast(n)); + + if(d == static_cast(UINT8_C(1))) + { + detail::copy_unsafe(uu.cbegin(), + detail::advance_and_point(uu.cbegin(), static_cast(static_cast(number_of_limbs - v_offset))), + remainder->values.begin()); + } + else { - auto rl_it_fwd = // NOLINT(llvm-qualified-auto,readability-qualified-auto) - detail::advance_and_point(remainder->values.begin(), static_cast(n)); + auto previous_u = static_cast(UINT8_C(0)); - if(d == static_cast(UINT8_C(1))) - { - detail::copy_unsafe(uu.cbegin(), - detail::advance_and_point(uu.cbegin(), static_cast(static_cast(number_of_limbs - v_offset))), - remainder->values.begin()); - } - else - { - auto previous_u = static_cast(UINT8_C(0)); + auto rl_it_rev = static_cast(rl_it_fwd); - auto rl_it_rev = static_cast(rl_it_fwd); + auto ul = + static_cast + ( + static_cast + ( + number_of_limbs + - static_cast(v_offset + static_cast(UINT8_C(1))) + ) + ); - auto ul = - static_cast + for( ; rl_it_rev != remainder->values.rend(); ++rl_it_rev, --ul) // NOLINT(altera-id-dependent-backward-branch) + { + const auto t = + static_cast ( - static_cast - ( - number_of_limbs - - static_cast(v_offset + static_cast(UINT8_C(1))) - ) + *(uu.cbegin() + static_cast(ul)) + + static_cast + ( + static_cast(previous_u) << static_cast(std::numeric_limits::digits) + ) ); - for( ; rl_it_rev != remainder->values.rend(); ++rl_it_rev, --ul) // NOLINT(altera-id-dependent-backward-branch) - { - const auto t = - static_cast - ( - *(uu.cbegin() + static_cast(ul)) - + static_cast - ( - static_cast(previous_u) << static_cast(std::numeric_limits::digits) - ) - ); - - *rl_it_rev = static_cast(static_cast(t / d)); - previous_u = static_cast(static_cast(t - static_cast(static_cast(d) * *rl_it_rev))); - } + *rl_it_rev = static_cast(static_cast(t / d)); + previous_u = static_cast(static_cast(t - static_cast(static_cast(d) * *rl_it_rev))); } - - detail::fill_unsafe(rl_it_fwd, remainder->values.end(), static_cast(UINT8_C(0))); } - } - } - - template - constexpr auto eval_divide_knuth_core(const unsigned_fast_type u_offset, - const unsigned_fast_type v_offset, - const uintwide_t& other, - uintwide_t* remainder, - std::enable_if_t<(RePhraseWidth2 <= static_cast(std::numeric_limits::digits)), int>* p_nullparam = nullptr) -> void - { - static_cast(p_nullparam); - static_cast(v_offset); - - // The denominator has one single limb. - // Use a one-dimensional division algorithm. - const auto short_denominator = static_cast(*other.values.cbegin()); - - eval_divide_by_single_limb(short_denominator, u_offset, remainder); + detail::fill_unsafe(rl_it_fwd, remainder->values.end(), static_cast(UINT8_C(0))); + } } template @@ -5538,55 +5530,29 @@ // Define some convenient unsigned wide integer types. using uint64_t = uintwide_t(UINT32_C( 64)), std::uint32_t>; - using int64_t = uintwide_t(UINT32_C( 64)), std::uint16_t, void, true>; - #if defined(WIDE_INTEGER_HAS_LIMB_TYPE_UINT64) - using uint128_t = uintwide_t(UINT32_C( 128)), std::uint64_t>; - using uint256_t = uintwide_t(UINT32_C( 256)), std::uint64_t>; - using uint512_t = uintwide_t(UINT32_C( 512)), std::uint64_t>; - using uint1024_t = uintwide_t(UINT32_C( 1024)), std::uint64_t>; - using uint2048_t = uintwide_t(UINT32_C( 2048)), std::uint64_t>; - using uint4096_t = uintwide_t(UINT32_C( 4096)), std::uint64_t>; - using uint8192_t = uintwide_t(UINT32_C( 8192)), std::uint64_t>; - using uint16384_t = uintwide_t(UINT32_C(16384)), std::uint64_t>; - using uint32768_t = uintwide_t(UINT32_C(32768)), std::uint64_t>; - using uint65536_t = uintwide_t(UINT32_C(65536)), std::uint64_t>; - - using int128_t = uintwide_t(UINT32_C( 128)), std::uint64_t, void, true>; - using int256_t = uintwide_t(UINT32_C( 256)), std::uint64_t, void, true>; - using int512_t = uintwide_t(UINT32_C( 512)), std::uint64_t, void, true>; - using int1024_t = uintwide_t(UINT32_C( 1024)), std::uint64_t, void, true>; - using int2048_t = uintwide_t(UINT32_C( 2048)), std::uint64_t, void, true>; - using int4096_t = uintwide_t(UINT32_C( 4096)), std::uint64_t, void, true>; - using int8192_t = uintwide_t(UINT32_C( 8192)), std::uint64_t, void, true>; - using int16384_t = uintwide_t(UINT32_C(16384)), std::uint64_t, void, true>; - using int32768_t = uintwide_t(UINT32_C(32768)), std::uint64_t, void, true>; - using int65536_t = uintwide_t(UINT32_C(65536)), std::uint64_t, void, true>; - - #else - - using uint128_t = uintwide_t(UINT32_C( 128)), std::uint32_t>; - using uint256_t = uintwide_t(UINT32_C( 256)), std::uint32_t>; - using uint512_t = uintwide_t(UINT32_C( 512)), std::uint32_t>; - using uint1024_t = uintwide_t(UINT32_C( 1024)), std::uint32_t>; - using uint2048_t = uintwide_t(UINT32_C( 2048)), std::uint32_t>; - using uint4096_t = uintwide_t(UINT32_C( 4096)), std::uint32_t>; - using uint8192_t = uintwide_t(UINT32_C( 8192)), std::uint32_t>; - using uint16384_t = uintwide_t(UINT32_C(16384)), std::uint32_t>; - using uint32768_t = uintwide_t(UINT32_C(32768)), std::uint32_t>; - using uint65536_t = uintwide_t(UINT32_C(65536)), std::uint32_t>; - - using int128_t = uintwide_t(UINT32_C( 128)), std::uint32_t, void, true>; - using int256_t = uintwide_t(UINT32_C( 256)), std::uint32_t, void, true>; - using int512_t = uintwide_t(UINT32_C( 512)), std::uint32_t, void, true>; - using int1024_t = uintwide_t(UINT32_C( 1024)), std::uint32_t, void, true>; - using int2048_t = uintwide_t(UINT32_C( 2048)), std::uint32_t, void, true>; - using int4096_t = uintwide_t(UINT32_C( 4096)), std::uint32_t, void, true>; - using int8192_t = uintwide_t(UINT32_C( 8192)), std::uint32_t, void, true>; - using int16384_t = uintwide_t(UINT32_C(16384)), std::uint32_t, void, true>; - using int32768_t = uintwide_t(UINT32_C(32768)), std::uint32_t, void, true>; - using int65536_t = uintwide_t(UINT32_C(65536)), std::uint32_t, void, true>; - - #endif + using int64_t = uintwide_t(UINT32_C( 64)), std::uint32_t, void, true>; + + using uint128_t = uintwide_t(UINT32_C( 128)), uint_defaultlimb_t>; + using uint256_t = uintwide_t(UINT32_C( 256)), uint_defaultlimb_t>; + using uint512_t = uintwide_t(UINT32_C( 512)), uint_defaultlimb_t>; + using uint1024_t = uintwide_t(UINT32_C( 1024)), uint_defaultlimb_t>; + using uint2048_t = uintwide_t(UINT32_C( 2048)), uint_defaultlimb_t>; + using uint4096_t = uintwide_t(UINT32_C( 4096)), uint_defaultlimb_t>; + using uint8192_t = uintwide_t(UINT32_C( 8192)), uint_defaultlimb_t>; + using uint16384_t = uintwide_t(UINT32_C(16384)), uint_defaultlimb_t>; + using uint32768_t = uintwide_t(UINT32_C(32768)), uint_defaultlimb_t>; + using uint65536_t = uintwide_t(UINT32_C(65536)), uint_defaultlimb_t>; + + using int128_t = uintwide_t(UINT32_C( 128)), uint_defaultlimb_t, void, true>; + using int256_t = uintwide_t(UINT32_C( 256)), uint_defaultlimb_t, void, true>; + using int512_t = uintwide_t(UINT32_C( 512)), uint_defaultlimb_t, void, true>; + using int1024_t = uintwide_t(UINT32_C( 1024)), uint_defaultlimb_t, void, true>; + using int2048_t = uintwide_t(UINT32_C( 2048)), uint_defaultlimb_t, void, true>; + using int4096_t = uintwide_t(UINT32_C( 4096)), uint_defaultlimb_t, void, true>; + using int8192_t = uintwide_t(UINT32_C( 8192)), uint_defaultlimb_t, void, true>; + using int16384_t = uintwide_t(UINT32_C(16384)), uint_defaultlimb_t, void, true>; + using int32768_t = uintwide_t(UINT32_C(32768)), uint_defaultlimb_t, void, true>; + using int65536_t = uintwide_t(UINT32_C(65536)), uint_defaultlimb_t, void, true>; #if !defined(WIDE_INTEGER_DISABLE_TRIVIAL_COPY_AND_STD_LAYOUT_CHECKS) static_assert(std::is_trivially_copyable::value, "uintwide_t must be trivially copyable."); @@ -6691,11 +6657,8 @@ const bool IsSignedLeft, const bool IsSignedRight> constexpr auto divmod(const uintwide_t& a, - const uintwide_t& b, - std::enable_if_t<((!IsSignedLeft) && (!IsSignedRight)), int>* p_nullparam) -> std::pair, uintwide_t> + const uintwide_t& b) -> std::enable_if_t<((!IsSignedLeft) && (!IsSignedRight)), std::pair, uintwide_t>> { - static_cast(p_nullparam); - using local_unsigned_wide_type = uintwide_t; local_unsigned_wide_type ua(a); @@ -6716,11 +6679,8 @@ const bool IsSignedLeft, const bool IsSignedRight> constexpr auto divmod(const uintwide_t& a, - const uintwide_t& b, - std::enable_if_t<(IsSignedLeft || IsSignedRight), int>* p_nullparam) -> std::pair, uintwide_t> + const uintwide_t& b) -> std::enable_if_t<(IsSignedLeft || IsSignedRight), std::pair, uintwide_t>> { - static_cast(p_nullparam); - using local_unsigned_wide_type = uintwide_t; using local_unknown_signedness_left_type = uintwide_t; @@ -6729,8 +6689,11 @@ const auto numer_was_neg = local_unknown_signedness_left_type::is_neg(a); const auto denom_was_neg = local_unknown_signedness_right_type::is_neg(b); - local_unsigned_wide_type ua((!numer_was_neg) ? a : -a); - const local_unsigned_wide_type ub((!denom_was_neg) ? b : -b); + local_unsigned_wide_type ua(a); + local_unsigned_wide_type ub(b); + + if(numer_was_neg) { ua.negate(); } + if(denom_was_neg) { ub.negate(); } local_unsigned_wide_type ur { }; @@ -6748,8 +6711,9 @@ if(numer_was_neg == denom_was_neg) { - result.first = local_unknown_signedness_left_type(ua); - result.second = (!numer_was_neg) ? local_unknown_signedness_right_type(ur) : -local_unknown_signedness_right_type(ur); + result = divmod_result_pair_type { ua, ur }; + + if(numer_was_neg) { result.second.negate(); } } else { @@ -7494,14 +7458,12 @@ const size_t Width2, typename LimbType, typename AllocatorType, - const bool IsSigned, - std::enable_if_t::value_type>::digits == std::numeric_limits::digits> const*> - constexpr - auto import_bits(uintwide_t& val, - ForwardIterator first, - ForwardIterator last, - unsigned chunk_size, - bool msv_first) -> uintwide_t& + const bool IsSigned> + constexpr auto import_bits(uintwide_t& val, + ForwardIterator first, + ForwardIterator last, + unsigned chunk_size, + bool msv_first) -> std::enable_if_t::value_type>::digits == std::numeric_limits::digits, uintwide_t&> { // This subroutine implements limb-by-limb import of bit-chunks. // This template specialization is intended for full chunk sizes, @@ -7620,14 +7582,12 @@ const size_t Width2, typename LimbType, typename AllocatorType, - const bool IsSigned, - std::enable_if_t::value_type>::digits == std::numeric_limits::digits)> const*> - constexpr - auto import_bits(uintwide_t& val, - ForwardIterator first, - ForwardIterator last, - unsigned chunk_size, - bool msv_first) -> uintwide_t& + const bool IsSigned> + constexpr auto import_bits(uintwide_t& val, + ForwardIterator first, + ForwardIterator last, + unsigned chunk_size, + bool msv_first) -> std::enable_if_t::value_type>::digits == std::numeric_limits::digits), uintwide_t&> { // This subroutine implements limb-by-limb import of bit-chunks. // This template specialization is intended for non-full chunk sizes, @@ -7712,13 +7672,11 @@ const size_t Width2, typename LimbType, typename AllocatorType, - const bool IsSigned, - std::enable_if_t::value_type>::digits == std::numeric_limits::digits> const*> - constexpr - auto export_bits(const uintwide_t& val, - OutputIterator out, - unsigned chunk_size, - bool msv_first) -> OutputIterator + const bool IsSigned> + constexpr auto export_bits(const uintwide_t& val, + OutputIterator out, + unsigned chunk_size, + bool msv_first) -> std::enable_if_t::value_type>::digits == std::numeric_limits::digits, OutputIterator> { // This subroutine implements limb-by-limb export of bit-chunks. // This template specialization is intended for full chunk sizes, @@ -7828,13 +7786,11 @@ const size_t Width2, typename LimbType, typename AllocatorType, - const bool IsSigned, - std::enable_if_t::value_type>::digits == std::numeric_limits::digits)> const*> - constexpr - auto export_bits(const uintwide_t& val, - OutputIterator out, - unsigned chunk_size, - bool msv_first) -> OutputIterator + const bool IsSigned> + constexpr auto export_bits(const uintwide_t& val, + OutputIterator out, + unsigned chunk_size, + bool msv_first) -> std::enable_if_t::value_type>::digits == std::numeric_limits::digits), OutputIterator> { // This subroutine implements limb-by-limb export of bit-chunks. // This template specialization is intended for non-full chunk sizes, From d0ae0688ab424244be06f49c5baa56f0e7716734 Mon Sep 17 00:00:00 2001 From: ckormanyos Date: Fri, 29 May 2026 11:18:27 +0200 Subject: [PATCH 2/2] Try repair warning on riscv compiler --- ref_app/src/math/wide_integer/uintwide_t.h | 50 ++++++++++++---------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/ref_app/src/math/wide_integer/uintwide_t.h b/ref_app/src/math/wide_integer/uintwide_t.h index 4243121b8..0713b91d1 100644 --- a/ref_app/src/math/wide_integer/uintwide_t.h +++ b/ref_app/src/math/wide_integer/uintwide_t.h @@ -1740,6 +1740,9 @@ template constexpr auto msb_helper(const UnsignedIntegralType& u) -> unsigned_fast_type; + template<> + constexpr auto msb_helper(const std::uint64_t& u) -> unsigned_fast_type; + template<> constexpr auto msb_helper(const std::uint32_t& u) -> unsigned_fast_type; @@ -2425,28 +2428,14 @@ constexpr auto operator+=(const uintwide_t& other) -> uintwide_t& { - if(this == &other) - { - // Unary addition function. - const auto carry = eval_add_n(values.begin(), // LCOV_EXCL_LINE - values.cbegin(), - other.values.cbegin(), - static_cast(number_of_limbs), - static_cast(UINT8_C(0))); + // Unary addition function. + const auto carry = eval_add_n(values.begin(), + values.cbegin(), + other.values.cbegin(), + static_cast(number_of_limbs), + static_cast(UINT8_C(0))); - static_cast(carry); - } - else - { - // Unary addition function. - const auto carry = eval_add_n(values.begin(), - values.cbegin(), - other.values.cbegin(), - static_cast(number_of_limbs), - static_cast(UINT8_C(0))); - - static_cast(carry); - } + static_cast(carry); return *this; } @@ -5979,6 +5968,18 @@ return static_cast((detail::max_unsafe)(static_cast(INT8_C(0)), i)); } + template<> + constexpr auto msb_helper(const std::uint64_t& u) -> unsigned_fast_type + { + constexpr unsigned thirty_two { unsigned { UINT8_C(32) } }; + + const std::uint32_t hi_dword { static_cast(u >> thirty_two) }; + + const bool hi_dword_is_nonzero { (hi_dword != std::uint32_t { UINT8_C(0) }) }; + + return (hi_dword_is_nonzero ? static_cast(msb_helper(hi_dword) + thirty_two) : msb_helper(static_cast(u))); + } + template<> constexpr auto msb_helper(const std::uint32_t& u) -> unsigned_fast_type { @@ -6711,7 +6712,12 @@ if(numer_was_neg == denom_was_neg) { - result = divmod_result_pair_type { ua, ur }; + result = + divmod_result_pair_type + { + local_unknown_signedness_left_type(ua), + local_unknown_signedness_right_type(ur) + }; if(numer_was_neg) { result.second.negate(); } }