From 4f250e8fee7f9421f26a91509c8a638c284bea2f Mon Sep 17 00:00:00 2001 From: TimTheBig <132001783+TimTheBig@users.noreply.github.com> Date: Wed, 20 May 2026 21:58:04 -0400 Subject: [PATCH 1/3] Add `dot2_refs` --- benches/scalar_micro.rs | 45 ++++++++++++++++++++ benchmarks.md | 19 ++++++++- src/real/arithmetic.rs | 94 +++++++++++++++++++++++++++++++++++++++++ src/real/tests.rs | 69 ++++++++++++++++++++++++++++++ 4 files changed, 225 insertions(+), 2 deletions(-) diff --git a/benches/scalar_micro.rs b/benches/scalar_micro.rs index eb561ad..5e07b6d 100644 --- a/benches/scalar_micro.rs +++ b/benches/scalar_micro.rs @@ -312,6 +312,18 @@ const SCALAR_MICRO_GROUPS: &[BenchGroupDoc] = &[ name: "real_add_owned", description: "Adds owned scaled transcendental `Real` values.", }, + BenchDoc { + name: "real_dot2_refs_dense_symbolic", + description: "Computes a borrowed two-lane symbolic dot product with no rational shortcut terms.", + }, + BenchDoc { + name: "real_active_dot2_refs_dense_symbolic", + description: "Computes a borrowed two-lane symbolic dot product after the caller has already classified every lane active.", + }, + BenchDoc { + name: "real_dot2_refs_mixed_structural", + description: "Computes a borrowed two-lane symbolic dot product with an exact zero lane and a rational scale lane.", + }, BenchDoc { name: "real_dot3_refs_dense_symbolic", description: "Computes a borrowed three-lane symbolic dot product with no rational shortcut terms.", @@ -800,6 +812,39 @@ fn bench_borrowed_op_overhead(c: &mut Criterion) { BatchSize::SmallInput, ) }); + group.bench_function("real_dot2_refs_dense_symbolic", |b| { + b.iter(|| { + black_box(Real::dot2_refs( + [black_box(&dense_dot_left[0]), black_box(&dense_dot_left[1])], + [ + black_box(&dense_dot_right[0]), + black_box(&dense_dot_right[1]), + ], + )) + }) + }); + group.bench_function("real_active_dot2_refs_dense_symbolic", |b| { + b.iter(|| { + black_box(Real::active_dot2_refs( + [black_box(&dense_dot_left[0]), black_box(&dense_dot_left[1])], + [ + black_box(&dense_dot_right[0]), + black_box(&dense_dot_right[1]), + ], + )) + }) + }); + group.bench_function("real_dot2_refs_mixed_structural", |b| { + b.iter(|| { + black_box(Real::dot2_refs( + [black_box(&mixed_dot_left[0]), black_box(&mixed_dot_left[1])], + [ + black_box(&mixed_dot_right[0]), + black_box(&mixed_dot_right[1]), + ], + )) + }) + }); group.bench_function("real_dot3_refs_dense_symbolic", |b| { b.iter(|| { black_box(Real::dot3_refs( diff --git a/benchmarks.md b/benchmarks.md index 914ff72..0005bdd 100644 --- a/benchmarks.md +++ b/benchmarks.md @@ -319,6 +319,15 @@ Borrowed versus owned operation overhead for rational and real operands. | `borrowed_op_overhead/real_dot4_refs_dense_symbolic` | 5.636 us | 5.625 us - 5.653 us | Computes a borrowed four-lane symbolic dot product with no rational shortcut terms. | | `borrowed_op_overhead/real_active_dot4_refs_dense_symbolic` | 5.677 us | 5.670 us - 5.688 us | Computes a borrowed four-lane symbolic dot product after the caller has already classified every lane active. | | `borrowed_op_overhead/real_dot4_refs_mixed_structural` | 653.48 ns | 651.96 ns - 655.30 ns | Computes a borrowed four-lane symbolic dot product with exact zero and rational scale terms. | +| `borrowed_op_overhead/real_dot2_refs_dense_symbolic` | not run | not run | Computes a borrowed two-lane symbolic dot product with no rational shortcut terms. | +| `borrowed_op_overhead/real_active_dot2_refs_dense_symbolic` | not run | not run | Computes a borrowed two-lane symbolic dot product after the caller has already classified every lane active. | +| `borrowed_op_overhead/real_dot2_refs_mixed_structural` | not run | not run | Computes a borrowed two-lane symbolic dot product with an exact zero lane and a rational scale lane. | +| `borrowed_op_overhead/real_dot3_refs_dense_symbolic` | not run | not run | Computes a borrowed three-lane symbolic dot product with no rational shortcut terms. | +| `borrowed_op_overhead/real_active_dot3_refs_dense_symbolic` | not run | not run | Computes a borrowed three-lane symbolic dot product after the caller has already classified every lane active. | +| `borrowed_op_overhead/real_dot3_refs_mixed_structural` | not run | not run | Computes a borrowed three-lane symbolic dot product with exact zero and rational scale terms. | +| `borrowed_op_overhead/real_dot4_refs_dense_symbolic` | not run | not run | Computes a borrowed four-lane symbolic dot product with no rational shortcut terms. | +| `borrowed_op_overhead/real_active_dot4_refs_dense_symbolic` | not run | not run | Computes a borrowed four-lane symbolic dot product after the caller has already classified every lane active. | +| `borrowed_op_overhead/real_dot4_refs_mixed_structural` | not run | not run | Computes a borrowed four-lane symbolic dot product with exact zero and rational scale terms. | ### `dense_algebra` @@ -344,8 +353,14 @@ Construction-time shortcuts for exact rational multiples of pi and inverse compo | `exact_transcendental_special_forms/acos_cos_9pi_7` | not run | not run | Recognizes the principal branch of acos(cos(9pi/7)). | | `exact_transcendental_special_forms/atan_tan_6pi_7` | not run | not run | Recognizes the principal branch of atan(tan(6pi/7)). | | `exact_transcendental_special_forms/asinh_large` | not run | not run | Builds a large inverse hyperbolic sine without exact intermediate Reals. | -| `exact_transcendental_special_forms/atanh_sqrt_half` | 192.18 ns | 189.98 ns - 194.71 ns | Builds atanh(sqrt(2)/2) after exact structural domain checks. | -| `exact_transcendental_special_forms/atanh_sqrt_two_error` | 199.45 ns | 121.09 ns - 355.20 ns | Rejects atanh(sqrt(2)) through exact structural domain checks. | +| `exact_transcendental_special_forms/atanh_sqrt_half` | not run | not run | Builds atanh(sqrt(2)/2) after exact structural domain checks. | +| `exact_transcendental_special_forms/atanh_sqrt_two_error` | not run | not run | Rejects atanh(sqrt(2)) through exact structural domain checks. | +| `exact_transcendental_special_forms/atan2_origin` | 528.39 ns | 219.56 ns - 837.23 ns | Hits the origin (0, 0) short-circuit returning exact zero. | +| `exact_transcendental_special_forms/atan2_axis_positive_y` | 289.65 ns | 285.15 ns - 294.15 ns | Hits the positive-y axis short-circuit returning exact pi/2. | +| `exact_transcendental_special_forms/atan2_axis_negative_x` | 261.49 ns | 260.46 ns - 262.51 ns | Hits the negative-x axis short-circuit returning exact pi. | +| `exact_transcendental_special_forms/atan2_quadrant_one_unit_diagonal` | 756.33 ns | 746.31 ns - 766.34 ns | Quadrant I unit diagonal reduces to atan(1) = pi/4 exact special form. | +| `exact_transcendental_special_forms/atan2_quadrant_two_pi_correction` | 1.890 us | 1.872 us - 1.908 us | Quadrant II (1, -2) exercises atan(small ratio) + pi correction. | +| `exact_transcendental_special_forms/atan2_quadrant_three_negative_pi` | 1.156 us | 1.140 us - 1.172 us | Quadrant III (-1, -2) exercises atan(small ratio) - pi correction. | ### `symbolic_reductions` diff --git a/src/real/arithmetic.rs b/src/real/arithmetic.rs index 24407a0..e7a3f65 100644 --- a/src/real/arithmetic.rs +++ b/src/real/arithmetic.rs @@ -1929,6 +1929,91 @@ impl Real { } } + /// Return the two-lane dot product of borrowed reals. + /// + /// Sibling of [`Self::dot3_refs`] / [`Self::dot4_refs`] for the + /// two-component case (2D coordinates, complex products, planar dot + /// products, etc.). Same exact-rational shared-denominator fast path; + /// same symbolic fallback policy. + pub fn dot2_refs(left: [&Real; 2], right: [&Real; 2]) -> Real { + if let (Some(l0), Some(l1), Some(r0), Some(r1)) = ( + left[0].exact_rational_ref(), + left[1].exact_rational_ref(), + right[0].exact_rational_ref(), + right[1].exact_rational_ref(), + ) { + crate::trace_dispatch!("real", "dot_product", "dot2-exact-rational-shared-denom"); + return Real::new(Rational::dot_products([l0, l1], [r0, r1])); + } + + Self::dot2_refs_fallback(left, right) + } + + /// Return a two-lane dot product whose lanes were already classified active. + /// + /// See [`Self::active_dot3_refs`]. + pub fn active_dot2_refs(left: [&Real; 2], right: [&Real; 2]) -> Real { + if let (Some(l0), Some(l1), Some(r0), Some(r1)) = ( + left[0].exact_rational_ref(), + left[1].exact_rational_ref(), + right[0].exact_rational_ref(), + right[1].exact_rational_ref(), + ) { + crate::trace_dispatch!("real", "dot_product", "active-dot2-exact-rational"); + return Real::new(Rational::dot_products([l0, l1], [r0, r1])); + } + + crate::trace_dispatch!("real", "dot_product", "active-dot2-real-tree"); + Self::sum_dot2_terms( + Some(Self::dot_product_active_term(left[0], right[0])), + Some(Self::dot_product_active_term(left[1], right[1])), + ) + } + + #[inline(never)] + fn dot2_refs_fallback(left: [&Real; 2], right: [&Real; 2]) -> Real { + // See `dot3_refs_fallback` for the code-layout rationale. + if Self::dot_product_has_structural_term(left[0], right[0]) + || Self::dot_product_has_structural_term(left[1], right[1]) + { + crate::trace_dispatch!("real", "dot_product", "dot2-structural-real-tree"); + return Self::sum_dot2_terms( + Self::dot_product_term(left[0], right[0]), + Self::dot_product_term(left[1], right[1]), + ); + } + + if left[0].rational.sign() == Sign::NoSign + || right[0].rational.sign() == Sign::NoSign + || left[1].rational.sign() == Sign::NoSign + || right[1].rational.sign() == Sign::NoSign + { + let p0 = Self::dot_product_term(left[0], right[0]); + let p1 = Self::dot_product_term(left[1], right[1]); + let active_terms = usize::from(p0.is_some()) + usize::from(p1.is_some()); + + match active_terms { + 0 => { + crate::trace_dispatch!("real", "dot_product", "dot2-all-zero-real-tree"); + return Real::zero(); + } + 1 => { + crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree-sparse"); + return Self::sum_dot2_terms(p0, p1); + } + _ => { + crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree"); + return Self::sum_dot2_terms(p0, p1); + } + } + } + + let p0 = left[0] * right[0]; + let p1 = left[1] * right[1]; + crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree"); + &p0 + &p1 + } + /// Return the three-lane dot product of borrowed reals. /// /// Exact-rational lanes are accumulated with one shared denominator and a @@ -2338,6 +2423,15 @@ impl Real { product } + #[inline] + fn sum_dot2_terms(p0: Option, p1: Option) -> Real { + match (p0, p1) { + (None, None) => Real::zero(), + (Some(p), None) | (None, Some(p)) => p, + (Some(a), Some(b)) => &a + &b, + } + } + #[inline] fn sum_dot3_terms(p0: Option, p1: Option, p2: Option) -> Real { match (p0, p1, p2) { diff --git a/src/real/tests.rs b/src/real/tests.rs index c8a0b16..2e090b6 100644 --- a/src/real/tests.rs +++ b/src/real/tests.rs @@ -1511,5 +1511,74 @@ mod tests { assert!( (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 ); + + let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]); + let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + assert!( + (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 + ); + } + + #[test] + fn dot2_refs_matches_pairwise_rational_arithmetic() { + let left = [ + Real::new(Rational::fraction(6, 5).unwrap()), + Real::new(Rational::fraction(-7, 10).unwrap()), + ]; + let right = [ + Real::new(Rational::fraction(-4, 5).unwrap()), + Real::new(Rational::fraction(11, 10).unwrap()), + ]; + let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]); + assert_eq!( + Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]), + expected, + ); + } + + #[test] + fn dot2_refs_handles_symbolic_lanes() { + let left = [Real::pi(), Real::e()]; + let right = [Real::e(), Real::pi()]; + let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]); + let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + assert!( + (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 + ); + } + + #[test] + fn dot2_refs_zero_lane_shortcut() { + let left = [Real::zero(), Real::from(3_i32)]; + let right = [Real::pi(), Real::e()]; + let expected = &left[1] * &right[1]; + let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + assert!( + (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 + ); + } + + #[test] + fn dot2_refs_all_zero_returns_zero() { + let left = [Real::zero(), Real::zero()]; + let right = [Real::pi(), Real::e()]; + assert_eq!( + Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]), + Real::zero(), + ); + } + + #[test] + fn active_dot2_refs_matches_dot2_refs_when_all_lanes_active() { + let left = [Real::pi(), Real::e() * Real::new(Rational::fraction(3, 5).unwrap())]; + let right = [ + Real::e() * Real::new(Rational::fraction(2, 7).unwrap()), + Real::pi(), + ]; + let expected = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + let actual = Real::active_dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + assert!( + (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 + ); } } From be7c1b9b0f84bbe07b27087fbadbb709e1148437 Mon Sep 17 00:00:00 2001 From: Ryan <132001783+TimTheBig@users.noreply.github.com> Date: Thu, 21 May 2026 11:15:22 -0400 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Ryan <132001783+TimTheBig@users.noreply.github.com> --- benchmarks.md | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/benchmarks.md b/benchmarks.md index 0005bdd..ac84894 100644 --- a/benchmarks.md +++ b/benchmarks.md @@ -322,12 +322,6 @@ Borrowed versus owned operation overhead for rational and real operands. | `borrowed_op_overhead/real_dot2_refs_dense_symbolic` | not run | not run | Computes a borrowed two-lane symbolic dot product with no rational shortcut terms. | | `borrowed_op_overhead/real_active_dot2_refs_dense_symbolic` | not run | not run | Computes a borrowed two-lane symbolic dot product after the caller has already classified every lane active. | | `borrowed_op_overhead/real_dot2_refs_mixed_structural` | not run | not run | Computes a borrowed two-lane symbolic dot product with an exact zero lane and a rational scale lane. | -| `borrowed_op_overhead/real_dot3_refs_dense_symbolic` | not run | not run | Computes a borrowed three-lane symbolic dot product with no rational shortcut terms. | -| `borrowed_op_overhead/real_active_dot3_refs_dense_symbolic` | not run | not run | Computes a borrowed three-lane symbolic dot product after the caller has already classified every lane active. | -| `borrowed_op_overhead/real_dot3_refs_mixed_structural` | not run | not run | Computes a borrowed three-lane symbolic dot product with exact zero and rational scale terms. | -| `borrowed_op_overhead/real_dot4_refs_dense_symbolic` | not run | not run | Computes a borrowed four-lane symbolic dot product with no rational shortcut terms. | -| `borrowed_op_overhead/real_active_dot4_refs_dense_symbolic` | not run | not run | Computes a borrowed four-lane symbolic dot product after the caller has already classified every lane active. | -| `borrowed_op_overhead/real_dot4_refs_mixed_structural` | not run | not run | Computes a borrowed four-lane symbolic dot product with exact zero and rational scale terms. | ### `dense_algebra` @@ -353,14 +347,8 @@ Construction-time shortcuts for exact rational multiples of pi and inverse compo | `exact_transcendental_special_forms/acos_cos_9pi_7` | not run | not run | Recognizes the principal branch of acos(cos(9pi/7)). | | `exact_transcendental_special_forms/atan_tan_6pi_7` | not run | not run | Recognizes the principal branch of atan(tan(6pi/7)). | | `exact_transcendental_special_forms/asinh_large` | not run | not run | Builds a large inverse hyperbolic sine without exact intermediate Reals. | -| `exact_transcendental_special_forms/atanh_sqrt_half` | not run | not run | Builds atanh(sqrt(2)/2) after exact structural domain checks. | -| `exact_transcendental_special_forms/atanh_sqrt_two_error` | not run | not run | Rejects atanh(sqrt(2)) through exact structural domain checks. | -| `exact_transcendental_special_forms/atan2_origin` | 528.39 ns | 219.56 ns - 837.23 ns | Hits the origin (0, 0) short-circuit returning exact zero. | -| `exact_transcendental_special_forms/atan2_axis_positive_y` | 289.65 ns | 285.15 ns - 294.15 ns | Hits the positive-y axis short-circuit returning exact pi/2. | -| `exact_transcendental_special_forms/atan2_axis_negative_x` | 261.49 ns | 260.46 ns - 262.51 ns | Hits the negative-x axis short-circuit returning exact pi. | -| `exact_transcendental_special_forms/atan2_quadrant_one_unit_diagonal` | 756.33 ns | 746.31 ns - 766.34 ns | Quadrant I unit diagonal reduces to atan(1) = pi/4 exact special form. | -| `exact_transcendental_special_forms/atan2_quadrant_two_pi_correction` | 1.890 us | 1.872 us - 1.908 us | Quadrant II (1, -2) exercises atan(small ratio) + pi correction. | -| `exact_transcendental_special_forms/atan2_quadrant_three_negative_pi` | 1.156 us | 1.140 us - 1.172 us | Quadrant III (-1, -2) exercises atan(small ratio) - pi correction. | +| `exact_transcendental_special_forms/atanh_sqrt_half` | 192.18 ns | 189.98 ns - 194.71 ns | Builds atanh(sqrt(2)/2) after exact structural domain checks. | +| `exact_transcendental_special_forms/atanh_sqrt_two_error` | 199.45 ns | 121.09 ns - 355.20 ns | Rejects atanh(sqrt(2)) through exact structural domain checks. | ### `symbolic_reductions` From f21dc62588ea5fbf390aaf7a6964e2dc8dedf326 Mon Sep 17 00:00:00 2001 From: Ryan <132001783+TimTheBig@users.noreply.github.com> Date: Thu, 21 May 2026 11:18:22 -0400 Subject: [PATCH 3/3] Fix code formating --- src/real/tests.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/real/tests.rs b/src/real/tests.rs index 2e090b6..7feea6c 100644 --- a/src/real/tests.rs +++ b/src/real/tests.rs @@ -1570,7 +1570,10 @@ mod tests { #[test] fn active_dot2_refs_matches_dot2_refs_when_all_lanes_active() { - let left = [Real::pi(), Real::e() * Real::new(Rational::fraction(3, 5).unwrap())]; + let left = [ + Real::pi(), + Real::e() * Real::new(Rational::fraction(3, 5).unwrap()), + ]; let right = [ Real::e() * Real::new(Rational::fraction(2, 7).unwrap()), Real::pi(),