diff --git a/benches/scalar_micro.rs b/benches/scalar_micro.rs index eb561ad..5e07b6d 100644 --- a/benches/scalar_micro.rs +++ b/benches/scalar_micro.rs @@ -312,6 +312,18 @@ const SCALAR_MICRO_GROUPS: &[BenchGroupDoc] = &[ name: "real_add_owned", description: "Adds owned scaled transcendental `Real` values.", }, + BenchDoc { + name: "real_dot2_refs_dense_symbolic", + description: "Computes a borrowed two-lane symbolic dot product with no rational shortcut terms.", + }, + BenchDoc { + name: "real_active_dot2_refs_dense_symbolic", + description: "Computes a borrowed two-lane symbolic dot product after the caller has already classified every lane active.", + }, + BenchDoc { + name: "real_dot2_refs_mixed_structural", + description: "Computes a borrowed two-lane symbolic dot product with an exact zero lane and a rational scale lane.", + }, BenchDoc { name: "real_dot3_refs_dense_symbolic", description: "Computes a borrowed three-lane symbolic dot product with no rational shortcut terms.", @@ -800,6 +812,39 @@ fn bench_borrowed_op_overhead(c: &mut Criterion) { BatchSize::SmallInput, ) }); + group.bench_function("real_dot2_refs_dense_symbolic", |b| { + b.iter(|| { + black_box(Real::dot2_refs( + [black_box(&dense_dot_left[0]), black_box(&dense_dot_left[1])], + [ + black_box(&dense_dot_right[0]), + black_box(&dense_dot_right[1]), + ], + )) + }) + }); + group.bench_function("real_active_dot2_refs_dense_symbolic", |b| { + b.iter(|| { + black_box(Real::active_dot2_refs( + [black_box(&dense_dot_left[0]), black_box(&dense_dot_left[1])], + [ + black_box(&dense_dot_right[0]), + black_box(&dense_dot_right[1]), + ], + )) + }) + }); + group.bench_function("real_dot2_refs_mixed_structural", |b| { + b.iter(|| { + black_box(Real::dot2_refs( + [black_box(&mixed_dot_left[0]), black_box(&mixed_dot_left[1])], + [ + black_box(&mixed_dot_right[0]), + black_box(&mixed_dot_right[1]), + ], + )) + }) + }); group.bench_function("real_dot3_refs_dense_symbolic", |b| { b.iter(|| { black_box(Real::dot3_refs( diff --git a/benchmarks.md b/benchmarks.md index 914ff72..ac84894 100644 --- a/benchmarks.md +++ b/benchmarks.md @@ -319,6 +319,9 @@ Borrowed versus owned operation overhead for rational and real operands. | `borrowed_op_overhead/real_dot4_refs_dense_symbolic` | 5.636 us | 5.625 us - 5.653 us | Computes a borrowed four-lane symbolic dot product with no rational shortcut terms. | | `borrowed_op_overhead/real_active_dot4_refs_dense_symbolic` | 5.677 us | 5.670 us - 5.688 us | Computes a borrowed four-lane symbolic dot product after the caller has already classified every lane active. | | `borrowed_op_overhead/real_dot4_refs_mixed_structural` | 653.48 ns | 651.96 ns - 655.30 ns | Computes a borrowed four-lane symbolic dot product with exact zero and rational scale terms. | +| `borrowed_op_overhead/real_dot2_refs_dense_symbolic` | not run | not run | Computes a borrowed two-lane symbolic dot product with no rational shortcut terms. | +| `borrowed_op_overhead/real_active_dot2_refs_dense_symbolic` | not run | not run | Computes a borrowed two-lane symbolic dot product after the caller has already classified every lane active. | +| `borrowed_op_overhead/real_dot2_refs_mixed_structural` | not run | not run | Computes a borrowed two-lane symbolic dot product with an exact zero lane and a rational scale lane. | ### `dense_algebra` diff --git a/src/real/arithmetic.rs b/src/real/arithmetic.rs index 24407a0..e7a3f65 100644 --- a/src/real/arithmetic.rs +++ b/src/real/arithmetic.rs @@ -1929,6 +1929,91 @@ impl Real { } } + /// Return the two-lane dot product of borrowed reals. + /// + /// Sibling of [`Self::dot3_refs`] / [`Self::dot4_refs`] for the + /// two-component case (2D coordinates, complex products, planar dot + /// products, etc.). Same exact-rational shared-denominator fast path; + /// same symbolic fallback policy. + pub fn dot2_refs(left: [&Real; 2], right: [&Real; 2]) -> Real { + if let (Some(l0), Some(l1), Some(r0), Some(r1)) = ( + left[0].exact_rational_ref(), + left[1].exact_rational_ref(), + right[0].exact_rational_ref(), + right[1].exact_rational_ref(), + ) { + crate::trace_dispatch!("real", "dot_product", "dot2-exact-rational-shared-denom"); + return Real::new(Rational::dot_products([l0, l1], [r0, r1])); + } + + Self::dot2_refs_fallback(left, right) + } + + /// Return a two-lane dot product whose lanes were already classified active. + /// + /// See [`Self::active_dot3_refs`]. + pub fn active_dot2_refs(left: [&Real; 2], right: [&Real; 2]) -> Real { + if let (Some(l0), Some(l1), Some(r0), Some(r1)) = ( + left[0].exact_rational_ref(), + left[1].exact_rational_ref(), + right[0].exact_rational_ref(), + right[1].exact_rational_ref(), + ) { + crate::trace_dispatch!("real", "dot_product", "active-dot2-exact-rational"); + return Real::new(Rational::dot_products([l0, l1], [r0, r1])); + } + + crate::trace_dispatch!("real", "dot_product", "active-dot2-real-tree"); + Self::sum_dot2_terms( + Some(Self::dot_product_active_term(left[0], right[0])), + Some(Self::dot_product_active_term(left[1], right[1])), + ) + } + + #[inline(never)] + fn dot2_refs_fallback(left: [&Real; 2], right: [&Real; 2]) -> Real { + // See `dot3_refs_fallback` for the code-layout rationale. + if Self::dot_product_has_structural_term(left[0], right[0]) + || Self::dot_product_has_structural_term(left[1], right[1]) + { + crate::trace_dispatch!("real", "dot_product", "dot2-structural-real-tree"); + return Self::sum_dot2_terms( + Self::dot_product_term(left[0], right[0]), + Self::dot_product_term(left[1], right[1]), + ); + } + + if left[0].rational.sign() == Sign::NoSign + || right[0].rational.sign() == Sign::NoSign + || left[1].rational.sign() == Sign::NoSign + || right[1].rational.sign() == Sign::NoSign + { + let p0 = Self::dot_product_term(left[0], right[0]); + let p1 = Self::dot_product_term(left[1], right[1]); + let active_terms = usize::from(p0.is_some()) + usize::from(p1.is_some()); + + match active_terms { + 0 => { + crate::trace_dispatch!("real", "dot_product", "dot2-all-zero-real-tree"); + return Real::zero(); + } + 1 => { + crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree-sparse"); + return Self::sum_dot2_terms(p0, p1); + } + _ => { + crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree"); + return Self::sum_dot2_terms(p0, p1); + } + } + } + + let p0 = left[0] * right[0]; + let p1 = left[1] * right[1]; + crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree"); + &p0 + &p1 + } + /// Return the three-lane dot product of borrowed reals. /// /// Exact-rational lanes are accumulated with one shared denominator and a @@ -2338,6 +2423,15 @@ impl Real { product } + #[inline] + fn sum_dot2_terms(p0: Option, p1: Option) -> Real { + match (p0, p1) { + (None, None) => Real::zero(), + (Some(p), None) | (None, Some(p)) => p, + (Some(a), Some(b)) => &a + &b, + } + } + #[inline] fn sum_dot3_terms(p0: Option, p1: Option, p2: Option) -> Real { match (p0, p1, p2) { diff --git a/src/real/tests.rs b/src/real/tests.rs index c8a0b16..7feea6c 100644 --- a/src/real/tests.rs +++ b/src/real/tests.rs @@ -1511,5 +1511,77 @@ mod tests { assert!( (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 ); + + let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]); + let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + assert!( + (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 + ); + } + + #[test] + fn dot2_refs_matches_pairwise_rational_arithmetic() { + let left = [ + Real::new(Rational::fraction(6, 5).unwrap()), + Real::new(Rational::fraction(-7, 10).unwrap()), + ]; + let right = [ + Real::new(Rational::fraction(-4, 5).unwrap()), + Real::new(Rational::fraction(11, 10).unwrap()), + ]; + let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]); + assert_eq!( + Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]), + expected, + ); + } + + #[test] + fn dot2_refs_handles_symbolic_lanes() { + let left = [Real::pi(), Real::e()]; + let right = [Real::e(), Real::pi()]; + let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]); + let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + assert!( + (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 + ); + } + + #[test] + fn dot2_refs_zero_lane_shortcut() { + let left = [Real::zero(), Real::from(3_i32)]; + let right = [Real::pi(), Real::e()]; + let expected = &left[1] * &right[1]; + let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + assert!( + (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 + ); + } + + #[test] + fn dot2_refs_all_zero_returns_zero() { + let left = [Real::zero(), Real::zero()]; + let right = [Real::pi(), Real::e()]; + assert_eq!( + Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]), + Real::zero(), + ); + } + + #[test] + fn active_dot2_refs_matches_dot2_refs_when_all_lanes_active() { + let left = [ + Real::pi(), + Real::e() * Real::new(Rational::fraction(3, 5).unwrap()), + ]; + let right = [ + Real::e() * Real::new(Rational::fraction(2, 7).unwrap()), + Real::pi(), + ]; + let expected = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + let actual = Real::active_dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]); + assert!( + (actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12 + ); } }