Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions benches/scalar_micro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,18 @@ const SCALAR_MICRO_GROUPS: &[BenchGroupDoc] = &[
name: "real_add_owned",
description: "Adds owned scaled transcendental `Real` values.",
},
BenchDoc {
name: "real_dot2_refs_dense_symbolic",
description: "Computes a borrowed two-lane symbolic dot product with no rational shortcut terms.",
},
BenchDoc {
name: "real_active_dot2_refs_dense_symbolic",
description: "Computes a borrowed two-lane symbolic dot product after the caller has already classified every lane active.",
},
BenchDoc {
name: "real_dot2_refs_mixed_structural",
description: "Computes a borrowed two-lane symbolic dot product with an exact zero lane and a rational scale lane.",
},
BenchDoc {
name: "real_dot3_refs_dense_symbolic",
description: "Computes a borrowed three-lane symbolic dot product with no rational shortcut terms.",
Expand Down Expand Up @@ -800,6 +812,39 @@ fn bench_borrowed_op_overhead(c: &mut Criterion) {
BatchSize::SmallInput,
)
});
group.bench_function("real_dot2_refs_dense_symbolic", |b| {
b.iter(|| {
black_box(Real::dot2_refs(
[black_box(&dense_dot_left[0]), black_box(&dense_dot_left[1])],
[
black_box(&dense_dot_right[0]),
black_box(&dense_dot_right[1]),
],
))
})
});
group.bench_function("real_active_dot2_refs_dense_symbolic", |b| {
b.iter(|| {
black_box(Real::active_dot2_refs(
[black_box(&dense_dot_left[0]), black_box(&dense_dot_left[1])],
[
black_box(&dense_dot_right[0]),
black_box(&dense_dot_right[1]),
],
))
})
});
group.bench_function("real_dot2_refs_mixed_structural", |b| {
b.iter(|| {
black_box(Real::dot2_refs(
[black_box(&mixed_dot_left[0]), black_box(&mixed_dot_left[1])],
[
black_box(&mixed_dot_right[0]),
black_box(&mixed_dot_right[1]),
],
))
})
});
group.bench_function("real_dot3_refs_dense_symbolic", |b| {
b.iter(|| {
black_box(Real::dot3_refs(
Expand Down
3 changes: 3 additions & 0 deletions benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,9 @@ Borrowed versus owned operation overhead for rational and real operands.
| `borrowed_op_overhead/real_dot4_refs_dense_symbolic` | 5.636 us | 5.625 us - 5.653 us | Computes a borrowed four-lane symbolic dot product with no rational shortcut terms. |
| `borrowed_op_overhead/real_active_dot4_refs_dense_symbolic` | 5.677 us | 5.670 us - 5.688 us | Computes a borrowed four-lane symbolic dot product after the caller has already classified every lane active. |
| `borrowed_op_overhead/real_dot4_refs_mixed_structural` | 653.48 ns | 651.96 ns - 655.30 ns | Computes a borrowed four-lane symbolic dot product with exact zero and rational scale terms. |
| `borrowed_op_overhead/real_dot2_refs_dense_symbolic` | not run | not run | Computes a borrowed two-lane symbolic dot product with no rational shortcut terms. |
| `borrowed_op_overhead/real_active_dot2_refs_dense_symbolic` | not run | not run | Computes a borrowed two-lane symbolic dot product after the caller has already classified every lane active. |
| `borrowed_op_overhead/real_dot2_refs_mixed_structural` | not run | not run | Computes a borrowed two-lane symbolic dot product with an exact zero lane and a rational scale lane. |

### `dense_algebra`

Expand Down
94 changes: 94 additions & 0 deletions src/real/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1929,6 +1929,91 @@ impl Real {
}
}

/// Return the two-lane dot product of borrowed reals.
///
/// Sibling of [`Self::dot3_refs`] / [`Self::dot4_refs`] for the
/// two-component case (2D coordinates, complex products, planar dot
/// products, etc.). Same exact-rational shared-denominator fast path;
/// same symbolic fallback policy.
pub fn dot2_refs(left: [&Real; 2], right: [&Real; 2]) -> Real {
if let (Some(l0), Some(l1), Some(r0), Some(r1)) = (
left[0].exact_rational_ref(),
left[1].exact_rational_ref(),
right[0].exact_rational_ref(),
right[1].exact_rational_ref(),
) {
crate::trace_dispatch!("real", "dot_product", "dot2-exact-rational-shared-denom");
return Real::new(Rational::dot_products([l0, l1], [r0, r1]));
}

Self::dot2_refs_fallback(left, right)
}

/// Return a two-lane dot product whose lanes were already classified active.
///
/// See [`Self::active_dot3_refs`].
pub fn active_dot2_refs(left: [&Real; 2], right: [&Real; 2]) -> Real {
if let (Some(l0), Some(l1), Some(r0), Some(r1)) = (
left[0].exact_rational_ref(),
left[1].exact_rational_ref(),
right[0].exact_rational_ref(),
right[1].exact_rational_ref(),
) {
crate::trace_dispatch!("real", "dot_product", "active-dot2-exact-rational");
return Real::new(Rational::dot_products([l0, l1], [r0, r1]));
}

crate::trace_dispatch!("real", "dot_product", "active-dot2-real-tree");
Self::sum_dot2_terms(
Some(Self::dot_product_active_term(left[0], right[0])),
Some(Self::dot_product_active_term(left[1], right[1])),
)
}

#[inline(never)]
fn dot2_refs_fallback(left: [&Real; 2], right: [&Real; 2]) -> Real {
// See `dot3_refs_fallback` for the code-layout rationale.
if Self::dot_product_has_structural_term(left[0], right[0])
|| Self::dot_product_has_structural_term(left[1], right[1])
{
crate::trace_dispatch!("real", "dot_product", "dot2-structural-real-tree");
return Self::sum_dot2_terms(
Self::dot_product_term(left[0], right[0]),
Self::dot_product_term(left[1], right[1]),
);
}

if left[0].rational.sign() == Sign::NoSign
|| right[0].rational.sign() == Sign::NoSign
|| left[1].rational.sign() == Sign::NoSign
|| right[1].rational.sign() == Sign::NoSign
{
let p0 = Self::dot_product_term(left[0], right[0]);
let p1 = Self::dot_product_term(left[1], right[1]);
let active_terms = usize::from(p0.is_some()) + usize::from(p1.is_some());

match active_terms {
0 => {
crate::trace_dispatch!("real", "dot_product", "dot2-all-zero-real-tree");
return Real::zero();
}
1 => {
crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree-sparse");
return Self::sum_dot2_terms(p0, p1);
}
_ => {
crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree");
return Self::sum_dot2_terms(p0, p1);
}
}
}

let p0 = left[0] * right[0];
let p1 = left[1] * right[1];
crate::trace_dispatch!("real", "dot_product", "dot2-generic-real-tree");
&p0 + &p1
}

/// Return the three-lane dot product of borrowed reals.
///
/// Exact-rational lanes are accumulated with one shared denominator and a
Expand Down Expand Up @@ -2338,6 +2423,15 @@ impl Real {
product
}

#[inline]
fn sum_dot2_terms(p0: Option<Real>, p1: Option<Real>) -> Real {
match (p0, p1) {
(None, None) => Real::zero(),
(Some(p), None) | (None, Some(p)) => p,
(Some(a), Some(b)) => &a + &b,
}
}

#[inline]
fn sum_dot3_terms(p0: Option<Real>, p1: Option<Real>, p2: Option<Real>) -> Real {
match (p0, p1, p2) {
Expand Down
72 changes: 72 additions & 0 deletions src/real/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1511,5 +1511,77 @@ mod tests {
assert!(
(actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12
);

let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]);
let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]);
assert!(
(actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12
);
}

#[test]
fn dot2_refs_matches_pairwise_rational_arithmetic() {
let left = [
Real::new(Rational::fraction(6, 5).unwrap()),
Real::new(Rational::fraction(-7, 10).unwrap()),
];
let right = [
Real::new(Rational::fraction(-4, 5).unwrap()),
Real::new(Rational::fraction(11, 10).unwrap()),
];
let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]);
assert_eq!(
Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]),
expected,
);
}

#[test]
fn dot2_refs_handles_symbolic_lanes() {
let left = [Real::pi(), Real::e()];
let right = [Real::e(), Real::pi()];
let expected = &(&left[0] * &right[0]) + &(&left[1] * &right[1]);
let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]);
assert!(
(actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12
);
}

#[test]
fn dot2_refs_zero_lane_shortcut() {
let left = [Real::zero(), Real::from(3_i32)];
let right = [Real::pi(), Real::e()];
let expected = &left[1] * &right[1];
let actual = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]);
assert!(
(actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12
);
}

#[test]
fn dot2_refs_all_zero_returns_zero() {
let left = [Real::zero(), Real::zero()];
let right = [Real::pi(), Real::e()];
assert_eq!(
Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]),
Real::zero(),
);
}

#[test]
fn active_dot2_refs_matches_dot2_refs_when_all_lanes_active() {
let left = [
Real::pi(),
Real::e() * Real::new(Rational::fraction(3, 5).unwrap()),
];
let right = [
Real::e() * Real::new(Rational::fraction(2, 7).unwrap()),
Real::pi(),
];
let expected = Real::dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]);
let actual = Real::active_dot2_refs([&left[0], &left[1]], [&right[0], &right[1]]);
assert!(
(actual.to_f64_approx().unwrap() - expected.to_f64_approx().unwrap()).abs() < 1e-12
);
}
}
Loading