Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
157 commits
Select commit Hold shift + click to select a range
999242b
feat(accumulation): scaffold _accumulation subpackage and test directory
spMohanty May 7, 2026
dc2e4b8
feat(accumulation): port outputOrbit.js to _output_orbit.py
spMohanty May 7, 2026
9dd9b99
test(accumulation): exercise dedup kernel with truly distinct global …
spMohanty May 7, 2026
d2ab4b4
feat(accumulation): port size-aware Burnside to _burnside.py
spMohanty May 7, 2026
1637b7d
feat(accumulation): port shape classifier to _shape.py
spMohanty May 7, 2026
f03a9cf
feat(accumulation): port typed-partition basic utilities to _partitio…
spMohanty May 7, 2026
24e7c38
feat(accumulation): add orbit dedup and induced-block-action utilities
spMohanty May 7, 2026
c284e35
feat(config): add partition_budget and dimino_budget settings
spMohanty May 7, 2026
151042c
feat(accumulation): define regime ladder data types
spMohanty May 7, 2026
5552fca
feat(accumulation): port functionalProjection regime
spMohanty May 7, 2026
3e3449d
feat(accumulation): port singleton regime (|V|=1)
spMohanty May 7, 2026
f229225
feat(accumulation): port young regime (G = Sym(L), uniform sizes)
spMohanty May 7, 2026
63ceb60
feat(accumulation): port partitionCount regime (general fallback)
spMohanty May 7, 2026
2eed316
feat(accumulation): wire compute_accumulation dispatcher
spMohanty May 7, 2026
17e0b0f
feat(accumulation): port bipartite graph + incidence matrix
spMohanty May 7, 2026
f4283ba
feat(accumulation): port wreath enumerator
spMohanty May 7, 2026
b1b90c8
feat(accumulation): port sigma-loop + pi-canonical derivation
spMohanty May 7, 2026
3db3e52
feat(accumulation): port build_full_group to _detection.py
spMohanty May 7, 2026
c297212
feat(accumulation): port connected-component decomposition
spMohanty May 7, 2026
14b3f23
feat(accumulation): ComponentCost + run_ladder_per_component
spMohanty May 7, 2026
56b1506
feat(accumulation): AccumulationCost + aggregate_einsum with fallback
spMohanty May 7, 2026
df1dc8f
feat(accumulation): compute_accumulation_cost end-to-end orchestrator
spMohanty May 7, 2026
1c13a9d
feat(accumulation): lock aggregate_reduction signature (stub)
spMohanty May 7, 2026
30bced6
feat(accumulation): describe() methods build LaTeX on demand
spMohanty May 7, 2026
50c8b0c
feat(accumulation): public einsum_accumulation_cost + re-exports
spMohanty May 7, 2026
9706c49
feat(accumulation): FlopscopePathInfo wraps opt_einsum PathInfo
spMohanty May 7, 2026
69c8254
refactor(einsum): drop symmetry oracle from path search
spMohanty May 7, 2026
b1d3354
feat(einsum): add _get_accumulation_cost helper with separate LRU cache
spMohanty May 7, 2026
98f7097
feat(einsum): charge accumulation_cost.total instead of path_info.opt…
spMohanty May 7, 2026
f10f87b
refactor: delete _opt_einsum/_subgraph_symmetry.py (oracle no longer …
spMohanty May 7, 2026
05422b3
refactor: delete _opt_einsum/_symmetry.py (entirely supplanted by _ac…
spMohanty May 7, 2026
3e3f343
refactor: drop symmetry_oracle parameter from opt_einsum vendor layer
spMohanty May 7, 2026
cc66cbb
refactor(einsum): final cleanup of dead code after symmetry-oracle re…
spMohanty May 7, 2026
a0743b0
test(einsum): update FLOP cost assertions to new direct-event model
spMohanty May 7, 2026
d96a470
test(einsum): update integration + path-cache tests for the new cost …
spMohanty May 7, 2026
87d1b4c
test(symmetric_einsum): update FLOP costs + add no-gaming property test
spMohanty May 7, 2026
a933f15
test(task-37): sweep all remaining test files to new accumulation model
spMohanty May 7, 2026
f2585f4
test(accumulation): set up Node-based JS oracle for parity tests
spMohanty May 7, 2026
11dba29
test(accumulation): brute-force alpha oracle for ground-truth compari…
spMohanty May 7, 2026
439fcc0
test(accumulation): hand-curated corpus mirroring JS EXAMPLES
spMohanty May 7, 2026
9f1c243
test(accumulation): cross-language parity tests vs JS engine
spMohanty May 7, 2026
db76a54
test(accumulation): Python ladder vs SymPy oracle on the corpus
spMohanty May 7, 2026
aa8860e
perf(accumulation): cold + warm latency benchmark with CI gate
spMohanty May 7, 2026
14474f5
docs: changelog + migration notes for symmetry-aware einsum cost rewrite
spMohanty May 7, 2026
73568bf
perf(accumulation): cache repeat einsum_accumulation_cost calls
spMohanty May 7, 2026
4138f38
build: add opt_einsum runtime dependency
spMohanty May 8, 2026
a5c247b
feat(config): add fma_cost setting (default 1, validator: 1 or 2)
spMohanty May 8, 2026
c09f2ba
feat(cost): promote FMA_COST constant to fma_cost() function
spMohanty May 8, 2026
028d4c8
refactor(opt_einsum): make _helpers.flop_count read fma_cost setting
spMohanty May 8, 2026
82b51fc
feat(opt_einsum): add build_path_info adapter from upstream PathInfo
spMohanty May 8, 2026
644d0cd
refactor(opt_einsum): boundary swap — contract_path now wraps upstream
spMohanty May 8, 2026
9c44177
refactor: slim _contract.py + delete dead vendored opt_einsum files
spMohanty May 8, 2026
941394a
refactor(opt_einsum): delete _parser.py — re-export parse_einsum_inpu…
spMohanty May 8, 2026
deef6af
docs: update NOTICE and CHANGELOG for opt_einsum de-vendor
spMohanty May 8, 2026
27bb3c4
fix(einsum-path-cache): include fma_cost() in path cache key
spMohanty May 13, 2026
8c80495
fix(ci): satisfy pyright, ruff, and stabilize the partition-budget ca…
spMohanty May 13, 2026
ae25357
fix(ci): bump gitlint title-length to 90, fix one stray pyright site
spMohanty May 13, 2026
94e3a91
feat(reduction): add _normalize_axis + _num_output_orbits helpers
spMohanty May 13, 2026
c36e0d0
refactor(reduction): set-based axes_summed membership + drop unused i…
spMohanty May 13, 2026
6283502
feat(reduction): add output_discounted_reduction_cost (Tier 2)
spMohanty May 13, 2026
d6572a5
feat(reduction): implement aggregate_reduction body
spMohanty May 13, 2026
209911e
refactor(reduction): code review fixes for aggregate_reduction
spMohanty May 13, 2026
c25db90
feat(reduction): orchestrator compute_reduction_accumulation_cost
spMohanty May 13, 2026
4bdb4c8
refactor(reduction): ruff fixes for orchestrator
spMohanty May 13, 2026
3caa671
feat(reduction): LRU cache _reduction_cache + get_reduction_cost_cached
spMohanty May 13, 2026
a67aa74
feat(reduction): public flopscope.reduction_accumulation_cost wrapper
spMohanty May 13, 2026
f7d2a9f
fix(reduction-cache): resolve partition_budget before cache lookup
spMohanty May 13, 2026
c9807ad
feat(reduction): _flops.analytical_reduction_cost uses the new model
spMohanty May 14, 2026
4070282
feat(reduction): _counted_ufunc_reduce_generic picks up the new model
spMohanty May 14, 2026
c9ba07a
feat(reduction): mean wrapper uses sum-cost + num_output_orbits divides
spMohanty May 14, 2026
edfbb75
feat(reduction): _tier2_reduction_cost + median wrapper
spMohanty May 14, 2026
ff0bdf4
feat(reduction): percentile/quantile wrappers use Tier-2 discount
spMohanty May 14, 2026
7cf2575
test(reduction): SymPy oracle parity for reduction α counter
spMohanty May 14, 2026
24c67c7
test(reduction): property + benchmark + accounting tests
spMohanty May 14, 2026
0b93fd6
docs(reduction): CHANGELOG entries for the new reduction-cost model
spMohanty May 14, 2026
3522348
fix(reduction): route median/percentile/quantile through _call_numpy
spMohanty May 14, 2026
adcea16
perf(test): trim reduction-cost warm-call benchmark from 1000 to 100 …
spMohanty May 14, 2026
7dcead2
feat(public-api): expose fma_cost(), einsum_clear_caches(), einsum_ca…
spMohanty May 14, 2026
6363ed6
fix(pyright): add type: ignore[return-value] to median/percentile/qua…
spMohanty May 14, 2026
2df36af
feat(public-api): add reduction cache APIs + tier2_reduction_cost + c…
spMohanty May 14, 2026
6535663
docs(symmetry-detection): rewrite as participant-facing FLOP-counting…
spMohanty May 14, 2026
843a2eb
docs(understanding): delete symmetry-explorer page
spMohanty May 14, 2026
59ce81e
docs(flop-counting-model): note FMA configurability + link to new page
spMohanty May 14, 2026
d53e13e
docs(flop-counting-model): fix anchor + sweep stale einsum/symmetry s…
spMohanty May 14, 2026
97302be
docs(operation-categories): split reductions into Tier 1 / Tier 2
spMohanty May 14, 2026
3965514
docs(guides/symmetry): post-#51 kwarg + symmetric-reduction row
spMohanty May 14, 2026
87af8cb
docs(guides/symmetry): fix cost-table rows to match α/M model
spMohanty May 14, 2026
36dc4ca
docs(guides/einsum): add inspection section + path-independent note
spMohanty May 14, 2026
a7af8a0
docs(guides/einsum): clean up stale worked-example + kwarg
spMohanty May 14, 2026
e1027f6
docs(readme,getting-started): sync feature list + verify cost claims
spMohanty May 14, 2026
1e097c5
docs(migrations): extend with reduction + FMA + public API sections
spMohanty May 14, 2026
fb53861
docs: repoint links from the going-internal JS explorer
spMohanty May 14, 2026
fbc8104
docs: final verification sweep — fix stale cost-API + symmetry kwargs
spMohanty May 14, 2026
2b52274
docs: restore symmetry-explorer pointer + JS-route link + flopscope-o…
spMohanty May 14, 2026
ef88fb2
fix(path-info): __str__ now renders the α/M optimized_cost, not the u…
spMohanty May 14, 2026
0e5b8b8
fix(path-info): also override inner.speedup to match α/M; simplify do…
spMohanty May 14, 2026
4635347
fix(lint): sort imports in test_str_shows_flopscope_optimized_cost
spMohanty May 14, 2026
d124510
fix(lint): ruff format tests/accumulation/test_path_info.py
spMohanty May 14, 2026
beed85b
fix(_dimino): consult dimino_budget; new accumulation paths bail to d…
spMohanty May 14, 2026
4b26d6c
fix(ci): pyright type: ignore + resync flopscope-client mirror
spMohanty May 14, 2026
5707e6d
fix(test): relax warm-call benchmark budget from 5 ms to 50 ms
spMohanty May 14, 2026
4374b26
fix(test): watchdog in conftest to break pytest-xdist worker-pipe dea…
spMohanty May 14, 2026
9a2e7a4
fix(test): watchdog dumps failing test nodeids before force-exit
spMohanty May 14, 2026
ee624f9
fix(test): bump cold-call benchmark budget from 15 ms to 100 ms for CI
spMohanty May 14, 2026
e489284
fix(einsum): subtract num_output_orbits from einsum cost (off-by-one)
spMohanty May 19, 2026
a3907e2
feat(accumulation): add per_step/path fields to AccumulationCost (no …
spMohanty May 19, 2026
fa6713d
test(accumulation): add Wilson regression tests for path-aware einsum…
spMohanty May 19, 2026
ffb7dab
feat(accumulation): path-aware orchestrator for k>=3 einsums
spMohanty May 19, 2026
1490739
test(accumulation): per-step sum-check + path-shape invariants
spMohanty May 19, 2026
3e9f93b
test(accumulation): per-step cache reuse across expressions
spMohanty May 19, 2026
2c03423
test: SymmetryGroup canonical-hash invariant for per-step cache reuse
spMohanty May 19, 2026
f480c0c
refactor(opt_einsum): symmetric_flop_count delegates to compute_accum…
spMohanty May 19, 2026
75a5bc7
fix(accumulation): restore fma_cost multiplier on the multiplication …
spMohanty May 19, 2026
4faedd1
test: update multi-operand cost assertions for path-aware totals (Tas…
spMohanty May 19, 2026
47be4b0
test: three-way cost agreement on multi-operand einsums
spMohanty May 19, 2026
90eefc0
refactor(_path_info): drop monkey-patch in __str__ (reconciliation ma…
spMohanty May 19, 2026
7826576
feat(_path_info): add check_consistency() utility for three-way cost …
spMohanty May 19, 2026
a3e5c2f
feat(_opt_einsum): restore deleted StepInfo diagnostic fields from main
spMohanty May 19, 2026
b422440
feat(_opt_einsum): populate restored StepInfo diagnostic fields per step
spMohanty May 19, 2026
25c2db8
feat(_opt_einsum): restore main's richer format_table
spMohanty May 19, 2026
b992d57
feat(_opt_einsum): restore main's _rich_step_table
spMohanty May 19, 2026
584b747
feat(_opt_einsum): restore _paths.py (symmetry-aware path search) fro…
spMohanty May 19, 2026
260632f
feat(_opt_einsum): restore _path_random.py (symmetry-aware random-gre…
spMohanty May 19, 2026
f3f27f8
test: restore tests/test_opt_einsum_paths.py from main + adapt to cur…
spMohanty May 19, 2026
b418048
feat(accumulation): wire SubgraphSymmetryOracle into _walk_path_and_a…
spMohanty May 19, 2026
be27e76
feat(_opt_einsum): add regime column to format_table
spMohanty May 19, 2026
d08699c
feat(_opt_einsum): add regime column to _rich_step_table
spMohanty May 19, 2026
e0ea0da
feat(_opt_einsum): verbose detail row shows per-step M/α/−O
spMohanty May 19, 2026
b5ea285
fix(_einsum): _path_cache key includes per_op_symmetries + identity_p…
spMohanty May 19, 2026
bb96669
feat(_einsum): auto-fall-back to greedy for k>=8 (spec §10)
spMohanty May 19, 2026
e2b1b5f
test: clear_cache flushes all three cost-cache layers
spMohanty May 19, 2026
88cbb76
test(js_parity): skip k>=3 cases (path-aware JS deferred to follow-up…
spMohanty May 19, 2026
cd05f05
fix(lint): ruff format + targeted type:ignore for path-aware code
spMohanty May 19, 2026
7af8c8a
fix(lint): ruff format pass to satisfy pre-push hook
spMohanty May 19, 2026
feecd54
fix(tests): add pyright ignore comments for oe._paths/_path_random at…
spMohanty May 19, 2026
aa3feb9
test: red regression tests for FMA=2 unification (Phase 1)
spMohanty May 20, 2026
ef05f6b
fix(_cost_model): remove fma_cost from public API; delete _cost_model.py
spMohanty May 20, 2026
f9a24bd
fix(_config): remove fma_cost setting; drop from _path_cache key
spMohanty May 20, 2026
320b90b
refactor(accumulation): remove fma_cost multiplier from aggregate_einsum
spMohanty May 20, 2026
bc48471
test: delete obsolete fma-specific tests + clean up cache key (Phase …
spMohanty May 20, 2026
7032f21
feat(window): restore 2* in hamming/hanning + halve weights (FMA=2)
spMohanty May 20, 2026
f0f16a9
feat(polynomial): restore 2* in polyval_cost (FMA=2)
spMohanty May 20, 2026
0c594d5
feat(linalg): restore 2* in multi_dot_cost (FMA=2)
spMohanty May 20, 2026
5cae04a
feat(linalg): restore 2* in norm/vector_norm/matrix_norm_cost (FMA=2)
spMohanty May 20, 2026
da9914c
test: red tests for dense_flop_cost == flop_cost invariant
spMohanty May 20, 2026
43a24c5
refactor(_helpers): flop_count routes through α/M-no-symmetry (FMA=2)
spMohanty May 20, 2026
61be8aa
test(paths): clarify legacy-fallback comments in test_flop_cost
spMohanty May 20, 2026
4b50467
feat(js): FMA=2 alignment in algorithm.js + costModel.js + denseCost.js
spMohanty May 20, 2026
43f4074
docs(website): FMA=1 → FMA=2 sweep across user-facing docs
spMohanty May 20, 2026
0b6ee4d
docs: FMA=2 unification — registry notes + migration doc + _flops doc…
spMohanty May 20, 2026
6f63d01
fix(scripts): FMA=1 → FMA=2 in doc generators + sheet uploader
spMohanty May 20, 2026
a41335f
docs(benchmarks): FMA=1 → FMA=2 in comments + update hardcoded expect…
spMohanty May 20, 2026
8369ebc
fix(lint): ruff format + targeted type:ignore for FMA=2 changes
spMohanty May 20, 2026
36ff0b6
fix(client): resync flopscope-client mirror with FMA=2 changes
spMohanty May 20, 2026
c7e13d5
fix(lint): sort imports in fma test files (ruff I001)
spMohanty May 20, 2026
275b7ad
fix(lint): ruff format + delete obsolete test_fma_cost_function.py
spMohanty May 20, 2026
af30930
fix(renderer): naive_cost via α/M, propagate operand symmetries to pa…
spMohanty May 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .gitlint
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ ignore-merge-commits=true
types=feat,fix,docs,style,refactor,perf,test,build,ci,chore,revert

[title-max-length]
line-length=80
line-length=90
127 changes: 127 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,133 @@

## Unreleased

### Changed (BREAKING)

- **Vendored opt_einsum replaced with runtime dependency.** flopscope now
depends on `opt_einsum>=3.3.0,<4.0.0` instead of vendoring its source.
The remaining `flopscope._opt_einsum` is a slim ~830-line shim that
adapts upstream's `PathInfo` to flopscope's expected shape and recomputes
per-step FLOP costs using flopscope's FMA convention.
- `flopscope._opt_einsum.contract_path` still returns a flopscope
`PathInfo` with the same essential fields (`path`, `steps`,
`optimized_cost`, etc.).
- `StepInfo` no longer carries the dead symmetry-related fields
(`input_groups`, `output_group`, `inner_group`, `inner_applied`,
`dense_flop_cost`, `symmetry_savings`). It now has 4-5 fields:
`subscript`, `flop_count`, `input_shapes`, `output_shape` (plus
`merged_subset` if still used by display).

- **Einsum cost model rewritten** to mirror the JS Symmetry-Aware Einsum
Contractions explorer's α/M direct-event model. The charged FLOP cost is
now path-independent: `(k - 1) · ∏ M_a + ∏ α_a` summed across components.
- `path_info.optimized_cost` returns the new whole-expression cost. For
expressions with declared symmetry, this number differs from the old
per-step `cost · unique/total` formula. See migration notes below.
- Path optimization no longer uses symmetry; `opt_einsum.contract_path`
behaves like upstream stock opt_einsum.
- Per-step `path_info.steps[i].flop_count` reverts to dense (no symmetry
adjustment per step).

- **Reduction cost model rewritten** to the orbit-mapping model. Tier 1
(`np.ufunc.reduce` ops — sum, prod, max, min, all, any, bitwise_or/and/xor,
logical_or/and):
```
cost = op_factor × (α - num_output_orbits) + extra_ops
```
where α is the per-output-orbit input-orbit count summed across output
orbits. The `α - num_output_orbits` correction fixes #56's off-by-one
(dense `sum(n)` charges `n - 1`, was `n`). For symmetric inputs the new
model charges more than the legacy `unique_elements_for_shape` formula —
see #56 for the architectural shift.
- **`np.median`, `np.percentile`, `np.quantile`** now use a Tier-2
output-discounted formula:
```
cost = num_output_orbits × dense_per_output_cost
```
For median/percentile/quantile, `dense_per_output_cost = axis_dim` (one
partition pass per output cell).
- **`np.mean`** charges `sum_cost + num_output_orbits` (one divide per
output orbit; orbit-shared output values share the divisor).
- `flopscope._flops.analytical_reduction_cost` body replaced with a
delegating call to `compute_reduction_accumulation_cost`. Signature
unchanged; numbers change.
- `flopscope.accounting.reduction_cost` returns different numbers for both
dense and symmetric inputs (via the body change above).

### Fixed

- `flopscope.numpy.einsum_path` cache now keys on `fma_cost()` in addition
to `(subscripts, shapes, optimize)`. Previously, toggling
`flopscope.configure(fma_cost=2)` after a path was cached would return a
stale `PathInfo` whose per-step `flop_count` values were computed under
the old FMA convention.

### Added

- `flopscope.fma_cost()` — top-level re-export of the FMA-convention reader
(was `flopscope._cost_model.fma_cost`). Returns the current value of the
`fma_cost` setting (1 or 2).
- `flopscope.einsum_clear_caches()` — clears the einsum path cache and the
einsum accumulation-cost cache together. Useful for cold-call benchmarks.
(The narrower `fnp.clear_einsum_cache()` continues to clear only the path
cache.)
- `flopscope.einsum_cache_info()` — returns
`{"path": CacheInfo, "accumulation": CacheInfo}` so callers can inspect
both einsum caches in one call.
- `flopscope.reduction_clear_cache()` and `flopscope.reduction_cache_info()`
— same pattern for the reduction accumulation-cost cache used by
`fnp.sum` / `fnp.mean` / `fnp.median` / etc.
- `flopscope.clear_cache()` — convenience aggregate that clears both
einsum and reduction caches in one call.
- `flopscope.tier2_reduction_cost(a, axis=None, *, dense_per_output_cost=None)`
— public inspection function for selection-style reductions
(`np.median` / `np.percentile` / `np.quantile`). Hides the
`op_factor=0, extra_ops=…` invocation pattern; `dense_per_output_cost`
defaults to the product of the reduced axes' lengths.
- `flopscope.einsum_accumulation_cost(subscripts, *operands, partition_budget=None)`
— public inspection function returning the new `AccumulationCost` decomposition
(path-independent, per-component breakdown, regime trace).
- `flopscope.AccumulationCost`, `flopscope.ComponentCost`, `flopscope.RegimeStep`
— public dataclasses.
- New settings:
- `partition_budget` (default 100 000): per-component typed-partition cap.
- `dimino_budget` (default 500 000): whole-expression `G_pt` closure cap.
- `CostFallbackWarning` now also fires when a partition counter exceeds its
budget; total falls back to `k · dense_baseline` (the no-symmetry direct-
event count).
- New configurable setting `fma_cost` (default 1). Counts a fused
multiply-add as 1 op (hardware convention). Set to 2 to get the
textbook / opt_einsum convention.
- `flopscope._cost_model.fma_cost()` function replaces the
`FMA_COST` constant. The constant is removed.
- `flopscope.reduction_accumulation_cost(a, axis=None, *, op_factor=1, extra_ops=0)`
— public inspection function returning an `AccumulationCost` for a
reduction. Parallel to `einsum_accumulation_cost`.
- Internal `_accumulation/_reduction.py`: `compute_reduction_accumulation_cost`
orchestrator, `output_discounted_reduction_cost` (Tier 2), and
`_normalize_axis` / `_num_output_orbits` helpers.
- `_accumulation/_cache.py`: `_reduction_cache` + `get_reduction_cost_cached`
(LRU 4,096).
- `_accumulation/_cost.py:aggregate_reduction` body implemented (was a
signature-locked `NotImplementedError`).

### Removed

- `flopscope._opt_einsum._paths` — now upstream
- `flopscope._opt_einsum._path_random` — now upstream
- `flopscope._opt_einsum._parser` — now upstream (re-exported via shim)
- `flopscope._opt_einsum._blas` — was unused dead code
- `flopscope._opt_einsum._testing` — was unused dead code
- `flopscope._opt_einsum._typing` — now upstream
- `flopscope._cost_model.FMA_COST` constant — replaced by `fma_cost()`
- `flopscope._opt_einsum._subgraph_symmetry` — internal module deleted.
- `flopscope._opt_einsum._symmetry` — internal module deleted (was mostly
`symmetric_flop_count`, `unique_elements`, `SubsetSymmetry` — all only used
by the deleted oracle).
- `use_inner_symmetry` setting — was a knob on the deleted oracle.

---

### BREAKING

- `BudgetContext.untracked_time` and `summary_dict()["untracked_time_s"]` now
Expand Down
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@ install: ## Install all deps (dev + docs) and set up git hooks
uv sync --all-extras
git config core.hooksPath .githooks

# ---------------------------------------------------------------------------
# Benchmarks
# ---------------------------------------------------------------------------
.PHONY: bench-accumulation
bench-accumulation: ## Cold + warm latency benchmark for einsum_accumulation_cost
$(UV) python benchmarks/accumulation/bench_cost_compute.py

# ---------------------------------------------------------------------------
# Help
# ---------------------------------------------------------------------------
Expand Down
42 changes: 29 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ for i, W in enumerate(weights):
h = fnp.einsum('ij,j->i', W, h)
if i < depth - 1:
h = fnp.maximum(h, 0)
flops.budget_summary() # 984,321 FLOPs
flops.budget_summary() # 6,231,041 FLOPs
```

</td>
Expand All @@ -87,6 +87,7 @@ flops.budget_summary() # 984,321 FLOPs
- **Budget enforcement** -- operations are checked before execution; exceeding the budget raises a clear error
- **Symmetry-aware einsum** -- automatic FLOP savings for repeated operands and declared symmetry groups
- **Transparent diagnostics** -- inspect per-operation costs, cumulative budget usage, and detailed summaries at any time
- **Inspect costs analytically** -- `flops.einsum_accumulation_cost(...)` and `flops.reduction_accumulation_cost(...)` return the exact FLOP count for an einsum or reduction without running the op
- **Truncated SVD** -- top-k singular value decomposition with `O(m * n * k)` cost

## What's Supported
Expand Down Expand Up @@ -143,16 +144,16 @@ flops.budget_summary() # accumulated session/global summary
```
flopscope FLOP Budget Summary
=============================
Total budget: 100,000,000
Used: 984,321 (1.0%)
Remaining: 99,015,679 (99.0%)
Total budget: 100,000,000
Used: 6,231,041 (6.2%)
Remaining: 93,768,959 (93.8%)

By operation:
random.randn 327,936 ( 33.3%) [6 calls]
multiply 327,680 ( 33.3%) [5 calls]
einsum 327,680 ( 33.3%) [5 calls]
maximum 1,024 ( 0.1%) [4 calls]
sqrt 1 ( 0.0%) [1 call]
random.randn 5,246,976 ( 84.2%) [6 calls]
einsum 655,360 ( 10.5%) [5 calls]
multiply 327,680 ( 5.3%) [5 calls]
maximum 1,024 ( 0.0%) [4 calls]
sqrt 1 ( 0.0%) [1 call]

Total Wall Time: ...s
Flopscope Backend: ...s (...%)
Expand All @@ -164,11 +165,26 @@ flopscope FLOP Budget Summary

```python
# Query FLOP costs without running anything (no BudgetContext needed)
cost = flops.einsum_cost('ij,jk->ik', shapes=[(256, 256), (256, 256)])
print(f"Matmul cost: {cost:,}") # 16,777,216
cost = flops.accounting.einsum_cost('ij,jk->ik', shapes=[(256, 256), (256, 256)])
print(f"Matmul cost: {cost:,}") # 33,554,432

cost = flops.svd_cost(m=256, n=256, k=10)
print(f"SVD cost: {cost:,}") # 655,360
cost = flops.accounting.svd_cost(m=256, n=256, k=10)
print(f"SVD cost: {cost:,}") # 2,621,440
```

For symmetry-aware inspection that takes actual array inputs (and reflects
declared symmetry), use the accumulation cost APIs:

```python
import numpy as np
A = np.zeros((256, 256))
B = np.zeros((256, 256))
# Returns an AccumulationCost decomposition without running the op
cost = flops.einsum_accumulation_cost('ij,jk->ik', A, B)
print(cost.total) # 33,554,432

cost = flops.reduction_accumulation_cost(A, op_factor=1)
print(cost.total) # 65,535
```

### Symmetry Savings
Expand Down
13 changes: 7 additions & 6 deletions benchmarks/_contractions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"outer": "M*N",
"tensordot": "product of free * contracted dims",
"kron": "numel(output)",
"einsum": "product of index dims (FMA=1)",
"einsum": "α/M model (FMA=2 textbook)",
}

_BENCHMARK_SIZE_STRINGS: dict[str, str] = {
Expand Down Expand Up @@ -59,27 +59,28 @@ def _analytical_cost(op: str, **kwargs: int) -> int:
Analytical FLOP count.
"""
costs: dict[str, int] = {
# dot: 2D matrix multiply A(512,512) @ B(512,512), FMA=1 op
# dot: 2D matrix multiply A(512,512) @ B(512,512), FMA=2 textbook
"dot": 512 * 512 * 512,
# matmul: identical to dot for 2D
"matmul": 512 * 512 * 512,
# inner: dot product of two 1M-element vectors.
# Runtime charges a.size — matches flopscope's convention (FMA=1).
# Runtime charges a.size — matches flopscope's convention (FMA=2, but
# a.size is pointwise-shaped so the FMA off-by-one doesn't apply here).
"inner": 1_000_000,
# vdot: same as inner for 1D real inputs.
# Runtime charges a.size (FMA=1).
# Runtime charges a.size (FMA=2, pointwise-shaped — no off-by-one).
"vdot": 1_000_000,
# vecdot: batched dot product A(1000,512) . B(1000,512)
# Output (1000,) with contracted axis 512.
# Runtime charges result.size * contracted = 1000 * 512 (FMA=1).
# Runtime charges result.size * contracted = 1000 * 512 (FMA=2 textbook).
"vecdot": 1000 * 512,
# outer: outer product of two 5000-element vectors
"outer": 5000 * 5000,
# tensordot: A(64,64,64) . B(64,64,64) axes=1 -> contract last of A with first of B
"tensordot": 64**5,
# kron: Kronecker product A(64,64) x B(64,64)
"kron": 64**4,
# einsum: 'ij,jk->ik' is matrix multiply (512,512)x(512,512), FMA=1
# einsum: 'ij,jk->ik' is matrix multiply (512,512)x(512,512), FMA=2 textbook
"einsum": 512 * 512 * 512,
}
return costs[op]
Expand Down
15 changes: 10 additions & 5 deletions benchmarks/_linalg_delegates.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,19 +79,24 @@ def _analytical_cost(op_name: str) -> int:
costs: dict[str, int] = {
"cond": 512 * 512 * 512, # m*n*min(m,n) via SVD
"cross": 6 * 1_000_000, # 6*n
"matmul": 512 * 512 * 512, # M*N*K (FMA=1)
"matmul": 2 * 512 * 512 * 512
- 512 * 512, # 2*M*N*K - M*N (FMA=2); = 268,173,312
"matrix_norm": 2 * 512 * 512, # 2*numel (Frobenius)
"matrix_power": 3 * 64**3, # 3 matmuls for n=5
"matrix_rank": 512 * 512 * 512, # m*n*min(m,n) via SVD
"multi_dot": 128 * 64 * 128 + 128 * 128 * 64, # FMA=1
"norm": 10_000_000, # numel (L2)
"multi_dot": 128 * 64 * 128
+ 128
* 128
* 64, # optimal chain (FMA=2); coincidentally same as FMA=1 value = 2,097,152
"norm": 2 * 10_000_000, # 2*numel (FMA=2, vector L2)
"outer": 5000 * 5000, # M*N
"tensordot": 64**5, # d^5 (FMA=1)
"tensordot": 64
** 5, # d^5 (FMA=2 textbook; matches flopscope charge = 1,073,741,824)
"tensorinv": 64**3, # n^3 after reshape
"tensorsolve": 64**3, # n^3 after reshape
"trace": 10_000, # min(m,n)
"vecdot": 1000 * 512, # batch*K
"vector_norm": 10_000_000, # numel (L2)
"vector_norm": 2 * 10_000_000, # 2*numel (FMA=2)
}
return costs[short]

Expand Down
6 changes: 4 additions & 2 deletions benchmarks/_polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
]

_FORMULA_STRINGS: dict[str, str] = {
"polyval": "n * degree (FMA=1)",
"polyval": "2 * n * degree (FMA=2)",
"polyfit": "2 * n * (degree+1)^2",
"roots": "degree^3",
"polymul": "(degree+1)^2",
Expand All @@ -40,7 +40,9 @@ def _analytical_cost(op: str, n: int, degree: int) -> int:
benchmark denominator and the budget deduction use the same formula.
"""
if op == "polyval":
return n * degree
return (
2 * n * degree
) # Updated for FMA=2 unification (spec 2026-05-20): polyval formula doubled m*deg → 2*m*deg.
elif op == "polyfit":
return 2 * n * (degree + 1) ** 2
elif op == "roots":
Expand Down
Empty file.
Loading
Loading