Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions benchmarks/microbenchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,64 @@ python compare_results.py baseline.csv candidate.csv --bench-name GEMM

The script auto-detects metric columns, computes speedups for overlapping rows,
and reports rows that exist only in the baseline or only in the candidate.

## Visualizing results

`visualize.py` turns the produced CSVs into interactive
[Plotly](https://plotly.com/python/) charts and writes a single, self-contained
HTML file (no server needed — convenient over SSH or as a PR attachment).
`dashboard.py` is a live [Panel](https://panel.holoviz.org/) companion for
ad-hoc exploration. Install the extra dependencies first:

```bash
pip install -r requirements-viz.txt
```

Both consume the aggregate (`--csv`) and per-sample (`--csv-samples`) CSVs and
auto-detect which format they were given. Parameter columns are detected
generically, so every benchmark in this directory is supported (dense GEMM,
FP8 GEMM, grouped GEMM, normalization, casting). When a benchmark has a
dimension the default axes don't show — e.g. grouped GEMM's expert count `B`
the tool prints a `[note]` that medians are pooling across it; add `--color`,
`--facet`, or `--pass` (or use a dashboard filter) to separate the series.

### Static HTML report

```bash
# All applicable views for one CSV -> benchmark_gemm_samples.html
python visualize.py benchmark_gemm_samples.csv

# A single view, choosing the metric
python visualize.py benchmark_gemm_samples.csv --kind scaling --value throughput

# Baseline vs candidate speedup (visual complement to compare_results.py)
python visualize.py bench_gemm_candidate.csv --baseline benchmark_gemm_samples.csv
```

Plot kinds (`--kind`, default `report` = all applicable):

- `distribution`: box plus every raw sample point per group, with percentile
trimming (`--trim-upper` / `--trim-lower`). The honest distribution view for
the suite's small (~12) sample counts, where a violin/KDE would over-smooth.
Requires a samples CSV.
- `scaling`: median throughput (or time) vs token count `M` per case, with a
shaded min–max band.
- `bars`: grouped median-throughput bars per case with IQR error bars.
- `comparison`: baseline-vs-candidate speedup bars, one per benchmark group
(needs `--baseline`).

Axes default sensibly per kind and can be overridden with `--x`, `--color`,
`--facet`, `--pass`, and `--value`. Pass `--cdn` to load plotly.js from a CDN
instead of inlining it (much smaller file, needs internet to open).

### Interactive dashboard

```bash
panel serve dashboard.py --show --args benchmark_gemm_samples.csv
```

The CSV path may also be set via the `BENCH_CSV` environment variable. The
sidebar exposes the plot kind, metric, independent variable, hue, facet, a
percentile-trim slider, and a per-attribute filter for every parameter column.
Figure builders are shared with `visualize.py`, so the static and interactive
views stay in sync.
144 changes: 144 additions & 0 deletions benchmarks/microbenchmarks/dashboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/usr/bin/env python
###############################################################################
# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved.
#
# See LICENSE for license information.
###############################################################################
"""Interactive Panel dashboard for microbenchmark CSVs.

A live-exploration companion to ``visualize.py`` (which emits static,
self-contained HTML). It mirrors the JAX fused-attention dashboard
(``benchmarks/attention/panel_app.py``): pick the independent variable, the
hue/series, the metric, per-attribute filters, and a percentile trim, and the
Plotly figure re-renders. Figure builders are shared with ``visualize.py`` so
the static and interactive views never diverge.

Usage
-----
panel serve dashboard.py --show --args benchmark_gemm_samples.csv

The CSV path may also be given via the ``BENCH_CSV`` environment variable. If
neither is provided, the first ``*_samples.csv`` in the working directory is
used.
"""

import glob
import os
import sys

import panel as pn

import viz_data as vd
import visualize as viz

pn.extension("plotly", design="material", sizing_mode="stretch_width")


def _resolve_csv_path():
"""Find the CSV to load from --args, $BENCH_CSV, or the cwd."""
for arg in sys.argv[1:]:
if arg.endswith(".csv"):
return arg
if os.environ.get("BENCH_CSV"):
return os.environ["BENCH_CSV"]
candidates = sorted(glob.glob("*_samples.csv")) or sorted(glob.glob("*.csv"))
if not candidates:
raise SystemExit(
"No CSV given. Pass one via '--args FILE' or set BENCH_CSV."
)
return candidates[0]


CSV_PATH = _resolve_csv_path()
DF = vd.load_any(CSV_PATH)
IS_SAMPLES = DF.attrs["source"] == "samples"
PARAMS = list(DF.attrs["params"])
AXIS_COLS = PARAMS + ["pass"]

KINDS = (["distribution", "scaling", "bars"] if IS_SAMPLES else ["scaling", "bars"])

# ---------------------------------------------------------------------------
# Controls
# ---------------------------------------------------------------------------
kind_w = pn.widgets.Select(name="Plot", options=KINDS, value=KINDS[0])
value_w = pn.widgets.Select(
name="Metric", options=["auto", "time_ms", "throughput"], value="auto",
)
x_w = pn.widgets.Select(
name="Independent variable (x)", options=AXIS_COLS,
value="M" if "M" in AXIS_COLS else AXIS_COLS[0],
)
color_w = pn.widgets.Select(
name="Hue / series", options=AXIS_COLS,
value="Case" if "Case" in AXIS_COLS else AXIS_COLS[-1],
)
facet_w = pn.widgets.Select(
name="Facet", options=["none"] + AXIS_COLS,
value="pass" if "pass" in AXIS_COLS else "none",
)
trim_w = pn.widgets.FloatSlider(
name="Percentile trim (upper)", start=0.5, end=1.0, step=0.01, value=0.95,
disabled=not IS_SAMPLES,
)

# One filter per parameter column (+ pass). Empty selection means "all".
filter_ws = {
col: pn.widgets.MultiChoice(
name=col, options=sorted(DF[col].astype(str).unique(), key=str), value=[],
)
for col in AXIS_COLS
}


def _resolve_value(kind, value):
if value != "auto":
return value
if kind == "distribution":
return "time_ms"
return vd.default_value_column(DF, "Forward")


def _apply_filters(df):
for col, widget in filter_ws.items():
if widget.value:
df = df[df[col].astype(str).isin(widget.value)]
return df


@pn.depends(
kind=kind_w, value=value_w, x=x_w, color=color_w, facet=facet_w, trim=trim_w,
**{f"f_{c}": w for c, w in filter_ws.items()},
)
def make_plot(kind, value, x, color, facet, trim, **_filters):
df = _apply_filters(DF)
if df.empty:
return pn.pane.Alert("No rows match the current filters.", alert_type="warning")

value = _resolve_value(kind, value)
facet_arg = None if facet == "none" else facet
try:
if kind == "distribution":
fig = viz.fig_distribution(df, x=x, value=value, color=color,
facet=facet_arg, trim_upper=trim)
elif kind == "scaling":
fig = viz.fig_scaling(vd.trim_percentile(df, value=value, upper=trim),
x=x, value=value, color=color, facet=facet_arg)
else:
fig = viz.fig_throughput_bars(vd.trim_percentile(df, value=value, upper=trim),
x=x, value=value, color=color, facet=facet_arg)
except Exception as exc: # surface builder errors in the UI instead of 500s
return pn.pane.Alert(f"Could not build plot: {exc}", alert_type="danger")
return pn.pane.Plotly(fig, sizing_mode="stretch_width", height=640)


template = pn.template.BootstrapTemplate(
title=f"Microbenchmark Explorer — {os.path.basename(CSV_PATH)}",
sidebar=[
pn.pane.Markdown(f"**Source:** `{CSV_PATH}` \n**Format:** {DF.attrs['source']}"),
kind_w, value_w, x_w, color_w, facet_w, trim_w,
pn.pane.Markdown("### Filters (empty = all)"),
*filter_ws.values(),
],
)
template.main.append(pn.Column(make_plot))
template.servable()
5 changes: 5 additions & 0 deletions benchmarks/microbenchmarks/requirements-viz.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Dependencies for the visualization tooling (visualize.py, dashboard.py).
# Not required to run the benchmarks themselves.
pandas
plotly
panel
Loading