GeomScale · saksham-stack · Jan 25, 2026
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,12 @@
+# Dingo Benchmarking Suite
+
+This directory contains performance tests to measure the overhead of the Python interface relative to the C++ volesti core.
+
+## How to Run
+1. Install dependencies: `pip install pytest-benchmark numpy`
+2. Execute benchmarks: `pytest benchmarks/`
+
+## Metrics Tracked
+- **Wall-clock time:** Total execution time per sample.
+- **Interface Overhead:** Time delta between Python entry and C++ execution.
+- **ESS (Planned):** Statistical quality of samples.
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py
@@ -0,0 +1,5 @@
+import pytest
+
+# This allows you to add global configuration for your benchmarks later
+def pytest_configure(config):
+    config.addinivalue_line("markers", "slow: mark benchmark as slow to run")
diff --git a/benchmarks/metrics_utils.py b/benchmarks/metrics_utils.py
@@ -0,0 +1,28 @@
+import numpy as np
+import arviz as az
+
+def calculate_ess(samples):
+    """
+    Calculates the Effective Sample Size (ESS) using ArviZ.
+    This provides a robust measure of how well the dingo/volesti 
+    sampler is exploring the space.
+    """
+    # Convert numpy samples to ArviZ InferenceData format
+    # samples shape: (num_samples, dimensions)
+    idata = az.convert_to_dataset(samples[np.newaxis, :]) 
+
+    # Calculate ESS for all dimensions
+    ess_values = az.ess(idata)
+
+    # Return the mean ESS across all dimensions as a single metric
+    return float(np.mean(ess_values.x.values))
+
+def report_convergence_quality(ess_value, total_samples):
+    """Provides a qualitative rating of the sampling quality."""
+    ratio = ess_value / total_samples
+    if ratio > 0.1:
+        return "Excellent"
+    elif ratio > 0.01:
+        return "Acceptable"
+    else:
+        return "Poor (High Autocorrelation)"
diff --git a/benchmarks/requirements-bench.txt b/benchmarks/requirements-bench.txt
@@ -0,0 +1,3 @@
+pytest-benchmark
+numpy
+arviz
diff --git a/benchmarks/runner.py b/benchmarks/runner.py
diff --git a/benchmarks/test_samplers_bench.py b/benchmarks/test_samplers_bench.py
@@ -0,0 +1,41 @@
+import pytest
+import numpy as np
+from dingo import PolytopeSampler
+from .metrics_utils import calculate_ess
+
+@pytest.fixture
+def unit_cube():
+    """Generates a 10-dimensional unit cube for benchmarking."""
+    dimension = 10
+    A = np.vstack([np.eye(dimension), -np.eye(dimension)])
+    b = np.concatenate([np.ones(dimension), np.zeros(dimension)])
+    return A, b
+
+def test_sampling_performance_and_quality(benchmark, unit_cube):
+    """
+    Measures both the execution time and the statistical quality (ESS)
+     of the dingo/volesti interface.
+    """
+    A, b = unit_cube
+    sampler = PolytopeSampler()
+    num_samples = 2000
+
+    def run_benchmark():
+        # 1. Sample from the polytope
+        samples = sampler.sample_from_polytope(A, b, n=num_samples, method='cdhr')
+
+        # 2. Convert to numpy and calculate quality metric
+        samples_array = np.array(samples)
+        ess_score = calculate_ess(samples_array)
+
+        return samples_array, ess_score
+
+    # The 'benchmark' fixture runs 'run_benchmark' multiple times to get stable timing
+    samples, ess_score = benchmark(run_benchmark)
+
+    # Assertions ensure the sampler is actually working
+    assert samples.shape[0] == num_samples
+    assert ess_score > 0
+
+    # This will print in the 'Captured stdout' section of your pytest results
+    print(f"\n[Bench Results] ESS: {ess_score:.2f} for {num_samples} samples")