From ccf44071b6ba610d1a272b8c3692fbf68b084a95 Mon Sep 17 00:00:00 2001 From: Ilya Yakelzon Date: Tue, 30 Jun 2026 12:26:46 +0200 Subject: [PATCH] metrics: emit goodput track as a point attribute so the bandit evaluator can slice it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit proxy.session.goodput tagged track only via the OTEL resource (semconv.ProxyTrackKey = "proxy.track"). SigNoz's metrics pipeline doesn't expose resource attributes as queryable labels, so the bandit experiment evaluator — which groups/filters goodput by the point attribute "track" (matching lantern-box) — saw zero samples for every http-proxy track: a busy http-proxy incumbent returned 0 for track='' while a sing-box track returned data. Every http-proxy challenger/control arm therefore read 0 goodput, so no http-proxy experiment could ever reach a verdict. Emit track as a point attribute keyed "track" on the goodput histogram (threaded through NewDefault from the proxy's -track flag). The resource keeps proxy.track for everything else. cloud.region is intentionally left a resource attr: the evaluator's strata are (track, country) and a challenger is pinned to one DC, so region would add cardinality without decision value. --- http_proxy.go | 1 + instrument/goodput_test.go | 6 +++++- instrument/instrument.go | 15 ++++++++++++--- instrument/otelinstrument/otelinstrument.go | 8 ++++---- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/http_proxy.go b/http_proxy.go index c6e9403c..1a9efeb5 100644 --- a/http_proxy.go +++ b/http_proxy.go @@ -242,6 +242,7 @@ func (p *Proxy) ListenAndServe(ctx context.Context) error { p.CountryLookup, p.ISPLookup, p.ProxyName, + p.Track, ) if err != nil { return errors.New("Unable to configure instrumentation: %v", err) diff --git a/instrument/goodput_test.go b/instrument/goodput_test.go index 87f8bce5..f8741844 100644 --- a/instrument/goodput_test.go +++ b/instrument/goodput_test.go @@ -28,7 +28,7 @@ func newGoodputInstrument(t *testing.T) (*defaultInstrument, *sdkmetric.ManualRe provider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader)) sdkotel.SetMeterProvider(provider) - ins, err := NewDefault(geo.NoLookup{}, &mockISPLookup{}, "test-proxy") + ins, err := NewDefault(geo.NoLookup{}, &mockISPLookup{}, "test-proxy", "test-track") require.NoError(t, err) return ins, reader } @@ -53,6 +53,10 @@ func TestSessionGoodput(t *testing.T) { attrs := extractHistogramAttrs(rm, "proxy.session.goodput") assert.Equal(t, "receive", attrs["network.io.direction"]) + // track must be a point attribute keyed "track" so the bandit evaluator can + // slice goodput per (track, country); it queries this label, not the + // "proxy.track" resource attribute. + assert.Equal(t, "test-track", attrs["track"], "goodput sample should carry the track point attribute") // The country point attribute must always be present (empty here, since the // test uses geo.NoLookup) so the metric stays sliceable by country. _, hasCountry := attrs["geo.country.iso_code"] diff --git a/instrument/instrument.go b/instrument/instrument.go index 679a3b73..20ca73e1 100644 --- a/instrument/instrument.go +++ b/instrument/instrument.go @@ -112,9 +112,10 @@ type defaultInstrument struct { originStats map[originDetails]*usage statsMx sync.Mutex proxyName string + track string } -func NewDefault(countryLookup geo.CountryLookup, ispLookup geo.ISPLookup, proxyName string) (*defaultInstrument, error) { +func NewDefault(countryLookup geo.CountryLookup, ispLookup geo.ISPLookup, proxyName, track string) (*defaultInstrument, error) { if err := otelinstrument.Initialize(); err != nil { return nil, err } @@ -127,6 +128,7 @@ func NewDefault(countryLookup geo.CountryLookup, ispLookup geo.ISPLookup, proxyN clientStats: make(map[clientDetails]*usage), originStats: make(map[originDetails]*usage), proxyName: proxyName, + track: track, } return p, nil @@ -320,8 +322,14 @@ func (ins *defaultInstrument) ProxiedBytes(ctx context.Context, sent, recv int, // periods, so this is a floor on true transfer speed — but both arms of a // bandit experiment are measured identically, so it's a fair relative signal, // and the byte floor filters the worst idle-dominated noise. No device_id tag -// (cardinality); track and cloud.region come from resource attributes, leaving -// geo.country.iso_code as the only point attribute the evaluator strata need. +// (cardinality). track is emitted as a point attribute keyed "track": the bandit +// evaluator slices goodput per (track, country) and queries that low-cardinality +// point attribute (matching lantern-box). The resource also carries the track as +// semconv.ProxyTrackKey ("proxy.track"), but the metrics pipeline doesn't expose +// resource attributes as queryable labels, so the evaluator's "track" filter +// can't see it — hence the explicit point attribute. cloud.region is left to the +// resource: the strata are (track, country) only and a challenger is pinned to +// one DC, so region would add cardinality without decision value. func (ins *defaultInstrument) SessionGoodput(ctx context.Context, recvBytes int, duration time.Duration, clientIP net.IP) { if recvBytes < goodputMinBytes || duration <= 0 { return @@ -330,6 +338,7 @@ func (ins *defaultInstrument) SessionGoodput(ctx context.Context, recvBytes int, country := ins.countryLookup.CountryCode(clientIP) otelinstrument.SessionGoodput.Record(ctx, goodput, metric.WithAttributes( + attribute.String("track", ins.track), semconv.GeoCountryISOCodeKey.String(country), semconv.NetworkIODirectionKey.String("receive"), )) diff --git a/instrument/otelinstrument/otelinstrument.go b/instrument/otelinstrument/otelinstrument.go index a3bde0a8..35144867 100644 --- a/instrument/otelinstrument/otelinstrument.go +++ b/instrument/otelinstrument/otelinstrument.go @@ -82,10 +82,10 @@ func initialize() error { } // Per-session download goodput (received bytes per second of connection // lifetime), recorded once at connection close for sessions that moved at - // least goodputMinBytes. Sliceable by track (resource attr) × cloud.region - // (resource attr) × geo.country.iso_code (point attr) so the bandit - // experiment evaluator can compare a challenger track's median goodput - // against the incumbent's. Unit "bytes/s" follows proxy.io's "bytes" + // least goodputMinBytes. Sliceable by track × geo.country.iso_code (both + // point attrs) so the bandit experiment evaluator can compare a challenger + // track's median goodput against the incumbent's per market; cloud.region + // stays a resource attr. Unit "bytes/s" follows proxy.io's "bytes" // spelling for consistency within this package's metrics. if SessionGoodput, err = meter.Float64Histogram("proxy.session.goodput", metric.WithUnit("bytes/s"),