From 798577f940e9ce7be0a0831e72d9b8078a90bf48 Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 08:29:36 +0100 Subject: [PATCH 1/9] feat: add support for automatic DSM context extraction --- bottlecap/Cargo.lock | 1 + bottlecap/Cargo.toml | 7 +- bottlecap/src/bin/bottlecap/main.rs | 33 +- bottlecap/src/config/env.rs | 13 + bottlecap/src/config/mod.rs | 10 + bottlecap/src/config/yaml.rs | 7 + bottlecap/src/flushing/service.rs | 17 + .../src/lifecycle/invocation/processor.rs | 112 ++ .../lifecycle/invocation/processor_service.rs | 6 +- .../invocation/triggers/event_bridge_event.rs | 108 ++ .../invocation/triggers/kinesis_event.rs | 13 + .../src/lifecycle/invocation/triggers/mod.rs | 7 + .../invocation/triggers/sns_event.rs | 9 + .../invocation/triggers/sqs_event.rs | 10 + bottlecap/src/proxy/interceptor.rs | 19 +- bottlecap/src/tags/lambda/tags.rs | 2 +- .../src/traces/data_streams/aggregator.rs | 237 +++ .../src/traces/data_streams/checkpoint.rs | 124 ++ bottlecap/src/traces/data_streams/context.rs | 172 ++ .../data_streams/fixtures/sketch_golden.json | 1491 +++++++++++++++++ bottlecap/src/traces/data_streams/mod.rs | 25 + bottlecap/src/traces/data_streams/pathway.rs | 165 ++ .../src/traces/data_streams/processor.rs | 214 +++ .../traces/data_streams/propagation_hash.rs | 52 + bottlecap/src/traces/data_streams/sketch.rs | 341 ++++ bottlecap/src/traces/mod.rs | 1 + tools/dsm/gen_sketch_golden.js | 67 + 27 files changed, 3256 insertions(+), 7 deletions(-) create mode 100644 bottlecap/src/traces/data_streams/aggregator.rs create mode 100644 bottlecap/src/traces/data_streams/checkpoint.rs create mode 100644 bottlecap/src/traces/data_streams/context.rs create mode 100644 bottlecap/src/traces/data_streams/fixtures/sketch_golden.json create mode 100644 bottlecap/src/traces/data_streams/mod.rs create mode 100644 bottlecap/src/traces/data_streams/pathway.rs create mode 100644 bottlecap/src/traces/data_streams/processor.rs create mode 100644 bottlecap/src/traces/data_streams/propagation_hash.rs create mode 100644 bottlecap/src/traces/data_streams/sketch.rs create mode 100644 tools/dsm/gen_sketch_golden.js diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index c360692fe..e365602bd 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -535,6 +535,7 @@ dependencies = [ "rustls-webpki", "serde", "serde-aux", + "serde_bytes", "serde_html_form", "serde_json", "serial_test", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index 446bab161..2456dea98 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -28,6 +28,10 @@ regex = { version = "1.10", default-features = false } reqwest = { version = "0.12.11", features = ["json", "http2"], default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["alloc"] } +serde_bytes = { version = "0.11", default-features = false, features = ["std"] } +# DSM pipeline-stats serialization (msgpack + gzip) for extension-side checkpoints. +rmp-serde = { version = "1.3.1", default-features = false } +flate2 = { version = "1.1", default-features = false, features = ["rust_backend"] } thiserror = { version = "1.0", default-features = false } # Transitive dependency (pulled in via cookie). Pinned to >=0.3.47 so cargo audit / CI passes (RUSTSEC-2026-0009). time = { version = "0.3.47", default-features = false } @@ -94,9 +98,6 @@ tower = { version = "0.5", features = ["util"] } mock_instant = "0.6" serial_test = "3.1" tempfile = "3.20" -# fake-intake test harness: decode msgpack+gzip stats payloads on arrival -rmp-serde = { version = "1.3.1", default-features = false } -flate2 = { version = "1.1", default-features = false, features = ["rust_backend"] } [build-dependencies] # No external dependencies needed for the build script diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index acfbf444c..0e7168a04 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -330,6 +330,32 @@ async fn extension_loop_active( .await; let propagator = Arc::new(DatadogCompositePropagator::new(Arc::clone(config))); + + // Shared proxy aggregator (used by the trace agent's proxy endpoints and, + // when enabled, the extension-side DSM processor). + let proxy_aggregator = Arc::new(TokioMutex::new(proxy_aggregator::Aggregator::default())); + + // Extension-side Data Streams Monitoring (consume checkpoints), gated by + // DD_DSM_CONSUME_ENABLED. + let dsm_processor = if config.dsm_consume_enabled { + let service = config + .service + .clone() + .or_else(|| tags_provider.get_canonical_resource_name()) + .unwrap_or_else(|| "aws.lambda".to_string()) + .to_lowercase(); + let env = config.env.clone().unwrap_or_default(); + Some(Arc::new(bottlecap::traces::data_streams::DsmProcessor::new( + service, + env, + env!("CARGO_PKG_VERSION").to_string(), + &config.site, + Arc::clone(&proxy_aggregator), + ))) + } else { + None + }; + // Lifecycle Invocation Processor let (invocation_processor_handle, invocation_processor_service) = InvocationProcessorService::new( @@ -339,6 +365,7 @@ async fn extension_loop_active( metrics_aggregator_handle.clone(), Arc::clone(&propagator), durable_context_tx, + dsm_processor.clone(), ); tokio::spawn(async move { invocation_processor_service.run().await; @@ -372,6 +399,7 @@ async fn extension_loop_active( invocation_processor_handle.clone(), appsec_processor.clone(), &shared_client, + Arc::clone(&proxy_aggregator), ); let api_runtime_proxy_shutdown_signal = start_api_runtime_proxy( @@ -429,6 +457,7 @@ async fn extension_loop_active( let stats_flusher_clone = Arc::clone(&stats_flusher); let proxy_flusher_clone = proxy_flusher.clone(); let metrics_aggr_handle_clone = metrics_aggregator_handle.clone(); + let dsm_processor_clone = dsm_processor.clone(); // In Managed Instance mode, create a separate interval for the background flusher task. // We don't reuse race_flush_interval because we need to configure the missed tick @@ -459,6 +488,7 @@ async fn extension_loop_active( proxy_flusher_clone, metrics_flushers_clone, metrics_aggr_handle_clone, + dsm_processor_clone, ); loop { @@ -633,6 +663,7 @@ async fn extension_loop_active( proxy_flusher.clone(), Arc::clone(&metrics_flushers), metrics_aggregator_handle.clone(), + dsm_processor.clone(), ); handle_next_invocation(next_lambda_response, &invocation_processor_handle).await; loop { @@ -1103,6 +1134,7 @@ fn start_trace_agent( invocation_processor_handle: InvocationProcessorHandle, appsec_processor: Option>>, client: &Client, + proxy_aggregator: Arc>, ) -> ( Sender, Arc, @@ -1167,7 +1199,6 @@ fn start_trace_agent( tokio::spawn(span_dedup_service.run()); // Proxy - let proxy_aggregator = Arc::new(TokioMutex::new(proxy_aggregator::Aggregator::default())); let proxy_flusher = Arc::new(ProxyFlusher::new( api_key_factory.clone(), Arc::clone(&proxy_aggregator), diff --git a/bottlecap/src/config/env.rs b/bottlecap/src/config/env.rs index 96d4afee3..5ff647227 100644 --- a/bottlecap/src/config/env.rs +++ b/bottlecap/src/config/env.rs @@ -256,6 +256,15 @@ pub struct EnvConfig { /// Enable the new AWS-resource naming logic in the tracer. #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] pub trace_aws_service_representation_enabled: Option, + /// @env `DD_DSM_CONSUME_ENABLED` + /// + /// Enable extension-side Data Streams Monitoring consume checkpoints. When + /// enabled, the extension extracts inbound DSM pathway context from event + /// payloads and emits `direction:in` checkpoints itself. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub dsm_consume_enabled: Option, + /// @env `DD_DSM_EXCHANGE_NAME` + pub dsm_exchange_name: Option, // // Trace Propagation /// @env `DD_TRACE_PROPAGATION_STYLE` @@ -582,6 +591,8 @@ fn merge_config(config: &mut Config, env_config: &EnvConfig) { merge_option!(config, env_config, apm_filter_tags_regex_require); merge_option!(config, env_config, apm_filter_tags_regex_reject); merge_option_to_value!(config, env_config, trace_aws_service_representation_enabled); + merge_option_to_value!(config, env_config, dsm_consume_enabled); + merge_option!(config, env_config, dsm_exchange_name); // Trace Propagation merge_vec!(config, env_config, trace_propagation_style); @@ -1027,6 +1038,8 @@ mod tests { trace_propagation_extract_first: true, trace_propagation_http_baggage_enabled: true, trace_aws_service_representation_enabled: true, + dsm_consume_enabled: false, + dsm_exchange_name: None, metrics_config_compression_level: 3, otlp_config_traces_enabled: false, otlp_config_traces_span_name_as_resource_name: true, diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index ebcb05aaf..90cded702 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -301,6 +301,14 @@ pub struct Config { pub trace_propagation_http_baggage_enabled: bool, pub trace_aws_service_representation_enabled: bool, + // Data Streams Monitoring + /// Enable extension-side DSM consume checkpoints (`DD_DSM_CONSUME_ENABLED`). + pub dsm_consume_enabled: bool, + /// Fallback DSM `exchange` (event bus name) used for `EventBridge` consume + /// checkpoints when it cannot be derived from the event payload + /// (`DD_DSM_EXCHANGE_NAME`). + pub dsm_exchange_name: Option, + // Metrics pub metrics_config_compression_level: i32, pub statsd_metric_namespace: Option, @@ -431,6 +439,8 @@ impl Default for Config { apm_filter_tags_regex_require: None, apm_filter_tags_regex_reject: None, trace_aws_service_representation_enabled: true, + dsm_consume_enabled: false, + dsm_exchange_name: None, trace_propagation_style: vec![ TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext, diff --git a/bottlecap/src/config/yaml.rs b/bottlecap/src/config/yaml.rs index e825e25b1..ef57e15ba 100644 --- a/bottlecap/src/config/yaml.rs +++ b/bottlecap/src/config/yaml.rs @@ -81,6 +81,9 @@ pub struct YamlConfig { pub service_mapping: HashMap, #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] pub trace_aws_service_representation_enabled: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub dsm_consume_enabled: Option, + pub dsm_exchange_name: Option, // Trace Propagation #[serde(deserialize_with = "deserialize_trace_propagation_style")] pub trace_propagation_style: Vec, @@ -553,6 +556,8 @@ fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { yaml_config, trace_aws_service_representation_enabled ); + merge_option_to_value!(config, yaml_config, dsm_consume_enabled); + merge_option!(config, yaml_config, dsm_exchange_name); // OTLP if let Some(otlp_config) = &yaml_config.otlp_config { @@ -975,6 +980,8 @@ api_security_sample_delay: 60 # Seconds trace_propagation_extract_first: true, trace_propagation_http_baggage_enabled: true, trace_aws_service_representation_enabled: true, + dsm_consume_enabled: false, + dsm_exchange_name: None, metrics_config_compression_level: 3, otlp_config_traces_enabled: false, otlp_config_traces_span_name_as_resource_name: true, diff --git a/bottlecap/src/flushing/service.rs b/bottlecap/src/flushing/service.rs index bd9c66882..c656b74b1 100644 --- a/bottlecap/src/flushing/service.rs +++ b/bottlecap/src/flushing/service.rs @@ -29,6 +29,11 @@ pub struct FlushingService { proxy_flusher: Arc, metrics_flushers: Arc>, + /// Optional extension-side DSM processor. When present, its aggregated + /// pipeline-stats payload is drained into the proxy aggregator immediately + /// before each proxy flush. `None` unless `DD_DSM_CONSUME_ENABLED` is set. + dsm_processor: Option>, + // Metrics aggregator handle for getting data to flush metrics_aggr_handle: MetricsAggregatorHandle, @@ -46,6 +51,7 @@ impl FlushingService { proxy_flusher: Arc, metrics_flushers: Arc>, metrics_aggr_handle: MetricsAggregatorHandle, + dsm_processor: Option>, ) -> Self { Self { logs_flusher, @@ -53,6 +59,7 @@ impl FlushingService { stats_flusher, proxy_flusher, metrics_flushers, + dsm_processor, metrics_aggr_handle, handles: FlushHandles::new(), } @@ -123,6 +130,11 @@ impl FlushingService { sf.flush(false, None).await.unwrap_or_default() })); + // Drain DSM pipeline stats into the proxy aggregator before flushing. + if let Some(dsm) = &self.dsm_processor { + dsm.drain_into_proxy().await; + } + // Spawn proxy flush let pf = self.proxy_flusher.clone(); self.handles @@ -324,6 +336,11 @@ impl FlushingService { }) .collect(); + // Drain DSM pipeline stats into the proxy aggregator before flushing. + if let Some(dsm) = &self.dsm_processor { + dsm.drain_into_proxy().await; + } + tokio::join!( self.logs_flusher.flush(None), futures::future::join_all(metrics_futures), diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 1f42fa87d..42a490657 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -111,6 +111,11 @@ pub struct Processor { /// on `platform.report`. This flag ensures whichever event arrives first wins and the other is skipped, /// preventing double counting. init_duration_metric_emitted: bool, + /// Optional extension-side DSM consume processor. `Some` only when + /// `DD_DSM_CONSUME_ENABLED` is set; records `direction:in` checkpoints from + /// inbound event payloads. + #[allow(clippy::struct_field_names)] + dsm_processor: Option>, } impl Processor { @@ -154,9 +159,19 @@ impl Processor { durable_context_tx, restore_time: None, init_duration_metric_emitted: false, + dsm_processor: None, } } + /// Attach an extension-side DSM consume processor. Called during startup only + /// when `DD_DSM_CONSUME_ENABLED` is set. + pub fn set_dsm_processor( + &mut self, + dsm_processor: Arc, + ) { + self.dsm_processor = Some(dsm_processor); + } + /// Given a `request_id`, creates the context and adds the enhanced metric offsets to the context buffer. /// pub fn on_invoke_event(&mut self, request_id: String) { @@ -1088,6 +1103,33 @@ impl Processor { context.extracted_span_context = Self::extract_span_context(&headers, &payload_value, Arc::clone(&self.propagator)); + // Extension-side DSM: record a consume (`direction:in`) checkpoint for + // DSM-eligible event sources, continuing any inbound pathway context. + if let Some(dsm) = self.dsm_processor.clone() { + debug!("DSM: extraction hook fired for request {request_id}"); + let identified = + crate::lifecycle::invocation::triggers::IdentifiedTrigger::from_value(&payload_value); + if let Some(trigger) = SpanInferrer::get_trigger_type(identified) { + if let Some(mut edge_tags) = trigger.get_dsm_edge_tags() { + apply_dsm_exchange_fallback( + &mut edge_tags, + self.config.dsm_exchange_name.as_deref(), + ); + debug!("DSM: trigger is DSM-eligible, edge_tags={edge_tags:?}"); + // Payload size is not currently measured; latency stats are unaffected. + dsm.record_consume(&edge_tags, &trigger.get_carrier(), 0.0); + } else { + debug!( + "DSM: identified trigger is not DSM-eligible, skipping consume checkpoint" + ); + } + } else { + debug!("DSM: no trigger identified for payload, skipping consume checkpoint"); + } + } else { + debug!("DSM: no DSM processor available, skipping consume checkpoint"); + } + // Set the extracted trace context to the spans if let Some(sc) = &context.extracted_span_context { #[allow(clippy::cast_possible_truncation)] // Datadog protocol uses lower 64 bits @@ -1534,6 +1576,19 @@ impl Processor { } } +/// Apply the configured `DD_DSM_EXCHANGE_NAME` fallback to DSM consume edge +/// tags. The fallback only applies to `EventBridge` (`type:eventbridge`) tags +/// that do not already carry a payload-derived `exchange:` tag, so a +/// payload-derived bus always wins and other sources are never affected. +fn apply_dsm_exchange_fallback(edge_tags: &mut Vec, exchange: Option<&str>) { + if let Some(exchange) = exchange + && edge_tags.iter().any(|t| t == "type:eventbridge") + && !edge_tags.iter().any(|t| t.starts_with("exchange:")) + { + edge_tags.push(format!("exchange:{exchange}")); + } +} + #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { @@ -1548,6 +1603,63 @@ mod tests { use dogstatsd::metric::EMPTY_TAGS; use serde_json::json; + #[test] + fn dsm_exchange_fallback_injects_for_eventbridge_without_exchange() { + let mut tags = vec![ + "direction:in".to_string(), + "type:eventbridge".to_string(), + "topic:OrderPlaced".to_string(), + ]; + apply_dsm_exchange_fallback(&mut tags, Some("my-bus")); + assert_eq!( + tags, + vec![ + "direction:in".to_string(), + "type:eventbridge".to_string(), + "topic:OrderPlaced".to_string(), + "exchange:my-bus".to_string(), + ] + ); + } + + #[test] + fn dsm_exchange_fallback_does_not_override_payload_derived_exchange() { + let mut tags = vec![ + "direction:in".to_string(), + "type:eventbridge".to_string(), + "exchange:payload-bus".to_string(), + "topic:OrderPlaced".to_string(), + ]; + let before = tags.clone(); + apply_dsm_exchange_fallback(&mut tags, Some("my-bus")); + assert_eq!(tags, before); + } + + #[test] + fn dsm_exchange_fallback_ignored_for_non_eventbridge_sources() { + // SQS consume tags must never receive an injected exchange. + let mut tags = vec![ + "direction:in".to_string(), + "topic:my-queue".to_string(), + "type:sqs".to_string(), + ]; + let before = tags.clone(); + apply_dsm_exchange_fallback(&mut tags, Some("my-bus")); + assert_eq!(tags, before); + } + + #[test] + fn dsm_exchange_fallback_noop_when_unconfigured() { + let mut tags = vec![ + "direction:in".to_string(), + "type:eventbridge".to_string(), + "topic:OrderPlaced".to_string(), + ]; + let before = tags.clone(); + apply_dsm_exchange_fallback(&mut tags, None); + assert_eq!(tags, before); + } + fn setup() -> Processor { let aws_config = Arc::new(AwsConfig { region: "us-east-1".into(), diff --git a/bottlecap/src/lifecycle/invocation/processor_service.rs b/bottlecap/src/lifecycle/invocation/processor_service.rs index c5703c042..d98dc974a 100644 --- a/bottlecap/src/lifecycle/invocation/processor_service.rs +++ b/bottlecap/src/lifecycle/invocation/processor_service.rs @@ -464,10 +464,11 @@ impl InvocationProcessorService { metrics_aggregator_handle: AggregatorHandle, propagator: Arc, durable_context_tx: mpsc::Sender, + dsm_processor: Option>, ) -> (InvocationProcessorHandle, Self) { let (sender, receiver) = mpsc::channel(1000); - let processor = Processor::new( + let mut processor = Processor::new( tags_provider, config, aws_config, @@ -475,6 +476,9 @@ impl InvocationProcessorService { propagator, durable_context_tx, ); + if let Some(dsm) = dsm_processor { + processor.set_dsm_processor(dsm); + } let handle = InvocationProcessorHandle { sender }; let service = Self { diff --git a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs index 322c82738..ecdcbc91d 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs @@ -111,6 +111,45 @@ impl Trigger for EventBridgeEvent { fn is_async(&self) -> bool { true } + + fn get_dsm_edge_tags(&self) -> Option> { + // EventBridge consume edge tags. `topic` is the detail-type. `exchange` + // is the event bus name, which is NOT carried in the event delivered to + // Lambda; when a triggering rule ARN is present in `resources` it is + // encoded as `:rule//`, so we recover it best-effort and omit + // the tag entirely when it cannot be determined (rather than emit a + // wrong/empty value that would corrupt the pathway hash). + let mut tags = vec![ + "direction:in".to_string(), + "type:eventbridge".to_string(), + ]; + if let Some(bus) = self.event_bus_name() { + tags.push(format!("exchange:{bus}")); + } + tags.push(format!("topic:{}", self.detail_type)); + Some(tags) + } +} + +impl EventBridgeEvent { + /// Best-effort event bus name recovered from a triggering rule ARN in + /// `resources`. Non-default buses encode the name as `:rule//`; + /// the default bus appears as `:rule/` (no bus segment). Returns + /// `None` when no bus name can be determined from the payload. + fn event_bus_name(&self) -> Option { + for arn in &self.resources { + if let Some(rest) = arn.split(":rule/").nth(1) { + let mut segments = rest.split('/'); + let first = segments.next().unwrap_or_default(); + // `rule//` => bus is the first segment. + // `rule/` (default bus) => no second segment, skip. + if segments.next().is_some() && !first.is_empty() { + return Some(first.to_string()); + } + } + } + None + } } impl ServiceNameResolver for EventBridgeEvent { @@ -236,6 +275,75 @@ mod tests { assert_eq!(event.get_arn("us-east-1"), "my.event"); } + fn make_event(detail_type: &str, resources: Vec) -> EventBridgeEvent { + EventBridgeEvent { + id: "id".to_string(), + version: "0".to_string(), + account: "123456789012".to_string(), + time: Utc::now(), + region: "us-east-1".to_string(), + resources, + source: "my.event".to_string(), + detail_type: detail_type.to_string(), + detail: serde_json::json!({}), + replay_name: None, + } + } + + #[test] + fn test_get_dsm_edge_tags_no_resources_omits_exchange() { + // The standard fixture has no `resources`, so the bus name is unknown + // and the exchange tag must be omitted. + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize EventBridge Event"); + assert_eq!( + event.get_dsm_edge_tags(), + Some(vec![ + "direction:in".to_string(), + "type:eventbridge".to_string(), + "topic:UserSignUp".to_string(), + ]) + ); + } + + #[test] + fn test_get_dsm_edge_tags_recovers_bus_from_rule_arn() { + let event = make_event( + "OrderPlaced", + vec![ + "arn:aws:events:us-east-1:123456789012:rule/my-bus/my-rule".to_string(), + ], + ); + assert_eq!( + event.get_dsm_edge_tags(), + Some(vec![ + "direction:in".to_string(), + "type:eventbridge".to_string(), + "exchange:my-bus".to_string(), + "topic:OrderPlaced".to_string(), + ]) + ); + } + + #[test] + fn test_get_dsm_edge_tags_default_bus_rule_arn_omits_exchange() { + // Default-bus rule ARNs have no bus segment (`:rule/`). + let event = make_event( + "OrderPlaced", + vec!["arn:aws:events:us-east-1:123456789012:rule/my-rule".to_string()], + ); + assert_eq!( + event.get_dsm_edge_tags(), + Some(vec![ + "direction:in".to_string(), + "type:eventbridge".to_string(), + "topic:OrderPlaced".to_string(), + ]) + ); + } + #[test] fn test_get_carrier() { let json = read_json_file("eventbridge_event.json"); diff --git a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs index 7883dfda4..29cf6e9f5 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs @@ -132,6 +132,19 @@ impl Trigger for KinesisRecord { fn is_async(&self) -> bool { true } + + fn get_dsm_edge_tags(&self) -> Option> { + // stream name = last `/` segment of the event source ARN. + let stream = self.event_source_arn.split('/').next_back().unwrap_or_default(); + if stream.is_empty() { + return Some(vec!["direction:in".to_string(), "type:kinesis".to_string()]); + } + Some(vec![ + "direction:in".to_string(), + format!("topic:{stream}"), + "type:kinesis".to_string(), + ]) + } } impl ServiceNameResolver for KinesisRecord { diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 8e89b2a4c..1fdef633b 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -130,6 +130,13 @@ pub trait Trigger: ServiceNameResolver { fn get_carrier(&self) -> HashMap; fn is_async(&self) -> bool; + /// Data Streams Monitoring consume-side edge tags for this trigger, with the + /// `direction:in` tag first. Returns `None` for sources that are not + /// DSM-eligible. Default: `None`. + fn get_dsm_edge_tags(&self) -> Option> { + None + } + fn get_dd_resource_key(&self, _region: &str) -> Option { None } diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs index 96cbc152d..40c9cd87a 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -165,6 +165,15 @@ impl Trigger for SnsRecord { fn is_async(&self) -> bool { true } + + fn get_dsm_edge_tags(&self) -> Option> { + // SNS uses the full topic ARN as the topic tag (matches dd-trace-js). + Some(vec![ + "direction:in".to_string(), + format!("topic:{}", self.sns.topic_arn), + "type:sns".to_string(), + ]) + } } impl ServiceNameResolver for SnsRecord { diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs index eb81944eb..1dcc9bcfa 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -202,6 +202,16 @@ impl Trigger for SqsRecord { fn is_async(&self) -> bool { true } + + fn get_dsm_edge_tags(&self) -> Option> { + // queue name = last `:` segment of the event source ARN. + let queue = self.event_source_arn.split(':').next_back().unwrap_or_default(); + Some(vec![ + "direction:in".to_string(), + format!("topic:{queue}"), + "type:sqs".to_string(), + ]) + } } impl ServiceNameResolver for SqsRecord { diff --git a/bottlecap/src/proxy/interceptor.rs b/bottlecap/src/proxy/interceptor.rs index 21a018377..9da6b144a 100644 --- a/bottlecap/src/proxy/interceptor.rs +++ b/bottlecap/src/proxy/interceptor.rs @@ -196,7 +196,22 @@ async fn invocation_next_proxy( } } - if aws_config.aws_lwa_proxy_lambda_runtime_api.is_some() { + // Drive universal instrumentation from the intercepted `/next` payload + // whenever the runtime API proxy is in the request path. For LWA this + // has always happened. We additionally enable it for the experimental + // wrapper proxy (`DD_EXPERIMENTAL_ENABLE_PROXY=true`, which the + // datadog_wrapper uses to reroute AWS_LAMBDA_RUNTIME_API to the + // extension), so the extension sees the event payload without a tracer + // calling `/lambda/start-invocation`. + // + // TEMPORARY: this is what feeds the extension-side DSM consume hook for + // functions whose tracer does not drive the invocation lifecycle. + let experimental_proxy_enabled = std::env::var("DD_EXPERIMENTAL_ENABLE_PROXY") + .is_ok_and(|v| v.eq_ignore_ascii_case("true")); + if aws_config.aws_lwa_proxy_lambda_runtime_api.is_some() || experimental_proxy_enabled { + debug!( + "PROXY | invocation_next_proxy | driving universal instrumentation from intercepted payload" + ); lwa::process_invocation_next( &invocation_processor, &intercepted_parts_clone, @@ -449,6 +464,7 @@ mod tests { }; #[tokio::test] + #[allow(clippy::too_many_lines)] async fn test_noop_proxy() { let aws_lwa_lambda_runtime_api = "127.0.0.1:12345"; let aws_lambda_runtime_api = "127.0.0.1:12344"; @@ -508,6 +524,7 @@ mod tests { metrics_aggregator, Arc::clone(&propagator), durable_context_tx, + None, ); tokio::spawn(async move { invocation_processor_service.run().await; diff --git a/bottlecap/src/tags/lambda/tags.rs b/bottlecap/src/tags/lambda/tags.rs index cd6c4d69b..ae04b2928 100644 --- a/bottlecap/src/tags/lambda/tags.rs +++ b/bottlecap/src/tags/lambda/tags.rs @@ -47,7 +47,7 @@ const FUNCTION_TAGS_KEY: &str = "_dd.tags.function"; // TODO(astuyve) decide what to do with the version const EXTENSION_VERSION_KEY: &str = "dd_extension_version"; // TODO(duncanista) figure out a better way to not hardcode this -pub const EXTENSION_VERSION: &str = "97-next"; +pub const EXTENSION_VERSION: &str = "james-test-next"; const REGION_KEY: &str = "region"; const ACCOUNT_ID_KEY: &str = "account_id"; diff --git a/bottlecap/src/traces/data_streams/aggregator.rs b/bottlecap/src/traces/data_streams/aggregator.rs new file mode 100644 index 000000000..d65b6e93e --- /dev/null +++ b/bottlecap/src/traces/data_streams/aggregator.rs @@ -0,0 +1,237 @@ +//! In-memory aggregation of DSM consume checkpoints into pipeline-stats buckets, +//! and serialization to the msgpack payload the DSM intake expects. +//! +//! Mirrors the `dd-trace-js` processor: 10-second time buckets keyed by checkpoint +//! hash, each holding `EdgeLatency` / `PathwayLatency` / `PayloadSize` sketches. +//! The serialized payload is msgpack (struct-as-map) and is gzipped by the +//! flusher before being sent to `/api/v0.1/pipeline_stats`. + +use std::collections::HashMap; + +use serde::Serialize; + +use crate::traces::data_streams::checkpoint::Checkpoint; +use crate::traces::data_streams::sketch::DdSketch; + +/// Bucket width in nanoseconds (10s), matching the tracer. +const BUCKET_SIZE_NS: u64 = 10_000_000_000; + +/// A single checkpoint's accumulated stats within a bucket. +struct StatsPoint { + hash: u64, + parent_hash: u64, + edge_tags: Vec, + edge_latency: DdSketch, + pathway_latency: DdSketch, + payload_size: DdSketch, +} + +impl StatsPoint { + fn new(hash: u64, parent_hash: u64, edge_tags: Vec) -> Self { + Self { + hash, + parent_hash, + edge_tags, + edge_latency: DdSketch::new(), + pathway_latency: DdSketch::new(), + payload_size: DdSketch::new(), + } + } + + fn add(&mut self, edge_latency_ns: u64, pathway_latency_ns: u64, payload_size: f64) { + #[allow(clippy::cast_precision_loss)] + let edge_s = edge_latency_ns as f64 / 1e9; + #[allow(clippy::cast_precision_loss)] + let pathway_s = pathway_latency_ns as f64 / 1e9; + self.edge_latency.accept(edge_s); + self.pathway_latency.accept(pathway_s); + self.payload_size.accept(payload_size); + } +} + +/// One time bucket: a set of checkpoints keyed by hash. +#[derive(Default)] +struct StatsBucket { + points: HashMap, +} + +/// Aggregates DSM checkpoints across invocations until flushed. +pub struct Aggregator { + service: String, + env: String, + tracer_version: String, + buckets: HashMap, +} + +impl Aggregator { + #[must_use] + pub fn new(service: String, env: String, tracer_version: String) -> Self { + Self { + service, + env, + tracer_version, + buckets: HashMap::new(), + } + } + + /// Fold a computed consume checkpoint into the appropriate time bucket. + pub fn add(&mut self, checkpoint: &Checkpoint, payload_size: f64) { + let bucket_start = checkpoint.current_ts_ns - (checkpoint.current_ts_ns % BUCKET_SIZE_NS); + let hash = u64::from_le_bytes(checkpoint.hash); + let parent_hash = u64::from_le_bytes(checkpoint.parent_hash); + + let bucket = self.buckets.entry(bucket_start).or_default(); + let point = bucket + .points + .entry(hash) + .or_insert_with(|| StatsPoint::new(hash, parent_hash, checkpoint.edge_tags.clone())); + point.add( + checkpoint.edge_latency_ns, + checkpoint.pathway_latency_ns, + payload_size, + ); + } + + #[must_use] + pub fn is_empty(&self) -> bool { + self.buckets.is_empty() + } + + /// Drain all buckets and build the msgpack `StatsPayload` (struct-as-map). + /// Returns `None` when there is nothing to flush. + #[must_use] + pub fn take_payload(&mut self) -> Option> { + if self.buckets.is_empty() { + return None; + } + + let stats: Vec = self + .buckets + .drain() + .map(|(start, bucket)| StatsBucketSer { + start, + duration: BUCKET_SIZE_NS, + stats: bucket + .points + .into_values() + .map(|p| StatsPointSer { + hash: p.hash, + parent_hash: p.parent_hash, + edge_tags: p.edge_tags, + edge_latency: p.edge_latency.to_proto_bytes(), + pathway_latency: p.pathway_latency.to_proto_bytes(), + payload_size: p.payload_size.to_proto_bytes(), + }) + .collect(), + backlogs: Vec::new(), + }) + .collect(); + + let payload = StatsPayloadSer { + env: self.env.clone(), + service: self.service.clone(), + stats, + tracer_version: self.tracer_version.clone(), + lang: "rust-extension".to_string(), + }; + + // struct-as-map (named) so keys are emitted as strings, not positional. + rmp_serde::to_vec_named(&payload).ok() + } +} + +#[derive(Serialize)] +struct StatsPayloadSer { + #[serde(rename = "Env")] + env: String, + #[serde(rename = "Service")] + service: String, + #[serde(rename = "Stats")] + stats: Vec, + #[serde(rename = "TracerVersion")] + tracer_version: String, + #[serde(rename = "Lang")] + lang: String, +} + +#[derive(Serialize)] +struct StatsBucketSer { + #[serde(rename = "Start")] + start: u64, + #[serde(rename = "Duration")] + duration: u64, + #[serde(rename = "Stats")] + stats: Vec, + #[serde(rename = "Backlogs")] + backlogs: Vec<()>, +} + +#[derive(Serialize)] +struct StatsPointSer { + #[serde(rename = "Hash")] + hash: u64, + #[serde(rename = "ParentHash")] + parent_hash: u64, + #[serde(rename = "EdgeTags")] + edge_tags: Vec, + // serde_bytes => msgpack `bin` (the agent decodes these as []byte). + #[serde(rename = "EdgeLatency", with = "serde_bytes")] + edge_latency: Vec, + #[serde(rename = "PathwayLatency", with = "serde_bytes")] + pathway_latency: Vec, + #[serde(rename = "PayloadSize", with = "serde_bytes")] + payload_size: Vec, +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + use crate::traces::data_streams::checkpoint::compute_consume_checkpoint; + + fn tags() -> Vec { + vec![ + "direction:in".to_string(), + "topic:q".to_string(), + "type:sqs".to_string(), + ] + } + + #[test] + fn empty_aggregator_has_no_payload() { + let mut agg = Aggregator::new("svc".into(), "env".into(), "1.0".into()); + assert!(agg.is_empty()); + assert!(agg.take_payload().is_none()); + } + + #[test] + fn aggregates_and_serializes() { + let mut agg = Aggregator::new("svc".into(), "env".into(), "1.0".into()); + let cp = compute_consume_checkpoint("svc", "env", &tags(), None, 2_000_000_000, None); + agg.add(&cp, 128.0); + assert!(!agg.is_empty()); + + let payload = agg.take_payload().expect("payload"); + assert!(!payload.is_empty()); + // Draining leaves the aggregator empty. + assert!(agg.is_empty()); + + // Top-level is a 5-key msgpack fixmap (struct-as-map), so the first + // byte is 0x85. This guards against accidental struct-as-array output. + assert_eq!(payload[0], 0x85, "top-level payload must be a 5-entry msgpack map"); + } + + #[test] + fn same_hash_merges_into_one_point() { + let mut agg = Aggregator::new("svc".into(), "env".into(), "1.0".into()); + // Two checkpoints with identical inputs land in the same bucket+point. + let cp1 = compute_consume_checkpoint("svc", "env", &tags(), None, 2_000_000_000, None); + let cp2 = compute_consume_checkpoint("svc", "env", &tags(), None, 2_000_000_001, None); + agg.add(&cp1, 1.0); + agg.add(&cp2, 1.0); + + assert_eq!(agg.buckets.len(), 1); + let bucket = agg.buckets.values().next().unwrap(); + assert_eq!(bucket.points.len(), 1); + } +} diff --git a/bottlecap/src/traces/data_streams/checkpoint.rs b/bottlecap/src/traces/data_streams/checkpoint.rs new file mode 100644 index 000000000..fb4ba8347 --- /dev/null +++ b/bottlecap/src/traces/data_streams/checkpoint.rs @@ -0,0 +1,124 @@ +//! Consume-side DSM checkpoint computation. +//! +//! This is the extension-only subset of `dd-trace-js`'s `setCheckpoint`: we only +//! ever produce a single inbound (`direction:in`) checkpoint continuing from an +//! extracted parent context. The tracer's in-process `closestOppositeDirection` +//! loop handling does not apply, because the extension never observes the +//! produce side of a pathway. + +use crate::traces::data_streams::context::DsmContext; +use crate::traces::data_streams::pathway::compute_pathway_hash; + +/// Parent hash used when there is no inbound context (pathway entry point). +pub const ENTRY_PARENT_HASH: [u8; 8] = [0; 8]; + +/// A computed consume checkpoint, ready to be folded into a stats bucket. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Checkpoint { + /// This checkpoint's pathway hash. + pub hash: [u8; 8], + /// The parent pathway hash this checkpoint continues from. + pub parent_hash: [u8; 8], + /// Sorted edge tags (direction tag first, as supplied by the caller). + pub edge_tags: Vec, + /// Time the producer-to-consumer edge took, in nanoseconds. + pub edge_latency_ns: u64, + /// Total pathway latency from origin to here, in nanoseconds. + pub pathway_latency_ns: u64, + /// Wall-clock time of this checkpoint, in nanoseconds (used for bucketing). + pub current_ts_ns: u64, +} + +/// Compute an inbound (`direction:in`) consume checkpoint. +/// +/// * `edge_tags` must contain `direction:in` and the source-specific tags. +/// * `ctx` is the extracted inbound DSM context, if any. +/// * `now_ns` is the current wall-clock time in nanoseconds. +/// * `propagation_hash` is the optional process/container-tag hash. +#[must_use] +pub fn compute_consume_checkpoint( + service: &str, + env: &str, + edge_tags: &[String], + ctx: Option<&DsmContext>, + now_ns: u64, + propagation_hash: Option, +) -> Checkpoint { + let (parent_hash, pathway_start_ns, edge_start_ns) = match ctx { + Some(ctx) => (ctx.hash, ctx.pathway_start_ns, ctx.edge_start_ns), + None => (ENTRY_PARENT_HASH, now_ns, now_ns), + }; + + let hash = compute_pathway_hash(service, env, edge_tags, parent_hash, propagation_hash); + + // Saturating: a clock skew where the stored start is in the future yields 0 + // latency rather than a wildly large wrapped value. + let edge_latency_ns = now_ns.saturating_sub(edge_start_ns); + let pathway_latency_ns = now_ns.saturating_sub(pathway_start_ns); + + Checkpoint { + hash, + parent_hash, + edge_tags: edge_tags.to_vec(), + edge_latency_ns, + pathway_latency_ns, + current_ts_ns: now_ns, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn tags() -> Vec { + vec![ + "direction:in".to_string(), + "topic:my-topic".to_string(), + "type:sqs".to_string(), + ] + } + + #[test] + fn continues_from_extracted_context() { + let ctx = DsmContext { + hash: [1, 2, 3, 4, 5, 6, 7, 8], + pathway_start_ns: 1_000_000_000, + edge_start_ns: 1_500_000_000, + }; + let now = 2_000_000_000; + + let cp = compute_consume_checkpoint("svc", "env", &tags(), Some(&ctx), now, None); + + assert_eq!(cp.parent_hash, [1, 2, 3, 4, 5, 6, 7, 8]); + assert_eq!(cp.edge_latency_ns, 500_000_000); // now - edge_start + assert_eq!(cp.pathway_latency_ns, 1_000_000_000); // now - pathway_start + assert_eq!(cp.current_ts_ns, now); + // Hash must match a direct pathway-hash computation with the parent. + assert_eq!( + cp.hash, + compute_pathway_hash("svc", "env", &tags(), ctx.hash, None) + ); + } + + #[test] + fn entry_point_when_no_context() { + let now = 2_000_000_000; + let cp = compute_consume_checkpoint("svc", "env", &tags(), None, now, None); + + assert_eq!(cp.parent_hash, ENTRY_PARENT_HASH); + assert_eq!(cp.edge_latency_ns, 0); + assert_eq!(cp.pathway_latency_ns, 0); + } + + #[test] + fn clock_skew_saturates_to_zero() { + let ctx = DsmContext { + hash: [0; 8], + pathway_start_ns: 5_000_000_000, // in the future relative to now + edge_start_ns: 5_000_000_000, + }; + let cp = compute_consume_checkpoint("svc", "env", &tags(), Some(&ctx), 1_000_000_000, None); + assert_eq!(cp.edge_latency_ns, 0); + assert_eq!(cp.pathway_latency_ns, 0); + } +} diff --git a/bottlecap/src/traces/data_streams/context.rs b/bottlecap/src/traces/data_streams/context.rs new file mode 100644 index 000000000..b1f787e16 --- /dev/null +++ b/bottlecap/src/traces/data_streams/context.rs @@ -0,0 +1,172 @@ +//! Decoding of inbound Data Streams Monitoring (DSM) pathway context. +//! +//! The wire format (after base64 decoding) is: +//! 1. first 8 bytes: raw pathway hash +//! 2. zigzag-encoded signed varint (protobuf `sint64`): `pathwayStartMs` +//! 3. zigzag-encoded signed varint (protobuf `sint64`): `edgeStartMs` +//! +//! NOTE: an earlier design note described these as plain unsigned varints. The +//! `dd-trace-js` tracer actually zigzag-encodes them (a positive `n` is stored +//! as `2n`), so they must be zigzag-decoded to recover the millisecond value. +//! +//! Both timestamps are stored in milliseconds and converted to nanoseconds by +//! multiplying by `1_000_000`, matching `dd-trace-js`. +//! +//! All decoding fails closed: malformed payloads return `None` and are treated +//! as "no parent DSM context". + +use base64::Engine; +use base64::engine::general_purpose::STANDARD; + +/// Carrier key (preferred) holding the base64-encoded DSM pathway context. +pub const DD_PATHWAY_CTX_BASE64_KEY: &str = "dd-pathway-ctx-base64"; +/// Legacy carrier key holding the raw (binary) DSM pathway context. +pub const DD_PATHWAY_CTX_KEY: &str = "dd-pathway-ctx"; + +const MS_TO_NS: u64 = 1_000_000; + +/// An inbound DSM pathway context extracted from a carrier. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DsmContext { + /// Raw 8-byte parent pathway hash (opaque; do not reinterpret for hashing). + pub hash: [u8; 8], + /// Pathway start time in nanoseconds. + pub pathway_start_ns: u64, + /// Edge start time in nanoseconds. + pub edge_start_ns: u64, +} + +impl DsmContext { + /// Decode a DSM context from a base64-encoded `dd-pathway-ctx-base64` value. + #[must_use] + pub fn from_base64(input: &str) -> Option { + let bytes = STANDARD.decode(input).ok()?; + Self::from_bytes(&bytes) + } + + /// Decode a DSM context from its raw binary representation. + #[must_use] + pub fn from_bytes(bytes: &[u8]) -> Option { + if bytes.len() < 8 { + return None; + } + + let mut hash = [0u8; 8]; + hash.copy_from_slice(&bytes[..8]); + + let (pathway_start_ms, rest) = decode_zigzag_varint(&bytes[8..])?; + let (edge_start_ms, _) = decode_zigzag_varint(rest)?; + + Some(Self { + hash, + pathway_start_ns: ms_to_ns(pathway_start_ms)?, + edge_start_ns: ms_to_ns(edge_start_ms)?, + }) + } +} + +/// Convert a (signed) millisecond timestamp to nanoseconds. Negative values are +/// rejected — DSM timestamps are always positive wall-clock times. +fn ms_to_ns(ms: i64) -> Option { + u64::try_from(ms).ok()?.checked_mul(MS_TO_NS) +} + +/// Decode a zigzag-encoded signed varint (protobuf `sint64`). +fn decode_zigzag_varint(bytes: &[u8]) -> Option<(i64, &[u8])> { + let (raw, rest) = decode_uvarint(bytes)?; + // Zigzag decode: (raw >> 1) ^ -(raw & 1). + #[allow(clippy::cast_possible_wrap)] + let decoded = ((raw >> 1) as i64) ^ -((raw & 1) as i64); + Some((decoded, rest)) +} + +/// Decode an unsigned LEB128 varint, returning the value and the remaining bytes. +/// +/// Returns `None` if the input is truncated or the varint overflows `u64`. +fn decode_uvarint(bytes: &[u8]) -> Option<(u64, &[u8])> { + let mut result: u64 = 0; + let mut shift: u32 = 0; + + for (idx, &byte) in bytes.iter().enumerate() { + // A u64 holds at most 10 varint groups (last group contributes 1 bit). + if shift >= 64 { + return None; + } + let payload = u64::from(byte & 0x7f); + result |= payload.checked_shl(shift)?; + + if byte & 0x80 == 0 { + return Some((result, &bytes[idx + 1..])); + } + shift += 7; + } + + // Ran out of bytes before the terminating group. + None +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Pinned `dd-trace-js` fixture. + const FIXTURE_B64: &str = "Z7CzXmXArPrE58Cfj2LI2cOfj2I="; + + #[test] + fn decodes_pinned_base64_fixture() { + let ctx = DsmContext::from_base64(FIXTURE_B64).expect("should decode"); + + assert_eq!(hex::encode(ctx.hash), "67b0b35e65c0acfa"); + assert_eq!(ctx.pathway_start_ns, 1_685_673_482_722_000_000); + assert_eq!(ctx.edge_start_ns, 1_685_673_506_404_000_000); + } + + #[test] + fn rejects_short_context() { + assert!(DsmContext::from_bytes(&[0u8; 7]).is_none()); + } + + #[test] + fn rejects_missing_varints() { + // 8 hash bytes but no varints follows. + assert!(DsmContext::from_bytes(&[0u8; 8]).is_none()); + } + + #[test] + fn rejects_truncated_varint() { + // Hash + a varint with continuation bit set but no following byte. + let mut bytes = vec![0u8; 8]; + bytes.push(0x80); + assert!(DsmContext::from_bytes(&bytes).is_none()); + } + + #[test] + fn rejects_invalid_base64() { + assert!(DsmContext::from_base64("not valid base64!!!").is_none()); + } + + #[test] + fn uvarint_single_byte() { + let (value, rest) = decode_uvarint(&[0x01]).expect("decode"); + assert_eq!(value, 1); + assert!(rest.is_empty()); + } + + #[test] + fn zigzag_decodes_positive() { + // 1685673482722 zigzag-encoded is 2 * 1685673482722 = 3371346965444. + // 3371346965444 in LEB128: encode and decode round-trip via the public API + // is covered by the pinned fixture; here we check the helper directly. + let (value, _) = decode_zigzag_varint(&[0xac, 0x02]).expect("decode"); + // raw uvarint 300 -> zigzag -> 150 + assert_eq!(value, 150); + } + + #[test] + fn uvarint_multi_byte() { + // 300 = 0xAC 0x02 in LEB128. + let (value, rest) = decode_uvarint(&[0xac, 0x02, 0xff]).expect("decode"); + assert_eq!(value, 300); + assert_eq!(rest, &[0xff]); + } +} diff --git a/bottlecap/src/traces/data_streams/fixtures/sketch_golden.json b/bottlecap/src/traces/data_streams/fixtures/sketch_golden.json new file mode 100644 index 000000000..1cf560223 --- /dev/null +++ b/bottlecap/src/traces/data_streams/fixtures/sketch_golden.json @@ -0,0 +1,1491 @@ +{ + "generator": "dd-trace-js vendored @datadog/sketches-js", + "sketch": "LogCollapsingLowestDenseDDSketch (relativeAccuracy=0.01, binLimit=2048)", + "cases": [ + { + "name": "single_value_1s", + "values": [ + 1 + ], + "valueHex": "0a1409fd4a815abf52f03f11000000000000000018001285081280080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000187f1a021800210000000000000000", + "valueBase64": "ChQJ/UqBWr9S8D8RAAAAAAAAAAAYABKFCBKACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA8D8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAYfxoCGAAhAAAAAAAAAAA=", + "byteLen": 1067, + "decoded": { + "mapping": { + "gamma": 1.02020202020202, + "indexOffset": 0, + "interpolation": "NONE" + }, + "positiveValues": { + "contiguousBinCounts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "binCounts": {}, + "contiguousBinIndexOffset": -64 + }, + "negativeValues": { + "contiguousBinCounts": [], + "binCounts": {}, + "contiguousBinIndexOffset": 0 + }, + "zeroCount": 0 + } + }, + { + "name": "single_value_tenth", + "values": [ + 0.1 + ], + "valueHex": "0a1409fd4a815abf52f03f11000000000000000018001286081280080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000018e5021a021800210000000000000000", + "valueBase64": "ChQJ/UqBWr9S8D8RAAAAAAAAAAAYABKGCBKACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA8D8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAY5QIaAhgAIQAAAAAAAAAA", + "byteLen": 1068, + "decoded": { + "mapping": { + "gamma": 1.02020202020202, + "indexOffset": 0, + "interpolation": "NONE" + }, + "positiveValues": { + "contiguousBinCounts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "binCounts": {}, + "contiguousBinIndexOffset": -179 + }, + "negativeValues": { + "contiguousBinCounts": [], + "binCounts": {}, + "contiguousBinIndexOffset": 0 + }, + "zeroCount": 0 + } + }, + { + "name": "single_value_1ms", + "values": [ + 0.001 + ], + "valueHex": "0a1409fd4a815abf52f03f11000000000000000018001286081280080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000018b1061a021800210000000000000000", + "valueBase64": "ChQJ/UqBWr9S8D8RAAAAAAAAAAAYABKGCBKACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA8D8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAYsQYaAhgAIQAAAAAAAAAA", + "byteLen": 1068, + "decoded": { + "mapping": { + "gamma": 1.02020202020202, + "indexOffset": 0, + "interpolation": "NONE" + }, + "positiveValues": { + "contiguousBinCounts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "binCounts": {}, + "contiguousBinIndexOffset": -409 + }, + "negativeValues": { + "contiguousBinCounts": [], + "binCounts": {}, + "contiguousBinIndexOffset": 0 + }, + "zeroCount": 0 + } + }, + { + "name": "multi_spread", + "values": [ + 0.001, + 0.01, + 0.1, + 1, + 10 + ], + "valueHex": "0a1409fd4a815abf52f03f11000000000000000018001286201280200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000018e3051a021800210000000000000000", + "valueBase64": "ChQJ/UqBWr9S8D8RAAAAAAAAAAAYABKGIBKAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA8D8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADwPwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA8D8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAY4wUaAhgAIQAAAAAAAAAA", + "byteLen": 4140, + "decoded": { + "mapping": { + "gamma": 1.02020202020202, + "indexOffset": 0, + "interpolation": "NONE" + }, + "positiveValues": { + "contiguousBinCounts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "binCounts": {}, + "contiguousBinIndexOffset": -370 + }, + "negativeValues": { + "contiguousBinCounts": [], + "binCounts": {}, + "contiguousBinIndexOffset": 0 + }, + "zeroCount": 0 + } + }, + { + "name": "repeated_same", + "values": [ + 0.5, + 0.5, + 0.5, + 0.5 + ], + "valueHex": "0a1409fd4a815abf52f03f11000000000000000018001286081280080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000104000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000018c3011a021800210000000000000000", + "valueBase64": "ChQJ/UqBWr9S8D8RAAAAAAAAAAAYABKGCBKACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAYwwEaAhgAIQAAAAAAAAAA", + "byteLen": 1068, + "decoded": { + "mapping": { + "gamma": 1.02020202020202, + "indexOffset": 0, + "interpolation": "NONE" + }, + "positiveValues": { + "contiguousBinCounts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "binCounts": {}, + "contiguousBinIndexOffset": -98 + }, + "negativeValues": { + "contiguousBinCounts": [], + "binCounts": {}, + "contiguousBinIndexOffset": 0 + }, + "zeroCount": 0 + } + }, + { + "name": "payload_sizes", + "values": [ + 100, + 256, + 1024, + 4096 + ], + "valueHex": "0a1409fd4a815abf52f03f110000000000000000180012861012801000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f03f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001888031a021800210000000000000000", + "valueBase64": "ChQJ/UqBWr9S8D8RAAAAAAAAAAAYABKGEBKAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADwPwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA8D8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA8D8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA8D8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGIgDGgIYACEAAAAAAAAAAA==", + "byteLen": 2092, + "decoded": { + "mapping": { + "gamma": 1.02020202020202, + "indexOffset": 0, + "interpolation": "NONE" + }, + "positiveValues": { + "contiguousBinCounts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "binCounts": {}, + "contiguousBinIndexOffset": 196 + }, + "negativeValues": { + "contiguousBinCounts": [], + "binCounts": {}, + "contiguousBinIndexOffset": 0 + }, + "zeroCount": 0 + } + }, + { + "name": "zero_value", + "values": [ + 0 + ], + "valueHex": "0a1409fd4a815abf52f03f1100000000000000001800120218001a02180021000000000000f03f", + "valueBase64": "ChQJ/UqBWr9S8D8RAAAAAAAAAAAYABICGAAaAhgAIQAAAAAAAPA/", + "byteLen": 39, + "decoded": { + "mapping": { + "gamma": 1.02020202020202, + "indexOffset": 0, + "interpolation": "NONE" + }, + "positiveValues": { + "contiguousBinCounts": [], + "binCounts": {}, + "contiguousBinIndexOffset": 0 + }, + "negativeValues": { + "contiguousBinCounts": [], + "binCounts": {}, + "contiguousBinIndexOffset": 0 + }, + "zeroCount": 1 + } + } + ] +} diff --git a/bottlecap/src/traces/data_streams/mod.rs b/bottlecap/src/traces/data_streams/mod.rs new file mode 100644 index 000000000..266c51d9f --- /dev/null +++ b/bottlecap/src/traces/data_streams/mod.rs @@ -0,0 +1,25 @@ +//! Data Streams Monitoring (DSM) support. +//! +//! This module provides `dd-trace-js`-compatible primitives for continuing an +//! inbound DSM pathway from request payloads and computing consume-side +//! checkpoint hashes inside the extension. +//! +//! The pieces are split so the compatibility-sensitive steps can be tested in +//! isolation: +//! * [`context`] — decode inbound pathway context (base64 + binary + varint). +//! * [`pathway`] — compute the pathway/checkpoint hash. +//! * [`checkpoint`] — compute a consume-side checkpoint from an extracted context. +//! * [`propagation_hash`] — optional process/container-tag propagation hash. + +pub mod aggregator; +pub mod checkpoint; +pub mod context; +pub mod pathway; +pub mod processor; +pub mod propagation_hash; +pub mod sketch; + +pub use checkpoint::{Checkpoint, compute_consume_checkpoint}; +pub use context::DsmContext; +pub use pathway::compute_pathway_hash; +pub use processor::DsmProcessor; diff --git a/bottlecap/src/traces/data_streams/pathway.rs b/bottlecap/src/traces/data_streams/pathway.rs new file mode 100644 index 000000000..66b5cd6a5 --- /dev/null +++ b/bottlecap/src/traces/data_streams/pathway.rs @@ -0,0 +1,165 @@ +//! DSM pathway hash computation, byte-for-byte compatible with `dd-trace-js`. +//! +//! See `docs`/design notes: the algorithm intentionally preserves a quirk where +//! the 16-byte `current_hash || parent_hash` buffer is converted to a (lossy) +//! UTF-8 string *before* the final SHA-256, rather than hashing the raw bytes. +//! Do not "simplify" this to `sha256(&combined)` — it would break compatibility +//! with pathways produced by the tracers. + +use sha2::{Digest, Sha256}; +use std::fmt::Write as _; + +const MANUAL_CHECKPOINT_TAG: &str = "manual_checkpoint:true"; + +/// First 8 bytes of `SHA-256(bytes)`. +fn sha256_first8(bytes: &[u8]) -> [u8; 8] { + let digest = Sha256::digest(bytes); + let mut out = [0u8; 8]; + out.copy_from_slice(&digest[..8]); + out +} + +/// Compute a DSM pathway hash for a checkpoint. +/// +/// * `service` / `env` — local service identity. +/// * `edge_tags` — checkpoint edge tags (e.g. `direction:in`, `type:sqs`). +/// Sorted and de-`manual_checkpoint`-ed before hashing. +/// * `parent_hash` — raw 8-byte parent pathway hash (zero bytes if no parent). +/// * `propagation_hash` — optional process/container-tag propagation hash. +#[must_use] +pub fn compute_pathway_hash( + service: &str, + env: &str, + edge_tags: &[String], + parent_hash: [u8; 8], + propagation_hash: Option, +) -> [u8; 8] { + let mut tags = edge_tags.to_vec(); + tags.sort_unstable(); + + let joined_tags = tags + .iter() + .filter(|tag| tag.as_str() != MANUAL_CHECKPOINT_TAG) + .map(String::as_str) + .collect::(); + + let mut base = format!("{service}{env}{joined_tags}"); + if let Some(hash) = propagation_hash { + // Appended as ":" + lowercase hex with no "0x" prefix and no leading zeros, + // matching JS `Number.prototype.toString(16)`. + write!(&mut base, ":{hash:x}").expect("writing to String cannot fail"); + } + + let current_hash = sha256_first8(base.as_bytes()); + + let mut combined = [0u8; 16]; + combined[..8].copy_from_slice(¤t_hash); + combined[8..].copy_from_slice(&parent_hash); + + // Compatibility-critical: lossy UTF-8 round-trip before the final hash. + let combined_string = String::from_utf8_lossy(&combined); + sha256_first8(combined_string.as_bytes()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn tags(values: &[&str]) -> Vec { + values.iter().map(|s| (*s).to_string()).collect() + } + + /// Pinned `dd-trace-js` fixture. + #[test] + fn matches_pinned_pathway_hash() { + let hash = compute_pathway_hash( + "test-service", + "test-env", + &tags(&["direction:in", "group:group1", "topic:topic1", "type:kafka"]), + [0u8; 8], + None, + ); + assert_eq!(hex::encode(hash), "67b0b35e65c0acfa"); + } + + #[test] + fn tag_order_does_not_change_hash() { + let sorted = compute_pathway_hash( + "test-service", + "test-env", + &tags(&["direction:in", "group:group1", "topic:topic1", "type:kafka"]), + [0u8; 8], + None, + ); + let shuffled = compute_pathway_hash( + "test-service", + "test-env", + &tags(&["type:kafka", "topic:topic1", "direction:in", "group:group1"]), + [0u8; 8], + None, + ); + assert_eq!(sorted, shuffled); + } + + #[test] + fn manual_checkpoint_tag_is_excluded() { + let without = compute_pathway_hash( + "svc", + "env", + &tags(&["direction:in"]), + [0u8; 8], + None, + ); + let with = compute_pathway_hash( + "svc", + "env", + &tags(&["direction:in", "manual_checkpoint:true"]), + [0u8; 8], + None, + ); + assert_eq!(without, with); + } + + #[test] + fn parent_hash_changes_result() { + let a = compute_pathway_hash("svc", "env", &tags(&["direction:in"]), [0u8; 8], None); + let b = compute_pathway_hash( + "svc", + "env", + &tags(&["direction:in"]), + [1, 2, 3, 4, 5, 6, 7, 8], + None, + ); + assert_ne!(a, b); + } + + #[test] + fn propagation_hash_changes_result() { + let absent = compute_pathway_hash("svc", "env", &tags(&["direction:in"]), [0u8; 8], None); + let present = compute_pathway_hash( + "svc", + "env", + &tags(&["direction:in"]), + [0u8; 8], + Some(0x1234_5678_9abc_def0), + ); + let present_repeat = compute_pathway_hash( + "svc", + "env", + &tags(&["direction:in"]), + [0u8; 8], + Some(0x1234_5678_9abc_def0), + ); + let different = compute_pathway_hash( + "svc", + "env", + &tags(&["direction:in"]), + [0u8; 8], + Some(0x0fed_cba9_8765_4321), + ); + + assert_ne!(absent, present); + assert_eq!(present, present_repeat); + assert_ne!(present, different); + } +} diff --git a/bottlecap/src/traces/data_streams/processor.rs b/bottlecap/src/traces/data_streams/processor.rs new file mode 100644 index 000000000..c6ff0a4f5 --- /dev/null +++ b/bottlecap/src/traces/data_streams/processor.rs @@ -0,0 +1,214 @@ +//! Extension-side DSM consume processor. +//! +//! Owns the checkpoint [`Aggregator`] and bridges it to the existing proxy +//! flush path: consume checkpoints are folded in during invocation start, and +//! on flush the aggregated pipeline-stats payload is gzipped and enqueued as a +//! [`ProxyRequest`] so the shared [`crate::traces::proxy_flusher`] ships it to +//! `/api/v0.1/pipeline_stats` (adding the API key + tags). +//! +//! Gated entirely by `DD_DSM_CONSUME_ENABLED`; when disabled this is never +//! constructed. + +use std::io::Write; +use std::sync::Mutex; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; + +use bytes::Bytes; +use flate2::Compression; +use flate2::write::GzEncoder; +use reqwest::header::{CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderValue}; +use tokio::sync::Mutex as TokioMutex; +use tracing::{debug, warn}; + +use crate::traces::data_streams::aggregator::Aggregator; +use crate::traces::data_streams::checkpoint::compute_consume_checkpoint; +use crate::traces::data_streams::context::{ + DD_PATHWAY_CTX_BASE64_KEY, DD_PATHWAY_CTX_KEY, DsmContext, +}; +use crate::traces::proxy_aggregator::{Aggregator as ProxyAggregator, ProxyRequest}; + +/// gzip level used by the tracer for pipeline stats. +const GZIP_LEVEL: u32 = 1; + +pub struct DsmProcessor { + service: String, + env: String, + aggregator: Mutex, + proxy_aggregator: Arc>, + target_url: String, +} + +impl DsmProcessor { + #[must_use] + pub fn new( + service: String, + env: String, + tracer_version: String, + site: &str, + proxy_aggregator: Arc>, + ) -> Self { + let aggregator = Aggregator::new(service.clone(), env.clone(), tracer_version); + Self { + service, + env, + aggregator: Mutex::new(aggregator), + proxy_aggregator, + target_url: format!("https://trace.agent.{site}/api/v0.1/pipeline_stats"), + } + } + + /// Record a consume (`direction:in`) checkpoint for an inbound event. + /// + /// `edge_tags` come from the trigger (`Trigger::get_dsm_edge_tags`); `carrier` + /// is the trigger carrier (which may contain the inbound pathway context). + pub fn record_consume( + &self, + edge_tags: &[String], + carrier: &std::collections::HashMap, + payload_size: f64, + ) { + let ctx = extract_pathway_context(carrier); + let now_ns = now_unix_nanos(); + + let checkpoint = compute_consume_checkpoint( + &self.service, + &self.env, + edge_tags, + ctx.as_ref(), + now_ns, + None, + ); + + debug!( + "DSM: recorded consume checkpoint hash={:x} parent={:x} has_inbound_ctx={} edge_tags={:?}", + u64::from_le_bytes(checkpoint.hash), + u64::from_le_bytes(checkpoint.parent_hash), + ctx.is_some(), + edge_tags + ); + + if let Ok(mut agg) = self.aggregator.lock() { + agg.add(&checkpoint, payload_size); + } + } + + /// Drain the aggregator into the proxy aggregator for flushing. No-op when + /// there is nothing buffered. + pub async fn drain_into_proxy(&self) { + let payload = match self.aggregator.lock() { + Ok(mut agg) => agg.take_payload(), + Err(_) => None, + }; + let Some(payload) = payload else { + return; + }; + + let body = match gzip(&payload) { + Ok(b) => b, + Err(e) => { + warn!("DSM: failed to gzip pipeline stats payload: {e}"); + return; + } + }; + + let mut headers = HeaderMap::new(); + headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/msgpack")); + headers.insert(CONTENT_ENCODING, HeaderValue::from_static("gzip")); + + let request = ProxyRequest { + headers, + body: Bytes::from(body), + target_url: self.target_url.clone(), + }; + + debug!("DSM: enqueued pipeline stats payload ({} bytes gzipped)", request.body.len()); + self.proxy_aggregator.lock().await.add(request); + } +} + +/// Extract the inbound DSM pathway context from a carrier, preferring the +/// base64 key. Fails closed (returns `None`) on malformed input. +fn extract_pathway_context( + carrier: &std::collections::HashMap, +) -> Option { + carrier + .get(DD_PATHWAY_CTX_BASE64_KEY) + .or_else(|| carrier.get(DD_PATHWAY_CTX_KEY)) + .and_then(|v| DsmContext::from_base64(v)) +} + +fn now_unix_nanos() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| u64::try_from(d.as_nanos()).unwrap_or(u64::MAX)) + .unwrap_or(0) +} + +fn gzip(data: &[u8]) -> std::io::Result> { + let mut encoder = GzEncoder::new(Vec::new(), Compression::new(GZIP_LEVEL)); + encoder.write_all(data)?; + encoder.finish() +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn extracts_base64_context_from_carrier() { + let mut carrier = HashMap::new(); + carrier.insert( + DD_PATHWAY_CTX_BASE64_KEY.to_string(), + "Z7CzXmXArPrE58Cfj2LI2cOfj2I=".to_string(), + ); + let ctx = extract_pathway_context(&carrier).expect("context"); + assert_eq!(hex::encode(ctx.hash), "67b0b35e65c0acfa"); + } + + #[test] + fn missing_context_returns_none() { + let carrier = HashMap::new(); + assert!(extract_pathway_context(&carrier).is_none()); + } + + #[test] + fn malformed_context_returns_none() { + let mut carrier = HashMap::new(); + carrier.insert(DD_PATHWAY_CTX_BASE64_KEY.to_string(), "@@bad@@".to_string()); + assert!(extract_pathway_context(&carrier).is_none()); + } + + #[tokio::test] + async fn drain_enqueues_proxy_request_when_data_present() { + let proxy = Arc::new(TokioMutex::new(ProxyAggregator::default())); + let dsm = DsmProcessor::new( + "svc".into(), + "env".into(), + "1.0".into(), + "datadoghq.com", + proxy.clone(), + ); + + let edge_tags = vec![ + "direction:in".to_string(), + "topic:q".to_string(), + "type:sqs".to_string(), + ]; + dsm.record_consume(&edge_tags, &HashMap::new(), 128.0); + dsm.drain_into_proxy().await; + + let batch = proxy.lock().await.get_batch(); + assert_eq!(batch.len(), 1); + assert!(batch[0].target_url.ends_with("/api/v0.1/pipeline_stats")); + } + + #[tokio::test] + async fn drain_is_noop_when_empty() { + let proxy = Arc::new(TokioMutex::new(ProxyAggregator::default())); + let dsm = DsmProcessor::new("svc".into(), "env".into(), "1.0".into(), "datadoghq.com", proxy.clone()); + dsm.drain_into_proxy().await; + assert_eq!(proxy.lock().await.get_batch().len(), 0); + } +} diff --git a/bottlecap/src/traces/data_streams/propagation_hash.rs b/bottlecap/src/traces/data_streams/propagation_hash.rs new file mode 100644 index 000000000..49e18784f --- /dev/null +++ b/bottlecap/src/traces/data_streams/propagation_hash.rs @@ -0,0 +1,52 @@ +//! Optional DSM propagation hash. +//! +//! Used when process-tag propagation is enabled. The input is the serialized +//! process tags plus the container-tags hash returned by the agent. +//! +//! NOTE: `dd-trace-js` comments describe this as "FNV-1a", but the code performs +//! multiply-then-XOR, i.e. FNV-1 (not FNV-1a). We match the implementation, not +//! the comment, for compatibility. + +const FNV1_64_OFFSET_BASIS: u64 = 0xCBF2_9CE4_8422_2325; +const FNV1_64_PRIME: u64 = 0x0000_0100_0000_01B3; + +/// Compute the FNV-1 (64-bit) hash over `bytes`, matching `dd-trace-js`. +#[must_use] +pub fn fnv1_64(bytes: &[u8]) -> u64 { + let mut hash = FNV1_64_OFFSET_BASIS; + for &byte in bytes { + hash = hash.wrapping_mul(FNV1_64_PRIME); + hash ^= u64::from(byte); + } + hash +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_input_is_offset_basis() { + assert_eq!(fnv1_64(b""), FNV1_64_OFFSET_BASIS); + } + + #[test] + fn is_deterministic() { + assert_eq!(fnv1_64(b"process-tags:foo"), fnv1_64(b"process-tags:foo")); + } + + #[test] + fn differs_by_input() { + assert_ne!(fnv1_64(b"foo"), fnv1_64(b"bar")); + } + + #[test] + fn applies_multiply_before_xor() { + // FNV-1 (multiply-then-XOR) differs from FNV-1a (XOR-then-multiply). + // Verify the first step explicitly for a single byte. + let expected = FNV1_64_OFFSET_BASIS + .wrapping_mul(FNV1_64_PRIME) + ^ u64::from(b'a'); + assert_eq!(fnv1_64(b"a"), expected); + } +} diff --git a/bottlecap/src/traces/data_streams/sketch.rs b/bottlecap/src/traces/data_streams/sketch.rs new file mode 100644 index 000000000..dd7cff359 --- /dev/null +++ b/bottlecap/src/traces/data_streams/sketch.rs @@ -0,0 +1,341 @@ +//! A byte-for-byte port of the tracer's `LogCollapsingLowestDenseDDSketch` +//! (relative accuracy 0.01, bin limit 2048) and its protobuf serialization. +//! +//! DSM's `EdgeLatency` / `PathwayLatency` / `PayloadSize` fields are the raw +//! `DDSketch` protobuf bytes produced by `@datadog/sketches-js`. To stay +//! compatible we must reproduce both the binning *and* the dense-store layout +//! exactly (including the bin-centering that pads each chunk with zeros). +//! +//! Verified against fixtures generated by the real tracer in +//! `fixtures/sketch_golden.json` (see `tools/dsm/gen_sketch_golden.js`). + +/// Default relative accuracy used by DSM sketches. +const RELATIVE_ACCURACY: f64 = 0.01; +const BIN_LIMIT: i64 = 2048; +const CHUNK_SIZE: i64 = 128; + +/// Logarithmic key mapping, mirroring `sketches-js` `LogarithmicMapping`. +#[derive(Debug, Clone, Copy)] +struct LogarithmicMapping { + gamma: f64, + multiplier: f64, + min_possible: f64, +} + +impl LogarithmicMapping { + fn new(relative_accuracy: f64) -> Self { + let i = 2.0 * relative_accuracy / (1.0 - relative_accuracy); + let gamma = 1.0 + i; + // KeyMapping multiplier is 1/ln1p(i); LogarithmicMapping then * ln(2). + let multiplier = std::f64::consts::LN_2 / i.ln_1p(); + // KeyMapping.minPossible = MIN_NORMAL * gamma (MIN_NORMAL = f64::MIN_POSITIVE). + let min_possible = f64::MIN_POSITIVE * gamma; + Self { + gamma, + multiplier, + min_possible, + } + } + + /// `key(v) = ceil(log2(v) * multiplier)` (offset is always 0 for DSM). + fn key(&self, v: f64) -> i32 { + // Match the JS `Math.log2(v) * multiplier` op order exactly. + #[allow(clippy::cast_possible_truncation)] + let k = (v.log2() * self.multiplier).ceil() as i32; + k + } +} + +/// Collapse-lowest dense store, mirroring `sketches-js` `CollapsingLowestDenseStore`. +#[derive(Debug, Clone)] +struct CollapsingLowestDenseStore { + bins: Vec, + count: f64, + min_key: i32, + max_key: i32, + offset: i32, + is_collapsed: bool, +} + +impl CollapsingLowestDenseStore { + fn new() -> Self { + Self { + bins: Vec::new(), + count: 0.0, + // Emulate JS +Inf / -Inf sentinels for an empty store. + min_key: i32::MAX, + max_key: i32::MIN, + offset: 0, + is_collapsed: false, + } + } + + fn length(&self) -> i32 { + i32::try_from(self.bins.len()).unwrap_or(i32::MAX) + } + + #[allow(clippy::unused_self)] + fn get_new_length(&self, new_min: i32, new_max: i32) -> usize { + let desired = i64::from(new_max) - i64::from(new_min) + 1; + // ceil(desired / CHUNK_SIZE) without float casts. + let chunks = (desired + CHUNK_SIZE - 1) / CHUNK_SIZE; + let len = (CHUNK_SIZE * chunks).min(BIN_LIMIT); + usize::try_from(len).unwrap_or(0) + } + + fn add(&mut self, key: i32, weight: f64) { + let idx = self.get_index(key); + self.bins[idx] += weight; + self.count += weight; + } + + fn get_index(&mut self, key: i32) -> usize { + if key < self.min_key { + if self.is_collapsed { + return 0; + } + self.extend_range(key, key); + if self.is_collapsed { + return 0; + } + } else if key > self.max_key { + self.extend_range(key, key); + } + #[allow(clippy::cast_sign_loss)] + let idx = (key - self.offset) as usize; + idx + } + + fn extend_range(&mut self, key: i32, key2: i32) { + let new_min = key.min(key2).min(self.min_key); + let new_max = key.max(key2).max(self.max_key); + + if self.bins.is_empty() { + self.bins = vec![0.0; self.get_new_length(new_min, new_max)]; + self.offset = new_min; + self.adjust(new_min, new_max); + } else if new_min >= self.min_key && new_max < self.offset + self.length() { + self.min_key = new_min; + self.max_key = new_max; + } else { + let new_length = self.get_new_length(new_min, new_max); + if new_length > self.bins.len() { + self.bins.resize(new_length, 0.0); + } + self.adjust(new_min, new_max); + } + } + + /// `CollapsingLowestDenseStore._adjust`. + fn adjust(&mut self, new_min: i32, new_max: i32) { + if new_max - new_min + 1 > self.length() { + // Collapse the lowest bins to fit within bin_limit. + let collapse_min = new_max - self.length() + 1; + if collapse_min >= self.max_key { + self.offset = collapse_min; + self.min_key = collapse_min; + self.bins.iter_mut().for_each(|b| *b = 0.0); + self.bins[0] = self.count; + } else { + let shift = self.offset - collapse_min; + if shift < 0 { + #[allow(clippy::cast_sign_loss)] + let n = (self.min_key - self.offset) as usize; + #[allow(clippy::cast_sign_loss)] + let r = (collapse_min - self.offset) as usize; + let s: f64 = self.bins[n..=r].iter().sum(); + self.bins[n..r].iter_mut().for_each(|b| *b = 0.0); + self.bins[r] += s; + self.min_key = collapse_min; + self.shift_bins(shift); + } else { + self.min_key = collapse_min; + self.shift_bins(shift); + } + } + self.max_key = new_max; + self.is_collapsed = true; + } else { + self.center_bins(new_min, new_max); + self.min_key = new_min; + self.max_key = new_max; + } + } + + fn center_bins(&mut self, new_min: i32, new_max: i32) { + let middle_key = new_min + (new_max - new_min + 1) / 2; + let shift = (self.offset + self.length() / 2) - middle_key; + self.shift_bins(shift); + } + + fn shift_bins(&mut self, shift: i32) { + if shift > 0 { + #[allow(clippy::cast_sign_loss)] + let s = shift as usize; + let keep = self.bins.len() - s; + let mut new_bins = vec![0.0; s]; + new_bins.extend_from_slice(&self.bins[..keep]); + self.bins = new_bins; + } else if shift < 0 { + #[allow(clippy::cast_sign_loss)] + let a = (-shift) as usize; + let mut new_bins = self.bins[a..].to_vec(); + new_bins.resize(self.bins.len(), 0.0); + self.bins = new_bins; + } + self.offset -= shift; + } +} + +/// A `DDSketch` matching the tracer's `LogCollapsingLowestDenseDDSketch`. +#[derive(Debug, Clone)] +pub struct DdSketch { + mapping: LogarithmicMapping, + store: CollapsingLowestDenseStore, + negative_store: CollapsingLowestDenseStore, + zero_count: f64, +} + +impl Default for DdSketch { + fn default() -> Self { + Self::new() + } +} + +impl DdSketch { + #[must_use] + pub fn new() -> Self { + Self { + mapping: LogarithmicMapping::new(RELATIVE_ACCURACY), + store: CollapsingLowestDenseStore::new(), + negative_store: CollapsingLowestDenseStore::new(), + zero_count: 0.0, + } + } + + /// Accept a single value (weight 1), mirroring `DDSketch.accept`. + pub fn accept(&mut self, value: f64) { + if value > self.mapping.min_possible { + let key = self.mapping.key(value); + self.store.add(key, 1.0); + } else if value < -self.mapping.min_possible { + let key = self.mapping.key(-value); + self.negative_store.add(key, 1.0); + } else { + self.zero_count += 1.0; + } + } + + /// Serialize to the `DDSketch` protobuf wire format. + #[must_use] + pub fn to_proto_bytes(&self) -> Vec { + let mut out = Vec::new(); + + // Field 1: mapping (IndexMapping), length-delimited. + let mut mapping = Vec::new(); + write_tag(&mut mapping, 1, WIRE_FIXED64); + mapping.extend_from_slice(&self.mapping.gamma.to_le_bytes()); + write_tag(&mut mapping, 2, WIRE_FIXED64); + mapping.extend_from_slice(&0.0f64.to_le_bytes()); // indexOffset + write_tag(&mut mapping, 3, WIRE_VARINT); + mapping.push(0); // interpolation = NONE + write_tag(&mut out, 1, WIRE_LEN); + write_uvarint(&mut out, mapping.len() as u64); + out.extend_from_slice(&mapping); + + // Field 2: positiveValues (Store). + write_tag(&mut out, 2, WIRE_LEN); + let pos = encode_store(&self.store); + write_uvarint(&mut out, pos.len() as u64); + out.extend_from_slice(&pos); + + // Field 3: negativeValues (Store). + write_tag(&mut out, 3, WIRE_LEN); + let neg = encode_store(&self.negative_store); + write_uvarint(&mut out, neg.len() as u64); + out.extend_from_slice(&neg); + + // Field 4: zeroCount (double) — always emitted (matches sketches-js). + write_tag(&mut out, 4, WIRE_FIXED64); + out.extend_from_slice(&self.zero_count.to_le_bytes()); + + out + } +} + +const WIRE_VARINT: u8 = 0; +const WIRE_FIXED64: u8 = 1; +const WIRE_LEN: u8 = 2; + +fn write_tag(buf: &mut Vec, field: u32, wire: u8) { + write_uvarint(buf, u64::from((field << 3) | u32::from(wire))); +} + +fn write_uvarint(buf: &mut Vec, mut value: u64) { + loop { + let mut byte = (value & 0x7f) as u8; + value >>= 7; + if value != 0 { + byte |= 0x80; + } + buf.push(byte); + if value == 0 { + break; + } + } +} + +fn write_zigzag32(buf: &mut Vec, value: i32) { + #[allow(clippy::cast_sign_loss)] + let zz = ((value << 1) ^ (value >> 31)) as u32; + write_uvarint(buf, u64::from(zz)); +} + +/// Encode a dense store: field 2 = packed `contiguousBinCounts`, +/// field 3 = `contiguousBinIndexOffset` (sint32). Field 2 is omitted when empty, +/// matching `sketches-js`. +fn encode_store(store: &CollapsingLowestDenseStore) -> Vec { + let mut buf = Vec::new(); + if !store.bins.is_empty() { + write_tag(&mut buf, 2, WIRE_LEN); + write_uvarint(&mut buf, (store.bins.len() * 8) as u64); + for &count in &store.bins { + buf.extend_from_slice(&count.to_le_bytes()); + } + } + write_tag(&mut buf, 3, WIRE_VARINT); + write_zigzag32(&mut buf, store.offset); + buf +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + use serde_json::Value; + + const GOLDEN: &str = include_str!("fixtures/sketch_golden.json"); + + #[test] + fn gamma_matches_tracer() { + let m = LogarithmicMapping::new(RELATIVE_ACCURACY); + assert_eq!(hex::encode(m.gamma.to_le_bytes()), "fd4a815abf52f03f"); + } + + #[test] + fn matches_all_golden_vectors() { + let golden: Value = serde_json::from_str(GOLDEN).expect("parse fixture"); + let cases = golden["cases"].as_array().expect("cases array"); + + for case in cases { + let name = case["name"].as_str().unwrap(); + let mut sketch = DdSketch::new(); + for v in case["values"].as_array().unwrap() { + sketch.accept(v.as_f64().unwrap()); + } + let got = hex::encode(sketch.to_proto_bytes()); + let want = case["valueHex"].as_str().unwrap(); + assert_eq!(got, want, "sketch mismatch for case `{name}`"); + } + } +} diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index 41ee7f064..0d9981805 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -1,6 +1,7 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +pub mod data_streams; pub mod http_client; pub mod propagation; pub mod proxy_aggregator; diff --git a/tools/dsm/gen_sketch_golden.js b/tools/dsm/gen_sketch_golden.js new file mode 100644 index 000000000..5b76695de --- /dev/null +++ b/tools/dsm/gen_sketch_golden.js @@ -0,0 +1,67 @@ +#!/usr/bin/env node +/* + * Golden-vector generator for the DSM DDSketch encoding. + * + * Reuses the *exact* sketch implementation the dd-trace-js tracer ships + * (`LogCollapsingLowestDenseDDSketch`, relativeAccuracy 0.01, binLimit 2048) + * and dumps, for a set of fixed inputs: + * - the serialized DDSketch protobuf bytes (hex) -- this is the blob the + * tracer embeds as EdgeLatency / PathwayLatency / PayloadSize. + * - the decoded structure (gamma, indexOffset, interpolation, + * contiguousBinIndexOffset, contiguousBinCounts, zeroCount) for sanity. + * + * Output is JSON on stdout. These vectors are pinned in the Rust unit tests so + * our hand-rolled sketch must reproduce them byte-for-byte. + * + * Read-only: requires the vendored sketches-js bundle by absolute path and + * writes nothing. + */ + +'use strict' + +// Absolute path to the dd-trace-js vendored sketches-js bundle. +const SKETCHES_JS = '/Users/james.eastham/source/datadog/dd-trace-js/vendor/dist/@datadog/sketches-js' +// The compiled proto, used only to DECODE our own output for the readable dump. +const PROTO = '/Users/james.eastham/source/datadog/dd-trace-js/vendor/node_modules/@datadog/sketches-js/dist/ddsketch/proto/compiled.js' + +const { LogCollapsingLowestDenseDDSketch } = require(SKETCHES_JS) +const { DDSketch: DDSketchProto } = require(PROTO) + +// Fixed input cases. Latency values are in SECONDS (the tracer feeds ns / 1e9); +// the payload-size case uses raw byte counts. Names are stable test IDs. +const CASES = [ + { name: 'single_value_1s', values: [1.0] }, + { name: 'single_value_tenth', values: [0.1] }, + { name: 'single_value_1ms', values: [0.001] }, + { name: 'multi_spread', values: [0.001, 0.01, 0.1, 1.0, 10.0] }, + { name: 'repeated_same', values: [0.5, 0.5, 0.5, 0.5] }, + { name: 'payload_sizes', values: [100, 256, 1024, 4096] }, + { name: 'zero_value', values: [0.0] }, +] + +function dump(values) { + const sketch = new LogCollapsingLowestDenseDDSketch() + for (const v of values) sketch.accept(v) + + const bytes = Buffer.from(sketch.toProto()) + const decoded = DDSketchProto.toObject(DDSketchProto.decode(bytes), { + longs: String, + enums: String, + defaults: true, + }) + + return { + valueHex: bytes.toString('hex'), + valueBase64: bytes.toString('base64'), + byteLen: bytes.length, + decoded, + } +} + +const out = { + generator: 'dd-trace-js vendored @datadog/sketches-js', + sketch: 'LogCollapsingLowestDenseDDSketch (relativeAccuracy=0.01, binLimit=2048)', + cases: CASES.map((c) => ({ name: c.name, values: c.values, ...dump(c.values) })), +} + +process.stdout.write(JSON.stringify(out, null, 2) + '\n') From f320471ed8ecff8dc739d5106c0489ec7f9aac9d Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 08:30:20 +0100 Subject: [PATCH 2/9] chore: cleanup files --- tools/dsm/gen_sketch_golden.js | 67 ---------------------------------- 1 file changed, 67 deletions(-) delete mode 100644 tools/dsm/gen_sketch_golden.js diff --git a/tools/dsm/gen_sketch_golden.js b/tools/dsm/gen_sketch_golden.js deleted file mode 100644 index 5b76695de..000000000 --- a/tools/dsm/gen_sketch_golden.js +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env node -/* - * Golden-vector generator for the DSM DDSketch encoding. - * - * Reuses the *exact* sketch implementation the dd-trace-js tracer ships - * (`LogCollapsingLowestDenseDDSketch`, relativeAccuracy 0.01, binLimit 2048) - * and dumps, for a set of fixed inputs: - * - the serialized DDSketch protobuf bytes (hex) -- this is the blob the - * tracer embeds as EdgeLatency / PathwayLatency / PayloadSize. - * - the decoded structure (gamma, indexOffset, interpolation, - * contiguousBinIndexOffset, contiguousBinCounts, zeroCount) for sanity. - * - * Output is JSON on stdout. These vectors are pinned in the Rust unit tests so - * our hand-rolled sketch must reproduce them byte-for-byte. - * - * Read-only: requires the vendored sketches-js bundle by absolute path and - * writes nothing. - */ - -'use strict' - -// Absolute path to the dd-trace-js vendored sketches-js bundle. -const SKETCHES_JS = '/Users/james.eastham/source/datadog/dd-trace-js/vendor/dist/@datadog/sketches-js' -// The compiled proto, used only to DECODE our own output for the readable dump. -const PROTO = '/Users/james.eastham/source/datadog/dd-trace-js/vendor/node_modules/@datadog/sketches-js/dist/ddsketch/proto/compiled.js' - -const { LogCollapsingLowestDenseDDSketch } = require(SKETCHES_JS) -const { DDSketch: DDSketchProto } = require(PROTO) - -// Fixed input cases. Latency values are in SECONDS (the tracer feeds ns / 1e9); -// the payload-size case uses raw byte counts. Names are stable test IDs. -const CASES = [ - { name: 'single_value_1s', values: [1.0] }, - { name: 'single_value_tenth', values: [0.1] }, - { name: 'single_value_1ms', values: [0.001] }, - { name: 'multi_spread', values: [0.001, 0.01, 0.1, 1.0, 10.0] }, - { name: 'repeated_same', values: [0.5, 0.5, 0.5, 0.5] }, - { name: 'payload_sizes', values: [100, 256, 1024, 4096] }, - { name: 'zero_value', values: [0.0] }, -] - -function dump(values) { - const sketch = new LogCollapsingLowestDenseDDSketch() - for (const v of values) sketch.accept(v) - - const bytes = Buffer.from(sketch.toProto()) - const decoded = DDSketchProto.toObject(DDSketchProto.decode(bytes), { - longs: String, - enums: String, - defaults: true, - }) - - return { - valueHex: bytes.toString('hex'), - valueBase64: bytes.toString('base64'), - byteLen: bytes.length, - decoded, - } -} - -const out = { - generator: 'dd-trace-js vendored @datadog/sketches-js', - sketch: 'LogCollapsingLowestDenseDDSketch (relativeAccuracy=0.01, binLimit=2048)', - cases: CASES.map((c) => ({ name: c.name, values: c.values, ...dump(c.values) })), -} - -process.stdout.write(JSON.stringify(out, null, 2) + '\n') From 03c926d2b0733b97cf547c6b1cc41c162e7a950f Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 08:39:47 +0100 Subject: [PATCH 3/9] fix: revert extension version --- bottlecap/src/tags/lambda/tags.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bottlecap/src/tags/lambda/tags.rs b/bottlecap/src/tags/lambda/tags.rs index ae04b2928..cd6c4d69b 100644 --- a/bottlecap/src/tags/lambda/tags.rs +++ b/bottlecap/src/tags/lambda/tags.rs @@ -47,7 +47,7 @@ const FUNCTION_TAGS_KEY: &str = "_dd.tags.function"; // TODO(astuyve) decide what to do with the version const EXTENSION_VERSION_KEY: &str = "dd_extension_version"; // TODO(duncanista) figure out a better way to not hardcode this -pub const EXTENSION_VERSION: &str = "james-test-next"; +pub const EXTENSION_VERSION: &str = "97-next"; const REGION_KEY: &str = "region"; const ACCOUNT_ID_KEY: &str = "account_id"; From e6135edad67929425e7089a48f08011f822eddc6 Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 08:40:43 +0100 Subject: [PATCH 4/9] chore: log DSM serialization failures --- bottlecap/src/traces/data_streams/aggregator.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/bottlecap/src/traces/data_streams/aggregator.rs b/bottlecap/src/traces/data_streams/aggregator.rs index d65b6e93e..5bff2d839 100644 --- a/bottlecap/src/traces/data_streams/aggregator.rs +++ b/bottlecap/src/traces/data_streams/aggregator.rs @@ -136,7 +136,13 @@ impl Aggregator { }; // struct-as-map (named) so keys are emitted as strings, not positional. - rmp_serde::to_vec_named(&payload).ok() + match rmp_serde::to_vec_named(&payload) { + Ok(buf) => Some(buf), + Err(e) => { + tracing::warn!("DSM: failed to serialize pipeline stats payload: {e}"); + None + } + } } } @@ -218,7 +224,10 @@ mod tests { // Top-level is a 5-key msgpack fixmap (struct-as-map), so the first // byte is 0x85. This guards against accidental struct-as-array output. - assert_eq!(payload[0], 0x85, "top-level payload must be a 5-entry msgpack map"); + assert_eq!( + payload[0], 0x85, + "top-level payload must be a 5-entry msgpack map" + ); } #[test] From 13043c4f7a4008b48dd298f4ac111514b5ee939c Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 08:42:58 +0100 Subject: [PATCH 5/9] chore: PR feedback --- .../src/lifecycle/invocation/processor.rs | 2 +- bottlecap/src/traces/data_streams/context.rs | 2 +- .../src/traces/data_streams/processor.rs | 30 ++++++++++++++----- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 42a490657..551850dc8 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -1105,7 +1105,7 @@ impl Processor { // Extension-side DSM: record a consume (`direction:in`) checkpoint for // DSM-eligible event sources, continuing any inbound pathway context. - if let Some(dsm) = self.dsm_processor.clone() { + if let Some(dsm) = self.dsm_processor.as_ref() { debug!("DSM: extraction hook fired for request {request_id}"); let identified = crate::lifecycle::invocation::triggers::IdentifiedTrigger::from_value(&payload_value); diff --git a/bottlecap/src/traces/data_streams/context.rs b/bottlecap/src/traces/data_streams/context.rs index b1f787e16..1c086139c 100644 --- a/bottlecap/src/traces/data_streams/context.rs +++ b/bottlecap/src/traces/data_streams/context.rs @@ -20,7 +20,7 @@ use base64::engine::general_purpose::STANDARD; /// Carrier key (preferred) holding the base64-encoded DSM pathway context. pub const DD_PATHWAY_CTX_BASE64_KEY: &str = "dd-pathway-ctx-base64"; -/// Legacy carrier key holding the raw (binary) DSM pathway context. +/// Legacy carrier key holding the base64-encoded DSM pathway context. pub const DD_PATHWAY_CTX_KEY: &str = "dd-pathway-ctx"; const MS_TO_NS: u64 = 1_000_000; diff --git a/bottlecap/src/traces/data_streams/processor.rs b/bottlecap/src/traces/data_streams/processor.rs index c6ff0a4f5..6fc9d9fa8 100644 --- a/bottlecap/src/traces/data_streams/processor.rs +++ b/bottlecap/src/traces/data_streams/processor.rs @@ -10,8 +10,8 @@ //! constructed. use std::io::Write; -use std::sync::Mutex; use std::sync::Arc; +use std::sync::Mutex; use std::time::{SystemTime, UNIX_EPOCH}; use bytes::Bytes; @@ -88,8 +88,9 @@ impl DsmProcessor { edge_tags ); - if let Ok(mut agg) = self.aggregator.lock() { - agg.add(&checkpoint, payload_size); + match self.aggregator.lock() { + Ok(mut agg) => agg.add(&checkpoint, payload_size), + Err(e) => warn!("DSM: aggregator lock poisoned; dropping consume checkpoint: {e}"), } } @@ -98,7 +99,10 @@ impl DsmProcessor { pub async fn drain_into_proxy(&self) { let payload = match self.aggregator.lock() { Ok(mut agg) => agg.take_payload(), - Err(_) => None, + Err(e) => { + warn!("DSM: aggregator lock poisoned; skipping pipeline stats flush: {e}"); + return; + } }; let Some(payload) = payload else { return; @@ -113,7 +117,10 @@ impl DsmProcessor { }; let mut headers = HeaderMap::new(); - headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/msgpack")); + headers.insert( + CONTENT_TYPE, + HeaderValue::from_static("application/msgpack"), + ); headers.insert(CONTENT_ENCODING, HeaderValue::from_static("gzip")); let request = ProxyRequest { @@ -122,7 +129,10 @@ impl DsmProcessor { target_url: self.target_url.clone(), }; - debug!("DSM: enqueued pipeline stats payload ({} bytes gzipped)", request.body.len()); + debug!( + "DSM: enqueued pipeline stats payload ({} bytes gzipped)", + request.body.len() + ); self.proxy_aggregator.lock().await.add(request); } } @@ -207,7 +217,13 @@ mod tests { #[tokio::test] async fn drain_is_noop_when_empty() { let proxy = Arc::new(TokioMutex::new(ProxyAggregator::default())); - let dsm = DsmProcessor::new("svc".into(), "env".into(), "1.0".into(), "datadoghq.com", proxy.clone()); + let dsm = DsmProcessor::new( + "svc".into(), + "env".into(), + "1.0".into(), + "datadoghq.com", + proxy.clone(), + ); dsm.drain_into_proxy().await; assert_eq!(proxy.lock().await.get_batch().len(), 0); } From 947380af7be1e8e2cb1e662486b4877d081e5f3e Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 08:49:38 +0100 Subject: [PATCH 6/9] feat: add batch iteration for DSM checkpointing --- .../src/lifecycle/invocation/processor.rs | 29 ++++++--- .../invocation/triggers/kinesis_event.rs | 7 ++- .../src/lifecycle/invocation/triggers/mod.rs | 52 ++++++++++++++++ .../invocation/triggers/sns_event.rs | 7 ++- .../invocation/triggers/sqs_event.rs | 60 ++++++++++++++++++- 5 files changed, 144 insertions(+), 11 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 551850dc8..bc670e495 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -1110,18 +1110,31 @@ impl Processor { let identified = crate::lifecycle::invocation::triggers::IdentifiedTrigger::from_value(&payload_value); if let Some(trigger) = SpanInferrer::get_trigger_type(identified) { - if let Some(mut edge_tags) = trigger.get_dsm_edge_tags() { - apply_dsm_exchange_fallback( - &mut edge_tags, - self.config.dsm_exchange_name.as_deref(), + // Batched sources (SQS/SNS/Kinesis) yield one checkpoint per + // record so every message's pathway context is captured. + let checkpoints = trigger.get_dsm_checkpoints(&payload_value); + if checkpoints.is_empty() { + debug!( + "DSM: identified trigger is not DSM-eligible, skipping consume checkpoint" ); - debug!("DSM: trigger is DSM-eligible, edge_tags={edge_tags:?}"); - // Payload size is not currently measured; latency stats are unaffected. - dsm.record_consume(&edge_tags, &trigger.get_carrier(), 0.0); } else { debug!( - "DSM: identified trigger is not DSM-eligible, skipping consume checkpoint" + "DSM: trigger is DSM-eligible, {} record(s)", + checkpoints.len() ); + for mut checkpoint in checkpoints { + apply_dsm_exchange_fallback( + &mut checkpoint.edge_tags, + self.config.dsm_exchange_name.as_deref(), + ); + debug!( + "DSM: recording consume checkpoint edge_tags={:?}", + checkpoint.edge_tags + ); + // Payload size is not currently measured; latency stats + // are unaffected. + dsm.record_consume(&checkpoint.edge_tags, &checkpoint.carrier, 0.0); + } } } else { debug!("DSM: no trigger identified for payload, skipping consume checkpoint"); diff --git a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs index 29cf6e9f5..b448abdf5 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs @@ -10,7 +10,8 @@ use tracing::debug; use crate::lifecycle::invocation::{ processor::S_TO_NS, triggers::{ - DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, ServiceNameResolver, Trigger, + DATADOG_CARRIER_KEY, DsmCheckpointInput, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + ServiceNameResolver, Trigger, dsm_checkpoints_from_records, }, }; @@ -145,6 +146,10 @@ impl Trigger for KinesisRecord { "type:kinesis".to_string(), ]) } + + fn get_dsm_checkpoints(&self, payload: &Value) -> Vec { + dsm_checkpoints_from_records::(payload) + } } impl ServiceNameResolver for KinesisRecord { diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs index 1fdef633b..d28bc4923 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/mod.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -112,6 +112,38 @@ pub fn get_default_service_name( instance_name.to_string() } +/// DSM consume inputs for a single record: the source-specific edge tags plus +/// the record's carrier (which may contain the inbound pathway context). +#[derive(Debug, Clone, PartialEq)] +pub struct DsmCheckpointInput { + pub edge_tags: Vec, + pub carrier: HashMap, +} + +/// Build per-record DSM consume inputs for a batched event by deserializing +/// every entry in the `Records` array into `T` and reading its edge tags and +/// carrier. Records that fail to deserialize or are not DSM-eligible (no edge +/// tags) are skipped. Returns empty when there is no `Records` array. +pub(crate) fn dsm_checkpoints_from_records(payload: &Value) -> Vec +where + T: Trigger + serde::de::DeserializeOwned, +{ + let Some(records) = payload.get("Records").and_then(Value::as_array) else { + return Vec::new(); + }; + records + .iter() + .filter_map(|record| { + let record: T = serde_json::from_value(record.clone()).ok()?; + let edge_tags = record.get_dsm_edge_tags()?; + Some(DsmCheckpointInput { + edge_tags, + carrier: record.get_carrier(), + }) + }) + .collect() +} + pub trait Trigger: ServiceNameResolver { fn new(payload: Value) -> Option where @@ -137,6 +169,26 @@ pub trait Trigger: ServiceNameResolver { None } + /// Per-record DSM consume inputs for this (possibly batched) event. + /// + /// Each Lambda invocation can deliver multiple records (e.g. an SQS/SNS/ + /// Kinesis batch), and every record can carry its own inbound pathway + /// context. The default implementation yields a single entry derived from + /// the representative record this trigger was parsed from; batched sources + /// override it to yield one entry per record so no message is dropped. + /// + /// `payload` is the full, unparsed event so overrides can re-read every + /// record. Records that are not DSM-eligible are omitted. + fn get_dsm_checkpoints(&self, _payload: &Value) -> Vec { + match self.get_dsm_edge_tags() { + Some(edge_tags) => vec![DsmCheckpointInput { + edge_tags, + carrier: self.get_carrier(), + }], + None => Vec::new(), + } + } + fn get_dd_resource_key(&self, _region: &str) -> Option { None } diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs index 40c9cd87a..a2fddc38f 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -10,7 +10,8 @@ use crate::lifecycle::invocation::{ base64_to_string, processor::MS_TO_NS, triggers::{ - DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, ServiceNameResolver, Trigger, + DATADOG_CARRIER_KEY, DsmCheckpointInput, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + ServiceNameResolver, Trigger, dsm_checkpoints_from_records, event_bridge_event::EventBridgeEvent, }, }; @@ -174,6 +175,10 @@ impl Trigger for SnsRecord { "type:sns".to_string(), ]) } + + fn get_dsm_checkpoints(&self, payload: &Value) -> Vec { + dsm_checkpoints_from_records::(payload) + } } impl ServiceNameResolver for SnsRecord { diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs index 1dcc9bcfa..3152e79be 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -2,7 +2,8 @@ use crate::config::aws::get_aws_partition_by_region; use crate::lifecycle::invocation::{ processor::MS_TO_NS, triggers::{ - DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, ServiceNameResolver, Trigger, + DATADOG_CARRIER_KEY, DsmCheckpointInput, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + ServiceNameResolver, Trigger, dsm_checkpoints_from_records, event_bridge_event::EventBridgeEvent, sns_event::{SnsEntity, SnsRecord}, }, @@ -212,6 +213,10 @@ impl Trigger for SqsRecord { "type:sqs".to_string(), ]) } + + fn get_dsm_checkpoints(&self, payload: &Value) -> Vec { + dsm_checkpoints_from_records::(payload) + } } impl ServiceNameResolver for SqsRecord { @@ -407,6 +412,59 @@ mod tests { ); } + #[test] + fn test_get_dsm_checkpoints_one_per_record() { + // Build a two-record batch from the single-record fixture, giving each + // record a distinct queue and a distinct pathway carrier. + let json = read_json_file("sqs_event.json"); + let mut payload: Value = + serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let records = payload["Records"].as_array().expect("Records array"); + let mut first = records[0].clone(); + let mut second = records[0].clone(); + + first["eventSourceARN"] = Value::from("arn:aws:sqs:us-east-1:123456789012:QueueA"); + first["messageAttributes"]["_datadog"]["stringValue"] = + Value::from("{\"x-datadog-trace-id\":\"111\",\"dd-pathway-ctx-base64\":\"ctxA\"}"); + + second["eventSourceARN"] = Value::from("arn:aws:sqs:us-east-1:123456789012:QueueB"); + second["messageAttributes"]["_datadog"]["stringValue"] = + Value::from("{\"x-datadog-trace-id\":\"222\",\"dd-pathway-ctx-base64\":\"ctxB\"}"); + + payload["Records"] = Value::from(vec![first, second]); + + let trigger = SqsRecord::new(payload.clone()).expect("Failed to deserialize SqsRecord"); + let checkpoints = trigger.get_dsm_checkpoints(&payload); + + assert_eq!(checkpoints.len(), 2, "expected one checkpoint per record"); + + assert_eq!( + checkpoints[0].edge_tags, + vec![ + "direction:in".to_string(), + "topic:QueueA".to_string(), + "type:sqs".to_string(), + ] + ); + assert_eq!( + checkpoints[0].carrier.get("dd-pathway-ctx-base64"), + Some(&"ctxA".to_string()) + ); + + assert_eq!( + checkpoints[1].edge_tags, + vec![ + "direction:in".to_string(), + "topic:QueueB".to_string(), + "type:sqs".to_string(), + ] + ); + assert_eq!( + checkpoints[1].carrier.get("dd-pathway-ctx-base64"), + Some(&"ctxB".to_string()) + ); + } + #[test] fn test_get_carrier() { let json = read_json_file("sqs_event.json"); From 7a6974691606d822b9e46b3fff9e038922819502 Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 08:53:41 +0100 Subject: [PATCH 7/9] chore: clippy fix --- bottlecap/src/bin/bottlecap/main.rs | 16 +++++++++------- bottlecap/src/lifecycle/invocation/processor.rs | 5 +++-- .../invocation/triggers/event_bridge_event.rs | 9 ++------- .../invocation/triggers/kinesis_event.rs | 6 +++++- .../lifecycle/invocation/triggers/sqs_event.rs | 6 +++++- bottlecap/src/traces/data_streams/pathway.rs | 8 +------- .../src/traces/data_streams/propagation_hash.rs | 4 +--- 7 files changed, 26 insertions(+), 28 deletions(-) diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index 0e7168a04..4456cae2b 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -345,13 +345,15 @@ async fn extension_loop_active( .unwrap_or_else(|| "aws.lambda".to_string()) .to_lowercase(); let env = config.env.clone().unwrap_or_default(); - Some(Arc::new(bottlecap::traces::data_streams::DsmProcessor::new( - service, - env, - env!("CARGO_PKG_VERSION").to_string(), - &config.site, - Arc::clone(&proxy_aggregator), - ))) + Some(Arc::new( + bottlecap::traces::data_streams::DsmProcessor::new( + service, + env, + env!("CARGO_PKG_VERSION").to_string(), + &config.site, + Arc::clone(&proxy_aggregator), + ), + )) } else { None }; diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index bc670e495..6d1033188 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -1107,8 +1107,9 @@ impl Processor { // DSM-eligible event sources, continuing any inbound pathway context. if let Some(dsm) = self.dsm_processor.as_ref() { debug!("DSM: extraction hook fired for request {request_id}"); - let identified = - crate::lifecycle::invocation::triggers::IdentifiedTrigger::from_value(&payload_value); + let identified = crate::lifecycle::invocation::triggers::IdentifiedTrigger::from_value( + &payload_value, + ); if let Some(trigger) = SpanInferrer::get_trigger_type(identified) { // Batched sources (SQS/SNS/Kinesis) yield one checkpoint per // record so every message's pathway context is captured. diff --git a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs index ecdcbc91d..d12c81ac9 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs @@ -119,10 +119,7 @@ impl Trigger for EventBridgeEvent { // encoded as `:rule//`, so we recover it best-effort and omit // the tag entirely when it cannot be determined (rather than emit a // wrong/empty value that would corrupt the pathway hash). - let mut tags = vec![ - "direction:in".to_string(), - "type:eventbridge".to_string(), - ]; + let mut tags = vec!["direction:in".to_string(), "type:eventbridge".to_string()]; if let Some(bus) = self.event_bus_name() { tags.push(format!("exchange:{bus}")); } @@ -312,9 +309,7 @@ mod tests { fn test_get_dsm_edge_tags_recovers_bus_from_rule_arn() { let event = make_event( "OrderPlaced", - vec![ - "arn:aws:events:us-east-1:123456789012:rule/my-bus/my-rule".to_string(), - ], + vec!["arn:aws:events:us-east-1:123456789012:rule/my-bus/my-rule".to_string()], ); assert_eq!( event.get_dsm_edge_tags(), diff --git a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs index b448abdf5..93f228b75 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs @@ -136,7 +136,11 @@ impl Trigger for KinesisRecord { fn get_dsm_edge_tags(&self) -> Option> { // stream name = last `/` segment of the event source ARN. - let stream = self.event_source_arn.split('/').next_back().unwrap_or_default(); + let stream = self + .event_source_arn + .split('/') + .next_back() + .unwrap_or_default(); if stream.is_empty() { return Some(vec!["direction:in".to_string(), "type:kinesis".to_string()]); } diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs index 3152e79be..cbf0eb64b 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -206,7 +206,11 @@ impl Trigger for SqsRecord { fn get_dsm_edge_tags(&self) -> Option> { // queue name = last `:` segment of the event source ARN. - let queue = self.event_source_arn.split(':').next_back().unwrap_or_default(); + let queue = self + .event_source_arn + .split(':') + .next_back() + .unwrap_or_default(); Some(vec![ "direction:in".to_string(), format!("topic:{queue}"), diff --git a/bottlecap/src/traces/data_streams/pathway.rs b/bottlecap/src/traces/data_streams/pathway.rs index 66b5cd6a5..233a3a2da 100644 --- a/bottlecap/src/traces/data_streams/pathway.rs +++ b/bottlecap/src/traces/data_streams/pathway.rs @@ -103,13 +103,7 @@ mod tests { #[test] fn manual_checkpoint_tag_is_excluded() { - let without = compute_pathway_hash( - "svc", - "env", - &tags(&["direction:in"]), - [0u8; 8], - None, - ); + let without = compute_pathway_hash("svc", "env", &tags(&["direction:in"]), [0u8; 8], None); let with = compute_pathway_hash( "svc", "env", diff --git a/bottlecap/src/traces/data_streams/propagation_hash.rs b/bottlecap/src/traces/data_streams/propagation_hash.rs index 49e18784f..1c9b085cd 100644 --- a/bottlecap/src/traces/data_streams/propagation_hash.rs +++ b/bottlecap/src/traces/data_streams/propagation_hash.rs @@ -44,9 +44,7 @@ mod tests { fn applies_multiply_before_xor() { // FNV-1 (multiply-then-XOR) differs from FNV-1a (XOR-then-multiply). // Verify the first step explicitly for a single byte. - let expected = FNV1_64_OFFSET_BASIS - .wrapping_mul(FNV1_64_PRIME) - ^ u64::from(b'a'); + let expected = FNV1_64_OFFSET_BASIS.wrapping_mul(FNV1_64_PRIME) ^ u64::from(b'a'); assert_eq!(fnv1_64(b"a"), expected); } } From ca5b7da28616438dca8a49c00517159ba712b497 Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 09:02:19 +0100 Subject: [PATCH 8/9] feat: add MSK support for DSM extraction --- bottlecap/src/config/env.rs | 4 + bottlecap/src/config/mod.rs | 4 + bottlecap/src/config/yaml.rs | 3 + .../src/lifecycle/invocation/processor.rs | 74 ++++++++++++++++ .../invocation/triggers/msk_event.rs | 86 ++++++++++++++++++- 5 files changed, 170 insertions(+), 1 deletion(-) diff --git a/bottlecap/src/config/env.rs b/bottlecap/src/config/env.rs index 5ff647227..2cf8b2483 100644 --- a/bottlecap/src/config/env.rs +++ b/bottlecap/src/config/env.rs @@ -265,6 +265,8 @@ pub struct EnvConfig { pub dsm_consume_enabled: Option, /// @env `DD_DSM_EXCHANGE_NAME` pub dsm_exchange_name: Option, + /// @env `DD_DSM_KAFKA_GROUP` + pub dsm_kafka_group: Option, // // Trace Propagation /// @env `DD_TRACE_PROPAGATION_STYLE` @@ -593,6 +595,7 @@ fn merge_config(config: &mut Config, env_config: &EnvConfig) { merge_option_to_value!(config, env_config, trace_aws_service_representation_enabled); merge_option_to_value!(config, env_config, dsm_consume_enabled); merge_option!(config, env_config, dsm_exchange_name); + merge_option!(config, env_config, dsm_kafka_group); // Trace Propagation merge_vec!(config, env_config, trace_propagation_style); @@ -1040,6 +1043,7 @@ mod tests { trace_aws_service_representation_enabled: true, dsm_consume_enabled: false, dsm_exchange_name: None, + dsm_kafka_group: None, metrics_config_compression_level: 3, otlp_config_traces_enabled: false, otlp_config_traces_span_name_as_resource_name: true, diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index 90cded702..8c2a5dce8 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -308,6 +308,9 @@ pub struct Config { /// checkpoints when it cannot be derived from the event payload /// (`DD_DSM_EXCHANGE_NAME`). pub dsm_exchange_name: Option, + /// Consumer group used for `MSK`/Kafka DSM consume checkpoints, which is not + /// present in the Lambda event payload (`DD_DSM_KAFKA_GROUP`). + pub dsm_kafka_group: Option, // Metrics pub metrics_config_compression_level: i32, @@ -441,6 +444,7 @@ impl Default for Config { trace_aws_service_representation_enabled: true, dsm_consume_enabled: false, dsm_exchange_name: None, + dsm_kafka_group: None, trace_propagation_style: vec![ TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext, diff --git a/bottlecap/src/config/yaml.rs b/bottlecap/src/config/yaml.rs index ef57e15ba..232747425 100644 --- a/bottlecap/src/config/yaml.rs +++ b/bottlecap/src/config/yaml.rs @@ -84,6 +84,7 @@ pub struct YamlConfig { #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] pub dsm_consume_enabled: Option, pub dsm_exchange_name: Option, + pub dsm_kafka_group: Option, // Trace Propagation #[serde(deserialize_with = "deserialize_trace_propagation_style")] pub trace_propagation_style: Vec, @@ -558,6 +559,7 @@ fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { ); merge_option_to_value!(config, yaml_config, dsm_consume_enabled); merge_option!(config, yaml_config, dsm_exchange_name); + merge_option!(config, yaml_config, dsm_kafka_group); // OTLP if let Some(otlp_config) = &yaml_config.otlp_config { @@ -982,6 +984,7 @@ api_security_sample_delay: 60 # Seconds trace_aws_service_representation_enabled: true, dsm_consume_enabled: false, dsm_exchange_name: None, + dsm_kafka_group: None, metrics_config_compression_level: 3, otlp_config_traces_enabled: false, otlp_config_traces_span_name_as_resource_name: true, diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 6d1033188..78b736b7f 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -1128,6 +1128,10 @@ impl Processor { &mut checkpoint.edge_tags, self.config.dsm_exchange_name.as_deref(), ); + apply_dsm_kafka_group_fallback( + &mut checkpoint.edge_tags, + self.config.dsm_kafka_group.as_deref(), + ); debug!( "DSM: recording consume checkpoint edge_tags={:?}", checkpoint.edge_tags @@ -1603,6 +1607,19 @@ fn apply_dsm_exchange_fallback(edge_tags: &mut Vec, exchange: Option<&st } } +/// Apply the configured `DD_DSM_KAFKA_GROUP` fallback to DSM consume edge tags. +/// The Kafka/`MSK` consumer group is not present in the Lambda event payload, so +/// it can only be supplied via config. Applies only to `type:kafka` tags that do +/// not already carry a `group:` tag. +fn apply_dsm_kafka_group_fallback(edge_tags: &mut Vec, group: Option<&str>) { + if let Some(group) = group + && edge_tags.iter().any(|t| t == "type:kafka") + && !edge_tags.iter().any(|t| t.starts_with("group:")) + { + edge_tags.push(format!("group:{group}")); + } +} + #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { @@ -1674,6 +1691,63 @@ mod tests { assert_eq!(tags, before); } + #[test] + fn dsm_kafka_group_fallback_injects_for_kafka_without_group() { + let mut tags = vec![ + "direction:in".to_string(), + "topic:my-topic".to_string(), + "type:kafka".to_string(), + ]; + apply_dsm_kafka_group_fallback(&mut tags, Some("my-group")); + assert_eq!( + tags, + vec![ + "direction:in".to_string(), + "topic:my-topic".to_string(), + "type:kafka".to_string(), + "group:my-group".to_string(), + ] + ); + } + + #[test] + fn dsm_kafka_group_fallback_does_not_override_existing_group() { + let mut tags = vec![ + "direction:in".to_string(), + "group:payload-group".to_string(), + "topic:my-topic".to_string(), + "type:kafka".to_string(), + ]; + let before = tags.clone(); + apply_dsm_kafka_group_fallback(&mut tags, Some("my-group")); + assert_eq!(tags, before); + } + + #[test] + fn dsm_kafka_group_fallback_ignored_for_non_kafka_sources() { + // SQS consume tags must never receive an injected group. + let mut tags = vec![ + "direction:in".to_string(), + "topic:my-queue".to_string(), + "type:sqs".to_string(), + ]; + let before = tags.clone(); + apply_dsm_kafka_group_fallback(&mut tags, Some("my-group")); + assert_eq!(tags, before); + } + + #[test] + fn dsm_kafka_group_fallback_noop_when_unconfigured() { + let mut tags = vec![ + "direction:in".to_string(), + "topic:my-topic".to_string(), + "type:kafka".to_string(), + ]; + let before = tags.clone(); + apply_dsm_kafka_group_fallback(&mut tags, None); + assert_eq!(tags, before); + } + fn setup() -> Processor { let aws_config = Arc::new(AwsConfig { region: "us-east-1".into(), diff --git a/bottlecap/src/lifecycle/invocation/triggers/msk_event.rs b/bottlecap/src/lifecycle/invocation/triggers/msk_event.rs index c36607d18..f90d4528b 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/msk_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/msk_event.rs @@ -1,6 +1,6 @@ use crate::lifecycle::invocation::processor::MS_TO_NS; use crate::lifecycle::invocation::triggers::{ - FUNCTION_TRIGGER_EVENT_SOURCE_TAG, ServiceNameResolver, Trigger, + DsmCheckpointInput, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, ServiceNameResolver, Trigger, }; use libdd_trace_protobuf::pb::Span; use serde::{Deserialize, Serialize}; @@ -263,6 +263,43 @@ impl Trigger for MSKEvent { fn is_async(&self) -> bool { true } + + fn get_dsm_checkpoints(&self, payload: &Value) -> Vec { + // `new` prunes the records map to a single record, so iterate the full + // unparsed payload to capture every Kafka record in the batch. Edge tags + // follow the dd-trace Kafka consume convention + // (`[direction:in, topic:, type:kafka]`); the `group:` tag is not present in the event and is injected from config + // (`DD_DSM_KAFKA_GROUP`) by the extraction hook. + let Some(records_map) = payload.get("records").and_then(Value::as_object) else { + return Vec::new(); + }; + let mut checkpoints = Vec::new(); + for group in records_map.values() { + let records: Vec<&Value> = match group { + Value::Array(arr) => arr.iter().collect(), + Value::Object(obj) => obj.values().collect(), + _ => Vec::new(), + }; + for record in records { + let Some(topic) = record.get("topic").and_then(Value::as_str) else { + continue; + }; + let carrier = record + .get("headers") + .map_or_else(HashMap::new, headers_to_string_map); + checkpoints.push(DsmCheckpointInput { + edge_tags: vec![ + "direction:in".to_string(), + format!("topic:{topic}"), + "type:kafka".to_string(), + ], + carrier, + }); + } + } + checkpoints + } } impl ServiceNameResolver for MSKEvent { @@ -616,4 +653,51 @@ mod tests { assert_eq!(record.partition, 0); assert!(event.get_carrier().is_empty()); } + + #[test] + fn test_get_dsm_checkpoints_one_per_record() { + // Two topic-partitions, each with a record carrying its own pathway + // context header. `dd-pathway-ctx-base64` bytes: "ctxA"=[99,116,120,65], + // "ctxB"=[99,116,120,66]. + let payload = serde_json::json!({ + "eventSource": "aws:kafka", + "eventSourceArn": "arn:aws:kafka:us-east-1:123456789012:cluster/demo-cluster/751d2973-a626-431c-9d4e-d7975eb44dd7-2", + "records": { + "topicA-0": [{ + "topic": "topicA", "partition": 0, "timestamp": 1000.0, + "headers": [{ "dd-pathway-ctx-base64": [99, 116, 120, 65] }] + }], + "topicB-0": [{ + "topic": "topicB", "partition": 0, "timestamp": 2000.0, + "headers": [{ "dd-pathway-ctx-base64": [99, 116, 120, 66] }] + }] + } + }); + + // `new` prunes to one record; the per-record checkpoints must come from + // the full payload, not the pruned trigger. + let trigger = MSKEvent::new(payload.clone()).expect("Failed to deserialize MSKEvent"); + let checkpoints = trigger.get_dsm_checkpoints(&payload); + + assert_eq!(checkpoints.len(), 2, "expected one checkpoint per record"); + + for (topic, ctx) in [("topicA", "ctxA"), ("topicB", "ctxB")] { + let cp = checkpoints + .iter() + .find(|c| c.edge_tags.contains(&format!("topic:{topic}"))) + .unwrap_or_else(|| panic!("missing checkpoint for {topic}")); + assert_eq!( + cp.edge_tags, + vec![ + "direction:in".to_string(), + format!("topic:{topic}"), + "type:kafka".to_string(), + ] + ); + assert_eq!( + cp.carrier.get("dd-pathway-ctx-base64").map(String::as_str), + Some(ctx) + ); + } + } } From 9b923a55cf2f0278730ec6a6a34a8ad7dcf7c24b Mon Sep 17 00:00:00 2001 From: "james.eastham" Date: Thu, 18 Jun 2026 09:48:31 +0100 Subject: [PATCH 9/9] chore: update event bridge naming logic --- .../src/lifecycle/invocation/processor.rs | 92 ++++++++++++------- .../invocation/triggers/event_bridge_event.rs | 28 +++--- 2 files changed, 73 insertions(+), 47 deletions(-) diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index 78b736b7f..1b34a7af1 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -1124,7 +1124,7 @@ impl Processor { checkpoints.len() ); for mut checkpoint in checkpoints { - apply_dsm_exchange_fallback( + resolve_dsm_eventbridge_exchange( &mut checkpoint.edge_tags, self.config.dsm_exchange_name.as_deref(), ); @@ -1594,17 +1594,26 @@ impl Processor { } } -/// Apply the configured `DD_DSM_EXCHANGE_NAME` fallback to DSM consume edge -/// tags. The fallback only applies to `EventBridge` (`type:eventbridge`) tags -/// that do not already carry a payload-derived `exchange:` tag, so a -/// payload-derived bus always wins and other sources are never affected. -fn apply_dsm_exchange_fallback(edge_tags: &mut Vec, exchange: Option<&str>) { - if let Some(exchange) = exchange - && edge_tags.iter().any(|t| t == "type:eventbridge") - && !edge_tags.iter().any(|t| t.starts_with("exchange:")) - { - edge_tags.push(format!("exchange:{exchange}")); - } +/// Resolve the `exchange` (event bus) tag for `EventBridge` (`type:eventbridge`) +/// DSM consume edge tags, with precedence: configured `DD_DSM_EXCHANGE_NAME` > +/// payload-derived bus (rule ARN) > `default`. The resolved tag always replaces +/// any payload-derived `exchange:` tag; other sources are never affected. +fn resolve_dsm_eventbridge_exchange(edge_tags: &mut Vec, configured: Option<&str>) { + if !edge_tags.iter().any(|t| t == "type:eventbridge") { + return; + } + // Precedence: configured `DD_DSM_EXCHANGE_NAME` > payload-derived bus (rule + // ARN) > `default`. EventBridge consume checkpoints always carry an + // `exchange:` tag so the node hashes consistently across invocations. + let payload_exchange = edge_tags + .iter() + .find_map(|t| t.strip_prefix("exchange:").map(ToString::to_string)); + let exchange = configured + .map(ToString::to_string) + .or(payload_exchange) + .unwrap_or_else(|| "default".to_string()); + edge_tags.retain(|t| !t.starts_with("exchange:")); + edge_tags.push(format!("exchange:{exchange}")); } /// Apply the configured `DD_DSM_KAFKA_GROUP` fallback to DSM consume edge tags. @@ -1635,59 +1644,74 @@ mod tests { use serde_json::json; #[test] - fn dsm_exchange_fallback_injects_for_eventbridge_without_exchange() { + fn dsm_exchange_config_takes_priority_over_payload() { + // Priority 1: configured DD_DSM_EXCHANGE_NAME overrides a payload-derived bus. let mut tags = vec![ "direction:in".to_string(), "type:eventbridge".to_string(), + "exchange:payload-bus".to_string(), "topic:OrderPlaced".to_string(), ]; - apply_dsm_exchange_fallback(&mut tags, Some("my-bus")); + resolve_dsm_eventbridge_exchange(&mut tags, Some("config-bus")); + assert!(tags.contains(&"exchange:config-bus".to_string())); + assert!(!tags.contains(&"exchange:payload-bus".to_string())); + // Exactly one exchange tag remains. assert_eq!( - tags, - vec![ - "direction:in".to_string(), - "type:eventbridge".to_string(), - "topic:OrderPlaced".to_string(), - "exchange:my-bus".to_string(), - ] + tags.iter().filter(|t| t.starts_with("exchange:")).count(), + 1 ); } #[test] - fn dsm_exchange_fallback_does_not_override_payload_derived_exchange() { + fn dsm_exchange_uses_payload_bus_when_unconfigured() { + // Priority 2: payload-derived bus is kept when no config is set. let mut tags = vec![ "direction:in".to_string(), "type:eventbridge".to_string(), "exchange:payload-bus".to_string(), "topic:OrderPlaced".to_string(), ]; - let before = tags.clone(); - apply_dsm_exchange_fallback(&mut tags, Some("my-bus")); - assert_eq!(tags, before); + resolve_dsm_eventbridge_exchange(&mut tags, None); + assert!(tags.contains(&"exchange:payload-bus".to_string())); + assert_eq!( + tags.iter().filter(|t| t.starts_with("exchange:")).count(), + 1 + ); } #[test] - fn dsm_exchange_fallback_ignored_for_non_eventbridge_sources() { - // SQS consume tags must never receive an injected exchange. + fn dsm_exchange_uses_config_when_no_payload_bus() { let mut tags = vec![ "direction:in".to_string(), - "topic:my-queue".to_string(), - "type:sqs".to_string(), + "type:eventbridge".to_string(), + "topic:OrderPlaced".to_string(), ]; - let before = tags.clone(); - apply_dsm_exchange_fallback(&mut tags, Some("my-bus")); - assert_eq!(tags, before); + resolve_dsm_eventbridge_exchange(&mut tags, Some("config-bus")); + assert!(tags.contains(&"exchange:config-bus".to_string())); } #[test] - fn dsm_exchange_fallback_noop_when_unconfigured() { + fn dsm_exchange_defaults_when_nothing_found() { + // Priority 3: no config and no payload bus => `default` floor. let mut tags = vec![ "direction:in".to_string(), "type:eventbridge".to_string(), "topic:OrderPlaced".to_string(), ]; + resolve_dsm_eventbridge_exchange(&mut tags, None); + assert!(tags.contains(&"exchange:default".to_string())); + } + + #[test] + fn dsm_exchange_ignored_for_non_eventbridge_sources() { + // SQS consume tags must never receive an exchange. + let mut tags = vec![ + "direction:in".to_string(), + "topic:my-queue".to_string(), + "type:sqs".to_string(), + ]; let before = tags.clone(); - apply_dsm_exchange_fallback(&mut tags, None); + resolve_dsm_eventbridge_exchange(&mut tags, Some("config-bus")); assert_eq!(tags, before); } diff --git a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs index d12c81ac9..0fa65a282 100644 --- a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs +++ b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs @@ -114,11 +114,10 @@ impl Trigger for EventBridgeEvent { fn get_dsm_edge_tags(&self) -> Option> { // EventBridge consume edge tags. `topic` is the detail-type. `exchange` - // is the event bus name, which is NOT carried in the event delivered to - // Lambda; when a triggering rule ARN is present in `resources` it is - // encoded as `:rule//`, so we recover it best-effort and omit - // the tag entirely when it cannot be determined (rather than emit a - // wrong/empty value that would corrupt the pathway hash). + // (event bus) is not carried in the event; we only emit a payload-derived + // bus here when a `:rule//` ARN is present. The final exchange + // value (with `DD_DSM_EXCHANGE_NAME` taking priority and a `default` + // floor) is resolved downstream in the extraction hook. let mut tags = vec!["direction:in".to_string(), "type:eventbridge".to_string()]; if let Some(bus) = self.event_bus_name() { tags.push(format!("exchange:{bus}")); @@ -129,17 +128,18 @@ impl Trigger for EventBridgeEvent { } impl EventBridgeEvent { - /// Best-effort event bus name recovered from a triggering rule ARN in - /// `resources`. Non-default buses encode the name as `:rule//`; - /// the default bus appears as `:rule/` (no bus segment). Returns - /// `None` when no bus name can be determined from the payload. + /// Payload-derived event bus name from a triggering rule ARN in `resources`. + /// Only non-default buses can be recovered, encoded as `:rule//`; + /// the first segment is the bus. Default-bus rules (`:rule/`, no bus + /// segment) and missing rule ARNs return `None`, leaving the hook to apply + /// the configured override or the `default` floor. fn event_bus_name(&self) -> Option { for arn in &self.resources { if let Some(rest) = arn.split(":rule/").nth(1) { let mut segments = rest.split('/'); let first = segments.next().unwrap_or_default(); - // `rule//` => bus is the first segment. - // `rule/` (default bus) => no second segment, skip. + // `:rule//` => bus is the first segment. + // `:rule/` (default bus) => no second segment, not derivable here. if segments.next().is_some() && !first.is_empty() { return Some(first.to_string()); } @@ -323,8 +323,10 @@ mod tests { } #[test] - fn test_get_dsm_edge_tags_default_bus_rule_arn_omits_exchange() { - // Default-bus rule ARNs have no bus segment (`:rule/`). + fn test_get_dsm_edge_tags_default_bus_rule_arn_omits_exchange_at_trigger() { + // Default-bus rule ARNs (`:rule/`, no bus segment) are not + // derivable at the trigger level; the `default` floor is applied later + // by the extraction hook. let event = make_event( "OrderPlaced", vec!["arn:aws:events:us-east-1:123456789012:rule/my-rule".to_string()],