From 6918c2d8cee1551f2fe3388e62cd8602c68b408e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Kr=C3=BCger?= Date: Thu, 11 Jun 2026 12:19:38 +0200 Subject: [PATCH 1/2] Add --otel distributed tracing support Emit a sampled W3C traceparent on Cosmos DB requests so external tracing systems (e.g. the emulator) can correlate shell activity. - New --otel [endpoint] option (optional value, like --mcp): bare --otel enables tracing; an endpoint (or OTEL_EXPORTER_OTLP_ENDPOINT) also exports spans via OTLP - TracingBootstrap: sets the Azure.Experimental.EnableActivitySource switch and registers an AlwaysOn TracerProvider listening to Azure.Cosmos.Operation and CosmosDBShell sources so activities are recorded (traceparent flag -01) - Each command runs inside a CosmosDBShell root activity - CreateClientOptions sets DisableDistributedTracing=false explicitly - Add OpenTelemetry + OTLP exporter packages (1.15.3) - Help text, README, docs/navigation.md, and unit tests --- .../Runtime/TracingBootstrapTests.cs | 43 ++++++++++ .../ShellInterpreter.cs | 6 +- .../TracingBootstrap.cs | 79 +++++++++++++++++++ CosmosDBShell/CosmosDBShell.csproj | 2 + CosmosDBShell/Program.cs | 39 ++++++++- CosmosDBShell/lang/en.ftl | 1 + Directory.Packages.props | 2 + README.md | 2 + docs/navigation.md | 8 ++ 9 files changed, 179 insertions(+), 3 deletions(-) create mode 100644 CosmosDBShell.Tests/Runtime/TracingBootstrapTests.cs create mode 100644 CosmosDBShell/Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs diff --git a/CosmosDBShell.Tests/Runtime/TracingBootstrapTests.cs b/CosmosDBShell.Tests/Runtime/TracingBootstrapTests.cs new file mode 100644 index 0000000..b958b0d --- /dev/null +++ b/CosmosDBShell.Tests/Runtime/TracingBootstrapTests.cs @@ -0,0 +1,43 @@ +// ------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ------------------------------------------------------------ + +namespace CosmosShell.Tests; + +using System.Diagnostics; + +using Azure.Data.Cosmos.Shell.Core; + +using Xunit; + +public class TracingBootstrapTests +{ + [Fact] + public void StartCommandActivity_WithoutProvider_ReturnsNull() + { + using var activity = TracingBootstrap.StartCommandActivity("cosmosdbshell.command"); + + Assert.Null(activity); + } + + [Fact] + public void StartCommandActivity_WhenInitialized_ProducesRecordedActivity() + { + using var tracing = TracingBootstrap.Initialize(otlpEndpoint: null); + + using var activity = TracingBootstrap.StartCommandActivity("cosmosdbshell.command"); + + Assert.NotNull(activity); + Assert.True(activity!.Recorded); + Assert.True(activity.ActivityTraceFlags.HasFlag(ActivityTraceFlags.Recorded)); + } + + [Fact] + public void Initialize_SetsAzureActivitySourceSwitch() + { + using var tracing = TracingBootstrap.Initialize(otlpEndpoint: null); + + Assert.True( + AppContext.TryGetSwitch("Azure.Experimental.EnableActivitySource", out var enabled) && enabled); + } +} diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs index 4dedde9..ff68730 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/ShellInterpreter.cs @@ -265,6 +265,7 @@ public void CancelPrompt() /// A representing the result of the command execution. public async Task ExecuteCommandAsync(string command, CancellationToken token) { + using var activity = TracingBootstrap.StartCommandActivity("cosmosdbshell.command"); var state = new CommandState(); state.SetFormat(Environment.GetEnvironmentVariable("COSMOSDB_SHELL_FORMAT")); @@ -1316,7 +1317,10 @@ private static CosmosClientOptions CreateClientOptions(string connectionString, { ApplicationName = "CosmosDBShell", ConnectionMode = requestedMode, - CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions(), + CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions + { + DisableDistributedTracing = false, + }, UseSystemTextJsonSerializerWithOptions = new JsonSerializerOptions() { DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs new file mode 100644 index 0000000..e366b8e --- /dev/null +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs @@ -0,0 +1,79 @@ +// ------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// ------------------------------------------------------------ + +namespace Azure.Data.Cosmos.Shell.Core; + +using System.Diagnostics; +using OpenTelemetry; +using OpenTelemetry.Resources; +using OpenTelemetry.Trace; + +/// +/// Owns the distributed-tracing lifecycle for the shell. When enabled it registers +/// an tracer provider that records every activity, which +/// causes the Azure Cosmos DB SDK to emit a sampled W3C traceparent +/// (the -01 flag) on its outgoing requests. An OTLP exporter is added only +/// when an endpoint is supplied. +/// +public sealed class TracingBootstrap : IDisposable +{ + /// + /// Name of the used for per-command root activities. + /// + public const string ActivitySourceName = "CosmosDBShell"; + + private const string CosmosOperationSourceName = "Azure.Cosmos.Operation"; + + private static readonly ActivitySource SharedSource = new(ActivitySourceName); + + private readonly TracerProvider provider; + + private TracingBootstrap(TracerProvider provider) + { + this.provider = provider; + } + + /// + /// Enables distributed tracing for the current process. Sets the Azure SDK + /// experimental switch required to emit traceparent headers and builds a + /// tracer provider that records all activities. + /// + /// Optional OTLP endpoint to export spans to. When null or empty, no exporter is added and tracing only propagates a sampled traceparent on the wire. + /// A that must be disposed to flush and tear down the provider. + public static TracingBootstrap Initialize(string? otlpEndpoint) + { + // Required so the Azure.Core HTTP pipeline writes a W3C traceparent header. + AppContext.SetSwitch("Azure.Experimental.EnableActivitySource", true); + + var builder = Sdk.CreateTracerProviderBuilder() + .SetResourceBuilder(ResourceBuilder.CreateDefault().AddService(ActivitySourceName)) + .SetSampler(new AlwaysOnSampler()) + .AddSource(CosmosOperationSourceName) + .AddSource(ActivitySourceName); + + if (!string.IsNullOrWhiteSpace(otlpEndpoint)) + { + builder.AddOtlpExporter(options => options.Endpoint = new Uri(otlpEndpoint)); + } + + return new TracingBootstrap(builder.Build()); + } + + /// + /// Starts a root activity for a shell command. Returns null when tracing is not + /// enabled, so callers incur no overhead in the common case. + /// + /// The activity name. + /// The started activity, or null when no tracer is listening. + public static Activity? StartCommandActivity(string name) + { + return SharedSource.StartActivity(name, ActivityKind.Client); + } + + /// + public void Dispose() + { + this.provider.Dispose(); + } +} diff --git a/CosmosDBShell/CosmosDBShell.csproj b/CosmosDBShell/CosmosDBShell.csproj index ac48dde..b71b62a 100644 --- a/CosmosDBShell/CosmosDBShell.csproj +++ b/CosmosDBShell/CosmosDBShell.csproj @@ -95,6 +95,8 @@ + + diff --git a/CosmosDBShell/Program.cs b/CosmosDBShell/Program.cs index 5ed9eab..6074214 100644 --- a/CosmosDBShell/Program.cs +++ b/CosmosDBShell/Program.cs @@ -39,6 +39,7 @@ public static async Task Main(string[] args) } IHost? host = null; + TracingBootstrap? tracing = null; try { // --help / --version handled manually so we can render our own @@ -108,6 +109,19 @@ public static async Task Main(string[] args) o.McpPort = mcpValue ?? DefaultMcpPort; } + // --otel supports an optional value: when present without an endpoint, + // tracing is still enabled (emitting a sampled traceparent) and the + // OTLP endpoint, if any, falls back to the standard environment variable. + var otelResult = parseResult.FindResultFor(optionMap.Otel); + if (otelResult is not null) + { + o.EnableTracing = true; + var otelValue = parseResult.GetValueForOption(optionMap.Otel); + o.OtlpEndpoint = string.IsNullOrWhiteSpace(otelValue) + ? Environment.GetEnvironmentVariable("OTEL_EXPORTER_OTLP_ENDPOINT") + : otelValue; + } + if (o.StartLspServer) { // Already handled above, but keep for completeness @@ -149,6 +163,13 @@ public static async Task Main(string[] args) ShellInterpreter.Instance.Options = o; + // Enable distributed tracing before any CosmosClient is created so the + // Azure SDK pipeline emits a sampled W3C traceparent on its requests. + if (o.EnableTracing) + { + tracing = TracingBootstrap.Initialize(o.OtlpEndpoint); + } + if (o.ConnectionString != null) { using var connectTokenSource = ShellInterpreter.UserCancellationTokenSource; @@ -286,6 +307,7 @@ await ShellInterpreter.Instance.ConnectAsync( { ShellInterpreter.Instance.Dispose(); host?.Dispose(); + tracing?.Dispose(); } } @@ -451,6 +473,11 @@ private static (RootCommand Command, OptionMap Map) BuildRootCommand() var verbose = new Option("--verbose", MessageService.GetString("help-Verbose")); var theme = new Option("--theme", MessageService.GetString("help-Theme")); + var otel = new Option("--otel", MessageService.GetString("help-Otel")) + { + Arity = ArgumentArity.ZeroOrOne, + }; + var root = new RootCommand("Cosmos DB Shell") { colorSystem, @@ -471,6 +498,7 @@ private static (RootCommand Command, OptionMap Map) BuildRootCommand() lspStdio, verbose, theme, + otel, }; var map = new OptionMap( @@ -491,7 +519,8 @@ private static (RootCommand Command, OptionMap Map) BuildRootCommand() startLspServer, lspStdio, verbose, - theme); + theme, + otel); return (root, map); } @@ -517,6 +546,7 @@ private static string BuildHelpText() [map.ConnectResourceGroup] = "", [map.McpPort] = "[]", [map.Theme] = "", + [map.Otel] = "[]", }; var rows = new List<(string Label, string? Description)>(); @@ -620,7 +650,8 @@ private sealed record OptionMap( Option StartLspServer, Option LspStdio, Option Verbose, - Option Theme); + Option Theme, + Option Otel); /// /// Maps the most common System.CommandLine parse error messages @@ -698,5 +729,9 @@ public class CosmosShellOptions public bool Verbose { get; set; } public string? Theme { get; set; } + + public bool EnableTracing { get; set; } + + public string? OtlpEndpoint { get; set; } } } diff --git a/CosmosDBShell/lang/en.ftl b/CosmosDBShell/lang/en.ftl index f1bc7e5..5f42163 100644 --- a/CosmosDBShell/lang/en.ftl +++ b/CosmosDBShell/lang/en.ftl @@ -622,6 +622,7 @@ help-EnableLspServer = Enable Language Server Protocol (LSP) server for editor i help-McpPort = Enable MCP HTTP server. Optionally specify a port with --mcp ; default is 6128. help-Verbose = Print full exception details instead of only the message. help-Theme = Color theme profile to apply at startup. Falls back to the COSMOSDB_SHELL_THEME environment variable. +help-Otel = Enable distributed tracing so requests carry a sampled W3C traceparent. Optionally specify an OTLP endpoint with --otel ; falls back to the OTEL_EXPORTER_OTLP_ENDPOINT environment variable. mcp-error-invalid-port = Error: --mcp port must be greater than 0. warning-unknown-theme = Unknown theme '{ $name }'. Available themes: { $themes }. Falling back to default. diff --git a/Directory.Packages.props b/Directory.Packages.props index ed5327b..fad9b4c 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -19,6 +19,8 @@ + + diff --git a/README.md b/README.md index 0937509..7336866 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Lightweight CLI for Azure Cosmos DB. - Pipelines and scripting with variables, loops, functions - Multi-line input at the prompt — automatic continuation for unclosed blocks/strings, plus explicit `\` line continuation ([docs](docs/navigation.md#multi-line-input)) - MCP server for AI/tool integration +- Distributed tracing via OpenTelemetry (`--otel`): emits a sampled W3C `traceparent` on Cosmos requests, with optional OTLP export ## Quick Start @@ -130,6 +131,7 @@ Packaging runs produce preview versions in the form `1.0.-preview.` | `--connect-subscription ` | Azure subscription ID for ARM database and container operations | | `--connect-resource-group ` | Azure resource group name for ARM database and container operations | | `--mcp [port]` | Enable MCP server on the given port, or `6128` by default | +| `--otel [endpoint]` | Enable distributed tracing (sampled W3C `traceparent`); optional OTLP `endpoint`, else `OTEL_EXPORTER_OTLP_ENDPOINT` | | `--verbose` | Print full exception details | | `--color-system ` | Colors: 0=off, 1=standard, 2=truecolor (alias: `--cs`) | | `--theme ` | Color theme profile to apply at startup (`default`, `light`, `dark`, `monochrome`). Falls back to `COSMOSDB_SHELL_THEME`. | diff --git a/docs/navigation.md b/docs/navigation.md index de33b3d..68fa7f8 100644 --- a/docs/navigation.md +++ b/docs/navigation.md @@ -269,6 +269,7 @@ Start the shell with options to customize behavior: | `--connect-subscription ` | Azure subscription ID for ARM database and container operations at startup | | `--connect-resource-group ` | Azure resource group name for ARM database and container operations at startup | | `--mcp [port]` | Enable MCP (Model Context Protocol) server on the given port, or `6128` by default | +| `--otel [endpoint]` | Enable distributed tracing so requests carry a sampled W3C `traceparent`. Optionally export spans to an OTLP `endpoint`; falls back to the `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable | | `--color-system ` | Color scheme: 0=off, 1=standard, 2=truecolor (alias: `--cs`) | | `--clear-history` | Clear command history on start | | `--help` | Show usage information | @@ -282,6 +283,7 @@ Start the shell with options to customize behavior: | `COSMOSDB_SHELL_ACCOUNT_KEY` | Account key for authentication | | `COSMOSDB_SHELL_FORMAT` | Default output format | | `COSMOSDB_SHELL_CSVSEP` | CSV column separator | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | Default OTLP endpoint used by `--otel` when no endpoint is supplied | **Examples:** @@ -300,4 +302,10 @@ cosmosdbshell --mcp # Start with MCP server enabled on a custom port cosmosdbshell --mcp 5050 + +# Enable distributed tracing (emits a sampled traceparent on Cosmos requests) +cosmosdbshell --otel + +# Enable distributed tracing and export spans to an OTLP collector +cosmosdbshell --otel http://localhost:4317 ``` From 65df554d12ce361db8d397615f170135cff3b647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Kr=C3=BCger?= Date: Thu, 11 Jun 2026 14:47:22 +0200 Subject: [PATCH 2/2] Address PR #126 review: validate --otel endpoint, fix XML cref --- .../Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs | 4 ++-- CosmosDBShell/Program.cs | 12 ++++++++++++ CosmosDBShell/lang/en.ftl | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs index e366b8e..f8016c7 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Core/TracingBootstrap.cs @@ -11,8 +11,8 @@ namespace Azure.Data.Cosmos.Shell.Core; /// /// Owns the distributed-tracing lifecycle for the shell. When enabled it registers -/// an tracer provider that records every activity, which -/// causes the Azure Cosmos DB SDK to emit a sampled W3C traceparent +/// a that records every activity, which causes the +/// Azure Cosmos DB SDK to emit a sampled W3C traceparent /// (the -01 flag) on its outgoing requests. An OTLP exporter is added only /// when an endpoint is supplied. /// diff --git a/CosmosDBShell/Program.cs b/CosmosDBShell/Program.cs index 6074214..dc07b00 100644 --- a/CosmosDBShell/Program.cs +++ b/CosmosDBShell/Program.cs @@ -120,6 +120,18 @@ public static async Task Main(string[] args) o.OtlpEndpoint = string.IsNullOrWhiteSpace(otelValue) ? Environment.GetEnvironmentVariable("OTEL_EXPORTER_OTLP_ENDPOINT") : otelValue; + + // Validate the endpoint up front so a malformed --otel value (or + // OTEL_EXPORTER_OTLP_ENDPOINT) yields a clean error instead of an + // unhandled exception when the exporter is created. + if (!string.IsNullOrWhiteSpace(o.OtlpEndpoint) + && !Uri.TryCreate(o.OtlpEndpoint, UriKind.Absolute, out _)) + { + Environment.ExitCode = 1; + ShellInterpreter.WriteLine(MessageService.GetArgsString( + "otel-error-invalid-endpoint", "endpoint", o.OtlpEndpoint)); + return; + } } if (o.StartLspServer) diff --git a/CosmosDBShell/lang/en.ftl b/CosmosDBShell/lang/en.ftl index 5f42163..640734f 100644 --- a/CosmosDBShell/lang/en.ftl +++ b/CosmosDBShell/lang/en.ftl @@ -624,6 +624,7 @@ help-Verbose = Print full exception details instead of only the message. help-Theme = Color theme profile to apply at startup. Falls back to the COSMOSDB_SHELL_THEME environment variable. help-Otel = Enable distributed tracing so requests carry a sampled W3C traceparent. Optionally specify an OTLP endpoint with --otel ; falls back to the OTEL_EXPORTER_OTLP_ENDPOINT environment variable. mcp-error-invalid-port = Error: --mcp port must be greater than 0. +otel-error-invalid-endpoint = Error: --otel endpoint '{ $endpoint }' is not a valid absolute URI. warning-unknown-theme = Unknown theme '{ $name }'. Available themes: { $themes }. Falling back to default.