diff --git a/CosmosDBShell.Tests/CommandTests/QueryCommandTests.cs b/CosmosDBShell.Tests/CommandTests/QueryCommandTests.cs index b0baf9d..933dd70 100644 --- a/CosmosDBShell.Tests/CommandTests/QueryCommandTests.cs +++ b/CosmosDBShell.Tests/CommandTests/QueryCommandTests.cs @@ -314,4 +314,94 @@ public void BuildMetrics_CoversAllServerSideMetricsProperties() Assert.Contains(propertyToMetric[prop], metricNames); } } + + [Fact] + public void EvaluatePlan_NoUtilizedIndexes_ReportsFullScan() + { + var evaluation = QueryCommand.EvaluatePlan( + utilizedIndexes: [], + potentialIndexes: [], + indexHitRatio: 0, + retrievedDocumentCount: 1000, + outputDocumentCount: 1); + + Assert.True(evaluation.FullScan); + Assert.False(evaluation.IndexSeek); + Assert.Empty(evaluation.UtilizedIndexes); + } + + [Fact] + public void EvaluatePlan_WithUtilizedIndexes_ReportsIndexSeek() + { + var evaluation = QueryCommand.EvaluatePlan( + utilizedIndexes: ["/city/?"], + potentialIndexes: [], + indexHitRatio: 1, + retrievedDocumentCount: 1, + outputDocumentCount: 1); + + Assert.False(evaluation.FullScan); + Assert.True(evaluation.IndexSeek); + Assert.Equal(1, evaluation.IndexHitRatio); + Assert.Collection(evaluation.UtilizedIndexes, spec => Assert.Equal("/city/?", spec)); + } + + [Fact] + public void EvaluatePlan_PreservesPotentialIndexRecommendations() + { + var evaluation = QueryCommand.EvaluatePlan( + utilizedIndexes: ["/city/?"], + potentialIndexes: ["/age/?"], + indexHitRatio: 0.5, + retrievedDocumentCount: 200, + outputDocumentCount: 100); + + Assert.True(evaluation.IndexSeek); + Assert.Collection(evaluation.PotentialIndexes, spec => Assert.Equal("/age/?", spec)); + Assert.Equal(200, evaluation.RetrievedDocumentCount); + Assert.Equal(100, evaluation.OutputDocumentCount); + } + + [Fact] + public void ParseIndexPlan_ExtractsSingleAndCompositeIndexSpecs() + { + const string indexMetrics = """ + { + "UtilizedIndexes": { + "SingleIndexes": [ { "IndexSpec": "/city/?" } ], + "CompositeIndexes": [ { "IndexSpecs": [ "/age ASC", "/name ASC" ] } ] + }, + "PotentialIndexes": { + "SingleIndexes": [ { "IndexSpec": "/status/?" } ], + "CompositeIndexes": [] + } + } + """; + + var (utilized, potential) = QueryCommand.ParseIndexPlan(indexMetrics); + + Assert.Equal(["/city/?", "/age ASC, /name ASC"], utilized); + Assert.Equal(["/status/?"], potential); + } + + [Theory] + [InlineData(null)] + [InlineData("")] + [InlineData(" ")] + public void ParseIndexPlan_NullOrEmpty_ReturnsEmptyLists(string? indexMetrics) + { + var (utilized, potential) = QueryCommand.ParseIndexPlan(indexMetrics); + + Assert.Empty(utilized); + Assert.Empty(potential); + } + + [Fact] + public void ParseIndexPlan_MalformedJson_ReturnsEmptyLists() + { + var (utilized, potential) = QueryCommand.ParseIndexPlan("{ not valid json"); + + Assert.Empty(utilized); + Assert.Empty(potential); + } } \ No newline at end of file diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Commands/PlanEvaluation.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Commands/PlanEvaluation.cs new file mode 100644 index 0000000..6e472a3 --- /dev/null +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Commands/PlanEvaluation.cs @@ -0,0 +1,27 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Azure.Data.Cosmos.Shell.Commands; + +using System.Collections.Generic; + +/// +/// Structured evaluation of a query execution plan derived from index metrics and +/// server-side query metrics. Pure data so it can be produced and asserted in tests. +/// +/// True when no index contributed to the query. +/// True when at least one index was utilized. +/// The index hit ratio in the range [0,1], when available. +/// Documents loaded by the engine, when available. +/// Documents returned by the query, when available. +/// Index specifications that contributed to the query. +/// Index specifications that could improve the query. +internal sealed record PlanEvaluation( + bool FullScan, + bool IndexSeek, + double? IndexHitRatio, + long? RetrievedDocumentCount, + long? OutputDocumentCount, + IReadOnlyList UtilizedIndexes, + IReadOnlyList PotentialIndexes); diff --git a/CosmosDBShell/Azure.Data.Cosmos.Shell.Commands/QueryCommand.cs b/CosmosDBShell/Azure.Data.Cosmos.Shell.Commands/QueryCommand.cs index 72a6d8e..ca913dd 100644 --- a/CosmosDBShell/Azure.Data.Cosmos.Shell.Commands/QueryCommand.cs +++ b/CosmosDBShell/Azure.Data.Cosmos.Shell.Commands/QueryCommand.cs @@ -6,6 +6,7 @@ namespace Azure.Data.Cosmos.Shell.Commands; using System.Collections; using System.Collections.Generic; +using System.Globalization; using System.Text; using System.Text.Json; using Azure.Data.Cosmos.Shell.Mcp; @@ -27,13 +28,14 @@ internal enum MetricTarget [CosmosExample("query \"SELECT c.id, c.name FROM c\" -max=10", Description = "Query specific fields with result limit")] [CosmosExample("query \"SELECT * FROM c\" -max=0", Description = "Query all matching documents without a limit")] [CosmosExample("query \"SELECT * FROM c\" -metrics=Display", Description = "Query with performance metrics displayed")] +[CosmosExample("query \"SELECT * FROM c WHERE c.city = 'Seattle'\" --explain", Description = "Show the query execution plan and index usage without returning documents")] [CosmosExample("query \"SELECT * FROM c\" --database=MyDB --container=Products", Description = "Query specific database and container")] [McpAnnotation( Title = "Run Query", ReadOnly = true, Idempotent = true, OpenWorld = true, - Description = "Executes a Cosmos DB NoSQL query against the current container and returns matching documents. Use the cosmos://docs/nosql-query-language resource for query syntax reference.")] + Description = "Executes a Cosmos DB NoSQL query against the current container and returns matching documents. Pass explain=true to return the query execution plan (utilized/potential indexes and a plain-language evaluation) instead of documents. Use the cosmos://docs/nosql-query-language resource for query syntax reference.")] internal class QueryCommand : CosmosCommand { [CosmosParameter("query")] @@ -57,6 +59,9 @@ internal class QueryCommand : CosmosCommand [CosmosOption("container", "con")] public string? Container { get; init; } + [CosmosOption("explain")] + public bool? Explain { get; init; } + public async override Task ExecuteAsync(ShellInterpreter shell, CommandState commandState, string commandText, CancellationToken token) { if (this.Bucket.HasValue && !BucketCommand.CheckBucket(this.Bucket.Value)) @@ -79,6 +84,11 @@ public async override Task ExecuteAsync(ShellInterpreter shell, Co "query", token); + if (this.Explain == true) + { + return await this.ExecuteExplainAsync(container, shell, token); + } + return await this.ExecuteQueryAsync(container, shell, token); } @@ -279,6 +289,342 @@ private static void GeneratePlainResultDocument(CommandState returnState, IEnume returnState.Result = new ShellJson(JsonSerializer.SerializeToElement(new { items = documents.ToList() })); } + // Parses the raw IndexMetrics JSON returned by Cosmos (PopulateIndexMetrics = true) + // into flat lists of utilized and potential index specifications. The metrics group + // single and composite indexes separately; both are flattened here because the + // evaluation only cares about whether an index contributed, not its arity. + internal static (List Utilized, List Potential) ParseIndexPlan(string? indexMetricsJson) + { + var utilized = new List(); + var potential = new List(); + + if (string.IsNullOrWhiteSpace(indexMetricsJson)) + { + return (utilized, potential); + } + + try + { + using var doc = JsonDocument.Parse(indexMetricsJson); + var root = doc.RootElement; + if (root.ValueKind != JsonValueKind.Object) + { + return (utilized, potential); + } + + if (root.TryGetProperty("UtilizedIndexes", out var utilizedGroup)) + { + AddIndexSpecs(utilizedGroup, utilized); + } + + if (root.TryGetProperty("PotentialIndexes", out var potentialGroup)) + { + AddIndexSpecs(potentialGroup, potential); + } + } + catch (JsonException) + { + // The index metrics payload was not the expected JSON shape; treat as + // "no plan details available" rather than failing the explain. + } + + return (utilized, potential); + } + + // Builds a structured evaluation of an index plan. Pure and side-effect free so it + // can be unit tested without a live Cosmos response. A query is reported as a full + // scan when no index contributed; otherwise it is an index seek. + internal static PlanEvaluation EvaluatePlan( + IReadOnlyList utilizedIndexes, + IReadOnlyList potentialIndexes, + double? indexHitRatio, + long? retrievedDocumentCount, + long? outputDocumentCount) + { + bool indexSeek = utilizedIndexes.Count > 0; + bool fullScan = !indexSeek; + return new PlanEvaluation( + fullScan, + indexSeek, + indexHitRatio, + retrievedDocumentCount, + outputDocumentCount, + utilizedIndexes, + potentialIndexes); + } + + private static void AddIndexSpecs(JsonElement group, List target) + { + if (group.ValueKind != JsonValueKind.Object) + { + return; + } + + foreach (var kind in new[] { "SingleIndexes", "CompositeIndexes" }) + { + if (group.TryGetProperty(kind, out var array) && array.ValueKind == JsonValueKind.Array) + { + foreach (var element in array.EnumerateArray()) + { + var spec = ExtractIndexSpec(element); + if (!string.IsNullOrEmpty(spec)) + { + target.Add(spec); + } + } + } + } + } + + private static string? ExtractIndexSpec(JsonElement element) + { + if (element.ValueKind == JsonValueKind.String) + { + return element.GetString(); + } + + if (element.ValueKind == JsonValueKind.Object) + { + if (element.TryGetProperty("IndexSpec", out var spec) && spec.ValueKind == JsonValueKind.String) + { + return spec.GetString(); + } + + if (element.TryGetProperty("IndexSpecs", out var specs) && specs.ValueKind == JsonValueKind.Array) + { + var paths = new List(); + foreach (var path in specs.EnumerateArray()) + { + if (path.ValueKind == JsonValueKind.String) + { + var value = path.GetString(); + if (!string.IsNullOrEmpty(value)) + { + paths.Add(value); + } + } + } + + return string.Join(", ", paths); + } + } + + return null; + } + + private static List BuildPlanMessages(PlanEvaluation evaluation) + { + var messages = new List(); + + if (evaluation.FullScan) + { + messages.Add(MessageService.GetString("command-query-explain-full_scan")); + } + else + { + messages.Add(MessageService.GetArgsString( + "command-query-explain-index_seek", + "indexes", + string.Join(", ", evaluation.UtilizedIndexes))); + } + + if (evaluation.PotentialIndexes.Count > 0) + { + messages.Add(MessageService.GetArgsString( + "command-query-explain-recommend_index", + "indexes", + string.Join(", ", evaluation.PotentialIndexes))); + } + + if (evaluation.IndexHitRatio.HasValue) + { + messages.Add(MessageService.GetArgsString( + "command-query-explain-hit_ratio", + "ratio", + evaluation.IndexHitRatio.Value)); + } + + return messages; + } + + private static ShellJson BuildExplainJson(string? query, PlanEvaluation evaluation, double requestCharge, IReadOnlyList messages) + { + var element = JsonSerializer.SerializeToElement(new + { + query, + estimate = true, + plan = new + { + utilizedIndexes = evaluation.UtilizedIndexes, + potentialIndexes = evaluation.PotentialIndexes, + indexHitRatio = evaluation.IndexHitRatio, + retrievedDocumentCount = evaluation.RetrievedDocumentCount, + outputDocumentCount = evaluation.OutputDocumentCount, + requestCharge, + }, + evaluation = new + { + fullScan = evaluation.FullScan, + indexSeek = evaluation.IndexSeek, + messages, + }, + }); + + return new ShellJson(element); + } + + private static void RenderExplain(PlanEvaluation evaluation, double requestCharge, IReadOnlyList messages) + { + AnsiConsole.MarkupLine(MessageService.GetString("command-query-explain-header")); + + foreach (var message in messages) + { + AnsiConsole.MarkupLine(Markup.Escape(message)); + } + + var table = new Table(); + table.AddColumns(string.Empty, string.Empty); + table.HideHeaders(); + table.AddRow( + Theme.FormatHelpName(Markup.Escape(MessageService.GetString("command-query-explain-utilized"))), + Theme.FormatTableValue(Markup.Escape(evaluation.UtilizedIndexes.Count > 0 ? string.Join(", ", evaluation.UtilizedIndexes) : "-"))); + table.AddRow( + Theme.FormatHelpName(Markup.Escape(MessageService.GetString("command-query-explain-potential"))), + Theme.FormatTableValue(Markup.Escape(evaluation.PotentialIndexes.Count > 0 ? string.Join(", ", evaluation.PotentialIndexes) : "-"))); + table.AddRow( + Theme.FormatHelpName(Markup.Escape(MessageService.GetString("command-query-index_hit_ratio"))), + Theme.FormatTableValue(Markup.Escape(evaluation.IndexHitRatio?.ToString(CultureInfo.InvariantCulture) ?? "N/A"))); + table.AddRow( + Theme.FormatHelpName(Markup.Escape(MessageService.GetString("command-query-retrieved"))), + Theme.FormatTableValue(Markup.Escape(evaluation.RetrievedDocumentCount?.ToString(CultureInfo.InvariantCulture) ?? "N/A"))); + table.AddRow( + Theme.FormatHelpName(Markup.Escape(MessageService.GetString("command-query-output"))), + Theme.FormatTableValue(Markup.Escape(evaluation.OutputDocumentCount?.ToString(CultureInfo.InvariantCulture) ?? "N/A"))); + table.AddRow( + Theme.FormatHelpName(Markup.Escape(MessageService.GetString("command-query-explain-charge"))), + Theme.FormatTableValue(Markup.Escape(requestCharge.ToString(CultureInfo.InvariantCulture)))); + AnsiConsole.Write(table); + + AnsiConsole.MarkupLine(MessageService.GetString("command-query-explain-estimate_note")); + } + + private async Task ThrowIfRequestFailedAsync(ResponseMessage response, ShellInterpreter shell) + { + if (response.IsSuccessStatusCode) + { + return; + } + + var errorContent = string.Empty; + if (response.Content != null) + { + using var errorStreamReader = new StreamReader(response.Content); + errorContent = await errorStreamReader.ReadToEndAsync(); + } + + var message = string.IsNullOrWhiteSpace(response.ErrorMessage) ? errorContent : response.ErrorMessage; + if (string.IsNullOrWhiteSpace(message)) + { + message = MessageService.GetString("command-query-error-request_failed", new Dictionary + { + { "statusCode", (int)response.StatusCode }, + { "status", response.StatusCode }, + }); + } + else if (response.StatusCode == System.Net.HttpStatusCode.BadRequest + && shell.TryReportQueryError(this.Query ?? string.Empty, message)) + { + // The shell has already emitted a compiler-style diagnostic with + // line/column/caret; throw a marker exception so ReportExecutionError + // stays silent. + throw new CommandReportedException("query", new InvalidOperationException(message)); + } + + throw CommandException.FromResponseStatus("query", response.StatusCode, message); + } + + private async Task ExecuteExplainAsync(Container container, ShellInterpreter shell, CancellationToken token) + { + if (string.IsNullOrWhiteSpace(this.Query)) + { + throw new CommandException("query", MessageService.GetString("command-query-error-empty_query")); + } + + var returnState = new CommandState(); + returnState.SetFormat(this.OutputFormat ?? Environment.GetEnvironmentVariable("COSMOSDB_SHELL_FORMAT")); + + try + { + // The query must execute to obtain index metrics; Cosmos has no zero-cost + // EXPLAIN. Reading only the first page keeps the RU cost low while still + // reflecting the plan and index usage chosen by the query engine. + var options = new QueryRequestOptions + { + PopulateIndexMetrics = true, + MaxItemCount = 1, + }; + + if (this.Bucket.HasValue) + { + options.ThroughputBucket = this.Bucket.Value; + } + + using var feedIterator = container.GetItemQueryStreamIterator(this.Query, null, options); + + using ResponseMessage? response = feedIterator.HasMoreResults + ? await feedIterator.ReadNextAsync(token) + : null; + if (response is not null) + { + await this.ThrowIfRequestFailedAsync(response, shell); + } + + var cumulative = response?.Diagnostics.GetQueryMetrics()?.CumulativeMetrics; + double requestCharge = response?.Diagnostics.GetQueryMetrics()?.TotalRequestCharge ?? 0; + + var (utilized, potential) = ParseIndexPlan(response?.IndexMetrics); + var evaluation = EvaluatePlan( + utilized, + potential, + cumulative?.IndexHitRatio, + cumulative?.RetrievedDocumentCount, + cumulative?.OutputDocumentCount); + var messages = BuildPlanMessages(evaluation); + + // Emit JSON only for machine consumers (MCP, output redirection) or when + // the user explicitly asked for JSON. Interactive sessions get the + // human-readable table even though JSON is the default enum value. + var explicitJson = string.Equals(this.OutputFormat, "json", StringComparison.OrdinalIgnoreCase) + || string.Equals(this.OutputFormat, "js", StringComparison.OrdinalIgnoreCase); + + if (shell.McpPort.HasValue || shell.StdOutRedirect != null || explicitJson) + { + returnState.Result = BuildExplainJson(this.Query, evaluation, requestCharge, messages); + return returnState; + } + + RenderExplain(evaluation, requestCharge, messages); + returnState.IsPrinted = true; + return returnState; + } + catch (OperationCanceledException) when (token.IsCancellationRequested) + { + throw; + } + catch (OperationCanceledException e) + { + throw new CommandException("query", e); + } + catch (CommandReportedException) + { + throw; + } + catch (Exception e) + { + throw new CommandException("query", e); + } + } + private async Task ExecuteQueryAsync(Container container, ShellInterpreter shell, CancellationToken token) { var returnState = new CommandState(); @@ -318,37 +664,9 @@ private async Task ExecuteQueryAsync(Container container, ShellInt break; } - var response = await feedIterator.ReadNextAsync(token); + using var response = await feedIterator.ReadNextAsync(token); - if (!response.IsSuccessStatusCode) - { - var errorContent = string.Empty; - if (response.Content != null) - { - using var errorStreamReader = new StreamReader(response.Content); - errorContent = await errorStreamReader.ReadToEndAsync(); - } - - var message = string.IsNullOrWhiteSpace(response.ErrorMessage) ? errorContent : response.ErrorMessage; - if (string.IsNullOrWhiteSpace(message)) - { - message = MessageService.GetString("command-query-error-request_failed", new Dictionary - { - { "statusCode", (int)response.StatusCode }, - { "status", response.StatusCode }, - }); - } - else if (response.StatusCode == System.Net.HttpStatusCode.BadRequest - && shell.TryReportQueryError(this.Query ?? string.Empty, message)) - { - // The shell has already emitted a compiler-style - // diagnostic with line/column/caret; throw a marker - // exception so ReportExecutionError stays silent. - throw new CommandReportedException("query", new InvalidOperationException(message)); - } - - throw CommandException.FromResponseStatus("query", response.StatusCode, message); - } + await this.ThrowIfRequestFailedAsync(response, shell); if (response.Content == null) { diff --git a/CosmosDBShell/lang/en.ftl b/CosmosDBShell/lang/en.ftl index 640734f..e33cf7f 100644 --- a/CosmosDBShell/lang/en.ftl +++ b/CosmosDBShell/lang/en.ftl @@ -129,6 +129,7 @@ command-query-description-bucket = The throughput bucket to use for the query command-query-description-format = Output format (json, table, csv) command-query-description-database = The database to query against command-query-description-container = The container to query against +command-query-description-explain = Show the query execution plan (index usage and a plain-language evaluation) instead of returning documents command-query-fetched = Fetched { $count } documents. command-query-request_charge = Request Charge: { $charge } RUs command-query-document_header = Document @@ -152,6 +153,15 @@ command-query-index_metric-utilized_single = Utilized Single Indexes command-query-index_metric-potential_single = Potential Single Indexes command-query-index_metric-utilized_composite = Utilized Composite Indexes command-query-index_metric-potential_composite = Potential Composite Indexes +command-query-explain-header = Query execution plan +command-query-explain-full_scan = Full scan: the query did not use any index. Every document was examined, which is expensive on large containers. +command-query-explain-index_seek = Index seek: the query used index(es): { $indexes }. +command-query-explain-recommend_index = Consider adding index(es) on: { $indexes }. +command-query-explain-hit_ratio = Index hit ratio: { $ratio } (1 = every retrieved document matched the filter). +command-query-explain-utilized = Utilized indexes +command-query-explain-potential = Potential indexes +command-query-explain-charge = Request Charge +command-query-explain-estimate_note = Note: metrics are an estimate based on the first page of results. command-query-error-empty_query = Query text cannot be empty. Example: query "SELECT * FROM c". command-query-error-request_failed = Query request failed with status code { $statusCode } ({ $status }). command-query-error-no_content_stream = Query response did not contain a content stream. diff --git a/README.md b/README.md index cdcc74e..aac1f9a 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ A terminal-native shell for Azure Cosmos DB — navigate databases like a filesy - Navigate with `ls` and `cd` (Account -> Databases -> Containers -> Items) - Inspect the current location with `pwd` - Create, query, replace, patch, delete: `mkdb`, `mkcon`, `mkitem`, `query`, `replace`, `patch`, `rm` +- Inspect a query's execution plan and index usage with `query "" --explain` - Bulk roundtrip with `import` / `export` for JSON Lines and JSON array files, plus CSV import/export (CSV import coerces values to strings; `--partition-key` nests a CSV column under a nested partition key path) - Manage container indexing policies with `index` (`show`, `add`, `remove`, `set`) - Tail the change feed of a container with `watch` (alias `tail`) diff --git a/docs/commands.md b/docs/commands.md index 1ea5dca..0931979 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -152,17 +152,30 @@ theme reload Execute SQL query. ```text -Usage: query [-m ] query +Usage: query [-m ] [--explain] query Arguments: query The query to execute Options: -max, -m Maximum number of items returned. Use 0 or a negative value for no limit + --explain Show the query execution plan (index usage and a plain-language + evaluation) instead of returning documents ``` `query` does not apply a default item limit. Use `--max ` to cap returned items when needed, or `--max 0` to disable the limit explicitly. +#### Explain a query + +`query "" --explain` reports how the query engine resolved the query rather than returning documents. It shows whether the query performed a full scan or an index seek, lists the utilized and potential indexes, the index hit ratio, and the request charge. A plain-language summary highlights full scans and recommends indexes to add. + +```text +query "SELECT * FROM c WHERE c.city = 'Seattle'" --explain +``` + +To keep the cost low, `--explain` executes only the first page of the query (`MaxItemCount = 1`), so the reported metrics are an estimate based on that page. `--max` is ignored when `--explain` is supplied. + + ### print Get item by id and partition key.